Merge branch 'develop' into develop

98264f0a · smilejames · GitHub · b948a016 · 8b31ac5a · 98264f0a
7 changed file
--- a/src/fpga/fpga_quantilization.cpp
+++ b/src/fpga/fpga_quantilization.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "fpga/fpga_quantilization.h"
+#include <algorithm>
+namespace paddle_mobile {
+namespace fpga {
+template <typename Dtype>
+static void chw_to_hwc(Dtype* data_in, Dtype* data_out, int num, int channel,
+                       int height, int width) {
+  int offset_height = 0;
+  for (int n = 0; n < num; n++) {
+    int amount_per_row = width * channel;
+    for (int c = 0; c < channel; c++) {
+      for (int h = 0; h < height; h++) {
+        int offset_height = h * amount_per_row;
+        for (int w = 0; w < width; w++) {
+          *(data_out + offset_height + w * channel + c) = *(data_in++);
+        }
+      }
+    }
+    data_out += num;
+  }
+}
+template <typename Dtype>
+static Dtype find_max(Dtype* data, int num) {
+  Dtype max = 0;
+  for (int i = 0; i < num; ++i) {
+    max = std::max(max, data[i]);
+  }
+  return max;
+}
+template <typename Dtype>
+framework::Tensor* quantilize_filter(framework::Tensor* filter) {
+  float scale = 0;
+  float fix_range = static_cast<float>((1 << (8 - 1)) - 1);
+  const int batch_size = filter->dims()[0];
+  const int channel = filter->dims()[1];
+  const int height = filter->dims()[2];
+  const int width = filter->dims()[3];
+  int8_t* int_data = nullptr;
+  int8_t* tmp_data = new int[filter->numel()];
+  // 32bit filter -> 8bit filter;
+  if (filter->type() == typeid(float)) {
+    float* float_data = filter->data<float>();
+    float max = find_max(float_data, filter->numel());
+    scale = (max / fix_range);
+    framework::Tensor* filter = filter;
+    framework::Tensor* quant_filter = new framework::Tensor();
+    int_data = quant_filter->mutable_data<int8_t>();
+    for (int i = 0; i < filter->numel(); ++i) {
+      tmp_data[i] = (int8_t)float_data[i] * scale;
+    }
+    filter = quant_filter;
+  } else {
+    int8_t max = find_max(filter->data<int8_t>(), filter->numel());
+    scale = (max / fix_range);
+    int_data = filter->data<int8_t>();
+    for (int i = 0; i < filter->numel(); ++i) {
+      tmp_data[i] = int_data[i];
+    }
+    int_data = filter->mutable_data<int8_t>();
+  }
+  // NCHW -> NHWC;
+  chw_to_hwc<int8_t>(tmp_data, int_data, batch_size, channel, height, width);
+  delete tmp_data;
+  *(filter->fpga_args().scale_pointer()) = scale;
+  return filter;
+}
+}  // namespace fpga
+}  // namespace paddle_mobile
--- a/src/fpga/fpga_quantilization.h
+++ b/src/fpga/fpga_quantilization.h
@@ -18,35 +18,13 @@ limitations under the License. */
 #include "framework/tensor.h"
 namespace paddle_mobile {
+namespace fpga {
 template <typename Dtype>
-framework::Tensor* quantilize_filter(framework::Tensor* filter) {
+static void chw_to_hwc(Dtype* data_in, Dtype* data_out, int num, int channel,
-  float scale = 0;
+                       int height, int width);
-  // 32bit filter -> 8bit filter;
-  float min = 0f;
-  float max = 0f;
-  if (filter->type() == typeid(float)) {
-    float* floatData = originalFilter->data<float>();
-    for (int i = 0; i < filter->numel(); ++i) {
-      min = std::min(min, floatData[i]);
-      max = std::max(max, floatData[i]);
-    }
-    float fix_range = (float)((1 << (8 - 1)) - 1);
-    float float_range = max;
-    scale = (float_range / fix_range);
-    framework::Tensor* originalFilter = filter;
-    framework::Tensor* quantFilter = new framework::Tensor();
-    int8_t* intData = quantFilter->mutable_data<int8_t>();
-    for (int i = 0; i < filter->numel(); ++i) {
-      intData[i] = (int8_t)floatData[i] * scale;
-    }
-    quantFilter.scale = scale;
-    // NCHW -> NHWC;
-    return quantFilter;
-  }
-  return filter;
-}
+template <typename Dtype>
+framework::Tensor* quantilize_filter(framework::Tensor* filter);
+}  // namespace fpga
 }  // namespace paddle_mobile
--- a/src/operators/kernel/fpga/concat_kernel.cpp
+++ b/src/operators/kernel/fpga/concat_kernel.cpp
@@ -36,18 +36,18 @@ void ConcatKernel<FPGA, float>::Compute(const ConcatParam &param) const {
  auto out_channel = out_dim[3];
  auto out_offset = 0;
  for (int i = 0; i < inputs.size(); ++i) {
    auto input = inputs[i];
    auto channels = input->dims()[3];
    out_offset += channels;
    auto src = input->data<half>();
    for (int j = 0; j < pixels; ++j) {
-      auto dst = out->data<half>() + out_offset;
+      auto dst = out->mutable_data<half>() + out_offset;
      memory::Copy(dst, src, sizeof(half));
    }
  }
 }
+template class ConcatKernel<FPGA, float>;
 }  // namespace operators
 }  // namespace paddle_mobile

--- a/src/operators/kernel/fpga/conv_add_bn_kernel.cpp
+++ b/src/operators/kernel/fpga/conv_add_bn_kernel.cpp
@@ -16,6 +16,7 @@ limitations under the License. */
 #include "operators/kernel/conv_add_bn_kernel.h"
 #include "fpga/api/fpga_api.h"
+#include "fpga/quantilization.h"
 namespace paddle_mobile {
 namespace operators {
@@ -28,7 +29,7 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) {
  const Tensor *bias = param->Bias();
  auto bias_ptr = bias->data<float>();
  const Tensor *filter = param->Filter();
-  auto filter_ptr = filter->data<float>();
  Tensor *out = param->Output();
  auto out_ptr = out->mutable_data<half>();
  auto bn_mean_ptr = param->InputMean()->data<float>();
@@ -41,7 +42,8 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) {
                        "Image channel should be equal to bias number");
  const int channel = input->dims()[1];
-  float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
+  float *bs_ptr =
+      reinterpret_cast<float *>(fpga::fpga_malloc(2 * channel * sizeof(float)));
  Tensor *new_scale = new Tensor();
  Tensor *new_bias = new Tensor();
  auto new_scale_ptr = new_scale->mutable_data<float>({channel});
@@ -58,26 +60,33 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) {
  param->SetNewScale(new_scale);
  param->SetNewBias(new_bias);
+  const Tensor *quant_filter = quantilize_filter(filter);
+  // delete original filter?
+  filter = quant_filter;
+  auto filter_ptr = filter->data<float>();
  fpga::ConvArgs convArgs;
  convArgs.relu_enabled = relu_enabled;
-  convArgs.filter_address = (void *)filter_ptr;
+  convArgs.filter_address = reinterpret_cast<void *> filter_ptr;
  convArgs.filter_num = filter->dims()[0];
  convArgs.group_num = param->Groups();
-  convArgs.sb_address = (void *)bs_ptr;
+  convArgs.sb_address = reinterpret_cast<void *> bs_ptr;
  convArgs.kernel.stride_h = param->Strides()[0];
  convArgs.kernel.stride_w = param->Strides()[1];
  convArgs.kernel.height = filter->dims()[2];
  convArgs.kernel.width = filter->dims()[3];
-  convArgs.image.address = (void *)input_ptr;
+  convArgs.image.address = reinterpret_cast<void *> input_ptr;
  convArgs.image.channels = input->dims()[1];
  convArgs.image.height = input->dims()[2];
  convArgs.image.width = input->dims()[3];
  convArgs.image.pad_height = param->Paddings()[0];
  convArgs.image.pad_width = param->Paddings()[1];
  convArgs.image.scale_address = input->fpga_args().scale_pointer();
-  convArgs.output.address = (void *)out_ptr;
+  convArgs.output.address = reinterpret_cast<void *> out_ptr;
  convArgs.output.scale_address = out->fpga_args().scale_pointer();
  param->SetFpgaArgs(convArgs);
  return true;
 }

--- a/src/operators/kernel/fpga/conv_kernel.cpp
+++ b/src/operators/kernel/fpga/conv_kernel.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef CONV_OP
+#include "operators/kernel/conv_kernel.h"
+#include "operators/kernel/central-arm-func/conv_arm_func.h"
+namespace paddle_mobile {
+namespace operators {
+template <>
+bool ConvKernel<FPGA, float>::Init(ConvParam *param) {
+  return true;
+}
+template <>
+void ConvKernel<FPGA, float>::Compute(const ConvParam &param) const {
+  // ConvCompute<float>(param);
+}
+template class ConvKernel<FPGA, float>;
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -160,4 +160,12 @@ else ()
    #add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp)
+endif()
+if(FPGA)
+    ADD_EXECUTABLE(test-tensor-quant fpga/test_tensor_quant.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test-tensor-quant paddle-mobile)
 endif()
--- a/test/fpga/test_tensor_quant.cpp
+++ b/test/fpga/test_tensor_quant.cpp
@@ -20,7 +20,7 @@ int main() {
  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
  bool optimize = false;
  if (paddle_mobile.Load(g_googlenet, optimize)) {
-    auto time2 = time();
+    auto time1 = time();
    DLOG << "load cost: " << time_diff(time1, time1) << "ms";
    std::vector<float> input;
    std::vector<int64_t> dims{1, 3, 224, 224};