Merge branch 'develop' of https://github.com/PaddlePaddle/paddle-mobile into develop

ccfc0ab6 · zhaojiaying01 · 29054e8b · 5e8d06d5 · ccfc0ab6 · ccfc0ab6
9 changed file
--- a/src/fpga/fpga_quantilization.cpp
+++ b/src/fpga/fpga_quantilization.cpp
@@ -46,8 +46,8 @@ static Dtype find_max(Dtype* data, int num) {
  return max;
 }

-template <typename Dtype>
-framework::Tensor* quantilize_filter(framework::Tensor* filter) {
+// template <typename Dtype>
+framework::Tensor* quantify_filter(framework::Tensor* filter) {
  float scale = 0;
  float fix_range = static_cast<float>((1 << (8 - 1)) - 1);

@@ -57,7 +57,7 @@ framework::Tensor* quantilize_filter(framework::Tensor* filter) {
  const int width = filter->dims()[3];

  int8_t* int_data = nullptr;
-  int8_t* tmp_data = new int[filter->numel()];
+  int8_t* tmp_data = new int8_t[filter->numel()];

  // 32bit filter -> 8bit filter;
  if (filter->type() == typeid(float)) {

--- a/src/fpga/fpga_quantilization.h
+++ b/src/fpga/fpga_quantilization.h
@@ -24,7 +24,7 @@ template <typename Dtype>
 static void chw_to_hwc(Dtype* data_in, Dtype* data_out, int num, int channel,
                       int height, int width);

-template <typename Dtype>
-framework::Tensor* quantilize_filter(framework::Tensor* filter);
+// template <typename Dtype>
+framework::Tensor* quantify_filter(framework::Tensor* filter);
 }  // namespace fpga
 }  // namespace paddle_mobile
--- a/src/operators/kernel/fpga/conv_add_bn_kernel.cpp
+++ b/src/operators/kernel/fpga/conv_add_bn_kernel.cpp
@@ -28,7 +28,7 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) {
  auto input_ptr = input->data<half>();
  const Tensor *bias = param->Bias();
  auto bias_ptr = bias->data<float>();
-  const Tensor *filter = param->Filter();
+  Tensor *filter = param->Filter();

  Tensor *out = param->Output();
  auto out_ptr = out->mutable_data<half>();
@@ -60,7 +60,7 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) {
  param->SetNewScale(new_scale);
  param->SetNewBias(new_bias);

-  const Tensor *quant_filter = quantilize_filter(filter);
+  Tensor *quant_filter = fpga::quantify_filter(filter);

  // delete original filter?
  filter = quant_filter;
@@ -68,22 +68,22 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) {
  auto filter_ptr = filter->data<float>();
  fpga::ConvArgs convArgs;
  convArgs.relu_enabled = relu_enabled;
-  convArgs.filter_address = reinterpret_cast<void *> filter_ptr;
+  convArgs.filter_address = (void *)filter_ptr;
  convArgs.filter_num = filter->dims()[0];
  convArgs.group_num = param->Groups();
-  convArgs.sb_address = reinterpret_cast<void *> bs_ptr;
+  convArgs.sb_address = (void *)bs_ptr;
  convArgs.kernel.stride_h = param->Strides()[0];
  convArgs.kernel.stride_w = param->Strides()[1];
  convArgs.kernel.height = filter->dims()[2];
  convArgs.kernel.width = filter->dims()[3];
-  convArgs.image.address = reinterpret_cast<void *> input_ptr;
+  convArgs.image.address = (void *)input_ptr;
  convArgs.image.channels = input->dims()[1];
  convArgs.image.height = input->dims()[2];
  convArgs.image.width = input->dims()[3];
  convArgs.image.pad_height = param->Paddings()[0];
  convArgs.image.pad_width = param->Paddings()[1];
  convArgs.image.scale_address = input->fpga_args().scale_pointer();
-  convArgs.output.address = reinterpret_cast<void *> out_ptr;
+  convArgs.output.address = (void *)out_ptr;
  convArgs.output.scale_address = out->fpga_args().scale_pointer();
  param->SetFpgaArgs(convArgs);


--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -920,7 +920,11 @@ class FusionFcParam : public OpParam {
  }
  const Tensor *InputX() const { return input_x_; }

+#ifdef PADDLE_MOBILE_FPGA
+  Tensor *InputY() const { return input_y_; }
+#else
  const Tensor *InputY() const { return input_y_; }
+#endif

  const Tensor *InputZ() const { return input_z_; }

@@ -976,7 +980,11 @@ class FusionConvAddParam : public OpParam {

  const Tensor *Input() const { return input_; }

+#ifdef PADDLE_MOBILE_FPGA
+  Tensor *Filter() const { return filter_; }
+#else
  const Tensor *Filter() const { return filter_; }
+#endif

  Tensor *Output() const { return output_; }

@@ -1050,7 +1058,11 @@ class FusionConvAddBNReluParam : public OpParam {

  const Tensor *Input() const { return input_; }

+#ifdef PADDLE_MOBILE_FPGA
+  Tensor *Filter() const { return filter_; }
+#else
  const Tensor *Filter() const { return filter_; }
+#endif

  Tensor *Output() const { return output_; }

@@ -1144,8 +1156,11 @@ class FusionConvAddBNParam : public OpParam {

  const Tensor *Input() const { return input_; }

+#ifdef PADDLE_MOBILE_FPGA
+  Tensor *Filter() const { return filter_; }
+#else
  const Tensor *Filter() const { return filter_; }
-
+#endif
  Tensor *Output() const { return output_y_; }

  const vector<int> &Strides() const { return strides_; }

--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -9,6 +9,11 @@ elseif ("mobilenet" IN_LIST NET)
    # gen test
    ADD_EXECUTABLE(test-mobilenet net/test_mobilenet.cpp test_helper.h  test_include.h executor_for_test.h)
    target_link_libraries(test-mobilenet paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-mobilenet-combine net/test_mobilenet_combine.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test-mobilenet-combine paddle-mobile)
+
 elseif ("yolo" IN_LIST NET)
    # gen test
    ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h  test_include.h executor_for_test.h)
@@ -138,6 +143,10 @@ else ()
    ADD_EXECUTABLE(test-mobilenetssd net/test_mobilenet+ssd.cpp test_helper.h  test_include.h executor_for_test.h)
    target_link_libraries(test-mobilenetssd paddle-mobile)

+     # gen test
+    ADD_EXECUTABLE(test-mobilenet-combine net/test_mobilenet_combine.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test-mobilenet-combine paddle-mobile)
+
    # gen test
    ADD_EXECUTABLE(test-sigmoid operators/test_sigmoid_op.cpp  test_include.h)
    target_link_libraries(test-sigmoid paddle-mobile)

--- a/test/net/test_mobilenet.cpp
+++ b/test/net/test_mobilenet.cpp
@@ -44,5 +44,8 @@ int main() {
              << std::endl;
  }

+  std::cout
+      << "如果结果Nan请查看: test/images/test_image_1x3x224x224_float 是否存在?"
+      << std::endl;
  return 0;
 }
--- a/test/net/test_mobilenet_combine.cpp
+++ b/test/net/test_mobilenet_combine.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <iostream>
+#include "../test_helper.h"
+#include "../test_include.h"
+
+int main() {
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+  paddle_mobile.SetThreadNum(4);
+  auto time1 = time();
+  if (paddle_mobile.Load(std::string(g_mobilenet_combined) + "/model",
+                         std::string(g_mobilenet_combined) + "/params", true)) {
+    auto time2 = time();
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
+
+    std::vector<float> input;
+    std::vector<int64_t> dims{1, 3, 224, 224};
+    GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims);
+
+    // 预热一次
+    auto vec_result = paddle_mobile.Predict(input, dims);
+    std::vector<float>::iterator biggest =
+        std::max_element(std::begin(vec_result), std::end(vec_result));
+    std::cout << " Max element is " << *biggest << " at position "
+              << std::distance(std::begin(vec_result), biggest) << std::endl;
+
+    auto time3 = time();
+    for (int i = 0; i < 10; ++i) {
+      auto vec_result = paddle_mobile.Predict(input, dims);
+    }
+    auto time4 = time();
+    std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
+              << std::endl;
+  }
+  std::cout
+      << "如果结果Nan请查看: test/images/test_image_1x3x224x224_float 是否存在?"
+      << std::endl;
+  return 0;
+}
--- a/test/test_helper.h
+++ b/test/test_helper.h
@@ -27,6 +27,7 @@ limitations under the License. */
 static const char *g_ocr = "../models/ocr";
 static const char *g_mobilenet_ssd = "../models/mobilenet+ssd";
 static const char *g_mobilenet_ssd_gesture = "../models/mobilenet+ssd_gesture";
+static const char *g_mobilenet_combined = "../models/mobilenet_combine";
 static const char *g_squeezenet = "../models/squeezenet";
 static const char *g_googlenet = "../models/googlenet";
 static const char *g_mobilenet = "../models/mobilenet";

--- a/tools/op.cmake
+++ b/tools/op.cmake
@@ -21,7 +21,7 @@ if ("mobilenet" IN_LIST NET)
  set(ELEMENTWISEADD_OP ON)
  set(RELU_OP ON)
  set(SOFTMAX_OP ON)
-  set(SOFTMAX_OP ON)
+  set(MUL_OP ON)
  set(DEPTHWISECONV_OP ON)
  set(BATCHNORM_OP ON)
  set(POOL_OP ON)