Merge branch 'develop' into develop

6523dec9 · Ruilong Liu · GitHub · b2f1d723 · bd9c7c39 · 6523dec9
6 changed file
--- a/src/fpga/api/fpga_api.cpp
+++ b/src/fpga/api/fpga_api.cpp
@@ -36,7 +36,7 @@ static int fd = -1;
 static const char *device_path = "/dev/fpgadrv0";

 static inline int do_ioctl(int req, void *arg) {
-  return ioctl(req, (long unsigned int)arg);
+  return ioctl(req, (unsigned int64_t)arg);
 }

 int open_device() {
@@ -58,9 +58,13 @@ void fpga_copy(void *dest, const void *src, size_t num) {
  memcpy(dest, src, num);
 }

-int ComputeFpgaConv(struct ConvArgs args) {}
-int ComputeFpgaPool(struct PoolingArgs args) {}
-int ComputeFpgaEWAdd(struct EWAddArgs args) {}
+int ComputeFpgaConv(const struct ConvArgs &args) { return do_ioctl(21, &args); }
+int ComputeFpgaPool(const struct PoolingArgs &args) {
+  return do_ioctl(22, &args);
+}
+int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
+  return do_ioctl(23, &args);
+}

 }  // namespace fpga
 }  // namespace paddle_mobile
--- a/src/fpga/api/fpga_api.h
+++ b/src/fpga/api/fpga_api.h
@@ -31,6 +31,18 @@ void* fpga_malloc(size_t size);
 void fpga_free(void* ptr);
 void fpga_copy(void* dst, const void* src, size_t num);

+enum DataConvertType {
+  DATA_NO_CONVERT = 0,
+  DATA_FP32_TO_FP16 = 1,
+  DATA_FP16_TO_FP32 = 2,
+};
+
+enum LayoutConvertType {
+  LAYOUT_NO_CONVERT = 0,
+  LAYOUT_CHW_TO_HWC = 1,
+  LAYOUT_HWC_TO_CHW = 2,
+};
+
 struct VersionArgs {
  void* buffer;
 };
@@ -79,7 +91,7 @@ struct ConvArgs {
  uint32_t filter_num;
  uint32_t group_num;

-  struct BNArgs bn;
+  void* sb_address;  // scale and bias are interlaced;
  struct KernelArgs kernel;
  struct ImageInputArgs image;  // input image;
  struct ImageOutputArgs output;
@@ -102,6 +114,12 @@ struct EWAddArgs {
  struct ImageOutputArgs output;
 };

+struct BypassArgs {
+  enum DataConvertType convert_type;
+  struct ImageInputArgs image;
+  struct ImageOutputArgs output;
+};
+
 struct FpgaRegWriteArgs {
  uint64_t address;  //
  uint64_t value;
@@ -115,8 +133,6 @@ struct FpgaRegReadArgs {
 #define IOCTL_FPGA_MAGIC 'FPGA'

 #define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 01, struct VersionArgs)
-#define IOCTL_FPGA_REG_READ _IOW(IOCTL_FPGA_MAGIC, 02, struct FpgaRegReadArgs)
-#define IOCTL_FPGA_REG_WRITE _IOW(IOCTL_FPGA_MAGIC, 03, struct FpgaRegWriteArgs)

 #define IOCTL_SEPARATOR_0 10

@@ -127,6 +143,8 @@ struct FpgaRegReadArgs {
 #define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct ConvArgs)
 #define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct PoolingArgs)
 #define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct EWAddArgs)
+#define IOCTL_FPGA_REG_READ _IOW(IOCTL_FPGA_MAGIC, 28, struct FpgaRegReadArgs)
+#define IOCTL_FPGA_REG_WRITE _IOW(IOCTL_FPGA_MAGIC, 29, struct FpgaRegWriteArgs)

 enum FPGA_ERR_TYPE {
  ERR_IOCTL_CMD = -1,
@@ -154,9 +172,9 @@ enum FPGA_ERR_TYPE {

 //============================== API =============================

-int ComputeFpgaConv(struct ConvArgs args);
-int ComputeFpgaPool(struct PoolingArgs args);
-int ComputeFpgaEWAdd(struct EWAddArgs args);
+int ComputeFpgaConv(const struct ConvArgs& args);
+int ComputeFpgaPool(const struct PoolingArgs& args);
+int ComputeFpgaEWAdd(const struct EWAddArgs& args);

 }  // namespace fpga
 }  // namespace paddle_mobile
--- a/src/fpga/fpga_quantilization.h
+++ b/src/fpga/fpga_quantilization.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma once
+
+#include <string>
+#include "common/types.h"
+#include "framework/lod_tensor.h"
+#include "framework/operator.h"
+#include "framework/scope.h"
+#include "framework/tensor.h"
+
+namespace paddle_mobile {
+
+bool is_conv(std::string type) {
+  if (type.compare(G_OP_TYPE_CONV) == 0) {
+    return true;
+  }
+  if (type.compare(G_OP_TYPE_FUSION_CONV_ADD) == 0) {
+    return true;
+  }
+  if (type.compare(G_OP_TYPE_FUSION_CONV_ADD_RELU) == 0) {
+    return true;
+  }
+  if (type.compare(G_OP_TYPE_FUSION_CONV_BN_RELU) == 0) {
+    return true;
+  }
+  if (type.compare(G_OP_TYPE_FUSION_CONV_ADD_BN) == 0) {
+    return true;
+  }
+  return false;
+}
+
+template <typename Dtype>
+void quantilize_op(std::shared_ptr<framework::OperatorBase<Dtype>> op,
+                   std::shared_ptr<framework::Scope> scope) {
+  if (!is_conv(op.get()->Type())) {
+    return;
+  }
+  framework::Tensor* filter = nullptr;
+  auto var_vec = op.get()->Inputs().at("Filter");
+  if (!var_vec.empty()) {
+    auto var = scope.get()->FindVar(var_vec[0]);
+    filter = var->template GetMutable<framework::LoDTensor>();
+  }
+  float scale = 0;
+
+  // 32bit filter -> 8bit filter;
+  if (filter->type() == typeid(float)) {
+    framework::Tensor* originalFilter = filter;
+    framework::Tensor* quantFilter = new framework::Tensor();
+    float* floatData = originalFilter->data<float>();
+    int8_t* intData = quantFilter->mutable_data<int8_t>();
+  }
+}
+
+}  // namespace paddle_mobile
--- a/src/framework/tensor.h
+++ b/src/framework/tensor.h
@@ -260,7 +260,7 @@ class Tensor {
    inline float *scale_pointer() { return &scale; }
  };

-  struct &fpga_args() const {
+  struct FPGAArgs &fpga_args() {
    return fpgaArgs_;
  }
 #endif

--- a/src/io/executor.cpp
+++ b/src/io/executor.cpp
@@ -32,6 +32,10 @@ limitations under the License. */
 #include "common/threadpool.h"
 #endif

+#ifdef PADDLE_MOBILE_FPGA
+#include "fpga/fpga_quantilization.h"
+#endif
+
 namespace paddle_mobile {
 using framework::Variable;

@@ -96,6 +100,11 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
  for (const auto &op : ops) {
    op->Init();
  }
+#ifdef PADDLE_MOBILE_FPGA
+  for (const auto &op : ops) {
+    quantilize_op(op, program_.scope);
+  }
+#endif
 }

 template <typename Dtype, Precision P>

--- a/test/fpga/test_tensor_quant.cpp
+++ b/test/fpga/test_tensor_quant.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <fstream>
+#include "../test_helper.h"
+#include "../test_include.h"
+
+int main() {
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+  bool optimize = false;
+  if (paddle_mobile.Load(g_googlenet, optimize)) {
+    auto time2 = time();
+    DLOG << "load cost: " << time_diff(time1, time1) << "ms";
+    std::vector<float> input;
+    std::vector<int64_t> dims{1, 3, 224, 224};
+    GetInput<float>(g_test_image_1x3x224x224, &input, dims);
+    auto time3 = time();
+    auto vec_result = paddle_mobile.Predict(input, dims);
+    auto time4 = time();
+    DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
+  }
+  return 0;
+}