From 686b2c9388502f5a4d34fb5ee6e431054b5c70e8 Mon Sep 17 00:00:00 2001
From: hanbuhe <hanbuhe@baidu.com>
Date: Fri, 3 Aug 2018 19:32:38 +0800
Subject: [PATCH] FPGA conv added filter scale

---
 src/fpga/api/fpga_api.cpp      | 13 +++++---
 src/fpga/api/fpga_api.h        | 12 +++----
 src/fpga/fpga_quantilization.h | 57 +++++++++++++---------------------
 src/framework/tensor.h         |  5 ++-
 src/io/executor.cpp            |  9 ------
 5 files changed, 39 insertions(+), 57 deletions(-)
diff --git a/src/fpga/api/fpga_api.cpp b/src/fpga/api/fpga_api.cpp
index f91c21beb2..779c846d1f 100644
--- a/src/fpga/api/fpga_api.cpp
+++ b/src/fpga/api/fpga_api.cpp
@@ -35,7 +35,7 @@ namespace fpga {
 static int fd = -1;
 static const char *device_path = "/dev/fpgadrv0";
 
-static inline int do_ioctl(int req, void *arg) {
+static inline int do_ioctl(int req, const void *arg) {
   return ioctl(req, (unsigned int64_t)arg);
 }
 
@@ -58,12 +58,17 @@ void fpga_copy(void *dest, const void *src, size_t num) {
   memcpy(dest, src, num);
 }
 
-int ComputeFpgaConv(const struct ConvArgs &args) { return do_ioctl(21, &args); }
+int ComputeFpgaConv(const struct ConvArgs &args) {
+  return do_ioctl(IOCTL_CONFIG_CONV, &args);
+}
 int ComputeFpgaPool(const struct PoolingArgs &args) {
-  return do_ioctl(22, &args);
+  return do_ioctl(IOCTL_CONFIG_POOLING, &args);
 }
 int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
-  return do_ioctl(23, &args);
+  return do_ioctl(IOCTL_CONFIG_EW, &args);
+}
+int PerformBypass(const struct BypassArgs &args) {
+  return do_ioctl(IOCTL_CONFIG_BYPASS, &args);
 }
 
 }  // namespace fpga
diff --git a/src/fpga/api/fpga_api.h b/src/fpga/api/fpga_api.h
index 08635cdb5c..0823e19a7f 100644
--- a/src/fpga/api/fpga_api.h
+++ b/src/fpga/api/fpga_api.h
@@ -86,12 +86,12 @@ struct ImageOutputArgs {
 
 struct ConvArgs {
   bool relu_enabled;
-  void* bias_address;
+  void* sb_address;  // scale and bias are interlaced;
   void* filter_address;
+  float* filter_scale_address;
   uint32_t filter_num;
   uint32_t group_num;
 
-  void* sb_address;  // scale and bias are interlaced;
   struct KernelArgs kernel;
   struct ImageInputArgs image;  // input image;
   struct ImageOutputArgs output;
@@ -116,6 +116,7 @@ struct EWAddArgs {
 
 struct BypassArgs {
   enum DataConvertType convert_type;
+  enum LayoutConvertType layout_type;
   struct ImageInputArgs image;
   struct ImageOutputArgs output;
 };
@@ -125,11 +126,6 @@ struct FpgaRegWriteArgs {
   uint64_t value;
 };
 
-struct FpgaRegReadArgs {
-  uint64_t address;
-  uint64_t value;
-};
-
 #define IOCTL_FPGA_MAGIC 'FPGA'
 
 #define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 01, struct VersionArgs)
@@ -143,6 +139,7 @@ struct FpgaRegReadArgs {
 #define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct ConvArgs)
 #define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct PoolingArgs)
 #define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct EWAddArgs)
+#define IOCTL_CONFIG_BYPASS _IOW(IOCTL_FPGA_MAGIC, 24, struct BypassArgs)
 #define IOCTL_FPGA_REG_READ _IOW(IOCTL_FPGA_MAGIC, 28, struct FpgaRegReadArgs)
 #define IOCTL_FPGA_REG_WRITE _IOW(IOCTL_FPGA_MAGIC, 29, struct FpgaRegWriteArgs)
 
@@ -172,6 +169,7 @@ enum FPGA_ERR_TYPE {
 
 //============================== API =============================
 
+int PerformBypass(const struct BypassArgs& args);
 int ComputeFpgaConv(const struct ConvArgs& args);
 int ComputeFpgaPool(const struct PoolingArgs& args);
 int ComputeFpgaEWAdd(const struct EWAddArgs& args);
diff --git a/src/fpga/fpga_quantilization.h b/src/fpga/fpga_quantilization.h
index d2d2d61835..7a1df04732 100644
--- a/src/fpga/fpga_quantilization.h
+++ b/src/fpga/fpga_quantilization.h
@@ -13,55 +13,40 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 #pragma once
 
-#include <string>
 #include "common/types.h"
 #include "framework/lod_tensor.h"
-#include "framework/operator.h"
-#include "framework/scope.h"
 #include "framework/tensor.h"
 
 namespace paddle_mobile {
 
-bool is_conv(std::string type) {
-  if (type.compare(G_OP_TYPE_CONV) == 0) {
-    return true;
-  }
-  if (type.compare(G_OP_TYPE_FUSION_CONV_ADD) == 0) {
-    return true;
-  }
-  if (type.compare(G_OP_TYPE_FUSION_CONV_ADD_RELU) == 0) {
-    return true;
-  }
-  if (type.compare(G_OP_TYPE_FUSION_CONV_BN_RELU) == 0) {
-    return true;
-  }
-  if (type.compare(G_OP_TYPE_FUSION_CONV_ADD_BN) == 0) {
-    return true;
-  }
-  return false;
-}
-
 template <typename Dtype>
-void quantilize_op(std::shared_ptr<framework::OperatorBase<Dtype>> op,
-                   std::shared_ptr<framework::Scope> scope) {
-  if (!is_conv(op.get()->Type())) {
-    return;
-  }
-  framework::Tensor* filter = nullptr;
-  auto var_vec = op.get()->Inputs().at("Filter");
-  if (!var_vec.empty()) {
-    auto var = scope.get()->FindVar(var_vec[0]);
-    filter = var->template GetMutable<framework::LoDTensor>();
-  }
+framework::Tensor* quantilize_filter(framework::Tensor* filter) {
   float scale = 0;
-
   // 32bit filter -> 8bit filter;
+  float min = 0f;
+  float max = 0f;
   if (filter->type() == typeid(float)) {
+    float* floatData = originalFilter->data<float>();
+    for (int i = 0; i < filter->numel(); ++i) {
+      min = std::min(min, floatData[i]);
+      max = std::max(max, floatData[i]);
+    }
+
+    float fix_range = (float)((1 << (8 - 1)) - 1);
+    float float_range = max;
+    scale = (float_range / fix_range);
+
     framework::Tensor* originalFilter = filter;
     framework::Tensor* quantFilter = new framework::Tensor();
-    float* floatData = originalFilter->data<float>();
     int8_t* intData = quantFilter->mutable_data<int8_t>();
-  }
+    for (int i = 0; i < filter->numel(); ++i) {
+      intData[i] = (int8_t)floatData[i] * scale;
+    }
+    quantFilter.scale = scale;
+    // NCHW -> NHWC;
+    return quantFilter;
+  }
+  return filter;
 }
 
 }  // namespace paddle_mobile
diff --git a/src/framework/tensor.h b/src/framework/tensor.h
index 797fcf5bff..8bd6b56e23 100644
--- a/src/framework/tensor.h
+++ b/src/framework/tensor.h
@@ -257,7 +257,10 @@ class Tensor {
   struct FPGAArgs {
     float scale;
 
-    inline float *scale_pointer() { return &scale; }
+    inline const float *scale_pointer() { 
+      return &scale; 
+    }
+
   };
 
   struct FPGAArgs fpga_args() const {
diff --git a/src/io/executor.cpp b/src/io/executor.cpp
index c09fe2c585..d6434b64aa 100644
--- a/src/io/executor.cpp
+++ b/src/io/executor.cpp
@@ -32,10 +32,6 @@ limitations under the License. */
 #include "common/threadpool.h"
 #endif
 
-#ifdef PADDLE_MOBILE_FPGA
-#include "fpga/fpga_quantilization.h"
-#endif
-
 namespace paddle_mobile {
 using framework::Variable;
 
@@ -100,11 +96,6 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
   for (const auto &op : ops) {
     op->Init();
   }
-#ifdef PADDLE_MOBILE_FPGA
-  for (const auto &op : ops) {
-    quantilize_op(op, program_.scope);
-  }
-#endif
 }
 
 template <typename Dtype, Precision P>
-- 
GitLab