FPGA conv added filter scale

686b2c93 · hanbuhe · 1b97e2fa · 686b2c93 · 686b2c93 · 686b2c93
5 changed file
--- a/src/fpga/api/fpga_api.cpp
+++ b/src/fpga/api/fpga_api.cpp
@@ -35,7 +35,7 @@ namespace fpga {
 static int fd = -1;
 static const char *device_path = "/dev/fpgadrv0";

-static inline int do_ioctl(int req, void *arg) {
+static inline int do_ioctl(int req, const void *arg) {
  return ioctl(req, (unsigned int64_t)arg);
 }

@@ -58,12 +58,17 @@ void fpga_copy(void *dest, const void *src, size_t num) {
  memcpy(dest, src, num);
 }

-int ComputeFpgaConv(const struct ConvArgs &args) { return do_ioctl(21, &args); }
+int ComputeFpgaConv(const struct ConvArgs &args) {
+  return do_ioctl(IOCTL_CONFIG_CONV, &args);
+}
 int ComputeFpgaPool(const struct PoolingArgs &args) {
-  return do_ioctl(22, &args);
+  return do_ioctl(IOCTL_CONFIG_POOLING, &args);
 }
 int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
-  return do_ioctl(23, &args);
+  return do_ioctl(IOCTL_CONFIG_EW, &args);
+}
+int PerformBypass(const struct BypassArgs &args) {
+  return do_ioctl(IOCTL_CONFIG_BYPASS, &args);
 }

 }  // namespace fpga

--- a/src/fpga/api/fpga_api.h
+++ b/src/fpga/api/fpga_api.h
@@ -86,12 +86,12 @@ struct ImageOutputArgs {

 struct ConvArgs {
  bool relu_enabled;
-  void* bias_address;
+  void* sb_address;  // scale and bias are interlaced;
  void* filter_address;
+  float* filter_scale_address;
  uint32_t filter_num;
  uint32_t group_num;

-  void* sb_address;  // scale and bias are interlaced;
  struct KernelArgs kernel;
  struct ImageInputArgs image;  // input image;
  struct ImageOutputArgs output;
@@ -116,6 +116,7 @@ struct EWAddArgs {

 struct BypassArgs {
  enum DataConvertType convert_type;
+  enum LayoutConvertType layout_type;
  struct ImageInputArgs image;
  struct ImageOutputArgs output;
 };
@@ -125,11 +126,6 @@ struct FpgaRegWriteArgs {
  uint64_t value;
 };

-struct FpgaRegReadArgs {
-  uint64_t address;
-  uint64_t value;
-};
-
 #define IOCTL_FPGA_MAGIC 'FPGA'

 #define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 01, struct VersionArgs)
@@ -143,6 +139,7 @@ struct FpgaRegReadArgs {
 #define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct ConvArgs)
 #define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct PoolingArgs)
 #define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct EWAddArgs)
+#define IOCTL_CONFIG_BYPASS _IOW(IOCTL_FPGA_MAGIC, 24, struct BypassArgs)
 #define IOCTL_FPGA_REG_READ _IOW(IOCTL_FPGA_MAGIC, 28, struct FpgaRegReadArgs)
 #define IOCTL_FPGA_REG_WRITE _IOW(IOCTL_FPGA_MAGIC, 29, struct FpgaRegWriteArgs)

@@ -172,6 +169,7 @@ enum FPGA_ERR_TYPE {

 //============================== API =============================

+int PerformBypass(const struct BypassArgs& args);
 int ComputeFpgaConv(const struct ConvArgs& args);
 int ComputeFpgaPool(const struct PoolingArgs& args);
 int ComputeFpgaEWAdd(const struct EWAddArgs& args);

--- a/src/fpga/fpga_quantilization.h
+++ b/src/fpga/fpga_quantilization.h
@@ -13,55 +13,40 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 #pragma once

-#include <string>
 #include "common/types.h"
 #include "framework/lod_tensor.h"
-#include "framework/operator.h"
-#include "framework/scope.h"
 #include "framework/tensor.h"

 namespace paddle_mobile {

-bool is_conv(std::string type) {
-  if (type.compare(G_OP_TYPE_CONV) == 0) {
-    return true;
-  }
-  if (type.compare(G_OP_TYPE_FUSION_CONV_ADD) == 0) {
-    return true;
-  }
-  if (type.compare(G_OP_TYPE_FUSION_CONV_ADD_RELU) == 0) {
-    return true;
-  }
-  if (type.compare(G_OP_TYPE_FUSION_CONV_BN_RELU) == 0) {
-    return true;
-  }
-  if (type.compare(G_OP_TYPE_FUSION_CONV_ADD_BN) == 0) {
-    return true;
-  }
-  return false;
-}
-
 template <typename Dtype>
-void quantilize_op(std::shared_ptr<framework::OperatorBase<Dtype>> op,
-                   std::shared_ptr<framework::Scope> scope) {
-  if (!is_conv(op.get()->Type())) {
-    return;
-  }
-  framework::Tensor* filter = nullptr;
-  auto var_vec = op.get()->Inputs().at("Filter");
-  if (!var_vec.empty()) {
-    auto var = scope.get()->FindVar(var_vec[0]);
-    filter = var->template GetMutable<framework::LoDTensor>();
-  }
+framework::Tensor* quantilize_filter(framework::Tensor* filter) {
  float scale = 0;
-
  // 32bit filter -> 8bit filter;
+  float min = 0f;
+  float max = 0f;
  if (filter->type() == typeid(float)) {
+    float* floatData = originalFilter->data<float>();
+    for (int i = 0; i < filter->numel(); ++i) {
+      min = std::min(min, floatData[i]);
+      max = std::max(max, floatData[i]);
+    }
+
+    float fix_range = (float)((1 << (8 - 1)) - 1);
+    float float_range = max;
+    scale = (float_range / fix_range);
+
    framework::Tensor* originalFilter = filter;
    framework::Tensor* quantFilter = new framework::Tensor();
-    float* floatData = originalFilter->data<float>();
    int8_t* intData = quantFilter->mutable_data<int8_t>();
+    for (int i = 0; i < filter->numel(); ++i) {
+      intData[i] = (int8_t)floatData[i] * scale;
+    }
+    quantFilter.scale = scale;
+    // NCHW -> NHWC;
+    return quantFilter;
  }
+  return filter;
 }

 }  // namespace paddle_mobile
--- a/src/framework/tensor.h
+++ b/src/framework/tensor.h
@@ -257,7 +257,10 @@ class Tensor {
  struct FPGAArgs {
    float scale;

-    inline float *scale_pointer() { return &scale; }
+    inline const float *scale_pointer() { 
+      return &scale; 
+    }
+
  };

  struct FPGAArgs fpga_args() const {

--- a/src/io/executor.cpp
+++ b/src/io/executor.cpp
@@ -32,10 +32,6 @@ limitations under the License. */
 #include "common/threadpool.h"
 #endif

-#ifdef PADDLE_MOBILE_FPGA
-#include "fpga/fpga_quantilization.h"
-#endif
-
 namespace paddle_mobile {
 using framework::Variable;

@@ -100,11 +96,6 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
  for (const auto &op : ops) {
    op->Init();
  }
-#ifdef PADDLE_MOBILE_FPGA
-  for (const auto &op : ops) {
-    quantilize_op(op, program_.scope);
-  }
-#endif
 }

 template <typename Dtype, Precision P>