Merge pull request #675 from chonwhite/develop

#674 tensor增加fpga量化支持

Merge pull request #675 from chonwhite/develop
#674 tensor增加fpga量化支持
a74ccea0 · zhangyang0701 · GitHub · f4eb343d · e38aa828 · a74ccea0
Showing with 90 addition and 54 deletion

src/fpga/api/fpga_api.h src/fpga/api/fpga_api.h +73 -53

src/framework/tensor.h src/framework/tensor.h +16 -0

src/operators/kernel/fpga/concat_kernel.cpp src/operators/kernel/fpga/concat_kernel.cpp +1 -1

未找到文件。
--- a/src/fpga/api/fpga_api.h
+++ b/src/fpga/api/fpga_api.h
@@ -31,90 +31,110 @@ void* fpga_malloc(size_t size);
 void fpga_free(void* ptr);
 void fpga_copy(void* dst, const void* src, size_t num);
-struct FpgaVersionArgs {
+struct VersionArgs {
-  void* buf;
+  void* buffer;
-};
-struct MemoryToPhysicalArgs {
-  const void* src;
-  uint64_t physical;
 };
 struct MemoryCopyArgs {
  void* src;
-  void* dst;
+  void* dest;
  size_t size;
 };
-struct FpgaQuantArgs {
+struct BNArgs {
-  float scale;
+  bool enabled;
-};
+  void* bias_address;
+  void* scale_address;
-struct FpgaBNArgs {
-  bool enabled = false;
-  void* bias_addr;
-  void* scale_addr;
 };
-struct FpgaKernelArgs {
+/**
+Conv and Pooling kernel
+*/
+struct KernelArgs {
  uint32_t width;
  uint32_t height;
-  uint32_t stride_h;
  uint32_t stride_w;
+  uint32_t stride_h;
 };
-struct FpgaImageArgs {
+struct ImageInputArgs {
-  uint32_t width;
+  void* address;  // input featuremap virtual address
-  uint32_t height;
  uint32_t channels;
-  uint32_t pad_h;
+  uint32_t width;  // featuremap width
-  uint32_t pad_w;
+  uint32_t height;
+  uint32_t pad_width;  // padding width;
+  uint32_t pad_height;
+};
+struct ImageOutputArgs {
+  void* address;         // output result address;
+  float* scale_address;  // output scale address;
 };
-struct FpgaConvArgs {
+struct ConvArgs {
  bool relu_enabled;
-  struct FpgaBNArgs BNargs;
+  float scale;  // input scale;
-  void* image_addr;
+  void* bias_address;
-  void* filter_addr;
+  void* filter_address;
-  void* bias_addr;
-  void* output_addr;
-  float quant_scale;
-  struct FpgaImageArgs image;
  uint32_t filter_num;
  uint32_t group_num;
-  struct FpgaKernelArgs kernel;
+  struct BNArgs bn;
+  struct ImageInputArgs image;  // input image;
+  struct ImageOutputArgs output;
+  struct KernelArgs kernel;
 };
-struct FpgaPoolArgs {
+struct PoolingArgs {
-  void* image_addr;
+  float scale;
-  void* output_addr;
+  struct ImageInputArgs image;  // input image;
-  struct FpgaImageArgs image;
+  struct ImageOutputArgs output;
-  struct FpgaKernelArgs kernel;
+  struct KernelArgs kernel;
 };
-struct FpgaEWAddArgs {
+// elementwise add arguments
+struct EWAddArgs {
  bool relu_enabled;
-  void* image0_addr;
+  float scale;
-  void* image1_addr;
-  void* result_addr;
+  float const0;  // output0 = const0 x input0 + const1 x input1;
-  uint32_t const0;
+  float const1;
-  uint32_t const1;
+  struct ImageInputArgs image0;
-  uint32_t data_len;  // aligned element count
+  struct ImageInputArgs image1;
+  struct ImageOutputArgs output;
 };
+struct FpgaRegWriteArgs {
+  uint64_t address;  //
+  uint64_t value;
+};
+struct FpgaRegReadArgs {
+  uint64_t address;
+  uint64_t value;
+};
+#define IOCTL_FPGA_MAGIC 'FPGA'
+#define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 01, struct VersionArgs)
+#define IOCTL_FPGA_REG_READ _IOW(IOCTL_FPGA_MAGIC, 02, struct FpgaRegReadArgs)
+#define IOCTL_FPGA_REG_WRITE _IOW(IOCTL_FPGA_MAGIC, 03, struct FpgaRegWriteArgs)
+#define IOCTL_SEPARATOR_0 10
+#define IOCTL_MEM_COPY _IOW(IOCTL_FPGA_MAGIC, 11, struct MemoryCopyArgs)
+#define IOCTL_SEPARATOR_1 20
+#define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct ConvArgs)
+#define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct PoolingArgs)
+#define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct EWAddArgs)
+//============================== API =============================
 int ComputeFpgaConv(struct FpgaConvArgs args);
 int ComputeFpgaPool(struct FpgaPoolArgs args);
 int ComputeFpgaEWAdd(struct FpgaEWAddArgs args);
-#define IOCTL_FPGA_MAGIC 'CNN'
-#define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 1, struct FpgaVersionArgs)
-#define IOCTL_GET_QUANT _IOW(IOCTL_FPGA_MAGIC, 2, struct FpgaQuantArgs)
-#define IOCTL_SET_QUANT _IOW(IOCTL_FPGA_MAGIC, 3, struct FpgaQuantArgs)
-#define IOCTL_MEM_COPY _IOW(IOCTL_FPGA_MAGIC, 11, struct MemoryCopyArgs)
-#define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct FpgaConvArgs)
-#define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct FpgaPoolArgs)
-#define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct FpgaEWAddArgs)
 }  // namespace fpga
 }  // namespace paddle_mobile
--- a/src/framework/tensor.h
+++ b/src/framework/tensor.h
@@ -253,6 +253,18 @@ class Tensor {
                          "Tensor's dims_ is out of bound. ");
  }
+#ifdef PADDLE_MOBILE_FPGA
+  struct FPGAArgs {
+    float scale;
+    inline float *scale_pointer() { return &scale; }
+  };
+  struct &fpga_args() const {
+    return fpgaArgs_;
+  }
+#endif
 private:
  /**
   * @note    Placeholder hides type T, so it doesn't appear as a
@@ -319,6 +331,10 @@ class Tensor {
   * begins.
   */
  size_t offset_;
+#ifdef PADDLE_MOBILE_FPGA
+  FPGAArgs fpgaArgs_;
+#endif
 };
 #ifdef PADDLE_MOBILE_DEBUG

--- a/src/operators/kernel/fpga/concat_kernel.cpp
+++ b/src/operators/kernel/fpga/concat_kernel.cpp
@@ -39,7 +39,7 @@ void ConcatKernel<FPGA, half>::Compute(const ConcatParam &param) const {
  for (int i = 0; i < inputs.size(); ++i) {
    auto input = inputs[i];
-    auto channels = input[3];
+    auto channels = input->dims()[3];
    out_offset += channels;
    auto src = input->data<half>();
    for (int j = 0; j < pixels; ++j) {