fix fpga compile problem and kernels (#1989)

0720653b · TianXiaogang · Yan Chunwei · 019f5b8e · 0720653b · 0720653b
12 changed file
--- a/.gitignore
+++ b/.gitignore
@@ -34,6 +34,7 @@
 .DS_Store

 build/
+build_fpga/

 .idea/


--- a/lite/backends/fpga/KD/fpga_cv.cpp
+++ b/lite/backends/fpga/KD/fpga_cv.cpp
@@ -23,9 +23,7 @@ void fpga_resize(float* input,
                 uint8_t* output,
                 int output_width,
                 int output_height) {
-  paddle::zynqmp::InplaceArgs inplace_args = {
-      .relu_enable = 0, .power_enable = 0,
-  };
+  paddle::zynqmp::InplaceArgs inplace_args = {0, 0, 0};
  paddle::zynqmp::config_inplace(inplace_args);

  paddle::zynqmp::ImageInputArgs input_args = {nullptr};

--- a/lite/backends/fpga/KD/llapi/zynqmp_api.cpp
+++ b/lite/backends/fpga/KD/llapi/zynqmp_api.cpp
@@ -39,10 +39,14 @@ static size_t memory_size_max = 0;
 static size_t memory_size = 0;

 static inline int do_ioctl(uint64_t req, const void *arg) {
+  int ret = -1;
 #ifdef PADDLE_LITE_OS_LINUX
-  return ioctl(fd, req, arg);
+  ret = ioctl(fd, req, arg);
+  if (ret != 0) {
+    throw - 1;
+  }
 #else
-  return -1;
+  return ret;
 #endif
 }


--- a/lite/backends/fpga/KD/llapi/zynqmp_api.h
+++ b/lite/backends/fpga/KD/llapi/zynqmp_api.h
@@ -46,6 +46,15 @@ struct VersionArgs {

 struct DeviceInfo {
  uint32_t filter_cap;
+  uint32_t version;
+  uint16_t device_type;
+  uint32_t reserved0;
+  uint32_t reserved1;
+  uint32_t reserved2;
+  uint32_t reserved3;
+  uint32_t reserved4;
+  uint32_t reserved5;
+  uint32_t reserved6;
 };

 struct MemoryCopyArgs {
@@ -191,6 +200,7 @@ struct NormalizeParameterArgs {
 };

 struct InplaceArgs {
+  bool leaky_relu_enable;
  bool relu_enable;
  bool power_enable;
  bool normalize_enable;

--- a/lite/backends/fpga/lite_tensor.h
+++ b/lite/backends/fpga/lite_tensor.h
@@ -57,7 +57,7 @@ class DDimLite {

  DDimLite Slice(int start, int end) const;

-  DDimLite Flattern2D(int col) const {
+  DDimLite Flatten2D(int col) const {
    return DDimLite(std::vector<value_type>(
        {Slice(0, col).production(), Slice(col, size()).production()}));
  }
@@ -118,6 +118,13 @@ class TensorLite {
  const LoD &lod() const { return lod_; }
  LoD *mutable_lod() { return &lod_; }

+  void set_lod(const LoD &lod) { lod_ = lod; }
+
+  PrecisionType precision() const { return precision_; }
+  void set_precision(PrecisionType precision) { precision_ = precision; }
+
+  bool persistable() const { return persistable_; }
+  void set_persistable(bool persistable) { persistable_ = persistable; }
  // T is the data type and R is the return type
  // For OpenCL, the return type can be cl::Buffer
  // and the data type can be float/int8_t.
@@ -147,6 +154,9 @@ class TensorLite {

  void CopyDataFrom(const TensorLite &other);

+  template <typename T>
+  TensorLite Slice(int64_t begin, int64_t end) const;
+
  TargetType target() const { return target_; }

  zynqmp::Tensor *ZynqTensor() const { return zynq_tensor_; }
@@ -168,6 +178,11 @@ class TensorLite {
  LoD lod_;
  size_t memory_size_{};

+  size_t offset_{0};
+
+  PrecisionType precision_{PrecisionType::kUnk};
+  bool persistable_{false};
+
  zynqmp::Tensor *zynq_tensor_ = new zynqmp::Tensor();

  template <typename T>
@@ -219,6 +234,18 @@ bool TensorCompareWith(const TensorT &a, const TensorT &b) {
  if (memcmp(a.raw_data(), b.raw_data(), a.data_size()) != 0) return false;
  return true;
 }
-
+template <typename T>
+TensorLite TensorLite::Slice(int64_t begin, int64_t end) const {
+  int64_t base = numel() / dims_[0];
+
+  TensorLite dst;
+  dst.buffer_ = buffer_;
+  dst.target_ = target_;
+  auto dst_dims = dims_;
+  dst_dims[0] = end - begin;
+  dst.Resize(dst_dims);
+  dst.offset_ = offset_ + static_cast<size_t>(begin * base) * sizeof(T);
+  return dst;
+}
 }  // namespace lite
 }  // namespace paddle
--- a/lite/kernels/fpga/conv_compute.cc
+++ b/lite/kernels/fpga/conv_compute.cc
@@ -28,10 +28,9 @@ void ConvCompute::PrepareForRun() {

  // ====================================================
  zynqmp::ConvParam& conv_param = pe_.param();
-
  param.output->mutable_data<float16>();

-  filter_.setDataType(zynqmp::FP32);
+  // filter_.setDataType(zynqmp::FP32);
  conv_param.input = param.x->ZynqTensor();
  conv_param.output = param.output->ZynqTensor();
  conv_param.filter = param.filter->ZynqTensor();
@@ -40,11 +39,17 @@ void ConvCompute::PrepareForRun() {
  conv_param.paddings = param.paddings;
  conv_param.dilations = param.dilations;
  fill_scale_bias_const(&conv_param);
+  conv_param.bias()->copyFrom(param.bias->ZynqTensor());
+  conv_param.relu.enabled = param.fuse_relu;
  pe_.init();
  pe_.apply();
 }

-void ConvCompute::Run() { pe_.dispatch(); }
+void ConvCompute::Run() {
+  auto& param = this->Param<param_t>();
+  zynqmp::ConvParam& conv_param = pe_.param();
+  pe_.dispatch();
+}

 }  // namespace fpga
 }  // namespace kernels

--- a/lite/kernels/fpga/conv_compute.h
+++ b/lite/kernels/fpga/conv_compute.h
@@ -37,9 +37,6 @@ class ConvCompute

 private:
  zynqmp::ConvPE pe_;
-  zynqmp::Tensor input_;
-  zynqmp::Tensor output_;
-  zynqmp::Tensor filter_;
 };

 }  // namespace fpga

--- a/lite/kernels/fpga/elementwise_compute.h
+++ b/lite/kernels/fpga/elementwise_compute.h
@@ -36,9 +36,6 @@ class ElementwiseAddCompute

 private:
  zynqmp::ElementwiseAddPE pe_;
-  zynqmp::Tensor input_x_;
-  zynqmp::Tensor input_y_;
-  zynqmp::Tensor output_;
 };

 class ElementwiseAddActivationCompute
@@ -51,9 +48,6 @@ class ElementwiseAddActivationCompute

 private:
  zynqmp::ElementwiseAddPE pe_;
-  zynqmp::Tensor input_x_;
-  zynqmp::Tensor input_y_;
-  zynqmp::Tensor output_;
 };

 }  // namespace fpga

--- a/lite/kernels/fpga/pooling_compute.cc
+++ b/lite/kernels/fpga/pooling_compute.cc
@@ -35,9 +35,6 @@ void PoolCompute::PrepareForRun() {
  pool_param.output = param.output->ZynqTensor();
  pool_param.relu.enabled = false;

-  auto& in_dims = param.x->dims();
-  auto& out_dims = param.output->dims();
-
  pool_param.type = param.pooling_type == "max" ? zynqmp::PoolingType::MAX
                                                : zynqmp::PoolingType::AVERAGE;
  pool_param.globalPooling = param.global_pooling;

--- a/lite/kernels/fpga/pooling_compute.h
+++ b/lite/kernels/fpga/pooling_compute.h
@@ -36,8 +36,6 @@ class PoolCompute

 private:
  zynqmp::PoolingPE pe_;
-  zynqmp::Tensor input_;
-  zynqmp::Tensor output_;
 };

 }  // namespace fpga

--- a/lite/kernels/fpga/softmax_compute.cc
+++ b/lite/kernels/fpga/softmax_compute.cc
@@ -22,7 +22,7 @@ namespace fpga {

 using float16 = zynqmp::float16;

-void SoftmaxCompute::Run() {
+void SoftmaxCompute::PrepareForRun() {
  zynqmp::SoftmaxParam& softmax_param = pe_.param();
  auto& param = Param<operators::SoftmaxParam>();

@@ -33,6 +33,8 @@ void SoftmaxCompute::Run() {
  pe_.apply();
 }

+void SoftmaxCompute::Run() { pe_.dispatch(); }
+
 }  // namespace fpga
 }  // namespace kernels
 }  // namespace lite

--- a/lite/kernels/fpga/softmax_compute.h
+++ b/lite/kernels/fpga/softmax_compute.h
@@ -29,6 +29,7 @@ using float16 = zynqmp::float16;
 class SoftmaxCompute
    : public KernelLite<TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)> {
 public:
+  void PrepareForRun() override;
  void Run() override;

  virtual ~SoftmaxCompute() = default;