Merge pull request #947 from chonwhite/develop

fix:#946

Merge pull request #947 from chonwhite/develop
fix:#946
6859850c · Chon · GitHub · 62e08a52 · ccaae49f · 6859850c
4 changed file
--- a/src/fpga/api.cpp
+++ b/src/fpga/api.cpp
@@ -145,8 +145,8 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
 }
 int PerformBypass(const struct BypassArgs &args) {
 #ifdef FPGA_TEST_MODE
-  DLOG << "   layout_type:" << args.layout_type
+  DLOG << "   input_type:" << args.input_data_type
-       << "   convert_type:" << args.convert_type;
+       << "   input_layout_type:" << args.input_layout_type;
  DLOG << "   image_address:" << args.image.address
       << "   image_scale_address:" << args.image.scale_address
       << "   image_channels:" << args.image.channels

--- a/src/fpga/api.h
+++ b/src/fpga/api.h
@@ -25,23 +25,14 @@ limitations under the License. */
 namespace paddle_mobile {
 namespace fpga {
-int open_device();
+enum DataType {
-int close_device();
+  DATA_TYPE_FP32 = 1,
+  DATA_TYPE_FP16 = 0,
-void* fpga_malloc(size_t size);
-void fpga_free(void* ptr);
-void fpga_copy(void* dst, const void* src, size_t num);
-enum DataConvertType {
-  DATA_NO_CONVERT = 0,
-  DATA_FP32_TO_FP16 = 1,
-  DATA_FP16_TO_FP32 = 2,
 };
-enum LayoutConvertType {
+enum LayoutType {
-  LAYOUT_NO_CONVERT = 0,
+  LAYOUT_CHW = 1,
-  LAYOUT_CHW_TO_HWC = 1,
+  LAYOUT_HWC = 0,
-  LAYOUT_HWC_TO_CHW = 2,
 };
 struct VersionArgs {
@@ -83,7 +74,6 @@ struct ConvArgs {
  bool relu_enabled;
  void* sb_address;  // scale and bias are interlaced;
  void* filter_address;
-  float* filter_scale_address;
  uint32_t filter_num;
  uint32_t group_num;
@@ -122,16 +112,18 @@ struct PoolingArgs {
 struct EWAddArgs {
  bool relu_enabled;
-  float const0;  // output0 = const0 x input0 + const1 x input1;
+  uint32_t const0;  // output0 = const0 x input0 + const1 x input1;
-  float const1;
+  uint32_t const1;
  struct ImageInputArgs image0;
  struct ImageInputArgs image1;
  struct ImageOutputArgs output;
 };
 struct BypassArgs {
-  enum DataConvertType convert_type;
+  enum DataType input_data_type;
-  enum LayoutConvertType layout_type;
+  enum DataType output_data_type;
+  enum LayoutType input_layout_type;
+  enum LayoutType output_layout_type;
  struct ImageInputArgs image;
  struct ImageOutputArgs output;
 };
@@ -141,6 +133,11 @@ struct FpgaRegWriteArgs {
  uint64_t value;
 };
+struct FpgaRegReadArgs {
+  uint64_t address;
+  uint64_t value;
+};
 #define IOCTL_FPGA_MAGIC 'FPGA'
 #define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 01, struct VersionArgs)
@@ -184,6 +181,13 @@ enum FPGA_ERR_TYPE {
 //============================== API =============================
+int open_device();
+int close_device();
+void* fpga_malloc(size_t size);
+void fpga_free(void* ptr);
+void fpga_copy(void* dst, const void* src, size_t num);
 int PerformBypass(const struct BypassArgs& args);
 int ComputeFpgaConv(const struct WrapperConvArgs& args);
 int ComputeFpgaPool(const struct PoolingArgs& args);

--- a/src/operators/feed_op.h
+++ b/src/operators/feed_op.h
@@ -56,8 +56,11 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
    auto output_ptr = output->mutable_data<half>();
    fpga::BypassArgs args;
-    args.convert_type = fpga::DATA_FP32_TO_FP16;
-    args.layout_type = fpga::LAYOUT_NO_CONVERT;
+    args.input_data_type = fpga::DATA_TYPE_FP32;
+    args.output_data_type = fpga::DATA_TYPE_FP16;
+    args.input_layout_type = fpga::LAYOUT_CHW;
+    args.output_layout_type = fpga::LAYOUT_HWC;
    args.image.address = (void *)input_ptr;
    args.image.channels = input->dims()[1];
    args.image.height = input->dims()[2];

--- a/src/operators/kernel/fpga/softmax_kernel.cpp
+++ b/src/operators/kernel/fpga/softmax_kernel.cpp
@@ -25,27 +25,34 @@ namespace operators {
 template <>
 bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
  const Tensor *input = param->InputX();
+  if (input->type() == typeid(half)) {
-  auto input_ptr = input->data<float>();
+    auto input_ptr = input->data<half>();
-  auto output = param->Out();
+    auto output_ptr = param->Out();
-  auto output_ptr = output->mutable_data<float>();
+    fpga::BypassArgs args;
-  fpga::BypassArgs args;
+    args.input_layout_type = fpga::LAYOUT_HWC;
-  args.convert_type = fpga::DATA_FP16_TO_FP32;
+    args.output_layout_type = fpga::LAYOUT_CHW;
-  args.layout_type = fpga::LAYOUT_NO_CONVERT;
+    args.input_data_type = fpga::DATA_TYPE_FP16;
-  args.image.address = (void *)(input_ptr);
+    args.output_data_type = fpga::DATA_TYPE_FP32;
-  args.image.height = (uint32_t)input->dims()[0];
+    args.image.address = (void *)(input_ptr);
-  args.image.width = (uint32_t)input->dims()[1];
+    args.image.height = (uint32_t)input->dims()[0];
-  args.image.channels = 1;
+    args.image.width = (uint32_t)input->dims()[1];
-  args.output.address = output_ptr;
+    args.image.channels = 1;
-  param->SetFpgaArgs(args);
+    args.output.address = output_ptr;
+    param->SetFpgaArgs(args);
+  }
  return true;
 }
 template <>
 void SoftmaxKernel<FPGA, float>::Compute(
    const SoftmaxParam<FPGA> &param) const {
-  // SoftmaxCompute<float>(param);
+  DLOG << "======================================= FPGA SoftMAX "
+          "===============================================";
+  const Tensor *in_x = param.InputX();
+  Tensor *out = param.Out();
+  auto x_dims = in_x->dims();
+  out->Resize(x_dims);
+  math::SoftmaxFuntor<CPU, float>()(in_x, out);
 }
 template class SoftmaxKernel<FPGA, float>;