marker1-anchor update (#1652)

7d8589c8 · jameswu2014 · qnqinan · 111ca78b · 7d8589c8 · 7d8589c8
6 changed file
--- a/src/fpga/common/fpga_common.cpp
+++ b/src/fpga/common/fpga_common.cpp
@@ -97,7 +97,7 @@ float fp16_2_fp32(int16_t fp16_num) {
  } else if (se_fp16 < 63) {
    e_fp32 = 0x80000000 + ((se_fp16 - 32) << 23);
    offset = 1024;
-  } else {
+  } else {  // se_fp16 == 63
    e_fp32 = 0xC7800000;
    offset = 1024;
  }

--- a/src/fpga/common/fpga_common.h
+++ b/src/fpga/common/fpga_common.h
@@ -34,8 +34,10 @@ limitations under the License. */
 #define BS_NUM_ALIGNMENT (8)
 #define BIAS_SCALE_DMA_NUM (4)
 #define RESULT_ALIGNMENT (32)
+
 #define PE_COLUMN (8)
 #define ROW_PARALLEL_NUM (2)
+
 #define BIAS_NUM_ALIGNMENT (16)

 #endif
@@ -92,13 +94,14 @@ struct ImageOutputArgs {
      activation;  // To select activation and specify (Leaky)Relu parameter.
 };

+// #ifdef PADDLE_MOBILE_FPGA_V1
 struct ConvDriverParam {
  uint64_t filter_per_group;
  uint64_t channel_per_group;
-
  uint64_t image_one_pad_per_row;
  uint64_t deconv_param;

+  // new
  uint64_t col_padding_up;
  uint64_t col_padding_down;
  uint64_t row_padding_up;
@@ -108,39 +111,49 @@ struct ConvDriverParam {
  uint64_t filter_pad_width_mul_channel;
  uint64_t image_win_cnt;
  uint64_t image_win_cnt_last;
+
  uint64_t filter_row;
  uint64_t filter_width;
  uint64_t filter_height;
  uint64_t skip_window;
  uint64_t stride_h;
+
  uint64_t filter_amount_all;
  uint64_t prog_full_cnt;
  uint64_t filter_align;
  uint64_t filter_num;
+
  uint64_t output_width;
  uint64_t output_amount_per_row;
  uint64_t res_row_data_align4_pad;
  uint64_t cal_res_num;
  uint64_t last_cal_res_row_num;
  uint64_t post_prog_full_cnt;
+
  uint64_t deconv_skip_row;      // paralvl*deconv_group
  uint64_t deconv_res_skip_row;  // deconv_group * result_amount_per_row
  uint64_t deconv_ena;
  uint64_t deconv_dump;
+
  uint64_t output_address_phy;
  uint64_t output_height;
  uint64_t result_amount_per_row_multi_para;
+
  uint64_t sb_address_phy;
  uint64_t fpga_bias_scale_len;
  uint64_t filter_amount_whole;
+
  uint64_t filter_address_phy;
  uint64_t filters_amount_whole;
+
  uint64_t image_address_phy;
  uint64_t image_hight;
  uint64_t image_amount_per_row;
+
  uint64_t image_amount_per_row_multi_win_first;
  uint64_t image_amount_per_row_multi_win;
  uint64_t filter_pad_hight;
+
  uint64_t image_block_num;
  uint64_t image_block_len;
  uint64_t image_block_len_last;
@@ -178,6 +191,7 @@ struct ConvArgs {
  struct ImageInputArgs image;  // input image;
  struct ImageOutputArgs output;

+  // #ifdef PADDLE_MOBILE_FPGA_V1
  struct DeconvTxParm deconv_tx_param;
  struct ConvDriverParam driver;
 };
@@ -242,6 +256,7 @@ struct EWAddArgs {
  struct ImageInputArgs image0;
  struct ImageInputArgs image1;
  struct ImageOutputArgs output;
+  // #ifdef PADDLE_MOBILE_FPGA_V1
  struct EWAddDriverParam driver;
 };

@@ -287,6 +302,8 @@ struct DWDeconvArgs {
  std::vector<std::shared_ptr<char>> vector_dw_conv_space;
 };

+// static inline int align_to_x(int num, int x) { return (num + x - 1) / x * x;
+// }
 static inline uint32_t align_to_x(int64_t num, int64_t x) {
  return ((uint32_t)(num + x) - 1) / (uint32_t)x * (uint32_t)x;
 }

--- a/src/io/api_paddle_mobile.cc
+++ b/src/io/api_paddle_mobile.cc
@@ -13,7 +13,9 @@
 // limitations under the License.

 #include "io/api_paddle_mobile.h"
+#include <memory>
 #include <string>
+#include <utility>
 #include <vector>
 #include "common/enforce.h"
 #include "framework/tensor.h"
@@ -172,6 +174,14 @@ void PaddleMobilePredictor<Device, T>::FetchPaddleTensors(
  }
 }

+template <typename Device, typename T>
+void PaddleMobilePredictor<Device, T>::FetchPaddleTensors(PaddleTensor *output,
+                                                          int id) {
+  std::shared_ptr<framework::Tensor> tensor_ptr =
+      paddle_mobile_->FetchResult(id);
+  ConvertTensors(*(tensor_ptr.get()), output);
+  return;
+}
 template <typename Device, typename T>
 void PaddleMobilePredictor<Device, T>::GetPaddleTensor(const std::string &name,
                                                       PaddleTensor *output) {

--- a/src/io/api_paddle_mobile.h
+++ b/src/io/api_paddle_mobile.h
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 #pragma once
-
+#include <memory>
 #include <string>
 #include <vector>
 #include "common/types.h"
@@ -36,6 +36,7 @@ class PaddleMobilePredictor : public PaddlePredictor {
  void Predict_From_To(int start, int end) override;
  void FeedPaddleTensors(const std::vector<PaddleTensor>& inputs) override;
  void FetchPaddleTensors(std::vector<PaddleTensor>* outputs) override;
+  void FetchPaddleTensors(PaddleTensor* outputs, int id) override;
  void GetPaddleTensor(const std::string& name, PaddleTensor* output) override;

 #endif

--- a/src/io/paddle_inference_api.h
+++ b/src/io/paddle_inference_api.h
@@ -137,6 +137,7 @@ class PaddlePredictor {
  virtual void Predict_From_To(int start, int end) = 0;
  virtual void FeedPaddleTensors(const std::vector<PaddleTensor>& inputs) = 0;
  virtual void FetchPaddleTensors(std::vector<PaddleTensor>* outputs) = 0;
+  virtual void FetchPaddleTensors(PaddleTensor* outputs, int id) = 0;
  virtual void GetPaddleTensor(const std::string& name,
                               PaddleTensor* output) = 0;
 #endif

--- a/src/operators/kernel/fpga/V1/anchor_generator_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/anchor_generator_kernel.cpp
@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 #ifdef ANCHOR_GENERATOR_OP
-
 #include <string.h>
 #include <iostream>
+#include <memory>
 #include <utility>
 #include <vector>
 #include "operators/kernel/detection_kernel.h"
@@ -39,9 +39,10 @@ bool AnchorGeneratorKernel<FPGA, float>::Init(
                          79,  69,   -96,  -77, 112, 93,   -137, -118, 153,
                          134, -204, -188, 220, 204, -281, -395, 296,  441};

-  int anchors_offset2[] = {0, 0, 51, 77, 0, 0, 30, 35, 0, 0, 81, 103,
-                           0, 0, 20, 21, 0, 0, 36, 44, 0, 0, 43, 58,
-                           0, 0, 34, 68, 0, 0, 24, 28, 0, 0, 19, 46};
+  int anchors_offset2[] = {-18, -31, 34,  47,  -22, -22, 38,  38,  -33,
+                           -44, 49,  60,  -2,  -2,  18,  18,  -10, -14,
+                           26,  30,  -14, -22, 30,  38,  -9,  -26, 25,
+                           42,  -92, -92, 108, 108, -2,  -15, 18,  31};

  if (offset > 0.6) {
    memcpy(anchors_offset, anchors_offset2, sizeof(anchors_offset));