anchor_generator+version

7bc50fb6 · jameswu2014 · c70012f4 · 7bc50fb6 · 7bc50fb6 · 7bc50fb6
4 changed file
--- a/src/fpga/V1/api.cpp
+++ b/src/fpga/V1/api.cpp
@@ -207,7 +207,7 @@ void format_DWDconv_filter(framework::Tensor *filter_tensor, float *scale_ptr,
  //      framework::make_ddim({num, 1, height, width});
  //  filter_tensor->Resize(dims_new);
  filter_tensor->reset_data_ptr(new_data);
-  filter_tensor->set_type(typeid(int8_t));
+  filter_tensor->set_type(typeid(int16_t));
 }

 void format_fc_filter(framework::Tensor *filter_tensor, float max_value) {

--- a/src/fpga/V1/pe.cpp
+++ b/src/fpga/V1/pe.cpp
@@ -19,6 +19,7 @@ limitations under the License. */
 #include "fpga/V1/image.h"
 #include "fpga/common/config.h"
 #include "fpga/common/driver.h"
+#include "fpga/common/fpga_common.h"
 #ifdef COST_TIME_PRINT
 #include <sys/time.h>
 #include <time.h>
@@ -253,7 +254,14 @@ int ComputeBasicConv(const struct ConvArgs &args) {
  reg_writeq(
      ((uint64_t)args.kernel.height) | (((uint64_t)args.kernel.width) << 32),
      REG_CONV_FILTER_PIXEL);
-  reg_writeq(args.driver.output_height | (args.driver.output_width << 32),
+
+  uint64_t output_height_fraction =
+      args.driver.output_height / ROW_PARALLEL_NUM;
+  uint64_t output_height_remainder =
+      args.driver.output_height % ROW_PARALLEL_NUM;
+  reg_writeq(args.driver.output_height | (output_height_fraction << 16) |
+                 (output_height_remainder << 26) |
+                 (args.driver.output_width << 32),
             REG_CONV_RESULT_PIXEL);
  reg_writeq(((uint64_t)args.image.pad_height) |
                 (((uint64_t)args.image.pad_width) << 32),

--- a/src/fpga/common/fpga_common.cpp
+++ b/src/fpga/common/fpga_common.cpp
@@ -15,6 +15,7 @@ limitations under the License. */
 #include "fpga/common/fpga_common.h"
 #include <algorithm>
 #include <map>
+#include <utility>
 #include "fpga/common/config.h"
 #include "fpga/common/driver.h"

@@ -199,8 +200,8 @@ uint64_t vaddr_to_paddr(void *address) {
 }

 uint32_t paddle_mobile_version() {
-  uint32_t v_master = 34;
-  uint32_t v_slave = 34;
+  uint32_t v_master = 35;
+  uint32_t v_slave = 35;

  uint32_t first = 1, second = 2, fourth_master = 1, fourth_slave = 2;
  uint32_t master = first << 24 | second << 16 | v_master << 8 | fourth_master;

--- a/src/operators/kernel/fpga/V1/anchor_generator_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/anchor_generator_kernel.cpp
@@ -14,6 +14,9 @@ limitations under the License. */

 #ifdef ANCHOR_GENERATOR_OP

+#include <string.h>
+#include <iostream>
+#include <utility>
 #include <vector>
 #include "operators/kernel/detection_kernel.h"

@@ -29,11 +32,23 @@ bool AnchorGeneratorKernel<FPGA, float>::Init(
  auto stride = param->stride_;
  auto feature_width = input->dims()[3], feature_height = input->dims()[2];
  auto stride_width = stride[0], stride_height = stride[1];
+  auto offset = param->offset_;

  int anchors_offset[] = {-2,  -2,   18,   18,  -10, -9,   26,   25,   -23,
                          -20, 39,   36,   -43, -34, 59,   49,   -63,  -54,
                          79,  69,   -96,  -77, 112, 93,   -137, -118, 153,
                          134, -204, -188, 220, 204, -281, -395, 296,  441};
+
+  int anchors_offset2[] = {0, 0, 51, 77, 0, 0, 30, 35, 0, 0, 81, 103,
+                           0, 0, 20, 21, 0, 0, 36, 44, 0, 0, 43, 58,
+                           0, 0, 34, 68, 0, 0, 24, 28, 0, 0, 19, 46};
+
+  if (offset > 0.6) {
+    memcpy(anchors_offset, anchors_offset2, sizeof(anchors_offset));
+    std::cout << "anchor generator marker" << std::endl;
+  } else {
+    std::cout << "anchor generator rfcn" << std::endl;
+  }
  int num_anchors = sizeof(anchors_offset) / (sizeof(int) * 4);

  //  DLOG << "feature_height: " << feature_height;