diff --git a/src/fpga/V1/api.cpp b/src/fpga/V1/api.cpp index 9d1505dba462921f8155d239eea21c7ed48e050e..4ad252d41420442844aee25dddd7dcbac22aef2d 100644 --- a/src/fpga/V1/api.cpp +++ b/src/fpga/V1/api.cpp @@ -207,7 +207,7 @@ void format_DWDconv_filter(framework::Tensor *filter_tensor, float *scale_ptr, // framework::make_ddim({num, 1, height, width}); // filter_tensor->Resize(dims_new); filter_tensor->reset_data_ptr(new_data); - filter_tensor->set_type(typeid(int8_t)); + filter_tensor->set_type(typeid(int16_t)); } void format_fc_filter(framework::Tensor *filter_tensor, float max_value) { diff --git a/src/fpga/V1/pe.cpp b/src/fpga/V1/pe.cpp index e5fe494006c7a2d191baf3bb6a83370d9935f036..19bbcd22d3c1c29eb51d7b8da9a7923ff8fe387b 100644 --- a/src/fpga/V1/pe.cpp +++ b/src/fpga/V1/pe.cpp @@ -19,6 +19,7 @@ limitations under the License. */ #include "fpga/V1/image.h" #include "fpga/common/config.h" #include "fpga/common/driver.h" +#include "fpga/common/fpga_common.h" #ifdef COST_TIME_PRINT #include #include @@ -253,7 +254,14 @@ int ComputeBasicConv(const struct ConvArgs &args) { reg_writeq( ((uint64_t)args.kernel.height) | (((uint64_t)args.kernel.width) << 32), REG_CONV_FILTER_PIXEL); - reg_writeq(args.driver.output_height | (args.driver.output_width << 32), + + uint64_t output_height_fraction = + args.driver.output_height / ROW_PARALLEL_NUM; + uint64_t output_height_remainder = + args.driver.output_height % ROW_PARALLEL_NUM; + reg_writeq(args.driver.output_height | (output_height_fraction << 16) | + (output_height_remainder << 26) | + (args.driver.output_width << 32), REG_CONV_RESULT_PIXEL); reg_writeq(((uint64_t)args.image.pad_height) | (((uint64_t)args.image.pad_width) << 32), diff --git a/src/fpga/common/fpga_common.cpp b/src/fpga/common/fpga_common.cpp index 06b0b365bdde87cd9940315382572533987b263c..57bd162f02566ccb7b4cb5efa54c245abc51c350 100644 --- a/src/fpga/common/fpga_common.cpp +++ b/src/fpga/common/fpga_common.cpp @@ -15,6 +15,7 @@ limitations under the License. */ #include "fpga/common/fpga_common.h" #include #include +#include #include "fpga/common/config.h" #include "fpga/common/driver.h" @@ -199,8 +200,8 @@ uint64_t vaddr_to_paddr(void *address) { } uint32_t paddle_mobile_version() { - uint32_t v_master = 34; - uint32_t v_slave = 34; + uint32_t v_master = 35; + uint32_t v_slave = 35; uint32_t first = 1, second = 2, fourth_master = 1, fourth_slave = 2; uint32_t master = first << 24 | second << 16 | v_master << 8 | fourth_master; diff --git a/src/operators/kernel/fpga/V1/anchor_generator_kernel.cpp b/src/operators/kernel/fpga/V1/anchor_generator_kernel.cpp index 359c34b0cefa20ee13789402c87c8f13ca31cc50..6046b3d2f0a4a1d273d31aac079244ce3ec3703a 100644 --- a/src/operators/kernel/fpga/V1/anchor_generator_kernel.cpp +++ b/src/operators/kernel/fpga/V1/anchor_generator_kernel.cpp @@ -14,6 +14,9 @@ limitations under the License. */ #ifdef ANCHOR_GENERATOR_OP +#include +#include +#include #include #include "operators/kernel/detection_kernel.h" @@ -29,11 +32,23 @@ bool AnchorGeneratorKernel::Init( auto stride = param->stride_; auto feature_width = input->dims()[3], feature_height = input->dims()[2]; auto stride_width = stride[0], stride_height = stride[1]; + auto offset = param->offset_; int anchors_offset[] = {-2, -2, 18, 18, -10, -9, 26, 25, -23, -20, 39, 36, -43, -34, 59, 49, -63, -54, 79, 69, -96, -77, 112, 93, -137, -118, 153, 134, -204, -188, 220, 204, -281, -395, 296, 441}; + + int anchors_offset2[] = {0, 0, 51, 77, 0, 0, 30, 35, 0, 0, 81, 103, + 0, 0, 20, 21, 0, 0, 36, 44, 0, 0, 43, 58, + 0, 0, 34, 68, 0, 0, 24, 28, 0, 0, 19, 46}; + + if (offset > 0.6) { + memcpy(anchors_offset, anchors_offset2, sizeof(anchors_offset)); + std::cout << "anchor generator marker" << std::endl; + } else { + std::cout << "anchor generator rfcn" << std::endl; + } int num_anchors = sizeof(anchors_offset) / (sizeof(int) * 4); // DLOG << "feature_height: " << feature_height;