diff --git a/src/fpga/V1/api.cpp b/src/fpga/V1/api.cpp index 02db327cb3c261b31a80375b8b2062405a072c3e..4ad252d41420442844aee25dddd7dcbac22aef2d 100644 --- a/src/fpga/V1/api.cpp +++ b/src/fpga/V1/api.cpp @@ -207,7 +207,7 @@ void format_DWDconv_filter(framework::Tensor *filter_tensor, float *scale_ptr, // framework::make_ddim({num, 1, height, width}); // filter_tensor->Resize(dims_new); filter_tensor->reset_data_ptr(new_data); - filter_tensor->set_type(typeid(int8_t)); + filter_tensor->set_type(typeid(int16_t)); } void format_fc_filter(framework::Tensor *filter_tensor, float max_value) { @@ -466,24 +466,9 @@ void expand_EW_arg(EWAddArgs *arg) { uint64_t image_amount_per_row = align_to_x((uint64_t)args.image0.width * (uint64_t)args.image0.channels, IMAGE_ALIGNMENT); - ////////////////////////////////////////////////////////// - // temporary modify for EW and DMA problem - uint64_t image_image_pixel = 0; - if ((args.image0.width * args.image0.channels) >= 24576) { - if ((args.image0.width * args.image0.channels) % 32 != 0) { - DLOG << "EW parameter can not be support"; - } else { - image_amount_per_row = image_amount_per_row / 2; - image_image_pixel = ((uint64_t)args.image0.channels << 32) | - ((uint64_t)(args.image0.width / 2) << 16) | - (uint64_t)(args.image0.height * 2); - } - } else { - image_image_pixel = ((uint64_t)args.image0.channels << 32) | - ((uint64_t)args.image0.width << 16) | - (uint64_t)args.image0.height; - } - ////////////////////////////////////////////////////////// + uint64_t image_image_pixel = ((uint64_t)args.image0.channels << 32) | + ((uint64_t)args.image0.width << 16) | + (uint64_t)args.image0.height; (*arg).driver.image0_address_phy = image0_address_phy; (*arg).driver.image1_address_phy = image1_address_phy; diff --git a/src/fpga/V1/pe.cpp b/src/fpga/V1/pe.cpp index 24ef95e6fc25b32a2faf69c7e685b5c1f07d1cdd..19bbcd22d3c1c29eb51d7b8da9a7923ff8fe387b 100644 --- a/src/fpga/V1/pe.cpp +++ b/src/fpga/V1/pe.cpp @@ -13,11 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "fpga/common/pe.h" +#include "common/enforce.h" #include "common/types.h" #include "fpga/V1/filter.h" #include "fpga/V1/image.h" #include "fpga/common/config.h" #include "fpga/common/driver.h" +#include "fpga/common/fpga_common.h" #ifdef COST_TIME_PRINT #include #include @@ -252,7 +254,14 @@ int ComputeBasicConv(const struct ConvArgs &args) { reg_writeq( ((uint64_t)args.kernel.height) | (((uint64_t)args.kernel.width) << 32), REG_CONV_FILTER_PIXEL); - reg_writeq(args.driver.output_height | (args.driver.output_width << 32), + + uint64_t output_height_fraction = + args.driver.output_height / ROW_PARALLEL_NUM; + uint64_t output_height_remainder = + args.driver.output_height % ROW_PARALLEL_NUM; + reg_writeq(args.driver.output_height | (output_height_fraction << 16) | + (output_height_remainder << 26) | + (args.driver.output_width << 32), REG_CONV_RESULT_PIXEL); reg_writeq(((uint64_t)args.image.pad_height) | (((uint64_t)args.image.pad_width) << 32), @@ -296,6 +305,7 @@ int ComputeBasicConv(const struct ConvArgs &args) { g_fpgainfo.pe_data->pes[PE_IDX_CONV]->status = ERROR; ret = -EIO; DLOG << "Conv Wait Irq Timeout!"; + PADDLE_MOBILE_ENFORCE(0, "Conv Wait Irq Timeout"); } output_scale = reg_readq(REG_SCALE_PARAMETER); output_scale = (output_scale << 32) | (output_scale >> 32); @@ -447,6 +457,7 @@ int ComputeFpgaPool(const struct PoolingArgs &args) { g_fpgainfo.pe_data->pes[PE_IDX_POOLING]->status = ERROR; ret = -EIO; DLOG << "Pooling Wait Irq Timeout!"; + PADDLE_MOBILE_ENFORCE(0, "Pooling Wait Irq Timeout!"); } DLOG << "after reg poll"; @@ -529,6 +540,7 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) { g_fpgainfo.pe_data->pes[PE_IDX_EW]->status = ERROR; ret = -EIO; DLOG << "EW Wait Irq Timeout!"; + PADDLE_MOBILE_ENFORCE(0, "EW Wait Irq Timeout!"); } output_scale = reg_readq(REG_SCALE_PARAMETER); @@ -561,6 +573,7 @@ int PerformBypass(const struct BypassArgs &args) { << " out_scale_address:" << args.output.scale_address; #endif #ifdef PADDLE_MOBILE_ZU5 + uint64_t bypass_interrupt = reg_readq(REG_INTERRUPT); uint64_t output_scale = 0; uint64_t timer_cnt = 0; uint64_t cmd = 0; @@ -666,12 +679,12 @@ int PerformBypass(const struct BypassArgs &args) { reg_writeq(output_address_phy, REG_CONVERT_DST_ADDR); reg_writeq(datalen, REG_CONVERT_LENGTH); reg_writeq(cmd, REG_CONVERT_CMD); - DLOG << "before reg poll"; if (0 != fpga_regpoll(REG_INTERRUPT, INTERRUPT_BYPASS, PE_IRQ_TIMEOUT)) { g_fpgainfo.pe_data->pes[PE_IDX_BYPASS]->status = ERROR; ret = -EIO; DLOG << "BYPASS Wait Irq Timeout!"; + PADDLE_MOBILE_ENFORCE(0, "BYPASS Wait Irq Timeout!"); } DLOG << "after reg poll"; @@ -1052,6 +1065,7 @@ int ComputeDWConv(const struct DWconvArgs &args) { g_fpgainfo.pe_data->pes[PE_IDX_POOLING]->status = ERROR; ret = -EIO; DLOG << "Pooling Wait Irq Timeout!"; + PADDLE_MOBILE_ENFORCE(0, "DWConv Wait Irq Timeout"); } DLOG << "after reg poll"; diff --git a/src/fpga/common/driver.cpp b/src/fpga/common/driver.cpp index 0774cab71e99ce28987e922e22d46ab9a63b1a93..508f95f0bf175e270cc8ec49f1a7d8fb7ddfb977 100644 --- a/src/fpga/common/driver.cpp +++ b/src/fpga/common/driver.cpp @@ -51,11 +51,7 @@ int open_memdevice() { return g_fpgainfo.fd_mem; } -void pl_reset() { - // DLOG << "PL RESET"; - - usleep(100 * 1000); -} +void pl_reset() { usleep(100 * 1000); } void setup_pe(struct pe_data_s *pe_data, struct fpga_pe *pe, char const *type_name, int pe_idx) { @@ -77,7 +73,7 @@ void pl_init() { pe_data = (struct pe_data_s *)malloc(sizeof(struct pe_data_s)); if (pe_data == nullptr) { - DLOG << "pe_data malloc error!"; + std::cout << "pe_data malloc error!" << std::endl; return; } memset(pe_data, 0, sizeof(struct pe_data_s)); @@ -165,7 +161,7 @@ uint64_t vaddr_to_paddr_driver(void *address) { if (iter != g_fpgainfo.fpga_vaddr2paddr_map.end()) { paddr = iter->second; } else { - DLOG << "Invalid pointer: " << address; + std::cout << "Invalid pointer: " << address << std::endl; } return paddr; @@ -191,7 +187,7 @@ void *fpga_reg_free(void *ptr) { g_fpgainfo.fpga_addr2size_map.erase(iter); munmap(ptr, size); } else { - DLOG << "Invalid pointer" << ptr; + std::cout << "Invalid pointer" << ptr << std::endl; } } @@ -205,9 +201,6 @@ void *fpga_malloc_driver(size_t size) { int i = 0; struct MemoryVM2PHYArgs args; struct MemoryCacheArgs args_c; - - // memory_request(g_fpgainfo.memory_info, size, &phy_addr); - ret = mmap64(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, g_fpgainfo.fd_mem, FPGA_MEM_PHY_ADDR); PADDLE_MOBILE_ENFORCE(ret != (void *)-1, "Should not be -1"); @@ -233,16 +226,12 @@ void fpga_free_driver(void *ptr) { size = iter->second; g_fpgainfo.fpga_addr2size_map.erase(iter); munmap(ptr, size); - - // p_addr = vaddr_to_paddr_driver(ptr); - // pos = (p_addr - g_fpgainfo.memory_info->mem_start) / FPGA_PAGE_SIZE; - auto iter = g_fpgainfo.fpga_vaddr2paddr_map.find(ptr); if (iter != g_fpgainfo.fpga_vaddr2paddr_map.end()) { g_fpgainfo.fpga_vaddr2paddr_map.erase(iter); } } else { - DLOG << "Invalid pointer" << ptr; + std::cout << "Invalid pointer" << ptr << std::endl; } } @@ -295,10 +284,7 @@ int open_device_driver() { g_fpgainfo.FpgaRegVirAddr = (uint64_t *)fpga_reg_malloc(FPGA_REG_SIZE); // NOLINT - // fpga_memory_add(); - pl_init(); - return ret; } @@ -306,7 +292,6 @@ int close_device_driver() { pl_destroy(); fpga_reg_free(g_fpgainfo.FpgaRegVirAddr); memory_release(g_fpgainfo.memory_info); - return 0; } diff --git a/src/fpga/common/fpga_common.cpp b/src/fpga/common/fpga_common.cpp index 06b0b365bdde87cd9940315382572533987b263c..57bd162f02566ccb7b4cb5efa54c245abc51c350 100644 --- a/src/fpga/common/fpga_common.cpp +++ b/src/fpga/common/fpga_common.cpp @@ -15,6 +15,7 @@ limitations under the License. */ #include "fpga/common/fpga_common.h" #include #include +#include #include "fpga/common/config.h" #include "fpga/common/driver.h" @@ -199,8 +200,8 @@ uint64_t vaddr_to_paddr(void *address) { } uint32_t paddle_mobile_version() { - uint32_t v_master = 34; - uint32_t v_slave = 34; + uint32_t v_master = 35; + uint32_t v_slave = 35; uint32_t first = 1, second = 2, fourth_master = 1, fourth_slave = 2; uint32_t master = first << 24 | second << 16 | v_master << 8 | fourth_master; diff --git a/src/operators/kernel/fpga/V1/anchor_generator_kernel.cpp b/src/operators/kernel/fpga/V1/anchor_generator_kernel.cpp index 359c34b0cefa20ee13789402c87c8f13ca31cc50..6046b3d2f0a4a1d273d31aac079244ce3ec3703a 100644 --- a/src/operators/kernel/fpga/V1/anchor_generator_kernel.cpp +++ b/src/operators/kernel/fpga/V1/anchor_generator_kernel.cpp @@ -14,6 +14,9 @@ limitations under the License. */ #ifdef ANCHOR_GENERATOR_OP +#include +#include +#include #include #include "operators/kernel/detection_kernel.h" @@ -29,11 +32,23 @@ bool AnchorGeneratorKernel::Init( auto stride = param->stride_; auto feature_width = input->dims()[3], feature_height = input->dims()[2]; auto stride_width = stride[0], stride_height = stride[1]; + auto offset = param->offset_; int anchors_offset[] = {-2, -2, 18, 18, -10, -9, 26, 25, -23, -20, 39, 36, -43, -34, 59, 49, -63, -54, 79, 69, -96, -77, 112, 93, -137, -118, 153, 134, -204, -188, 220, 204, -281, -395, 296, 441}; + + int anchors_offset2[] = {0, 0, 51, 77, 0, 0, 30, 35, 0, 0, 81, 103, + 0, 0, 20, 21, 0, 0, 36, 44, 0, 0, 43, 58, + 0, 0, 34, 68, 0, 0, 24, 28, 0, 0, 19, 46}; + + if (offset > 0.6) { + memcpy(anchors_offset, anchors_offset2, sizeof(anchors_offset)); + std::cout << "anchor generator marker" << std::endl; + } else { + std::cout << "anchor generator rfcn" << std::endl; + } int num_anchors = sizeof(anchors_offset) / (sizeof(int) * 4); // DLOG << "feature_height: " << feature_height;