提交 2852680a 编写于 作者: H hjchen2

Merge branch 'develop' of https://github.com/PaddlePaddle/paddle-mobile into backup

...@@ -207,7 +207,7 @@ void format_DWDconv_filter(framework::Tensor *filter_tensor, float *scale_ptr, ...@@ -207,7 +207,7 @@ void format_DWDconv_filter(framework::Tensor *filter_tensor, float *scale_ptr,
// framework::make_ddim({num, 1, height, width}); // framework::make_ddim({num, 1, height, width});
// filter_tensor->Resize(dims_new); // filter_tensor->Resize(dims_new);
filter_tensor->reset_data_ptr(new_data); filter_tensor->reset_data_ptr(new_data);
filter_tensor->set_type(typeid(int8_t)); filter_tensor->set_type(typeid(int16_t));
} }
void format_fc_filter(framework::Tensor *filter_tensor, float max_value) { void format_fc_filter(framework::Tensor *filter_tensor, float max_value) {
...@@ -466,24 +466,9 @@ void expand_EW_arg(EWAddArgs *arg) { ...@@ -466,24 +466,9 @@ void expand_EW_arg(EWAddArgs *arg) {
uint64_t image_amount_per_row = uint64_t image_amount_per_row =
align_to_x((uint64_t)args.image0.width * (uint64_t)args.image0.channels, align_to_x((uint64_t)args.image0.width * (uint64_t)args.image0.channels,
IMAGE_ALIGNMENT); IMAGE_ALIGNMENT);
////////////////////////////////////////////////////////// uint64_t image_image_pixel = ((uint64_t)args.image0.channels << 32) |
// temporary modify for EW and DMA problem
uint64_t image_image_pixel = 0;
if ((args.image0.width * args.image0.channels) >= 24576) {
if ((args.image0.width * args.image0.channels) % 32 != 0) {
DLOG << "EW parameter can not be support";
} else {
image_amount_per_row = image_amount_per_row / 2;
image_image_pixel = ((uint64_t)args.image0.channels << 32) |
((uint64_t)(args.image0.width / 2) << 16) |
(uint64_t)(args.image0.height * 2);
}
} else {
image_image_pixel = ((uint64_t)args.image0.channels << 32) |
((uint64_t)args.image0.width << 16) | ((uint64_t)args.image0.width << 16) |
(uint64_t)args.image0.height; (uint64_t)args.image0.height;
}
//////////////////////////////////////////////////////////
(*arg).driver.image0_address_phy = image0_address_phy; (*arg).driver.image0_address_phy = image0_address_phy;
(*arg).driver.image1_address_phy = image1_address_phy; (*arg).driver.image1_address_phy = image1_address_phy;
......
...@@ -13,11 +13,13 @@ See the License for the specific language governing permissions and ...@@ -13,11 +13,13 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "fpga/common/pe.h" #include "fpga/common/pe.h"
#include "common/enforce.h"
#include "common/types.h" #include "common/types.h"
#include "fpga/V1/filter.h" #include "fpga/V1/filter.h"
#include "fpga/V1/image.h" #include "fpga/V1/image.h"
#include "fpga/common/config.h" #include "fpga/common/config.h"
#include "fpga/common/driver.h" #include "fpga/common/driver.h"
#include "fpga/common/fpga_common.h"
#ifdef COST_TIME_PRINT #ifdef COST_TIME_PRINT
#include <sys/time.h> #include <sys/time.h>
#include <time.h> #include <time.h>
...@@ -252,7 +254,14 @@ int ComputeBasicConv(const struct ConvArgs &args) { ...@@ -252,7 +254,14 @@ int ComputeBasicConv(const struct ConvArgs &args) {
reg_writeq( reg_writeq(
((uint64_t)args.kernel.height) | (((uint64_t)args.kernel.width) << 32), ((uint64_t)args.kernel.height) | (((uint64_t)args.kernel.width) << 32),
REG_CONV_FILTER_PIXEL); REG_CONV_FILTER_PIXEL);
reg_writeq(args.driver.output_height | (args.driver.output_width << 32),
uint64_t output_height_fraction =
args.driver.output_height / ROW_PARALLEL_NUM;
uint64_t output_height_remainder =
args.driver.output_height % ROW_PARALLEL_NUM;
reg_writeq(args.driver.output_height | (output_height_fraction << 16) |
(output_height_remainder << 26) |
(args.driver.output_width << 32),
REG_CONV_RESULT_PIXEL); REG_CONV_RESULT_PIXEL);
reg_writeq(((uint64_t)args.image.pad_height) | reg_writeq(((uint64_t)args.image.pad_height) |
(((uint64_t)args.image.pad_width) << 32), (((uint64_t)args.image.pad_width) << 32),
...@@ -296,6 +305,7 @@ int ComputeBasicConv(const struct ConvArgs &args) { ...@@ -296,6 +305,7 @@ int ComputeBasicConv(const struct ConvArgs &args) {
g_fpgainfo.pe_data->pes[PE_IDX_CONV]->status = ERROR; g_fpgainfo.pe_data->pes[PE_IDX_CONV]->status = ERROR;
ret = -EIO; ret = -EIO;
DLOG << "Conv Wait Irq Timeout!"; DLOG << "Conv Wait Irq Timeout!";
PADDLE_MOBILE_ENFORCE(0, "Conv Wait Irq Timeout");
} }
output_scale = reg_readq(REG_SCALE_PARAMETER); output_scale = reg_readq(REG_SCALE_PARAMETER);
output_scale = (output_scale << 32) | (output_scale >> 32); output_scale = (output_scale << 32) | (output_scale >> 32);
...@@ -447,6 +457,7 @@ int ComputeFpgaPool(const struct PoolingArgs &args) { ...@@ -447,6 +457,7 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
g_fpgainfo.pe_data->pes[PE_IDX_POOLING]->status = ERROR; g_fpgainfo.pe_data->pes[PE_IDX_POOLING]->status = ERROR;
ret = -EIO; ret = -EIO;
DLOG << "Pooling Wait Irq Timeout!"; DLOG << "Pooling Wait Irq Timeout!";
PADDLE_MOBILE_ENFORCE(0, "Pooling Wait Irq Timeout!");
} }
DLOG << "after reg poll"; DLOG << "after reg poll";
...@@ -529,6 +540,7 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) { ...@@ -529,6 +540,7 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
g_fpgainfo.pe_data->pes[PE_IDX_EW]->status = ERROR; g_fpgainfo.pe_data->pes[PE_IDX_EW]->status = ERROR;
ret = -EIO; ret = -EIO;
DLOG << "EW Wait Irq Timeout!"; DLOG << "EW Wait Irq Timeout!";
PADDLE_MOBILE_ENFORCE(0, "EW Wait Irq Timeout!");
} }
output_scale = reg_readq(REG_SCALE_PARAMETER); output_scale = reg_readq(REG_SCALE_PARAMETER);
...@@ -561,6 +573,7 @@ int PerformBypass(const struct BypassArgs &args) { ...@@ -561,6 +573,7 @@ int PerformBypass(const struct BypassArgs &args) {
<< " out_scale_address:" << args.output.scale_address; << " out_scale_address:" << args.output.scale_address;
#endif #endif
#ifdef PADDLE_MOBILE_ZU5 #ifdef PADDLE_MOBILE_ZU5
uint64_t bypass_interrupt = reg_readq(REG_INTERRUPT);
uint64_t output_scale = 0; uint64_t output_scale = 0;
uint64_t timer_cnt = 0; uint64_t timer_cnt = 0;
uint64_t cmd = 0; uint64_t cmd = 0;
...@@ -666,12 +679,12 @@ int PerformBypass(const struct BypassArgs &args) { ...@@ -666,12 +679,12 @@ int PerformBypass(const struct BypassArgs &args) {
reg_writeq(output_address_phy, REG_CONVERT_DST_ADDR); reg_writeq(output_address_phy, REG_CONVERT_DST_ADDR);
reg_writeq(datalen, REG_CONVERT_LENGTH); reg_writeq(datalen, REG_CONVERT_LENGTH);
reg_writeq(cmd, REG_CONVERT_CMD); reg_writeq(cmd, REG_CONVERT_CMD);
DLOG << "before reg poll"; DLOG << "before reg poll";
if (0 != fpga_regpoll(REG_INTERRUPT, INTERRUPT_BYPASS, PE_IRQ_TIMEOUT)) { if (0 != fpga_regpoll(REG_INTERRUPT, INTERRUPT_BYPASS, PE_IRQ_TIMEOUT)) {
g_fpgainfo.pe_data->pes[PE_IDX_BYPASS]->status = ERROR; g_fpgainfo.pe_data->pes[PE_IDX_BYPASS]->status = ERROR;
ret = -EIO; ret = -EIO;
DLOG << "BYPASS Wait Irq Timeout!"; DLOG << "BYPASS Wait Irq Timeout!";
PADDLE_MOBILE_ENFORCE(0, "BYPASS Wait Irq Timeout!");
} }
DLOG << "after reg poll"; DLOG << "after reg poll";
...@@ -1052,6 +1065,7 @@ int ComputeDWConv(const struct DWconvArgs &args) { ...@@ -1052,6 +1065,7 @@ int ComputeDWConv(const struct DWconvArgs &args) {
g_fpgainfo.pe_data->pes[PE_IDX_POOLING]->status = ERROR; g_fpgainfo.pe_data->pes[PE_IDX_POOLING]->status = ERROR;
ret = -EIO; ret = -EIO;
DLOG << "Pooling Wait Irq Timeout!"; DLOG << "Pooling Wait Irq Timeout!";
PADDLE_MOBILE_ENFORCE(0, "DWConv Wait Irq Timeout");
} }
DLOG << "after reg poll"; DLOG << "after reg poll";
......
...@@ -51,11 +51,7 @@ int open_memdevice() { ...@@ -51,11 +51,7 @@ int open_memdevice() {
return g_fpgainfo.fd_mem; return g_fpgainfo.fd_mem;
} }
void pl_reset() { void pl_reset() { usleep(100 * 1000); }
// DLOG << "PL RESET";
usleep(100 * 1000);
}
void setup_pe(struct pe_data_s *pe_data, struct fpga_pe *pe, void setup_pe(struct pe_data_s *pe_data, struct fpga_pe *pe,
char const *type_name, int pe_idx) { char const *type_name, int pe_idx) {
...@@ -77,7 +73,7 @@ void pl_init() { ...@@ -77,7 +73,7 @@ void pl_init() {
pe_data = (struct pe_data_s *)malloc(sizeof(struct pe_data_s)); pe_data = (struct pe_data_s *)malloc(sizeof(struct pe_data_s));
if (pe_data == nullptr) { if (pe_data == nullptr) {
DLOG << "pe_data malloc error!"; std::cout << "pe_data malloc error!" << std::endl;
return; return;
} }
memset(pe_data, 0, sizeof(struct pe_data_s)); memset(pe_data, 0, sizeof(struct pe_data_s));
...@@ -165,7 +161,7 @@ uint64_t vaddr_to_paddr_driver(void *address) { ...@@ -165,7 +161,7 @@ uint64_t vaddr_to_paddr_driver(void *address) {
if (iter != g_fpgainfo.fpga_vaddr2paddr_map.end()) { if (iter != g_fpgainfo.fpga_vaddr2paddr_map.end()) {
paddr = iter->second; paddr = iter->second;
} else { } else {
DLOG << "Invalid pointer: " << address; std::cout << "Invalid pointer: " << address << std::endl;
} }
return paddr; return paddr;
...@@ -191,7 +187,7 @@ void *fpga_reg_free(void *ptr) { ...@@ -191,7 +187,7 @@ void *fpga_reg_free(void *ptr) {
g_fpgainfo.fpga_addr2size_map.erase(iter); g_fpgainfo.fpga_addr2size_map.erase(iter);
munmap(ptr, size); munmap(ptr, size);
} else { } else {
DLOG << "Invalid pointer" << ptr; std::cout << "Invalid pointer" << ptr << std::endl;
} }
} }
...@@ -205,9 +201,6 @@ void *fpga_malloc_driver(size_t size) { ...@@ -205,9 +201,6 @@ void *fpga_malloc_driver(size_t size) {
int i = 0; int i = 0;
struct MemoryVM2PHYArgs args; struct MemoryVM2PHYArgs args;
struct MemoryCacheArgs args_c; struct MemoryCacheArgs args_c;
// memory_request(g_fpgainfo.memory_info, size, &phy_addr);
ret = mmap64(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, ret = mmap64(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED,
g_fpgainfo.fd_mem, FPGA_MEM_PHY_ADDR); g_fpgainfo.fd_mem, FPGA_MEM_PHY_ADDR);
PADDLE_MOBILE_ENFORCE(ret != (void *)-1, "Should not be -1"); PADDLE_MOBILE_ENFORCE(ret != (void *)-1, "Should not be -1");
...@@ -233,16 +226,12 @@ void fpga_free_driver(void *ptr) { ...@@ -233,16 +226,12 @@ void fpga_free_driver(void *ptr) {
size = iter->second; size = iter->second;
g_fpgainfo.fpga_addr2size_map.erase(iter); g_fpgainfo.fpga_addr2size_map.erase(iter);
munmap(ptr, size); munmap(ptr, size);
// p_addr = vaddr_to_paddr_driver(ptr);
// pos = (p_addr - g_fpgainfo.memory_info->mem_start) / FPGA_PAGE_SIZE;
auto iter = g_fpgainfo.fpga_vaddr2paddr_map.find(ptr); auto iter = g_fpgainfo.fpga_vaddr2paddr_map.find(ptr);
if (iter != g_fpgainfo.fpga_vaddr2paddr_map.end()) { if (iter != g_fpgainfo.fpga_vaddr2paddr_map.end()) {
g_fpgainfo.fpga_vaddr2paddr_map.erase(iter); g_fpgainfo.fpga_vaddr2paddr_map.erase(iter);
} }
} else { } else {
DLOG << "Invalid pointer" << ptr; std::cout << "Invalid pointer" << ptr << std::endl;
} }
} }
...@@ -295,10 +284,7 @@ int open_device_driver() { ...@@ -295,10 +284,7 @@ int open_device_driver() {
g_fpgainfo.FpgaRegVirAddr = g_fpgainfo.FpgaRegVirAddr =
(uint64_t *)fpga_reg_malloc(FPGA_REG_SIZE); // NOLINT (uint64_t *)fpga_reg_malloc(FPGA_REG_SIZE); // NOLINT
// fpga_memory_add();
pl_init(); pl_init();
return ret; return ret;
} }
...@@ -306,7 +292,6 @@ int close_device_driver() { ...@@ -306,7 +292,6 @@ int close_device_driver() {
pl_destroy(); pl_destroy();
fpga_reg_free(g_fpgainfo.FpgaRegVirAddr); fpga_reg_free(g_fpgainfo.FpgaRegVirAddr);
memory_release(g_fpgainfo.memory_info); memory_release(g_fpgainfo.memory_info);
return 0; return 0;
} }
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#include "fpga/common/fpga_common.h" #include "fpga/common/fpga_common.h"
#include <algorithm> #include <algorithm>
#include <map> #include <map>
#include <utility>
#include "fpga/common/config.h" #include "fpga/common/config.h"
#include "fpga/common/driver.h" #include "fpga/common/driver.h"
...@@ -199,8 +200,8 @@ uint64_t vaddr_to_paddr(void *address) { ...@@ -199,8 +200,8 @@ uint64_t vaddr_to_paddr(void *address) {
} }
uint32_t paddle_mobile_version() { uint32_t paddle_mobile_version() {
uint32_t v_master = 34; uint32_t v_master = 35;
uint32_t v_slave = 34; uint32_t v_slave = 35;
uint32_t first = 1, second = 2, fourth_master = 1, fourth_slave = 2; uint32_t first = 1, second = 2, fourth_master = 1, fourth_slave = 2;
uint32_t master = first << 24 | second << 16 | v_master << 8 | fourth_master; uint32_t master = first << 24 | second << 16 | v_master << 8 | fourth_master;
......
...@@ -14,6 +14,9 @@ limitations under the License. */ ...@@ -14,6 +14,9 @@ limitations under the License. */
#ifdef ANCHOR_GENERATOR_OP #ifdef ANCHOR_GENERATOR_OP
#include <string.h>
#include <iostream>
#include <utility>
#include <vector> #include <vector>
#include "operators/kernel/detection_kernel.h" #include "operators/kernel/detection_kernel.h"
...@@ -29,11 +32,23 @@ bool AnchorGeneratorKernel<FPGA, float>::Init( ...@@ -29,11 +32,23 @@ bool AnchorGeneratorKernel<FPGA, float>::Init(
auto stride = param->stride_; auto stride = param->stride_;
auto feature_width = input->dims()[3], feature_height = input->dims()[2]; auto feature_width = input->dims()[3], feature_height = input->dims()[2];
auto stride_width = stride[0], stride_height = stride[1]; auto stride_width = stride[0], stride_height = stride[1];
auto offset = param->offset_;
int anchors_offset[] = {-2, -2, 18, 18, -10, -9, 26, 25, -23, int anchors_offset[] = {-2, -2, 18, 18, -10, -9, 26, 25, -23,
-20, 39, 36, -43, -34, 59, 49, -63, -54, -20, 39, 36, -43, -34, 59, 49, -63, -54,
79, 69, -96, -77, 112, 93, -137, -118, 153, 79, 69, -96, -77, 112, 93, -137, -118, 153,
134, -204, -188, 220, 204, -281, -395, 296, 441}; 134, -204, -188, 220, 204, -281, -395, 296, 441};
int anchors_offset2[] = {0, 0, 51, 77, 0, 0, 30, 35, 0, 0, 81, 103,
0, 0, 20, 21, 0, 0, 36, 44, 0, 0, 43, 58,
0, 0, 34, 68, 0, 0, 24, 28, 0, 0, 19, 46};
if (offset > 0.6) {
memcpy(anchors_offset, anchors_offset2, sizeof(anchors_offset));
std::cout << "anchor generator marker" << std::endl;
} else {
std::cout << "anchor generator rfcn" << std::endl;
}
int num_anchors = sizeof(anchors_offset) / (sizeof(int) * 4); int num_anchors = sizeof(anchors_offset) / (sizeof(int) * 4);
// DLOG << "feature_height: " << feature_height; // DLOG << "feature_height: " << feature_height;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册