提交 2852680a 编写于 作者: H hjchen2

Merge branch 'develop' of https://github.com/PaddlePaddle/paddle-mobile into backup

......@@ -207,7 +207,7 @@ void format_DWDconv_filter(framework::Tensor *filter_tensor, float *scale_ptr,
// framework::make_ddim({num, 1, height, width});
// filter_tensor->Resize(dims_new);
filter_tensor->reset_data_ptr(new_data);
filter_tensor->set_type(typeid(int8_t));
filter_tensor->set_type(typeid(int16_t));
}
void format_fc_filter(framework::Tensor *filter_tensor, float max_value) {
......@@ -466,24 +466,9 @@ void expand_EW_arg(EWAddArgs *arg) {
uint64_t image_amount_per_row =
align_to_x((uint64_t)args.image0.width * (uint64_t)args.image0.channels,
IMAGE_ALIGNMENT);
//////////////////////////////////////////////////////////
// temporary modify for EW and DMA problem
uint64_t image_image_pixel = 0;
if ((args.image0.width * args.image0.channels) >= 24576) {
if ((args.image0.width * args.image0.channels) % 32 != 0) {
DLOG << "EW parameter can not be support";
} else {
image_amount_per_row = image_amount_per_row / 2;
image_image_pixel = ((uint64_t)args.image0.channels << 32) |
((uint64_t)(args.image0.width / 2) << 16) |
(uint64_t)(args.image0.height * 2);
}
} else {
image_image_pixel = ((uint64_t)args.image0.channels << 32) |
((uint64_t)args.image0.width << 16) |
(uint64_t)args.image0.height;
}
//////////////////////////////////////////////////////////
uint64_t image_image_pixel = ((uint64_t)args.image0.channels << 32) |
((uint64_t)args.image0.width << 16) |
(uint64_t)args.image0.height;
(*arg).driver.image0_address_phy = image0_address_phy;
(*arg).driver.image1_address_phy = image1_address_phy;
......
......@@ -13,11 +13,13 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "fpga/common/pe.h"
#include "common/enforce.h"
#include "common/types.h"
#include "fpga/V1/filter.h"
#include "fpga/V1/image.h"
#include "fpga/common/config.h"
#include "fpga/common/driver.h"
#include "fpga/common/fpga_common.h"
#ifdef COST_TIME_PRINT
#include <sys/time.h>
#include <time.h>
......@@ -252,7 +254,14 @@ int ComputeBasicConv(const struct ConvArgs &args) {
reg_writeq(
((uint64_t)args.kernel.height) | (((uint64_t)args.kernel.width) << 32),
REG_CONV_FILTER_PIXEL);
reg_writeq(args.driver.output_height | (args.driver.output_width << 32),
uint64_t output_height_fraction =
args.driver.output_height / ROW_PARALLEL_NUM;
uint64_t output_height_remainder =
args.driver.output_height % ROW_PARALLEL_NUM;
reg_writeq(args.driver.output_height | (output_height_fraction << 16) |
(output_height_remainder << 26) |
(args.driver.output_width << 32),
REG_CONV_RESULT_PIXEL);
reg_writeq(((uint64_t)args.image.pad_height) |
(((uint64_t)args.image.pad_width) << 32),
......@@ -296,6 +305,7 @@ int ComputeBasicConv(const struct ConvArgs &args) {
g_fpgainfo.pe_data->pes[PE_IDX_CONV]->status = ERROR;
ret = -EIO;
DLOG << "Conv Wait Irq Timeout!";
PADDLE_MOBILE_ENFORCE(0, "Conv Wait Irq Timeout");
}
output_scale = reg_readq(REG_SCALE_PARAMETER);
output_scale = (output_scale << 32) | (output_scale >> 32);
......@@ -447,6 +457,7 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
g_fpgainfo.pe_data->pes[PE_IDX_POOLING]->status = ERROR;
ret = -EIO;
DLOG << "Pooling Wait Irq Timeout!";
PADDLE_MOBILE_ENFORCE(0, "Pooling Wait Irq Timeout!");
}
DLOG << "after reg poll";
......@@ -529,6 +540,7 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
g_fpgainfo.pe_data->pes[PE_IDX_EW]->status = ERROR;
ret = -EIO;
DLOG << "EW Wait Irq Timeout!";
PADDLE_MOBILE_ENFORCE(0, "EW Wait Irq Timeout!");
}
output_scale = reg_readq(REG_SCALE_PARAMETER);
......@@ -561,6 +573,7 @@ int PerformBypass(const struct BypassArgs &args) {
<< " out_scale_address:" << args.output.scale_address;
#endif
#ifdef PADDLE_MOBILE_ZU5
uint64_t bypass_interrupt = reg_readq(REG_INTERRUPT);
uint64_t output_scale = 0;
uint64_t timer_cnt = 0;
uint64_t cmd = 0;
......@@ -666,12 +679,12 @@ int PerformBypass(const struct BypassArgs &args) {
reg_writeq(output_address_phy, REG_CONVERT_DST_ADDR);
reg_writeq(datalen, REG_CONVERT_LENGTH);
reg_writeq(cmd, REG_CONVERT_CMD);
DLOG << "before reg poll";
if (0 != fpga_regpoll(REG_INTERRUPT, INTERRUPT_BYPASS, PE_IRQ_TIMEOUT)) {
g_fpgainfo.pe_data->pes[PE_IDX_BYPASS]->status = ERROR;
ret = -EIO;
DLOG << "BYPASS Wait Irq Timeout!";
PADDLE_MOBILE_ENFORCE(0, "BYPASS Wait Irq Timeout!");
}
DLOG << "after reg poll";
......@@ -1052,6 +1065,7 @@ int ComputeDWConv(const struct DWconvArgs &args) {
g_fpgainfo.pe_data->pes[PE_IDX_POOLING]->status = ERROR;
ret = -EIO;
DLOG << "Pooling Wait Irq Timeout!";
PADDLE_MOBILE_ENFORCE(0, "DWConv Wait Irq Timeout");
}
DLOG << "after reg poll";
......
......@@ -51,11 +51,7 @@ int open_memdevice() {
return g_fpgainfo.fd_mem;
}
void pl_reset() {
// DLOG << "PL RESET";
usleep(100 * 1000);
}
void pl_reset() { usleep(100 * 1000); }
void setup_pe(struct pe_data_s *pe_data, struct fpga_pe *pe,
char const *type_name, int pe_idx) {
......@@ -77,7 +73,7 @@ void pl_init() {
pe_data = (struct pe_data_s *)malloc(sizeof(struct pe_data_s));
if (pe_data == nullptr) {
DLOG << "pe_data malloc error!";
std::cout << "pe_data malloc error!" << std::endl;
return;
}
memset(pe_data, 0, sizeof(struct pe_data_s));
......@@ -165,7 +161,7 @@ uint64_t vaddr_to_paddr_driver(void *address) {
if (iter != g_fpgainfo.fpga_vaddr2paddr_map.end()) {
paddr = iter->second;
} else {
DLOG << "Invalid pointer: " << address;
std::cout << "Invalid pointer: " << address << std::endl;
}
return paddr;
......@@ -191,7 +187,7 @@ void *fpga_reg_free(void *ptr) {
g_fpgainfo.fpga_addr2size_map.erase(iter);
munmap(ptr, size);
} else {
DLOG << "Invalid pointer" << ptr;
std::cout << "Invalid pointer" << ptr << std::endl;
}
}
......@@ -205,9 +201,6 @@ void *fpga_malloc_driver(size_t size) {
int i = 0;
struct MemoryVM2PHYArgs args;
struct MemoryCacheArgs args_c;
// memory_request(g_fpgainfo.memory_info, size, &phy_addr);
ret = mmap64(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED,
g_fpgainfo.fd_mem, FPGA_MEM_PHY_ADDR);
PADDLE_MOBILE_ENFORCE(ret != (void *)-1, "Should not be -1");
......@@ -233,16 +226,12 @@ void fpga_free_driver(void *ptr) {
size = iter->second;
g_fpgainfo.fpga_addr2size_map.erase(iter);
munmap(ptr, size);
// p_addr = vaddr_to_paddr_driver(ptr);
// pos = (p_addr - g_fpgainfo.memory_info->mem_start) / FPGA_PAGE_SIZE;
auto iter = g_fpgainfo.fpga_vaddr2paddr_map.find(ptr);
if (iter != g_fpgainfo.fpga_vaddr2paddr_map.end()) {
g_fpgainfo.fpga_vaddr2paddr_map.erase(iter);
}
} else {
DLOG << "Invalid pointer" << ptr;
std::cout << "Invalid pointer" << ptr << std::endl;
}
}
......@@ -295,10 +284,7 @@ int open_device_driver() {
g_fpgainfo.FpgaRegVirAddr =
(uint64_t *)fpga_reg_malloc(FPGA_REG_SIZE); // NOLINT
// fpga_memory_add();
pl_init();
return ret;
}
......@@ -306,7 +292,6 @@ int close_device_driver() {
pl_destroy();
fpga_reg_free(g_fpgainfo.FpgaRegVirAddr);
memory_release(g_fpgainfo.memory_info);
return 0;
}
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#include "fpga/common/fpga_common.h"
#include <algorithm>
#include <map>
#include <utility>
#include "fpga/common/config.h"
#include "fpga/common/driver.h"
......@@ -199,8 +200,8 @@ uint64_t vaddr_to_paddr(void *address) {
}
uint32_t paddle_mobile_version() {
uint32_t v_master = 34;
uint32_t v_slave = 34;
uint32_t v_master = 35;
uint32_t v_slave = 35;
uint32_t first = 1, second = 2, fourth_master = 1, fourth_slave = 2;
uint32_t master = first << 24 | second << 16 | v_master << 8 | fourth_master;
......
......@@ -14,6 +14,9 @@ limitations under the License. */
#ifdef ANCHOR_GENERATOR_OP
#include <string.h>
#include <iostream>
#include <utility>
#include <vector>
#include "operators/kernel/detection_kernel.h"
......@@ -29,11 +32,23 @@ bool AnchorGeneratorKernel<FPGA, float>::Init(
auto stride = param->stride_;
auto feature_width = input->dims()[3], feature_height = input->dims()[2];
auto stride_width = stride[0], stride_height = stride[1];
auto offset = param->offset_;
int anchors_offset[] = {-2, -2, 18, 18, -10, -9, 26, 25, -23,
-20, 39, 36, -43, -34, 59, 49, -63, -54,
79, 69, -96, -77, 112, 93, -137, -118, 153,
134, -204, -188, 220, 204, -281, -395, 296, 441};
int anchors_offset2[] = {0, 0, 51, 77, 0, 0, 30, 35, 0, 0, 81, 103,
0, 0, 20, 21, 0, 0, 36, 44, 0, 0, 43, 58,
0, 0, 34, 68, 0, 0, 24, 28, 0, 0, 19, 46};
if (offset > 0.6) {
memcpy(anchors_offset, anchors_offset2, sizeof(anchors_offset));
std::cout << "anchor generator marker" << std::endl;
} else {
std::cout << "anchor generator rfcn" << std::endl;
}
int num_anchors = sizeof(anchors_offset) / (sizeof(int) * 4);
// DLOG << "feature_height: " << feature_height;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册