提交 2c7af720 编写于 作者: qnqinan's avatar qnqinan 提交者: Jiaying Zhao

fix bugs of concat, reshape and slice op and add usleep in fpga regpoll,...

fix bugs of concat, reshape and slice op and add usleep in fpga regpoll, test=develop,  close #2501 (#2502)

* update proposal and psroipool kernel file in FPGA V2 track

* update, test=develop

* update FPGA v2 pe cpp file and ew kernel files, test=develop

* fix a bug of sigmoid kernel in FPGA v2 track, test=develop

* fix bugs of concat, reshape and slice op and add usleep in fpga regpoll, test=develop

* add interupt clear operation before op compute in FPGA V2 track, test=develop
上级 ab362576
......@@ -83,11 +83,6 @@ void concat_images(int8_t **images_in, float **scales_in, void *image_out,
height *
align_to_x(channel_num[i] * width, IMAGE_ALIGNMENT) *
sizeof(int8_t));
for (j = 0;
j < height * align_to_x(channel_num[i] * width, IMAGE_ALIGNMENT);
j++) {
images_in_tmp[i][j] = (int8_t)(images_in[i][j] * Ck + 0.5);
}
}
align_each_out_area_cw =
align_to_x(each_out_line_channel * width, IMAGE_ALIGNMENT);
......@@ -102,7 +97,7 @@ void concat_images(int8_t **images_in, float **scales_in, void *image_out,
memcpy(
(int8_t *)image_out + tmp_channel + // NOLINT
k * align_each_out_area_cw_differ,
images_in_tmp[i] + j * channel_num[i] + k * align_each_in_area_cw,
images_in[i] + j * channel_num[i] + k * align_each_in_area_cw,
channel_num[i] * sizeof(int8_t));
tmp_channel += channel_num[i];
......@@ -110,6 +105,10 @@ void concat_images(int8_t **images_in, float **scales_in, void *image_out,
}
}
fpga_flush(image_out, height * align_each_out_area_cw * sizeof(int8_t));
for (i = 0; i < image_num; i++) {
fpga_free(images_in_tmp[i]);
}
fpga_free(images_in_tmp);
}
void split_image(int8_t *image_in, void **images_out, int image_num,
......
......@@ -248,8 +248,8 @@ int ComputeBasicConv(const struct ConvArgs &args) {
// DLOG << " activation_type:" << active_args.activation_type
// << " leaky_relu_negative_slope:"
// << active_args.leaky_relu_negative_slope;
// DLOG << " reg_ActivationArgs:" << reg_ActivationArgs;
DLOG << " reg_ActivationArgs:";
uint64_t bypass_interrupt = reg_readq(REG_INTERRUPT);
pthread_mutex_lock(&g_fpgainfo.pe_data->mutex);
if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_CONV]->status) {
ret = -EIO;
......@@ -257,6 +257,10 @@ int ComputeBasicConv(const struct ConvArgs &args) {
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret;
}
// reg_writeq(reg_ActivationArgs,
// REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion
reg_writeq(output_scale, REG_SCALE_PARAMETER);
// new
reg_writeq((args.driver.row_padding_down << 45) |
(args.driver.row_padding_up << 34) |
......@@ -365,7 +369,7 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
uint64_t cmd = 0;
uint64_t image_physical_address = 0;
uint64_t output_physical_address = 0;
uint64_t bypass_interrupt = reg_readq(REG_INTERRUPT);
image_physical_address = vaddr_to_paddr(args.image.address);
output_physical_address = vaddr_to_paddr(args.output.address);
uint64_t C_paral_64 = align_to_x((uint64_t)args.image.channels, 64);
......@@ -440,7 +444,7 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret;
}
reg_writeq(output_scale, REG_SCALE_PARAMETER);
reg_writeq(image_physical_address, 0x808);
reg_writeq(result_addr_row, 0x810);
reg_writeq(kernel_padding_step, 0x818);
......@@ -497,7 +501,7 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
#endif
#ifdef PADDLE_MOBILE_ZU5
int ret = 0;
uint64_t bypass_interrupt = reg_readq(REG_INTERRUPT);
pthread_mutex_lock(&g_fpgainfo.pe_data->mutex);
if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_EW]->status) {
......@@ -534,6 +538,7 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
uint32_t* ew_scale = reinterpret_cast<uint32_t*>(&quantParam);
uint64_t ew_scale_mult_factor = (*ew_scale) |
((uint64_t)args.const0 << 32) | ((uint64_t)args.const1 << 40);
reg_writeq(0ul, REG_SCALE_PARAMETER);
reg_writeq(image_physical_address, 0x808);
reg_writeq(result_addr_row, 0x810);
reg_writeq(kernel_padding_step, 0x818);
......@@ -928,6 +933,7 @@ int ComputeDWConv(const struct DWconvArgs &args) {
#endif
#ifdef PADDLE_MOBILE_ZU5
DLOG << "DWConv";
uint64_t bypass_interrupt = reg_readq(REG_INTERRUPT);
// return 0;
uint64_t timer_cnt = 0;
int ret = 0;
......@@ -1011,7 +1017,7 @@ int ComputeDWConv(const struct DWconvArgs &args) {
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret;
}
reg_writeq(0ul, REG_SCALE_PARAMETER);
reg_writeq(image_physical_address, 0x808);
reg_writeq(result_addr_row, 0x810);
reg_writeq(kernel_padding_step, 0x818);
......
......@@ -134,9 +134,9 @@ int fpga_regpoll(uint64_t reg, uint64_t val, int time) {
uint64_t i = 0;
/*timeout精确性待确认*/
int64_t timeout = time * 6;
usleep(1);
for (i = 0; i < timeout; i++) {
usleep(1);
if (val == reg_readq(reg)) {
break;
}
......
......@@ -211,6 +211,7 @@ struct ConcatArgs {
uint32_t out_channel;
uint32_t height;
uint32_t width;
std::vector<std::shared_ptr<char>> vector_concat_space;
};
struct SplitConvArgs {
......
......@@ -53,6 +53,15 @@ bool ConcatKernel<FPGA, float>::Init(ConcatParam<FPGA> *param) {
concatArgs.channel_num = channel_num;
concatArgs.height = height;
concatArgs.width = width;
auto deleter = [](void *p) { fpga::fpga_free(p); };
concatArgs.vector_concat_space.push_back(std::shared_ptr<char>(
reinterpret_cast<char *>(concatArgs.images_in), deleter));
concatArgs.vector_concat_space.push_back(std::shared_ptr<char>(
reinterpret_cast<char *>(concatArgs.scales_in), deleter));
concatArgs.vector_concat_space.push_back(std::shared_ptr<char>(
reinterpret_cast<char *>(concatArgs.channel_num), deleter));
param->SetFpgaArgs(concatArgs);
return true;
}
......
......@@ -110,7 +110,27 @@ void Reshape2Kernel<FPGA, float>::Compute(const Reshape2Param<FPGA> &param) {
}
}
output->Resize(framework::make_ddim(shape));
bool reshapeNeedFlg = 1;
if (output->dims() == input->dims()) {
reshapeNeedFlg = 0;
} else if (output->dims().size() != input->dims().size()) {
auto inputdimsize = input->dims().size();
auto outputdimsize = output->dims().size();
int smallersize =
inputdimsize > outputdimsize ? outputdimsize : inputdimsize;
int i = 0;
for (i = 0; i < smallersize; i++) {
if ((input->dims())[i] != (output->dims())[i])
break;
}
if (i == smallersize) {
reshapeNeedFlg = 0;
}
}
if (reshapeNeedFlg) {
reshape(input, output);
} else {
DLOG << "No need to reshape";
output->ShareDataWith(*input);
framework::LoD lod = input->lod();
......@@ -118,9 +138,6 @@ void Reshape2Kernel<FPGA, float>::Compute(const Reshape2Param<FPGA> &param) {
output->scale[0] = input->scale[0];
return;
}
reshape(input, output);
//
}
} // namespace operators
......
......@@ -30,6 +30,7 @@ bool SliceKernel<FPGA, float>::Init(SliceParam<FPGA>* param) {
}
return true;
}
template <>
void SliceKernel<FPGA, float>::Compute(const SliceParam<FPGA>& param) {
// Only support slicing in channel dimension
......@@ -38,6 +39,8 @@ void SliceKernel<FPGA, float>::Compute(const SliceParam<FPGA>& param) {
auto input = param.input_;
auto output = param.output_;
int H = input->dims()[2];
int W = input->dims()[3];
int HW = input->dims()[2] * input->dims()[3];
int channel = input->dims()[1];
auto input_ptr = input->data<int8_t>();
......@@ -53,10 +56,32 @@ void SliceKernel<FPGA, float>::Compute(const SliceParam<FPGA>& param) {
end = end > channel ? channel : end;
int len = end - start;
size_t size = len * sizeof(int8_t);
DLOG << input->fpga_data_num;
fpga::fpga_invalidate(input_ptr, input->fpga_data_num*sizeof(int8_t));
DLOG << output->fpga_data_num;
fpga::fpga_invalidate(output_ptr, output->fpga_data_num*sizeof(int8_t));
int unalignedWC = len * W;
int alignedWC = fpga::align_to_x(W * len, IMAGE_ALIGNMENT);
for (int i = 0; i < HW; i++) {
memcpy(output_ptr + len * i, input_ptr + i * channel + start, size);
if (unalignedWC != alignedWC) {
auto tmpOutput = reinterpret_cast<int8_t*>
(fpga::fpga_malloc(len*HW * sizeof(int8_t)));
for (int i = 0; i < HW; i++) {
memcpy(tmpOutput + len * i, input_ptr + i * channel + start, size);
}
for (int i = 0; i < H; i++) {
for (int j = 0; j < unalignedWC; j++) {
*(output_ptr + alignedWC * i + j) =
*(tmpOutput + unalignedWC * i + j);
}
}
fpga::fpga_free(tmpOutput);
} else {
for (int i = 0; i < HW; i++) {
memcpy(output_ptr + len * i, input_ptr + i * channel + start, size);
}
}
fpga::fpga_flush(output_ptr, output->fpga_data_num*sizeof(int8_t));
}
} // namespace operators
} // namespace paddle_mobile
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册