From ca6eec789be9f3a028fac6febbe84f99dc0d133d Mon Sep 17 00:00:00 2001 From: qnqinan Date: Tue, 26 Nov 2019 15:12:21 +0800 Subject: [PATCH] fix bugs of concat, reshape and slice op and add usleep in fpga regpoll, test=develop --- mobile/src/fpga/V2/image.cpp | 4 +++ mobile/src/fpga/common/driver.cpp | 2 +- mobile/src/fpga/common/fpga_common.h | 1 + .../kernel/fpga/V2/concat_kernel.cpp | 9 ++++++ .../kernel/fpga/V2/reshape2_kernel.cpp | 23 +++++++++++++-- .../operators/kernel/fpga/V2/slice_kernel.cpp | 29 +++++++++++++++++-- 6 files changed, 62 insertions(+), 6 deletions(-) mode change 100644 => 100755 mobile/src/fpga/V2/image.cpp mode change 100644 => 100755 mobile/src/fpga/common/driver.cpp mode change 100644 => 100755 mobile/src/fpga/common/fpga_common.h mode change 100644 => 100755 mobile/src/operators/kernel/fpga/V2/concat_kernel.cpp mode change 100644 => 100755 mobile/src/operators/kernel/fpga/V2/reshape2_kernel.cpp mode change 100644 => 100755 mobile/src/operators/kernel/fpga/V2/slice_kernel.cpp diff --git a/mobile/src/fpga/V2/image.cpp b/mobile/src/fpga/V2/image.cpp old mode 100644 new mode 100755 index dc3c3356e8..70436da7f1 --- a/mobile/src/fpga/V2/image.cpp +++ b/mobile/src/fpga/V2/image.cpp @@ -110,6 +110,10 @@ void concat_images(int8_t **images_in, float **scales_in, void *image_out, } } fpga_flush(image_out, height * align_each_out_area_cw * sizeof(int8_t)); + for (i = 0; i < image_num; i++) { + fpga_free(images_in_tmp[i]); + } + fpga_free(images_in_tmp); } void split_image(int8_t *image_in, void **images_out, int image_num, diff --git a/mobile/src/fpga/common/driver.cpp b/mobile/src/fpga/common/driver.cpp old mode 100644 new mode 100755 index 911704965a..b7ce4d3247 --- a/mobile/src/fpga/common/driver.cpp +++ b/mobile/src/fpga/common/driver.cpp @@ -134,9 +134,9 @@ int fpga_regpoll(uint64_t reg, uint64_t val, int time) { uint64_t i = 0; /*timeout精确性待确认*/ int64_t timeout = time * 6; - usleep(1); for (i = 0; i < timeout; i++) { + usleep(1); if (val == reg_readq(reg)) { break; } diff --git a/mobile/src/fpga/common/fpga_common.h b/mobile/src/fpga/common/fpga_common.h old mode 100644 new mode 100755 index a798d54459..a767cd2606 --- a/mobile/src/fpga/common/fpga_common.h +++ b/mobile/src/fpga/common/fpga_common.h @@ -211,6 +211,7 @@ struct ConcatArgs { uint32_t out_channel; uint32_t height; uint32_t width; + std::vector> vector_concat_space; }; struct SplitConvArgs { diff --git a/mobile/src/operators/kernel/fpga/V2/concat_kernel.cpp b/mobile/src/operators/kernel/fpga/V2/concat_kernel.cpp old mode 100644 new mode 100755 index 716531fcab..8442eef8b2 --- a/mobile/src/operators/kernel/fpga/V2/concat_kernel.cpp +++ b/mobile/src/operators/kernel/fpga/V2/concat_kernel.cpp @@ -53,6 +53,15 @@ bool ConcatKernel::Init(ConcatParam *param) { concatArgs.channel_num = channel_num; concatArgs.height = height; concatArgs.width = width; + + auto deleter = [](void *p) { fpga::fpga_free(p); }; + concatArgs.vector_concat_space.push_back(std::shared_ptr( + reinterpret_cast(concatArgs.images_in), deleter)); + concatArgs.vector_concat_space.push_back(std::shared_ptr( + reinterpret_cast(concatArgs.scales_in), deleter)); + concatArgs.vector_concat_space.push_back(std::shared_ptr( + reinterpret_cast(concatArgs.channel_num), deleter)); + param->SetFpgaArgs(concatArgs); return true; } diff --git a/mobile/src/operators/kernel/fpga/V2/reshape2_kernel.cpp b/mobile/src/operators/kernel/fpga/V2/reshape2_kernel.cpp old mode 100644 new mode 100755 index fcf0889b4a..c7cd6575e4 --- a/mobile/src/operators/kernel/fpga/V2/reshape2_kernel.cpp +++ b/mobile/src/operators/kernel/fpga/V2/reshape2_kernel.cpp @@ -110,7 +110,27 @@ void Reshape2Kernel::Compute(const Reshape2Param ¶m) { } } output->Resize(framework::make_ddim(shape)); + + bool reshapeNeedFlg = 1; if (output->dims() == input->dims()) { + reshapeNeedFlg = 0; + } else if (output->dims().size() != input->dims().size()) { + auto inputdimsize = input->dims().size(); + auto outputdimsize = output->dims().size(); + int smallersize = + inputdimsize > outputdimsize ? outputdimsize : inputdimsize; + int i = 0; + for (i = 0; i < smallersize; i++) { + if ((input->dims())[i] != (output->dims())[i]) + break; + } + if (i == smallersize) { + reshapeNeedFlg = 0; + } + } + if (reshapeNeedFlg) { + reshape(input, output); + } else { DLOG << "No need to reshape"; output->ShareDataWith(*input); framework::LoD lod = input->lod(); @@ -118,9 +138,6 @@ void Reshape2Kernel::Compute(const Reshape2Param ¶m) { output->scale[0] = input->scale[0]; return; } - - reshape(input, output); - // } } // namespace operators diff --git a/mobile/src/operators/kernel/fpga/V2/slice_kernel.cpp b/mobile/src/operators/kernel/fpga/V2/slice_kernel.cpp old mode 100644 new mode 100755 index a1500ecdb0..d32dddb307 --- a/mobile/src/operators/kernel/fpga/V2/slice_kernel.cpp +++ b/mobile/src/operators/kernel/fpga/V2/slice_kernel.cpp @@ -30,6 +30,7 @@ bool SliceKernel::Init(SliceParam* param) { } return true; } + template <> void SliceKernel::Compute(const SliceParam& param) { // Only support slicing in channel dimension @@ -38,6 +39,8 @@ void SliceKernel::Compute(const SliceParam& param) { auto input = param.input_; auto output = param.output_; + int H = input->dims()[2]; + int W = input->dims()[3]; int HW = input->dims()[2] * input->dims()[3]; int channel = input->dims()[1]; auto input_ptr = input->data(); @@ -53,10 +56,32 @@ void SliceKernel::Compute(const SliceParam& param) { end = end > channel ? channel : end; int len = end - start; size_t size = len * sizeof(int8_t); + DLOG << input->fpga_data_num; + fpga::fpga_invalidate(input_ptr, input->fpga_data_num*sizeof(int8_t)); + DLOG << output->fpga_data_num; + fpga::fpga_invalidate(output_ptr, output->fpga_data_num*sizeof(int8_t)); + int unalignedWC = len * W; + int alignedWC = fpga::align_to_x(W * len, IMAGE_ALIGNMENT); - for (int i = 0; i < HW; i++) { - memcpy(output_ptr + len * i, input_ptr + i * channel + start, size); + if (unalignedWC != alignedWC) { + auto tmpOutput = reinterpret_cast + (fpga::fpga_malloc(len*HW * sizeof(int8_t))); + for (int i = 0; i < HW; i++) { + memcpy(tmpOutput + len * i, input_ptr + i * channel + start, size); + } + for (int i = 0; i < H; i++) { + for (int j = 0; j < unalignedWC; j++) { + *(output_ptr + alignedWC * i + j) = + *(tmpOutput + unalignedWC * i + j); + } + } + fpga::fpga_free(tmpOutput); + } else { + for (int i = 0; i < HW; i++) { + memcpy(output_ptr + len * i, input_ptr + i * channel + start, size); + } } + fpga::fpga_flush(output_ptr, output->fpga_data_num*sizeof(int8_t)); } } // namespace operators } // namespace paddle_mobile -- GitLab