diff --git a/src/fpga/V1/api.cpp b/src/fpga/V1/api.cpp index acb48aca7065f7c5d8abc2ce84b10214e388cebe..5c960bbea7f8e65053998a29cd72d7b78f2fb97a 100644 --- a/src/fpga/V1/api.cpp +++ b/src/fpga/V1/api.cpp @@ -623,7 +623,7 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input, fpga::format_fp16_ofm(out, dims_out_new); auto out_ptr = out->data(); arg->output.address = - (half *)out_ptr + // NOLINT + out_ptr + omit_size * sizeof(half) * (align_to_x(real_out_width * arg->filter_num, IMAGE_ALIGNMENT)); arg->output.scale_address = out->scale; @@ -713,7 +713,6 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input, } for (int j = 0; j < split_num; ++j) { - // arg->split_conv_args[i]->conv_arg[j].relu_enabled = relu_enabled; arg->split_conv_args[i]->conv_arg[j].output.activation.activation_type = activation_enable; arg->split_conv_args[i] @@ -759,9 +758,9 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input, align_to_x(arg->split_conv_args[i]->conv_arg[j].filter_num, FILTER_NUM_ALIGNMENT) * sizeof(int8_t); - auto filter_head = &(( - int8_t *)filter_ptr)[j * element_num * filter_num_per_div + // NOLINT - i * filter_sub_conv_offset]; + auto filter_head = + &filter_ptr[j * element_num * filter_num_per_div + // NOLINT + i * filter_sub_conv_offset]; arg->split_conv_args[i]->conv_arg[j].filter_address = fpga_malloc(filter_size); arg->split_conv_args[i]->vector_conv_space.push_back( @@ -836,6 +835,10 @@ void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input, int16_t leaky_relu_negative_slope, int stride_h, int stride_w, int padding_h, int padding_w, float *bias_ptr) { + auto deleter = [](void *p) { fpga_free(p); }; + arg->vector_dwconv_space.push_back( + std::shared_ptr(reinterpret_cast(bias_ptr), deleter)); + auto filter_ptr = filter->data(); auto input_ptr = input->data(); auto output_ptr = out->mutable_data(); diff --git a/src/fpga/V1/deconv_filter.cpp b/src/fpga/V1/deconv_filter.cpp index 7c87452f5a7264ad069d8508cb1e9dc24f5cdc3d..36a02578bca6698b510c18947d1e8463108cad8b 100644 --- a/src/fpga/V1/deconv_filter.cpp +++ b/src/fpga/V1/deconv_filter.cpp @@ -247,6 +247,7 @@ void deconv_format_filter(float** data_in, int num, int channel, int height, fpga_copy(ptr_space + i * align_offset, ptr_tmp, align_offset); fpga_free(ptr_tmp); } + fpga_free(ptr_ptr_data); *data_in = reinterpret_cast(ptr_space); /* { diff --git a/src/fpga/common/fpga_common.h b/src/fpga/common/fpga_common.h index 826e1c7402585127d6731878737e0edd678e6a76..898e76a65425c357a00e76eaedf39c003c9603f3 100644 --- a/src/fpga/common/fpga_common.h +++ b/src/fpga/common/fpga_common.h @@ -187,6 +187,7 @@ struct SplitArgs { uint32_t* out_channel_nums; uint32_t height; uint32_t width; + std::vector> vector_split_space; }; struct PoolingArgs { @@ -236,6 +237,7 @@ struct DWconvArgs { struct KernelArgs kernel; struct ImageInputArgs image; struct ImageOutputArgs output; + std::vector> vector_dwconv_space; }; struct DWDeconvArgs { diff --git a/src/operators/kernel/fpga/V1/conv_add_bn_kernel.cpp b/src/operators/kernel/fpga/V1/conv_add_bn_kernel.cpp index 3e41efdf76ed5b14d408a1278c7dba0bd1f30a1f..ecebe2fd91d62c29966b7726846c81b78f68ae52 100644 --- a/src/operators/kernel/fpga/V1/conv_add_bn_kernel.cpp +++ b/src/operators/kernel/fpga/V1/conv_add_bn_kernel.cpp @@ -59,8 +59,6 @@ bool ConvAddBNKernel::Init(FusionConvAddBNParam *param) { bs_ptr[i + channel] = new_scale_ptr[i]; bs_ptr[i] = new_bias_ptr[i]; } - param->SetNewScale(new_scale); - param->SetNewBias(new_bias); fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::SplitConvArgs conv_arg = {0}; @@ -70,6 +68,9 @@ bool ConvAddBNKernel::Init(FusionConvAddBNParam *param) { param->Paddings()[0], param->Paddings()[1], bs_ptr); param->SetFpgaArgs(conv_arg); + delete new_scale; + delete new_bias; + return true; } diff --git a/src/operators/kernel/fpga/V1/conv_add_bn_relu_kernel.cpp b/src/operators/kernel/fpga/V1/conv_add_bn_relu_kernel.cpp index b7b99be78acae80c46b9d1bd1f3cb72d5f4a7cfb..38d469fa7054193d24faf3d0981de4d87e0d32a5 100644 --- a/src/operators/kernel/fpga/V1/conv_add_bn_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V1/conv_add_bn_relu_kernel.cpp @@ -60,8 +60,6 @@ bool ConvAddBNReluKernel::Init( bs_ptr[i + channel] = new_scale_ptr[i]; bs_ptr[i] = new_bias_ptr[i]; } - param->SetNewScale(new_scale); - param->SetNewBias(new_bias); const int groups = param->Groups(); if (groups == channel) { @@ -71,6 +69,8 @@ bool ConvAddBNReluKernel::Init( leaky_relu_negative_slope, strides[0], strides[1], paddings[0], paddings[1], new_bias_ptr); param->SetFpgaArgs(dwconv_arg); + fpga::fpga_free(new_scale_ptr); + fpga::fpga_free(bs_ptr); } else { fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::SplitConvArgs conv_arg = {0}; @@ -78,6 +78,8 @@ bool ConvAddBNReluKernel::Init( leaky_relu_negative_slope, param->Groups(), strides[0], strides[1], paddings[0], paddings[1], bs_ptr); param->SetFpgaArgs(conv_arg); + delete new_scale; + delete new_bias; } return true; } diff --git a/src/operators/kernel/fpga/V1/conv_bn_kernel.cpp b/src/operators/kernel/fpga/V1/conv_bn_kernel.cpp index c4c2bf184d536ace31e52defb59e97c154386464..10ea54e380f8d9a585f03427ced1e569f0849b52 100644 --- a/src/operators/kernel/fpga/V1/conv_bn_kernel.cpp +++ b/src/operators/kernel/fpga/V1/conv_bn_kernel.cpp @@ -51,8 +51,6 @@ bool ConvBNKernel::Init(FusionConvBNParam *param) { bs_ptr[i + channel] = new_scale_ptr[i]; bs_ptr[i] = new_bias_ptr[i]; } - param->SetNewScale(new_scale); - param->SetNewBias(new_bias); fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::SplitConvArgs conv_arg = {0}; @@ -61,6 +59,8 @@ bool ConvBNKernel::Init(FusionConvBNParam *param) { param->Strides()[0], param->Strides()[1], param->Paddings()[0], param->Paddings()[1], bs_ptr); param->SetFpgaArgs(conv_arg); + delete new_scale; + delete new_bias; return true; } diff --git a/src/operators/kernel/fpga/V1/conv_bn_relu_kernel.cpp b/src/operators/kernel/fpga/V1/conv_bn_relu_kernel.cpp index 463c90d1bb0dcd48a7b41aff73b830d14f989c73..d1adec36adc73665d2e542b14b2e368830a2202d 100644 --- a/src/operators/kernel/fpga/V1/conv_bn_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V1/conv_bn_relu_kernel.cpp @@ -51,8 +51,6 @@ bool ConvBNReluKernel::Init(FusionConvBNReluParam *param) { bs_ptr[i + channel] = new_scale_ptr[i]; bs_ptr[i] = new_bias_ptr[i]; } - param->SetNewScale(new_scale); - param->SetNewBias(new_bias); fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::SplitConvArgs conv_arg = {0}; @@ -61,6 +59,9 @@ bool ConvBNReluKernel::Init(FusionConvBNReluParam *param) { param->Strides()[0], param->Strides()[1], param->Paddings()[0], param->Paddings()[1], bs_ptr); param->SetFpgaArgs(conv_arg); + + delete new_scale; + delete new_bias; return true; } diff --git a/src/operators/kernel/fpga/V1/split_kernel.cpp b/src/operators/kernel/fpga/V1/split_kernel.cpp index 35a559ae6475533978188a7f0e018b614db1415c..2aef8d018c3480e396c22ac6f6c953a1387c331d 100644 --- a/src/operators/kernel/fpga/V1/split_kernel.cpp +++ b/src/operators/kernel/fpga/V1/split_kernel.cpp @@ -43,6 +43,8 @@ bool SplitKernel::Init(SplitParam *param) { out_channels[i] = (uint32_t)sections[i]; } + auto deleter = [](void *p) { fpga::fpga_free(p); }; + fpga::SplitArgs arg = {0}; arg.image_num = image_num; arg.image_in = in->data(); @@ -52,6 +54,12 @@ bool SplitKernel::Init(SplitParam *param) { arg.out_channel_nums = out_channels; arg.height = (uint32_t)in->dims()[2]; arg.width = (uint32_t)in->dims()[3]; + arg.vector_split_space.push_back( + std::shared_ptr(reinterpret_cast(images_out), deleter)); + arg.vector_split_space.push_back( + std::shared_ptr(reinterpret_cast(scales_out), deleter)); + arg.vector_split_space.push_back( + std::shared_ptr(reinterpret_cast(out_channels), deleter)); param->SetFpgaArgs(arg); return true; diff --git a/test/fpga/test_rfcn.cpp b/test/fpga/test_rfcn.cpp index 2af521aa7f50c83aa77bb18921eb45bd06eb7490..e1d13541ef8000da18ceda4c356d158198d7b9f4 100644 --- a/test/fpga/test_rfcn.cpp +++ b/test/fpga/test_rfcn.cpp @@ -39,7 +39,7 @@ void readStream(std::string filename, uint8_t *buf) { } static const char *g_rfcn_combine = "../models/rfcn"; -const std::string g_image_src_float = "../models/rfcn/data.bin"; +static const char *g_image_src_float = "../models/rfcn/data.bin"; int main() { paddle_mobile::fpga::open_device(); paddle_mobile::PaddleMobile paddle_mobile; @@ -55,6 +55,7 @@ int main() { paddle_mobile.Predict_To(-1); paddle_mobile.GetResults(&v); DLOG << "Computation done"; + fpga::fpga_free(img); } return 0;