diff --git a/src/fpga/api.cpp b/src/fpga/api.cpp index 9d8a1c7f6780b74481d51d2c9b3097df86f6817d..a9ca711bdb95472664c64cffe886d7b5c32aae7e 100644 --- a/src/fpga/api.cpp +++ b/src/fpga/api.cpp @@ -68,26 +68,26 @@ void fpga_copy(void *dest, const void *src, size_t num) { memcpy(dest, src, num); } -int ComputeFpgaConv(const struct ConvArgs &args) { +int ComputeFpgaConv(const struct WrapperConvArgs &args) { #ifdef FPGA_TEST_MODE - DLOG << " relu_enabled:" << args.relu_enabled - << " sb_address:" << args.sb_address - << " filter_address:" << args.filter_address - << " filter_num:" << args.filter_num - << " group_num:" << args.group_num; - DLOG << " image_address:" << args.image.address - << " image_scale_address:" << args.image.scale_address - << " image_channels:" << args.image.channels - << " image_height:" << args.image.height - << " image_width:" << args.image.width - << " pad_height:" << args.image.pad_height - << " pad_width:" << args.image.pad_width; - DLOG << " kernel_height:" << args.kernel.height - << " kernel_width:" << args.kernel.width - << " stride_h:" << args.kernel.stride_h - << " stride_w:" << args.kernel.stride_w; - DLOG << " out_address:" << args.output.address - << " out_scale_address:" << args.output.scale_address; +/*DLOG << " relu_enabled:" << args.relu_enabled + << " sb_address:" << args.sb_address + << " filter_address:" << args.filter_address + << " filter_num:" << args.filter_num + << " group_num:" << args.group_num; +DLOG << " image_address:" << args.image.address + << " image_scale_address:" << args.image.scale_address + << " image_channels:" << args.image.channels + << " image_height:" << args.image.height + << " image_width:" << args.image.width + << " pad_height:" << args.image.pad_height + << " pad_width:" << args.image.pad_width; +DLOG << " kernel_height:" << args.kernel.height + << " kernel_width:" << args.kernel.width + << " stride_h:" << args.kernel.stride_h + << " stride_w:" << args.kernel.stride_w; +DLOG << " out_address:" << args.output.address + << " out_scale_address:" << args.output.scale_address;*/ #endif return do_ioctl(IOCTL_CONFIG_CONV, &args); @@ -178,16 +178,31 @@ float filter_find_max(framework::Tensor *filter_tensor) { auto filter_ptr = filter_tensor->data(); return filter::find_max(filter_ptr, filter_tensor->numel()); } + +int get_plit_num(framework::Tensor *filter_tensor) { + auto dims = filter_tensor->dims(); + int chw = dims[1] * dims[2] * dims[3]; + int num = dims[0]; + int div_capacity = filter::calc_division_capacity(chw); + return filter::calc_split_num(num, div_capacity); +} + int get_element_num_per_div(framework::Tensor *filter_tensor, int group_num) { auto dims = filter_tensor->dims(); - PADDLE_MOBILE_ENFORCE(dims.size() == 4 || dims.size() == 2, - "Filter order should be 4 or 2"); - int chw = dims.size() == 4 ? dims[1] * dims[2] * dims[3] : dims[1]; - int num = dims.size() == 4 ? dims[0] : dims[1]; + int chw = dims[1] * dims[2] * dims[3]; + int num = dims[0]; int div_capacity = filter::calc_division_capacity(chw); return filter::calc_num_per_div(num, group_num, div_capacity); } +int get_aligned_filter_element_num(int chw) { + return align_to_x(chw, FILTER_ELEMENT_ALIGNMENT); +} + +int get_aligned_filter_num(int num) { + return align_to_x(num, FILTER_NUM_ALIGNMENT); +} + void format_filter(framework::Tensor *filter_tensor, float max_value, int group_num) { auto dims = filter_tensor->dims(); diff --git a/src/fpga/api.h b/src/fpga/api.h index f7010e56ad4caf7bc49f5d37301009e226780da5..aa13b09178ad2e63222041c0d432d341640b3847 100644 --- a/src/fpga/api.h +++ b/src/fpga/api.h @@ -92,6 +92,14 @@ struct ConvArgs { struct ImageOutputArgs output; }; +struct WrapperConvArgs { + uint32_t split_num; + uint32_t group_num; + uint32_t filter_num; + struct ImageOutputArgs output; + struct ConvArgs* args; +}; + struct PoolingArgs { struct KernelArgs kernel; struct ImageInputArgs image; // input image; @@ -165,7 +173,7 @@ enum FPGA_ERR_TYPE { //============================== API ============================= int PerformBypass(const struct BypassArgs& args); -int ComputeFpgaConv(const struct ConvArgs& args); +int ComputeFpgaConv(const struct WrapperConvArgs& args); int ComputeFpgaPool(const struct PoolingArgs& args); int ComputeFpgaEWAdd(const struct EWAddArgs& args); @@ -174,6 +182,10 @@ void format_image(framework::Tensor* image_tensor); void format_ofm(framework::Tensor* ofm_tensor); // only allocate memory float filter_find_max(framework::Tensor* filter_tensor); int get_element_num_per_div(framework::Tensor* filter_tensor, int group_num); +int get_plit_num(framework::Tensor* filter_tensor); +int get_aligned_filter_element_num(int chw); +int get_aligned_filter_num(int num); + void format_filter(framework::Tensor* filter_tensor, float max_value, int group_num); void format_fc_matrix(framework::Tensor* filter_tensor, float max_value, diff --git a/src/framework/dim.h b/src/framework/dim.h index 0d3e86e92289da155843e1a9959d5ea67a73c060..85e86076e1de53fa80b75f56237901da49e22eb9 100644 --- a/src/framework/dim.h +++ b/src/framework/dim.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include +#include #include "common/enforce.h" namespace paddle_mobile { namespace framework { diff --git a/src/operators/kernel/fpga/conv_add_bn_kernel.cpp b/src/operators/kernel/fpga/conv_add_bn_kernel.cpp index 74080e6b0541a068956f031f984eea9ac0160b2d..2803a3de456e355d1044bec6beaaa3dad8a4e312 100644 --- a/src/operators/kernel/fpga/conv_add_bn_kernel.cpp +++ b/src/operators/kernel/fpga/conv_add_bn_kernel.cpp @@ -15,7 +15,6 @@ limitations under the License. */ #ifdef FUSION_CONVADDBN_OP #include "operators/kernel/conv_add_bn_kernel.h" -#include "fpga/api.h" namespace paddle_mobile { namespace operators { @@ -23,11 +22,11 @@ namespace operators { template <> bool ConvAddBNKernel::Init(FusionConvAddBNParam *param) { bool relu_enabled = false; - Tensor *input = const_cast(param->Input()); + auto *input = const_cast(param->Input()); auto input_ptr = input->data(); const Tensor *bias = param->Bias(); auto bias_ptr = bias->data(); - Tensor *filter = param->Filter(); + auto *filter = const_cast(param->Filter()); Tensor *out = param->Output(); @@ -41,10 +40,10 @@ bool ConvAddBNKernel::Init(FusionConvAddBNParam *param) { "Output channel should be equal to bias number"); const int channel = out->dims()[1]; - float *bs_ptr = + auto *bs_ptr = reinterpret_cast(fpga::fpga_malloc(2 * channel * sizeof(float))); - Tensor *new_scale = new Tensor(); - Tensor *new_bias = new Tensor(); + auto *new_scale = new Tensor(); + auto *new_bias = new Tensor(); auto new_scale_ptr = new_scale->mutable_data({channel}); auto new_bias_ptr = new_bias->mutable_data({channel}); @@ -70,27 +69,42 @@ bool ConvAddBNKernel::Init(FusionConvAddBNParam *param) { fpga::format_ofm(out); auto out_ptr = out->mutable_data(); - fpga::ConvArgs convArgs; - convArgs.relu_enabled = relu_enabled; - convArgs.filter_address = (void *)filter_ptr; - convArgs.filter_num = filter->dims()[0]; - convArgs.group_num = param->Groups(); - convArgs.sb_address = (void *)bs_ptr; - convArgs.kernel.stride_h = param->Strides()[0]; - convArgs.kernel.stride_w = param->Strides()[1]; - convArgs.kernel.height = filter->dims()[2]; - convArgs.kernel.width = filter->dims()[3]; - convArgs.image.address = (void *)input_ptr; - convArgs.image.channels = input->dims()[1]; - convArgs.image.height = input->dims()[2]; - convArgs.image.width = input->dims()[3]; - convArgs.image.pad_height = param->Paddings()[0]; - convArgs.image.pad_width = param->Paddings()[1]; - convArgs.image.scale_address = input->scale; - convArgs.output.address = (void *)out_ptr; + fpga::WrapperConvArgs convArgs; + convArgs.group_num = (uint32_t)param->Groups(); + convArgs.split_num = (uint32_t)fpga::get_plit_num(filter); + convArgs.filter_num = (uint32_t)filter->dims()[0]; + convArgs.output.address = out_ptr; convArgs.output.scale_address = out->scale; + convArgs.args = (fpga::ConvArgs *)fpga::fpga_malloc(convArgs.split_num * + sizeof(fpga::ConvArgs)); param->SetFpgaArgs(convArgs); + int element_num = fpga::get_aligned_filter_element_num( + filter->dims()[1] * filter->dims()[2] * filter->dims()[3]); + + int n = convArgs.split_num; + for (int i = 0; i < n; i++) { + convArgs.args[i].relu_enabled = relu_enabled; + convArgs.args[i].group_num = (uint32_t)param->Groups(); + convArgs.args[i].kernel.stride_h = (uint32_t)param->Strides()[0]; + convArgs.args[i].kernel.stride_w = (uint32_t)param->Strides()[1]; + convArgs.args[i].kernel.height = (uint32_t)filter->dims()[2]; + convArgs.args[i].kernel.width = (uint32_t)filter->dims()[3]; + convArgs.args[i].image.address = input_ptr; + convArgs.args[i].image.channels = (uint32_t)input->dims()[1]; + convArgs.args[i].image.height = (uint32_t)input->dims()[2]; + convArgs.args[i].image.width = (uint32_t)input->dims()[3]; + convArgs.args[i].image.pad_height = (uint32_t)param->Paddings()[0]; + convArgs.args[i].image.pad_width = (uint32_t)param->Paddings()[1]; + convArgs.args[i].filter_address = &((int8_t *)filter_ptr)[i * element_num]; + convArgs.args[i].sb_address = &((int8_t *)bs_ptr)[i * element_num]; + convArgs.args[i].filter_num = + (uint32_t)(i == n - 1 ? fpga::get_aligned_filter_num( + channel - (n - 1) * element_num_per_div) + : element_num_per_div); + convArgs.args[i].image.scale_address = + (float *)fpga::fpga_malloc(2 * sizeof(float)); + } return true; } diff --git a/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp b/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp index c5d3c7ae5da758e7ca8ae30bc9c9d7352c007260..b96e6669998287588af5fbbde27ca8a87fa30b90 100644 --- a/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp +++ b/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp @@ -27,7 +27,7 @@ bool ConvAddBNReluKernel::Init( auto input_ptr = input->data(); const Tensor *bias = param->Bias(); auto bias_ptr = bias->data(); - Tensor *filter = param->Filter(); + Tensor *filter = const_cast(param->Filter()); Tensor *out = param->Output(); auto bn_mean_ptr = param->InputMean()->data(); auto bn_var_ptr = param->InputVariance()->data(); @@ -67,26 +67,43 @@ bool ConvAddBNReluKernel::Init( fpga::format_ofm(out); auto out_ptr = out->mutable_data(); - fpga::ConvArgs convArgs; - convArgs.relu_enabled = relu_enabled; - convArgs.filter_address = (void *)filter_ptr; - convArgs.filter_num = filter->dims()[0]; - convArgs.group_num = param->Groups(); - convArgs.sb_address = (void *)bs_ptr; - convArgs.kernel.stride_h = param->Strides()[0]; - convArgs.kernel.stride_w = param->Strides()[1]; - convArgs.kernel.height = filter->dims()[2]; - convArgs.kernel.width = filter->dims()[3]; - convArgs.image.address = (void *)input_ptr; - convArgs.image.channels = input->dims()[1]; - convArgs.image.height = input->dims()[2]; - convArgs.image.width = input->dims()[3]; - convArgs.image.pad_height = param->Paddings()[0]; - convArgs.image.pad_width = param->Paddings()[1]; - convArgs.image.scale_address = input->scale; - convArgs.output.address = (void *)out_ptr; + fpga::WrapperConvArgs convArgs; + convArgs.group_num = (uint32_t)param->Groups(); + convArgs.split_num = (uint32_t)fpga::get_plit_num(filter); + convArgs.filter_num = (uint32_t)filter->dims()[0]; + convArgs.output.address = out_ptr; convArgs.output.scale_address = out->scale; + convArgs.args = (fpga::ConvArgs *)fpga::fpga_malloc(convArgs.split_num * + sizeof(fpga::ConvArgs)); param->SetFpgaArgs(convArgs); + + int element_num = fpga::get_aligned_filter_element_num( + filter->dims()[1] * filter->dims()[2] * filter->dims()[3]); + + int n = convArgs.split_num; + for (int i = 0; i < n; i++) { + convArgs.args[i].relu_enabled = relu_enabled; + convArgs.args[i].group_num = (uint32_t)param->Groups(); + convArgs.args[i].kernel.stride_h = (uint32_t)param->Strides()[0]; + convArgs.args[i].kernel.stride_w = (uint32_t)param->Strides()[1]; + convArgs.args[i].kernel.height = (uint32_t)filter->dims()[2]; + convArgs.args[i].kernel.width = (uint32_t)filter->dims()[3]; + convArgs.args[i].image.address = input_ptr; + convArgs.args[i].image.channels = (uint32_t)input->dims()[1]; + convArgs.args[i].image.height = (uint32_t)input->dims()[2]; + convArgs.args[i].image.width = (uint32_t)input->dims()[3]; + convArgs.args[i].image.pad_height = (uint32_t)param->Paddings()[0]; + convArgs.args[i].image.pad_width = (uint32_t)param->Paddings()[1]; + convArgs.args[i].filter_address = &((int8_t *)filter_ptr)[i * element_num]; + convArgs.args[i].sb_address = &((int8_t *)bs_ptr)[i * element_num]; + convArgs.args[i].filter_num = + (uint32_t)(i == n - 1 ? fpga::get_aligned_filter_num( + channel - (n - 1) * element_num_per_div) + : element_num_per_div); + convArgs.args[i].image.scale_address = + (float *)fpga::fpga_malloc(2 * sizeof(float)); + } + return true; return true; } diff --git a/src/operators/kernel/fpga/conv_add_relu_kernel.cpp b/src/operators/kernel/fpga/conv_add_relu_kernel.cpp index 7c2e627ab2b64cec5a66b57f84ca6de448b4c37d..9bc041a4d960a1bacc74e1caffce3190b3659363 100644 --- a/src/operators/kernel/fpga/conv_add_relu_kernel.cpp +++ b/src/operators/kernel/fpga/conv_add_relu_kernel.cpp @@ -26,13 +26,13 @@ bool ConvAddReluKernel::Init(FusionConvAddReluParam *param) { auto input_ptr = input->data(); const Tensor *bias = param->Bias(); auto bias_ptr = bias->data(); - Tensor *filter = param->Filter(); + auto *filter = const_cast(param->Filter()); Tensor *out = param->Output(); PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], "Output channel should be equal to bias number"); int channel = out->dims()[1]; - float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); + auto *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); for (int i = 0; i < channel; i++) { bs_ptr[i + channel] = 1; bs_ptr[i] = bias_ptr[i]; @@ -49,27 +49,42 @@ bool ConvAddReluKernel::Init(FusionConvAddReluParam *param) { fpga::format_ofm(out); auto out_ptr = out->mutable_data(); - fpga::ConvArgs convArgs; - convArgs.relu_enabled = relu_enabled; - convArgs.filter_address = (void *)filter_ptr; - convArgs.filter_num = filter->dims()[0]; - convArgs.group_num = param->Groups(); - convArgs.sb_address = (void *)bs_ptr; - convArgs.kernel.stride_h = param->Strides()[0]; - convArgs.kernel.stride_w = param->Strides()[1]; - convArgs.kernel.height = filter->dims()[2]; - convArgs.kernel.width = filter->dims()[3]; - convArgs.image.address = (void *)input_ptr; - convArgs.image.channels = input->dims()[1]; - convArgs.image.height = input->dims()[2]; - convArgs.image.width = input->dims()[3]; - - convArgs.image.pad_height = param->Paddings()[0]; - convArgs.image.pad_width = param->Paddings()[1]; - convArgs.image.scale_address = input->scale; - convArgs.output.address = (void *)out_ptr; + fpga::WrapperConvArgs convArgs; + convArgs.group_num = (uint32_t)param->Groups(); + convArgs.split_num = (uint32_t)fpga::get_plit_num(filter); + convArgs.filter_num = (uint32_t)filter->dims()[0]; + convArgs.output.address = out_ptr; convArgs.output.scale_address = out->scale; + convArgs.args = (fpga::ConvArgs *)fpga::fpga_malloc(convArgs.split_num * + sizeof(fpga::ConvArgs)); param->SetFpgaArgs(convArgs); + + int element_num = fpga::get_aligned_filter_element_num( + filter->dims()[1] * filter->dims()[2] * filter->dims()[3]); + + int n = convArgs.split_num; + for (int i = 0; i < n; i++) { + convArgs.args[i].relu_enabled = relu_enabled; + convArgs.args[i].group_num = (uint32_t)param->Groups(); + convArgs.args[i].kernel.stride_h = (uint32_t)param->Strides()[0]; + convArgs.args[i].kernel.stride_w = (uint32_t)param->Strides()[1]; + convArgs.args[i].kernel.height = (uint32_t)filter->dims()[2]; + convArgs.args[i].kernel.width = (uint32_t)filter->dims()[3]; + convArgs.args[i].image.address = input_ptr; + convArgs.args[i].image.channels = (uint32_t)input->dims()[1]; + convArgs.args[i].image.height = (uint32_t)input->dims()[2]; + convArgs.args[i].image.width = (uint32_t)input->dims()[3]; + convArgs.args[i].image.pad_height = (uint32_t)param->Paddings()[0]; + convArgs.args[i].image.pad_width = (uint32_t)param->Paddings()[1]; + convArgs.args[i].filter_address = &((int8_t *)filter_ptr)[i * element_num]; + convArgs.args[i].sb_address = &((int8_t *)bs_ptr)[i * element_num]; + convArgs.args[i].filter_num = + (uint32_t)(i == n - 1 ? fpga::get_aligned_filter_num( + channel - (n - 1) * element_num_per_div) + : element_num_per_div); + convArgs.args[i].image.scale_address = + (float *)fpga::fpga_malloc(2 * sizeof(float)); + } return true; } diff --git a/src/operators/kernel/fpga/conv_bn_kernel.cpp b/src/operators/kernel/fpga/conv_bn_kernel.cpp index 9a296244bf1de110b655ae4ebd3bde36cee5fe0d..0c25ca0e8c9acef2caeeb2f712efe9db3eb21012 100644 --- a/src/operators/kernel/fpga/conv_bn_kernel.cpp +++ b/src/operators/kernel/fpga/conv_bn_kernel.cpp @@ -25,8 +25,7 @@ bool ConvBNKernel::Init(FusionConvBNParam *param) { bool relu_enabled = false; Tensor *input = const_cast(param->Input()); auto input_ptr = input->data(); - Tensor *filter = param->Filter(); - + Tensor *filter = const_cast(param->Filter()); Tensor *out = param->Output(); auto bn_mean_ptr = param->InputMean()->data(); auto bn_var_ptr = param->InputVariance()->data(); @@ -65,27 +64,42 @@ bool ConvBNKernel::Init(FusionConvBNParam *param) { fpga::format_ofm(out); auto out_ptr = out->mutable_data(); - fpga::ConvArgs convArgs; - convArgs.relu_enabled = relu_enabled; - convArgs.filter_address = (void *)filter_ptr; - convArgs.filter_num = filter->dims()[0]; - convArgs.group_num = param->Groups(); - convArgs.sb_address = (void *)bs_ptr; - convArgs.kernel.stride_h = param->Strides()[0]; - convArgs.kernel.stride_w = param->Strides()[1]; - convArgs.kernel.height = filter->dims()[2]; - convArgs.kernel.width = filter->dims()[3]; - convArgs.image.address = (void *)input_ptr; - convArgs.image.channels = input->dims()[1]; - convArgs.image.height = input->dims()[2]; - convArgs.image.width = input->dims()[3]; - convArgs.image.pad_height = param->Paddings()[0]; - convArgs.image.pad_width = param->Paddings()[1]; - convArgs.image.scale_address = input->scale; - convArgs.output.address = (void *)out_ptr; + fpga::WrapperConvArgs convArgs; + convArgs.group_num = (uint32_t)param->Groups(); + convArgs.split_num = (uint32_t)fpga::get_plit_num(filter); + convArgs.filter_num = (uint32_t)filter->dims()[0]; + convArgs.output.address = out_ptr; convArgs.output.scale_address = out->scale; + convArgs.args = (fpga::ConvArgs *)fpga::fpga_malloc(convArgs.split_num * + sizeof(fpga::ConvArgs)); param->SetFpgaArgs(convArgs); + int element_num = fpga::get_aligned_filter_element_num( + filter->dims()[1] * filter->dims()[2] * filter->dims()[3]); + + int n = convArgs.split_num; + for (int i = 0; i < n; i++) { + convArgs.args[i].relu_enabled = relu_enabled; + convArgs.args[i].group_num = (uint32_t)param->Groups(); + convArgs.args[i].kernel.stride_h = (uint32_t)param->Strides()[0]; + convArgs.args[i].kernel.stride_w = (uint32_t)param->Strides()[1]; + convArgs.args[i].kernel.height = (uint32_t)filter->dims()[2]; + convArgs.args[i].kernel.width = (uint32_t)filter->dims()[3]; + convArgs.args[i].image.address = input_ptr; + convArgs.args[i].image.channels = (uint32_t)input->dims()[1]; + convArgs.args[i].image.height = (uint32_t)input->dims()[2]; + convArgs.args[i].image.width = (uint32_t)input->dims()[3]; + convArgs.args[i].image.pad_height = (uint32_t)param->Paddings()[0]; + convArgs.args[i].image.pad_width = (uint32_t)param->Paddings()[1]; + convArgs.args[i].filter_address = &((int8_t *)filter_ptr)[i * element_num]; + convArgs.args[i].sb_address = &((int8_t *)bs_ptr)[i * element_num]; + convArgs.args[i].filter_num = + (uint32_t)(i == n - 1 ? fpga::get_aligned_filter_num( + channel - (n - 1) * element_num_per_div) + : element_num_per_div); + convArgs.args[i].image.scale_address = + (float *)fpga::fpga_malloc(2 * sizeof(float)); + } return true; } diff --git a/src/operators/kernel/fpga/conv_bn_relu_kernel.cpp b/src/operators/kernel/fpga/conv_bn_relu_kernel.cpp index 68a9202f2b9a8a965277807bbdce48c3d07c2c43..2a493fc07cadeaca54bd2d8d37727a56017e9455 100644 --- a/src/operators/kernel/fpga/conv_bn_relu_kernel.cpp +++ b/src/operators/kernel/fpga/conv_bn_relu_kernel.cpp @@ -24,7 +24,7 @@ bool ConvBNReluKernel::Init(FusionConvBNReluParam *param) { bool relu_enabled = true; Tensor *input = const_cast(param->Input()); auto input_ptr = input->data(); - Tensor *filter = param->Filter(); + Tensor *filter = const_cast(param->Filter()); Tensor *out = param->Output(); auto bn_mean_ptr = param->InputMean()->data(); auto bn_var_ptr = param->InputVariance()->data(); @@ -61,26 +61,42 @@ bool ConvBNReluKernel::Init(FusionConvBNReluParam *param) { fpga::format_ofm(out); auto out_ptr = out->mutable_data(); - fpga::ConvArgs convArgs; - convArgs.relu_enabled = relu_enabled; - convArgs.filter_address = (void *)filter_ptr; - convArgs.filter_num = filter->dims()[0]; - convArgs.group_num = param->Groups(); - convArgs.sb_address = (void *)bs_ptr; - convArgs.kernel.stride_h = param->Strides()[0]; - convArgs.kernel.stride_w = param->Strides()[1]; - convArgs.kernel.height = filter->dims()[2]; - convArgs.kernel.width = filter->dims()[3]; - convArgs.image.address = (void *)input_ptr; - convArgs.image.channels = input->dims()[1]; - convArgs.image.height = input->dims()[2]; - convArgs.image.width = input->dims()[3]; - convArgs.image.pad_height = param->Paddings()[0]; - convArgs.image.pad_width = param->Paddings()[1]; - convArgs.image.scale_address = input->scale; - convArgs.output.address = (void *)out_ptr; + fpga::WrapperConvArgs convArgs; + convArgs.group_num = (uint32_t)param->Groups(); + convArgs.split_num = (uint32_t)fpga::get_plit_num(filter); + convArgs.filter_num = (uint32_t)filter->dims()[0]; + convArgs.output.address = out_ptr; convArgs.output.scale_address = out->scale; + convArgs.args = (fpga::ConvArgs *)fpga::fpga_malloc(convArgs.split_num * + sizeof(fpga::ConvArgs)); param->SetFpgaArgs(convArgs); + + int element_num = fpga::get_aligned_filter_element_num( + filter->dims()[1] * filter->dims()[2] * filter->dims()[3]); + + int n = convArgs.split_num; + for (int i = 0; i < n; i++) { + convArgs.args[i].relu_enabled = relu_enabled; + convArgs.args[i].group_num = (uint32_t)param->Groups(); + convArgs.args[i].kernel.stride_h = (uint32_t)param->Strides()[0]; + convArgs.args[i].kernel.stride_w = (uint32_t)param->Strides()[1]; + convArgs.args[i].kernel.height = (uint32_t)filter->dims()[2]; + convArgs.args[i].kernel.width = (uint32_t)filter->dims()[3]; + convArgs.args[i].image.address = input_ptr; + convArgs.args[i].image.channels = (uint32_t)input->dims()[1]; + convArgs.args[i].image.height = (uint32_t)input->dims()[2]; + convArgs.args[i].image.width = (uint32_t)input->dims()[3]; + convArgs.args[i].image.pad_height = (uint32_t)param->Paddings()[0]; + convArgs.args[i].image.pad_width = (uint32_t)param->Paddings()[1]; + convArgs.args[i].filter_address = &((int8_t *)filter_ptr)[i * element_num]; + convArgs.args[i].sb_address = &((int8_t *)bs_ptr)[i * element_num]; + convArgs.args[i].filter_num = + (uint32_t)(i == n - 1 ? fpga::get_aligned_filter_num( + channel - (n - 1) * element_num_per_div) + : element_num_per_div); + convArgs.args[i].image.scale_address = + (float *)fpga::fpga_malloc(2 * sizeof(float)); + } return true; } diff --git a/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp b/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp index 5323796080073dd97b1eb06a0a0c7d8e5d8d824e..c1a9710803a8ea9052d6e355aa57a6bdff380846 100644 --- a/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp +++ b/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp @@ -22,9 +22,9 @@ template <> bool ElementwiseAddReluKernel::Init( ElementwiseAddReluParam *param) { bool relu_enabled = true; - Tensor *input_x = const_cast(param->InputX()); - Tensor *input_y = const_cast(param->InputY()); - Tensor *out = param->Out(); + auto *input_x = const_cast(param->InputX()); + auto *input_y = const_cast(param->InputY()); + auto *out = param->Out(); auto input_x_ptr = input_x->data(); auto input_y_ptr = input_y->data(); fpga::format_ofm(out); @@ -34,22 +34,22 @@ bool ElementwiseAddReluKernel::Init( ewaddArgs.relu_enabled = relu_enabled; ewaddArgs.const0 = 1; ewaddArgs.const1 = 1; - ewaddArgs.image0.address = (void *)input_x_ptr; - ewaddArgs.image0.channels = input_x->dims()[1]; + ewaddArgs.image0.address = input_x_ptr; + ewaddArgs.image0.channels = (uint32_t)input_x->dims()[1]; ewaddArgs.image0.scale_address = input_x->scale; - ewaddArgs.image0.height = input_x->dims()[2]; - ewaddArgs.image0.width = input_x->dims()[3]; + ewaddArgs.image0.height = (uint32_t)input_x->dims()[2]; + ewaddArgs.image0.width = (uint32_t)input_x->dims()[3]; ewaddArgs.image0.pad_height = 0; ewaddArgs.image0.pad_width = 0; - ewaddArgs.image1.address = (void *)input_y_ptr; - ewaddArgs.image1.channels = input_y->dims()[1]; + ewaddArgs.image1.address = input_y_ptr; + ewaddArgs.image1.channels = (uint32_t)input_y->dims()[1]; ewaddArgs.image1.scale_address = input_y->scale; - ewaddArgs.image1.height = input_y->dims()[2]; - ewaddArgs.image1.width = input_y->dims()[3]; + ewaddArgs.image1.height = (uint32_t)input_y->dims()[2]; + ewaddArgs.image1.width = (uint32_t)input_y->dims()[3]; ewaddArgs.image1.pad_height = 0; ewaddArgs.image1.pad_width = 0; ewaddArgs.output.scale_address = out->scale; - ewaddArgs.output.address = (void *)out_ptr; + ewaddArgs.output.address = out_ptr; param->SetFpgaArgs(ewaddArgs); return true; } diff --git a/src/operators/kernel/fpga/fc_relu_kernel.cpp b/src/operators/kernel/fpga/fc_relu_kernel.cpp index 57db757d734bab9fceb1af2845936170b41d185c..6636096d1cc159701c7a2aa214251b1b4622fc41 100644 --- a/src/operators/kernel/fpga/fc_relu_kernel.cpp +++ b/src/operators/kernel/fpga/fc_relu_kernel.cpp @@ -14,71 +14,82 @@ limitations under the License. */ #ifdef FUSION_FCRELU_OP #include "operators/kernel/fc_relu_kernel.h" -#include "fpga/api.h" - namespace paddle_mobile { namespace operators { template <> bool FusionFcReluKernel::Init(FusionFcReluParam *param) { bool relu_enabled = true; - Tensor *input_x = const_cast(param->InputX()); + auto *input_x = const_cast(param->InputX()); auto input_x_ptr = input_x->data(); - Tensor *input_y = param->InputY(); + auto *filter = const_cast(param->InputY()); const Tensor *input_z = param->InputZ(); auto input_z_ptr = input_z->data(); Tensor *out = param->Out(); - - PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_y->dims()[0], + PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == filter->dims()[0], "Image channel should be equal to weight number"); - int channel = out->dims()[1]; - float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); + int channel = (uint32_t)out->dims()[1]; + auto *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); for (int i = 0; i < channel; i++) { bs_ptr[i + channel] = 1; bs_ptr[i] = input_z_ptr[i]; } - int num = input_y->dims()[1]; - int chw = input_y->dims()[0]; + int num = (uint32_t)filter->dims()[1]; + int chw = (uint32_t)filter->dims()[0]; PADDLE_MOBILE_ENFORCE( chw == input_x->numel(), "Filter element num should be equal to IFM element num"); - int height = input_x->dims()[2]; - int width = input_x->dims()[3]; + int height = (uint32_t)input_x->dims()[2]; + int width = (uint32_t)input_x->dims()[3]; int filter_channel = chw / height / width; - input_y->Resize(framework::make_ddim({num, filter_channel, height, width})); - float max_value = fpga::filter_find_max(input_y); - fpga::format_filter(input_y, max_value, 1); - auto input_y_ptr = input_y->data(); + filter->Resize(framework::make_ddim({num, filter_channel, height, width})); + float max_value = fpga::filter_find_max(filter); + fpga::format_filter(filter, max_value, 1); + auto filter_ptr = filter->data(); - int element_num_per_div = fpga::get_element_num_per_div(input_y, 1); + int element_num_per_div = fpga::get_element_num_per_div(filter, 1); fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); - fpga::format_ofm(out); auto out_ptr = out->mutable_data(); - fpga::ConvArgs convArgs; - convArgs.relu_enabled = relu_enabled; - convArgs.filter_address = (void *)input_y_ptr; - convArgs.filter_num = out->dims()[1]; + fpga::WrapperConvArgs convArgs; convArgs.group_num = 1; - convArgs.sb_address = (void *)bs_ptr; - convArgs.kernel.stride_w = 1; - convArgs.kernel.stride_h = 1; - convArgs.kernel.height = input_x->dims()[2]; - convArgs.kernel.width = input_x->dims()[3]; - convArgs.image.address = (void *)input_x_ptr; - convArgs.image.channels = input_x->dims()[1]; - convArgs.image.height = input_x->dims()[2]; - convArgs.image.width = input_x->dims()[3]; - convArgs.image.pad_height = 0; - convArgs.image.pad_width = 0; - convArgs.image.scale_address = input_x->scale; - convArgs.output.address = (void *)out_ptr; + convArgs.split_num = (uint32_t)fpga::get_plit_num(filter); + convArgs.filter_num = (uint32_t)filter->dims()[0]; + convArgs.output.address = out_ptr; convArgs.output.scale_address = out->scale; + convArgs.args = (fpga::ConvArgs *)fpga::fpga_malloc(convArgs.split_num * + sizeof(fpga::ConvArgs)); param->SetFpgaArgs(convArgs); + int element_num = fpga::get_aligned_filter_element_num( + filter->dims()[1] * filter->dims()[2] * filter->dims()[3]); + + int n = convArgs.split_num; + for (int i = 0; i < n; i++) { + convArgs.args[i].relu_enabled = relu_enabled; + convArgs.args[i].group_num = 1; + convArgs.args[i].kernel.stride_h = 1; + convArgs.args[i].kernel.stride_w = 1; + convArgs.args[i].kernel.height = (uint32_t)filter->dims()[2]; + convArgs.args[i].kernel.width = (uint32_t)filter->dims()[3]; + convArgs.args[i].image.address = input_x_ptr; + convArgs.args[i].image.channels = (uint32_t)input_x->dims()[1]; + convArgs.args[i].image.height = (uint32_t)input_x->dims()[2]; + convArgs.args[i].image.width = (uint32_t)input_x->dims()[3]; + convArgs.args[i].image.pad_height = 0; + convArgs.args[i].image.pad_width = 0; + convArgs.args[i].filter_address = &((int8_t *)filter_ptr)[i * element_num]; + convArgs.args[i].sb_address = &((int8_t *)bs_ptr)[i * element_num]; + convArgs.args[i].filter_num = + (uint32_t)(i == n - 1 ? fpga::get_aligned_filter_num( + channel - (n - 1) * element_num_per_div) + : element_num_per_div); + convArgs.args[i].image.scale_address = + (float *)fpga::fpga_malloc(2 * sizeof(float)); + } return true; } template <> diff --git a/src/operators/kernel/fpga/fusion_fc_kernel.cpp b/src/operators/kernel/fpga/fusion_fc_kernel.cpp index 3254b8cf5dd84ab339a373233278fae4101a15cf..15392dd146a8fed8b4f5edb946b00d6510dd3732 100644 --- a/src/operators/kernel/fpga/fusion_fc_kernel.cpp +++ b/src/operators/kernel/fpga/fusion_fc_kernel.cpp @@ -21,58 +21,76 @@ namespace operators { template <> bool FusionFcKernel::Init(FusionFcParam *param) { bool relu_enabled = false; - Tensor *input_x = const_cast(param->InputX()); + auto *input_x = const_cast(param->InputX()); auto input_x_ptr = input_x->data(); - Tensor *input_y = param->InputY(); + auto *filter = const_cast(param->InputY()); const Tensor *input_z = param->InputZ(); auto input_z_ptr = input_z->data(); Tensor *out = param->Out(); - PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_y->dims()[0], + PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == filter->dims()[0], "Image channel should be equal to weight number"); - int channel = out->dims()[1]; - float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); + int channel = (uint32_t)out->dims()[1]; + auto *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); for (int i = 0; i < channel; i++) { bs_ptr[i + channel] = 1; bs_ptr[i] = input_z_ptr[i]; } - - int num = input_y->dims()[1]; - int chw = input_y->dims()[0]; + int num = (uint32_t)filter->dims()[1]; + int chw = (uint32_t)filter->dims()[0]; PADDLE_MOBILE_ENFORCE( chw == input_x->numel(), "Filter element num should be equal to IFM element num"); - int height = input_x->dims()[2]; - int width = input_x->dims()[3]; + int height = (uint32_t)input_x->dims()[2]; + int width = (uint32_t)input_x->dims()[3]; int filter_channel = chw / height / width; - input_y->Resize(framework::make_ddim({num, filter_channel, height, width})); - float max_value = fpga::filter_find_max(input_y); - fpga::format_filter(input_y, max_value, 1); - auto input_y_ptr = input_y->data(); - int element_num_per_div = fpga::get_element_num_per_div(input_y, 1); + + filter->Resize(framework::make_ddim({num, filter_channel, height, width})); + float max_value = fpga::filter_find_max(filter); + fpga::format_filter(filter, max_value, 1); + auto filter_ptr = filter->data(); + + int element_num_per_div = fpga::get_element_num_per_div(filter, 1); fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); + auto out_ptr = out->mutable_data(); - fpga::ConvArgs convArgs; - convArgs.relu_enabled = relu_enabled; - convArgs.filter_address = (void *)input_y_ptr; - convArgs.filter_num = out->dims()[1]; + fpga::WrapperConvArgs convArgs; convArgs.group_num = 1; - convArgs.sb_address = (void *)bs_ptr; - convArgs.kernel.stride_w = 1; - convArgs.kernel.stride_h = 1; - convArgs.kernel.height = input_x->dims()[2]; - convArgs.kernel.width = input_x->dims()[3]; - convArgs.image.address = (void *)input_x_ptr; - convArgs.image.channels = input_x->dims()[1]; - convArgs.image.height = input_x->dims()[2]; - convArgs.image.width = input_x->dims()[3]; - convArgs.image.pad_height = 0; - convArgs.image.pad_width = 0; - convArgs.image.scale_address = input_x->scale; - convArgs.output.address = (void *)out_ptr; + convArgs.split_num = (uint32_t)fpga::get_plit_num(filter); + convArgs.filter_num = (uint32_t)filter->dims()[0]; + convArgs.output.address = out_ptr; convArgs.output.scale_address = out->scale; + convArgs.args = (fpga::ConvArgs *)fpga::fpga_malloc(convArgs.split_num * + sizeof(fpga::ConvArgs)); param->SetFpgaArgs(convArgs); + + int element_num = fpga::get_aligned_filter_element_num( + filter->dims()[1] * filter->dims()[2] * filter->dims()[3]); + + int n = convArgs.split_num; + for (int i = 0; i < n; i++) { + convArgs.args[i].relu_enabled = relu_enabled; + convArgs.args[i].group_num = 1; + convArgs.args[i].kernel.stride_h = 1; + convArgs.args[i].kernel.stride_w = 1; + convArgs.args[i].kernel.height = (uint32_t)filter->dims()[2]; + convArgs.args[i].kernel.width = (uint32_t)filter->dims()[3]; + convArgs.args[i].image.address = input_x_ptr; + convArgs.args[i].image.channels = (uint32_t)input_x->dims()[1]; + convArgs.args[i].image.height = (uint32_t)input_x->dims()[2]; + convArgs.args[i].image.width = (uint32_t)input_x->dims()[3]; + convArgs.args[i].image.pad_height = 0; + convArgs.args[i].image.pad_width = 0; + convArgs.args[i].filter_address = &((int8_t *)filter_ptr)[i * element_num]; + convArgs.args[i].sb_address = &((int8_t *)bs_ptr)[i * element_num]; + convArgs.args[i].filter_num = + (uint32_t)(i == n - 1 ? fpga::get_aligned_filter_num( + channel - (n - 1) * element_num_per_div) + : element_num_per_div); + convArgs.args[i].image.scale_address = + (float *)fpga::fpga_malloc(2 * sizeof(float)); + } return true; } diff --git a/src/operators/kernel/fpga/pool_kernel.cpp b/src/operators/kernel/fpga/pool_kernel.cpp index e8c40086c459998eab0e1997125dc9c64574c8f2..d3df951dbc340814d766f76e8720c3aaef2f3539 100644 --- a/src/operators/kernel/fpga/pool_kernel.cpp +++ b/src/operators/kernel/fpga/pool_kernel.cpp @@ -21,7 +21,7 @@ namespace operators { template <> bool PoolKernel::Init(PoolParam *param) { - Tensor *input = const_cast(param->Input()); + auto *input = const_cast(param->Input()); auto input_ptr = input->data(); Tensor *output = param->Output(); fpga::format_ofm(output); @@ -31,19 +31,19 @@ bool PoolKernel::Init(PoolParam *param) { vector paddings = param->Paddings(); fpga::PoolingArgs poolArgs; - poolArgs.image.address = (void *)input_ptr; - poolArgs.image.channels = input->dims()[1]; - poolArgs.image.height = input->dims()[2]; - poolArgs.image.width = input->dims()[3]; - poolArgs.image.pad_height = paddings[0]; - poolArgs.image.pad_width = paddings[1]; + poolArgs.image.address = input_ptr; + poolArgs.image.channels = (uint32_t)input->dims()[1]; + poolArgs.image.height = (uint32_t)input->dims()[2]; + poolArgs.image.width = (uint32_t)input->dims()[3]; + poolArgs.image.pad_height = (uint32_t)paddings[0]; + poolArgs.image.pad_width = (uint32_t)paddings[1]; poolArgs.image.scale_address = input->scale; poolArgs.output.address = output_ptr; poolArgs.output.scale_address = input->scale; - poolArgs.kernel.height = ksize[0]; - poolArgs.kernel.width = ksize[1]; - poolArgs.kernel.stride_h = strides[0]; - poolArgs.kernel.stride_w = strides[1]; + poolArgs.kernel.height = (uint32_t)ksize[0]; + poolArgs.kernel.width = (uint32_t)ksize[1]; + poolArgs.kernel.stride_h = (uint32_t)strides[0]; + poolArgs.kernel.stride_w = (uint32_t)strides[1]; param->SetFpgaArgs(poolArgs); return true; } diff --git a/src/operators/kernel/fpga/softmax_kernel.cpp b/src/operators/kernel/fpga/softmax_kernel.cpp index d8159acf1ca0420db8b26656571826be30538e80..20c86a5c73bc9c35b8f8fd430013bb97d269fb4a 100644 --- a/src/operators/kernel/fpga/softmax_kernel.cpp +++ b/src/operators/kernel/fpga/softmax_kernel.cpp @@ -33,8 +33,8 @@ bool SoftmaxKernel::Init(SoftmaxParam *param) { args.convert_type = fpga::DATA_FP16_TO_FP32; args.layout_type = fpga::LAYOUT_NO_CONVERT; args.image.address = (void *)(input_ptr); - args.image.height = input->dims()[0]; - args.image.width = input->dims()[1]; + args.image.height = (uint32_t)input->dims()[0]; + args.image.width = (uint32_t)input->dims()[1]; args.image.channels = 1; args.output.address = output_ptr; param->SetFpgaArgs(args); diff --git a/src/operators/math/gemm.cpp b/src/operators/math/gemm.cpp index e5dc220f4c50922c2918251720619de8b4df7b98..fd9fdda58a0d193729a40f1ff2a23f5d5cade948 100644 --- a/src/operators/math/gemm.cpp +++ b/src/operators/math/gemm.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "operators/math/gemm.h" -#include +#include #include "common/log.h" #include "memory/t_malloc.h" #if __ARM_NEON diff --git a/src/operators/op_param.h b/src/operators/op_param.h index 4f1fc897252de0578fca587da0d1529e604bfae8..551ec0100431e32a775442066dae4964d484832a 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -56,7 +56,7 @@ struct DtypeTensorTrait { template <> struct DtypeTensorTrait { // This is the type we obtained in variable. - typedef framework::LoDTensor gtype; + typedef framework::Tensor gtype; // This type will be the parent class type // or the same type. typedef framework::Tensor rtype; @@ -1232,11 +1232,7 @@ class FusionFcParam : public OpParam { } const GType *InputX() const { return input_x_; } -#ifdef PADDLE_MOBILE_FPGA - RType *InputY() const { return input_y_; } -#else const RType *InputY() const { return input_y_; } -#endif const RType *InputZ() const { return input_z_; } @@ -1259,11 +1255,11 @@ class FusionFcParam : public OpParam { #ifdef PADDLE_MOBILE_FPGA private: - fpga::ConvArgs fpga_conv_args; + fpga::WrapperConvArgs fpga_conv_args; public: - const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; } - void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; } + const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; } + void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; } #endif }; @@ -1297,11 +1293,7 @@ class FusionConvAddParam : public OpParam { const RType *Input() const { return input_; } -#ifdef PADDLE_MOBILE_FPGA - RType *Filter() const { return filter_; } -#else const RType *Filter() const { return filter_; } -#endif RType *Output() const { return output_; } @@ -1326,11 +1318,11 @@ class FusionConvAddParam : public OpParam { #ifdef PADDLE_MOBILE_FPGA private: - fpga::ConvArgs fpga_conv_args; + fpga::WrapperConvArgs fpga_conv_args; public: - const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; } - void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; } + const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; } + void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; } #endif }; @@ -1379,11 +1371,7 @@ class FusionConvAddPReluParam : public OpParam { const RType *Input() const { return input_; } -#ifdef PADDLE_MOBILE_FPGA - RType *Filter() const { return filter_; } -#else const RType *Filter() const { return filter_; } -#endif RType *Output() const { return output_; } @@ -1410,11 +1398,11 @@ class FusionConvAddPReluParam : public OpParam { #ifdef PADDLE_MOBILE_FPGA private: - fpga::ConvArgs fpga_conv_args; + fpga::WrapperConvArgs fpga_conv_args; public: - const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; } - void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; } + const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; } + void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; } #endif }; #endif @@ -1461,11 +1449,7 @@ class FusionConvAddAddPReluParam : public OpParam { const RType *Input() const { return input_; } -#ifdef PADDLE_MOBILE_FPGA - RType *Filter() const { return filter_; } -#else const RType *Filter() const { return filter_; } -#endif RType *Output() const { return output_; } @@ -1496,11 +1480,11 @@ class FusionConvAddAddPReluParam : public OpParam { #ifdef PADDLE_MOBILE_FPGA private: - fpga::ConvArgs fpga_conv_args; + fpga::WrapperConvArgs fpga_conv_args; public: - const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; } - void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; } + const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; } + void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; } #endif }; #endif @@ -1538,11 +1522,7 @@ class FusionConvAddBNReluParam : public OpParam { const RType *Input() const { return input_; } -#ifdef PADDLE_MOBILE_FPGA - RType *Filter() const { return filter_; } -#else const RType *Filter() const { return filter_; } -#endif RType *Output() const { return output_; } @@ -1598,11 +1578,11 @@ class FusionConvAddBNReluParam : public OpParam { #ifdef PADDLE_MOBILE_FPGA private: - fpga::ConvArgs fpga_conv_args; + fpga::WrapperConvArgs fpga_conv_args; public: - const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; } - void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; } + const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; } + void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; } #endif }; #endif @@ -1648,11 +1628,7 @@ class FusionConvBNAddReluParam : public OpParam { const RType *Input() const { return input_; } -#ifdef PADDLE_MOBILE_FPGA - RType *Filter() const { return filter_; } -#else const RType *Filter() const { return filter_; } -#endif RType *Output() const { return output_; } @@ -1711,11 +1687,11 @@ class FusionConvBNAddReluParam : public OpParam { #ifdef PADDLE_MOBILE_FPGA private: - fpga::ConvArgs fpga_conv_args; + fpga::WrapperConvArgs fpga_conv_args; public: - const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; } - void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; } + const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; } + void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; } #endif }; #endif @@ -1748,11 +1724,8 @@ class FusionConvBNParam : public OpParam { const RType *Input() const { return input_; } -#ifdef PADDLE_MOBILE_FPGA - RType *Filter() const { return filter_; } -#else const RType *Filter() const { return filter_; } -#endif + RType *Output() const { return output_y_; } const vector &Strides() const { return strides_; } @@ -1805,11 +1778,11 @@ class FusionConvBNParam : public OpParam { #ifdef PADDLE_MOBILE_FPGA private: - fpga::ConvArgs fpga_conv_args; + fpga::WrapperConvArgs fpga_conv_args; public: - const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; } - void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; } + const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; } + void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; } #endif }; #endif @@ -1847,11 +1820,8 @@ class FusionConvAddBNParam : public OpParam { const RType *Input() const { return input_; } -#ifdef PADDLE_MOBILE_FPGA - RType *Filter() const { return filter_; } -#else const RType *Filter() const { return filter_; } -#endif + RType *Output() const { return output_y_; } const vector &Strides() const { return strides_; } @@ -1906,11 +1876,11 @@ class FusionConvAddBNParam : public OpParam { #ifdef PADDLE_MOBILE_FPGA private: - fpga::ConvArgs fpga_conv_args; + fpga::WrapperConvArgs fpga_conv_args; public: - const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; } - void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; } + const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; } + void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; } #endif }; #endif @@ -2027,11 +1997,7 @@ class FusionConvBNReluParam : public OpParam { const RType *Input() const { return input_; } -#ifdef PADDLE_MOBILE_FPGA - RType *Filter() const { return filter_; } -#else const RType *Filter() const { return filter_; } -#endif RType *Output() const { return output_; } @@ -2085,11 +2051,11 @@ class FusionConvBNReluParam : public OpParam { #ifdef PADDLE_MOBILE_FPGA private: - fpga::ConvArgs fpga_conv_args; + fpga::WrapperConvArgs fpga_conv_args; public: - const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; } - void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; } + const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; } + void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; } #endif }; #endif