diff --git a/src/operators/kernel/fpga/conv_add_bn_kernel.cpp b/src/operators/kernel/fpga/conv_add_bn_kernel.cpp index 089fec9aeaee198e6dbc0bf732b061fe014ed66b..74080e6b0541a068956f031f984eea9ac0160b2d 100644 --- a/src/operators/kernel/fpga/conv_add_bn_kernel.cpp +++ b/src/operators/kernel/fpga/conv_add_bn_kernel.cpp @@ -24,7 +24,7 @@ template <> bool ConvAddBNKernel::Init(FusionConvAddBNParam *param) { bool relu_enabled = false; Tensor *input = const_cast(param->Input()); - auto input_ptr = input->data(); + auto input_ptr = input->data(); const Tensor *bias = param->Bias(); auto bias_ptr = bias->data(); Tensor *filter = param->Filter(); @@ -61,14 +61,14 @@ bool ConvAddBNKernel::Init(FusionConvAddBNParam *param) { float max_value = fpga::filter_find_max(filter); fpga::format_filter(filter, max_value, param->Groups()); - auto filter_ptr = filter->data(); + auto filter_ptr = filter->data(); int element_num_per_div = fpga::get_element_num_per_div(filter, param->Groups()); fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); fpga::format_ofm(out); - auto out_ptr = out->mutable_data(); + auto out_ptr = out->mutable_data(); fpga::ConvArgs convArgs; convArgs.relu_enabled = relu_enabled; diff --git a/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp b/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp index 030dfcad9ac0a1cb93f7be626e59adbfc1630052..c5d3c7ae5da758e7ca8ae30bc9c9d7352c007260 100644 --- a/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp +++ b/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp @@ -24,7 +24,7 @@ bool ConvAddBNReluKernel::Init( FusionConvAddBNReluParam *param) { bool relu_enabled = true; Tensor *input = const_cast(param->Input()); - auto input_ptr = input->data(); + auto input_ptr = input->data(); const Tensor *bias = param->Bias(); auto bias_ptr = bias->data(); Tensor *filter = param->Filter(); @@ -58,14 +58,14 @@ bool ConvAddBNReluKernel::Init( float max_value = fpga::filter_find_max(filter); fpga::format_filter(filter, max_value, param->Groups()); - auto filter_ptr = filter->data(); + auto filter_ptr = filter->data(); int element_num_per_div = fpga::get_element_num_per_div(filter, param->Groups()); fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); fpga::format_ofm(out); - auto out_ptr = out->mutable_data(); + auto out_ptr = out->mutable_data(); fpga::ConvArgs convArgs; convArgs.relu_enabled = relu_enabled; diff --git a/src/operators/kernel/fpga/conv_add_relu_kernel.cpp b/src/operators/kernel/fpga/conv_add_relu_kernel.cpp index 81fc42980c17acea0f051d9dc548fd5e4c602bd5..7c2e627ab2b64cec5a66b57f84ca6de448b4c37d 100644 --- a/src/operators/kernel/fpga/conv_add_relu_kernel.cpp +++ b/src/operators/kernel/fpga/conv_add_relu_kernel.cpp @@ -23,7 +23,7 @@ template <> bool ConvAddReluKernel::Init(FusionConvAddReluParam *param) { bool relu_enabled = true; Tensor *input = const_cast(param->Input()); - auto input_ptr = input->data(); + auto input_ptr = input->data(); const Tensor *bias = param->Bias(); auto bias_ptr = bias->data(); Tensor *filter = param->Filter(); @@ -40,14 +40,14 @@ bool ConvAddReluKernel::Init(FusionConvAddReluParam *param) { float max_value = fpga::filter_find_max(filter); fpga::format_filter(filter, max_value, param->Groups()); - auto filter_ptr = filter->data(); + auto filter_ptr = filter->data(); int element_num_per_div = fpga::get_element_num_per_div(filter, param->Groups()); fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); fpga::format_ofm(out); - auto out_ptr = out->mutable_data(); + auto out_ptr = out->mutable_data(); fpga::ConvArgs convArgs; convArgs.relu_enabled = relu_enabled; diff --git a/src/operators/kernel/fpga/conv_bn_kernel.cpp b/src/operators/kernel/fpga/conv_bn_kernel.cpp index 997c0a754404bcda6334e8f9d068243bfd7102b0..9a296244bf1de110b655ae4ebd3bde36cee5fe0d 100644 --- a/src/operators/kernel/fpga/conv_bn_kernel.cpp +++ b/src/operators/kernel/fpga/conv_bn_kernel.cpp @@ -24,7 +24,7 @@ template <> bool ConvBNKernel::Init(FusionConvBNParam *param) { bool relu_enabled = false; Tensor *input = const_cast(param->Input()); - auto input_ptr = input->data(); + auto input_ptr = input->data(); Tensor *filter = param->Filter(); Tensor *out = param->Output(); @@ -56,14 +56,14 @@ bool ConvBNKernel::Init(FusionConvBNParam *param) { float max_value = fpga::filter_find_max(filter); fpga::format_filter(filter, max_value, param->Groups()); - auto filter_ptr = filter->data(); + auto filter_ptr = filter->data(); int element_num_per_div = fpga::get_element_num_per_div(filter, param->Groups()); fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); fpga::format_ofm(out); - auto out_ptr = out->mutable_data(); + auto out_ptr = out->mutable_data(); fpga::ConvArgs convArgs; convArgs.relu_enabled = relu_enabled; diff --git a/src/operators/kernel/fpga/conv_bn_relu_kernel.cpp b/src/operators/kernel/fpga/conv_bn_relu_kernel.cpp index e0865b4a7cfcf0422393ee56fb4f4d370eb9abcf..68a9202f2b9a8a965277807bbdce48c3d07c2c43 100644 --- a/src/operators/kernel/fpga/conv_bn_relu_kernel.cpp +++ b/src/operators/kernel/fpga/conv_bn_relu_kernel.cpp @@ -23,7 +23,7 @@ template <> bool ConvBNReluKernel::Init(FusionConvBNReluParam *param) { bool relu_enabled = true; Tensor *input = const_cast(param->Input()); - auto input_ptr = input->data(); + auto input_ptr = input->data(); Tensor *filter = param->Filter(); Tensor *out = param->Output(); auto bn_mean_ptr = param->InputMean()->data(); @@ -33,7 +33,6 @@ bool ConvBNReluKernel::Init(FusionConvBNReluParam *param) { const float epsilon = param->Epsilon(); PADDLE_MOBILE_ENFORCE(out->dims()[1] == param->InputBias()->dims()[0], "Output channel should be equal to bias number"); - const int channel = out->dims()[1]; float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); Tensor *new_scale = new Tensor(); @@ -53,14 +52,14 @@ bool ConvBNReluKernel::Init(FusionConvBNReluParam *param) { float max_value = fpga::filter_find_max(filter); fpga::format_filter(filter, max_value, param->Groups()); - auto filter_ptr = filter->data(); + auto filter_ptr = filter->data(); int element_num_per_div = fpga::get_element_num_per_div(filter, param->Groups()); fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); fpga::format_ofm(out); - auto out_ptr = out->mutable_data(); + auto out_ptr = out->mutable_data(); fpga::ConvArgs convArgs; convArgs.relu_enabled = relu_enabled; diff --git a/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp b/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp index 573e15a80bda57ebf5217d4f47ead5d2d5165688..5323796080073dd97b1eb06a0a0c7d8e5d8d824e 100644 --- a/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp +++ b/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp @@ -25,10 +25,10 @@ bool ElementwiseAddReluKernel::Init( Tensor *input_x = const_cast(param->InputX()); Tensor *input_y = const_cast(param->InputY()); Tensor *out = param->Out(); - auto input_x_ptr = input_x->data(); - auto input_y_ptr = input_y->data(); + auto input_x_ptr = input_x->data(); + auto input_y_ptr = input_y->data(); fpga::format_ofm(out); - auto out_ptr = out->mutable_data(); + auto out_ptr = out->mutable_data(); fpga::EWAddArgs ewaddArgs; ewaddArgs.relu_enabled = relu_enabled; diff --git a/src/operators/kernel/fpga/fc_relu_kernel.cpp b/src/operators/kernel/fpga/fc_relu_kernel.cpp index 1065eea0160066cbe9f1efcc0131cc45f08378d6..57db757d734bab9fceb1af2845936170b41d185c 100644 --- a/src/operators/kernel/fpga/fc_relu_kernel.cpp +++ b/src/operators/kernel/fpga/fc_relu_kernel.cpp @@ -23,7 +23,7 @@ template <> bool FusionFcReluKernel::Init(FusionFcReluParam *param) { bool relu_enabled = true; Tensor *input_x = const_cast(param->InputX()); - auto input_x_ptr = input_x->data(); + auto input_x_ptr = input_x->data(); Tensor *input_y = param->InputY(); const Tensor *input_z = param->InputZ(); auto input_z_ptr = input_z->data(); @@ -50,13 +50,13 @@ bool FusionFcReluKernel::Init(FusionFcReluParam *param) { input_y->Resize(framework::make_ddim({num, filter_channel, height, width})); float max_value = fpga::filter_find_max(input_y); fpga::format_filter(input_y, max_value, 1); - auto input_y_ptr = input_y->data(); + auto input_y_ptr = input_y->data(); int element_num_per_div = fpga::get_element_num_per_div(input_y, 1); fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); fpga::format_ofm(out); - auto out_ptr = out->mutable_data(); + auto out_ptr = out->mutable_data(); fpga::ConvArgs convArgs; convArgs.relu_enabled = relu_enabled; diff --git a/src/operators/kernel/fpga/fusion_fc_kernel.cpp b/src/operators/kernel/fpga/fusion_fc_kernel.cpp index 7a83fa65980a3055ea8b80caa8ca83caf750e8cc..3254b8cf5dd84ab339a373233278fae4101a15cf 100644 --- a/src/operators/kernel/fpga/fusion_fc_kernel.cpp +++ b/src/operators/kernel/fpga/fusion_fc_kernel.cpp @@ -22,7 +22,7 @@ template <> bool FusionFcKernel::Init(FusionFcParam *param) { bool relu_enabled = false; Tensor *input_x = const_cast(param->InputX()); - auto input_x_ptr = input_x->data(); + auto input_x_ptr = input_x->data(); Tensor *input_y = param->InputY(); const Tensor *input_z = param->InputZ(); auto input_z_ptr = input_z->data(); @@ -45,17 +45,13 @@ bool FusionFcKernel::Init(FusionFcParam *param) { int height = input_x->dims()[2]; int width = input_x->dims()[3]; int filter_channel = chw / height / width; - input_y->Resize(framework::make_ddim({num, filter_channel, height, width})); float max_value = fpga::filter_find_max(input_y); fpga::format_filter(input_y, max_value, 1); - auto input_y_ptr = input_y->data(); - + auto input_y_ptr = input_y->data(); int element_num_per_div = fpga::get_element_num_per_div(input_y, 1); fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); - - fpga::format_ofm(out); - auto out_ptr = out->mutable_data(); + auto out_ptr = out->mutable_data(); fpga::ConvArgs convArgs; convArgs.relu_enabled = relu_enabled; diff --git a/src/operators/kernel/fpga/pool_kernel.cpp b/src/operators/kernel/fpga/pool_kernel.cpp index 98c8da84d7875e1ffe5d6127b6bbc49ac907f923..e8c40086c459998eab0e1997125dc9c64574c8f2 100644 --- a/src/operators/kernel/fpga/pool_kernel.cpp +++ b/src/operators/kernel/fpga/pool_kernel.cpp @@ -22,10 +22,10 @@ namespace operators { template <> bool PoolKernel::Init(PoolParam *param) { Tensor *input = const_cast(param->Input()); - auto input_ptr = input->data(); + auto input_ptr = input->data(); Tensor *output = param->Output(); fpga::format_ofm(output); - auto output_ptr = output->mutable_data(); + auto output_ptr = output->mutable_data(); vector ksize = param->Ksize(); vector strides = param->Strides(); vector paddings = param->Paddings(); diff --git a/src/operators/kernel/fpga/softmax_kernel.cpp b/src/operators/kernel/fpga/softmax_kernel.cpp index 1c609431310b1399e0427326d5237368e649c14c..d8159acf1ca0420db8b26656571826be30538e80 100644 --- a/src/operators/kernel/fpga/softmax_kernel.cpp +++ b/src/operators/kernel/fpga/softmax_kernel.cpp @@ -25,19 +25,19 @@ namespace operators { template <> bool SoftmaxKernel::Init(SoftmaxParam *param) { const Tensor *input = param->InputX(); - if (input->type() == typeid(half)) { - auto input_ptr = input->data(); - auto output_ptr = param->Out(); - fpga::BypassArgs args; - args.convert_type = fpga::DATA_FP16_TO_FP32; - args.layout_type = fpga::LAYOUT_HWC_TO_CHW; - args.image.address = (void *)(input_ptr); - args.image.height = input->dims()[0]; - args.image.width = input->dims()[1]; - args.image.channels = 1; - args.output.address = output_ptr; - param->SetFpgaArgs(args); - } + + auto input_ptr = input->data(); + auto output = param->Out(); + auto output_ptr = output->mutable_data(); + fpga::BypassArgs args; + args.convert_type = fpga::DATA_FP16_TO_FP32; + args.layout_type = fpga::LAYOUT_NO_CONVERT; + args.image.address = (void *)(input_ptr); + args.image.height = input->dims()[0]; + args.image.width = input->dims()[1]; + args.image.channels = 1; + args.output.address = output_ptr; + param->SetFpgaArgs(args); return true; }