diff --git a/src/operators/kernel/fpga/V1/softmax_kernel.cpp b/src/operators/kernel/fpga/V1/softmax_kernel.cpp index ff5ff5380ff33545bb52a8d0ef31f7e539edb46b..ac7a7bdc77c291864aad55ebb33495d8e1c57b50 100644 --- a/src/operators/kernel/fpga/V1/softmax_kernel.cpp +++ b/src/operators/kernel/fpga/V1/softmax_kernel.cpp @@ -35,19 +35,23 @@ bool SoftmaxKernel::Init(SoftmaxParam *param) { auto float_input = new LoDTensor; - PADDLE_MOBILE_ENFORCE(input->dims().size() == 4, - "Softmax should have 4-order input"); - - auto channel = dims[3]; - if (channel == 1) { // This input is generated by FC op, dims = [N C 1 1] - PADDLE_MOBILE_ENFORCE(dims[2] == 1, "Softmax input must come from FC op"); - dims[3] = dims[1]; - dims[1] = 1; + int input_n = 1, input_c = 1, input_h = 1, input_w = 1; + if (dims.size() == 4) { + input_h = dims[1]; + input_w = dims[2]; + input_c = dims[3]; + if (input_c == 1) { // This input is generated by FC op, dims = [N C 1 1] + PADDLE_MOBILE_ENFORCE(input_w == 1, "Softmax input must come from FC op"); + input_c = dims[1]; + input_h = 1; + } + } else if (dims.size() == 2) { + input_c = dims[1]; } input->Resize(framework::make_ddim(dims)); float_input->Resize(framework::make_ddim(dims)); - if (channel == 2 && input->type() == type_id()) { // Use FPGA + if (input_c == 2 && input->type() == type_id()) { // Use FPGA fpga::format_fp16_ofm(out); fpga::BypassArgs args = {fpga::DATA_TYPE_FP16}; args.input_layout_type = fpga::LAYOUT_HWC; @@ -55,9 +59,9 @@ bool SoftmaxKernel::Init(SoftmaxParam *param) { args.input_data_type = fpga::DATA_TYPE_FP16; args.output_data_type = fpga::DATA_TYPE_FP16; args.image.address = input_ptr; - args.image.height = (uint32_t)input->dims()[1]; - args.image.width = (uint32_t)input->dims()[2]; - args.image.channels = (uint32_t)input->dims()[3]; + args.image.height = input_h; + args.image.width = input_w; + args.image.channels = input_c; args.output.address = out->data(); args.output.scale_address = out->scale; args.output.activation.activation_type = fpga::SOFTMAX; @@ -67,8 +71,8 @@ bool SoftmaxKernel::Init(SoftmaxParam *param) { out->mutable_data(framework::make_ddim(dims)); float_input->init(type_id().hash_code()); float_input->mutable_data(framework::make_ddim(dims)); - // fpga::format_fp32_ofm(float_input); - // fpga::format_fp32_ofm(out); + fpga::format_fp32_ofm(float_input); + fpga::format_fp32_ofm(out); fpga::BypassArgs args = {fpga::DATA_TYPE_FP16}; args.input_layout_type = fpga::LAYOUT_HWC; @@ -76,9 +80,9 @@ bool SoftmaxKernel::Init(SoftmaxParam *param) { args.input_data_type = fpga::DATA_TYPE_FP16; args.output_data_type = fpga::DATA_TYPE_FP32; args.image.address = input_ptr; - args.image.height = (uint32_t)dims[1] * dims[0]; - args.image.width = (uint32_t)dims[2]; - args.image.channels = (uint32_t)dims[3]; + args.image.height = input_h; + args.image.width = input_w; + args.image.channels = input_c; args.output.address = float_input->data(); args.output.scale_address = float_input->scale; param->SetFloatInput(float_input); @@ -91,6 +95,23 @@ bool SoftmaxKernel::Init(SoftmaxParam *param) { template <> void SoftmaxKernel::Compute(const SoftmaxParam ¶m) { auto *in_x = (param.InputX()); + auto dims = in_x->dims(); + auto n = 1; + auto h = 1; + auto w = 1; + auto c = 1; + if (dims.size() == 4) { + h = dims[1]; + w = dims[2]; + c = dims[3]; + if (c == 1) { // This input is generated by FC op, dims = [N C 1 1] + PADDLE_MOBILE_ENFORCE(w == 1, "Softmax input must come from FC op"); + c = dims[1]; + h = 1; + } + } else if (dims.size() == 2) { + c = dims[1]; + } if (in_x->type() == type_id()) { fpga::PerformBypass(param.FpgaArgs()); if (param.FpgaArgs().output.activation.activation_type != fpga::SOFTMAX) { @@ -105,8 +126,7 @@ void SoftmaxKernel::Compute(const SoftmaxParam ¶m) { } else { if (param.FpgaArgs().output.activation.activation_type != fpga::SOFTMAX) { Tensor *out = param.Out(); - out->Resize( - {in_x->dims()[0], out->dims()[1], out->dims()[2], out->dims()[3]}); + out->Resize({n, h, w, c}); math::SoftmaxFuntor()(in_x, out); } } diff --git a/src/operators/kernel/fpga/V2/conv_add_bn_relu_kernel.cpp b/src/operators/kernel/fpga/V2/conv_add_bn_relu_kernel.cpp index 8bf1ead85c18bc23c51b528c3729aa702558f3ae..8c65ee0627f2810a198dabdcbca286725595d798 100644 --- a/src/operators/kernel/fpga/V2/conv_add_bn_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V2/conv_add_bn_relu_kernel.cpp @@ -19,13 +19,10 @@ limitations under the License. */ namespace paddle_mobile { namespace operators { - template <> bool ConvAddBNReluKernel::Init( FusionConvAddBNReluParam *param) { bool relu_enabled = true; - // paddle_mobile::fpga::ActivationType activation_enable = - // paddle_mobile::fpga::LEAKYRELU; auto input = const_cast(param->Input()); auto bias = param->Bias(); auto bias_ptr = bias->data(); @@ -42,6 +39,7 @@ bool ConvAddBNReluKernel::Init( auto bn_scale_ptr = param->InputScale()->data(); auto bn_bias_ptr = param->InputBias()->data(); const float epsilon = param->Epsilon(); + PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0] && bias->dims()[0] == param->InputBias()->dims()[0], "Output channel should be equal to bias number"); @@ -75,6 +73,7 @@ bool ConvAddBNReluKernel::Init( new_bias_ptr); param->SetFpgaArgs(dwconv_arg); fpga::fpga_free(bs_ptr); + delete new_scale; } else { fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::SplitConvArgs conv_arg = {0}; @@ -82,9 +81,10 @@ bool ConvAddBNReluKernel::Init( param->Groups(), strides[0], strides[1], paddings[0], paddings[1], bs_ptr); param->SetFpgaArgs(conv_arg); + delete new_scale; + delete new_bias; } - delete new_scale; - delete new_bias; + return true; } diff --git a/src/operators/kernel/fpga/V2/reshape2_kernel.cpp b/src/operators/kernel/fpga/V2/reshape2_kernel.cpp index b1df9372a6b3d7a39d6d5cd7ac9dd48534522f64..ebaf3759400c60c9ecf36467d0eeb7adad140f46 100644 --- a/src/operators/kernel/fpga/V2/reshape2_kernel.cpp +++ b/src/operators/kernel/fpga/V2/reshape2_kernel.cpp @@ -114,6 +114,7 @@ void Reshape2Kernel::Compute(const Reshape2Param ¶m) { output->ShareDataWith(*input); framework::LoD lod = input->lod(); output->set_lod(lod); + output->scale[0] = input->scale[0]; return; } diff --git a/src/operators/kernel/fpga/V2/sigmoid_kernel.cpp b/src/operators/kernel/fpga/V2/sigmoid_kernel.cpp index 2171432cba5700844ccd58fbb32ffcf23d3c132d..194fd5a30565b866ca702b296981d0b8302a1c16 100644 --- a/src/operators/kernel/fpga/V2/sigmoid_kernel.cpp +++ b/src/operators/kernel/fpga/V2/sigmoid_kernel.cpp @@ -21,11 +21,12 @@ namespace operators { template <> bool SigmoidKernel::Init(SigmoidParam *param) { - paddle_mobile::fpga::ActivationType activation_enable = - paddle_mobile::fpga::SIGMOID; - int16_t leaky_relu_negative_slope = 0; auto input = const_cast(param->InputX()); auto input_ptr = input->data(); + paddle_mobile::fpga::ActivationType activation_enable = + paddle_mobile::fpga::SIGMOID; + int16_t leaky_relu_negative_slope = + fpga::fp32_2_fp16(input->scale[0] / 127.0); auto out = param->Out(); fpga::format_ofm(out); @@ -47,6 +48,7 @@ bool SigmoidKernel::Init(SigmoidParam *param) { template <> void SigmoidKernel::Compute(const SigmoidParam ¶m) { fpga::PerformBypass(param.FpgaArgs()); + param.Out()->scale[0] = 127.0; } } // namespace operators diff --git a/src/operators/kernel/fpga/V2/softmax_kernel.cpp b/src/operators/kernel/fpga/V2/softmax_kernel.cpp index 4f75e0f30b2e9f57d94941e972d012016b55251e..b7615a8891b8292dd4d65c15955a0ee640c2f770 100755 --- a/src/operators/kernel/fpga/V2/softmax_kernel.cpp +++ b/src/operators/kernel/fpga/V2/softmax_kernel.cpp @@ -28,17 +28,22 @@ bool SoftmaxKernel::Init(SoftmaxParam *param) { auto out = param->Out(); out->Resize(framework::make_ddim(dims)); - PADDLE_MOBILE_ENFORCE(input->dims().size() == 4, - "Softmax should have 4-order input"); - - auto channel = dims[3]; - if (channel == 1) { // This input is generated by FC op, dims = [N C 1 1] - PADDLE_MOBILE_ENFORCE(dims[2] == 1, "Softmax input must come from FC op"); - dims[3] = dims[1]; - dims[1] = 1; + int input_c = 1, input_h = 1, input_w = 1; + if (dims.size() == 4) { + input_h = dims[1]; + input_w = dims[2]; + input_c = dims[3]; + if (input_c == 1) { // This input is generated by FC op, dims = [N C 1 1] + PADDLE_MOBILE_ENFORCE(input_w == 1, "Softmax input must come from FC op"); + input_c = dims[1]; + input_h = 1; + } + } else if (dims.size() == 2) { + input_c = dims[1]; } + input->Resize(framework::make_ddim(dims)); - if ((channel == 2) && (input->type() == type_id())) { + if ((input_c == 2) && (input->type() == type_id())) { auto input_ptr = input->data(); float Si = input->scale[0]; int16_t slope = fpga::fp32_2_fp16(Si / 127); @@ -50,22 +55,14 @@ bool SoftmaxKernel::Init(SoftmaxParam *param) { args.input_data_type = fpga::DATA_TYPE_FP16; args.output_data_type = fpga::DATA_TYPE_FP16; args.image.address = input_ptr; - args.image.height = (uint32_t)input->dims()[1]; - args.image.width = (uint32_t)input->dims()[2]; - args.image.channels = (uint32_t)input->dims()[3]; + args.image.height = input_h; + args.image.width = input_w; + args.image.channels = input_c; args.output.address = out->data(); args.output.scale_address = out->scale; args.output.activation.activation_type = fpga::SOFTMAX; args.output.activation.leaky_relu_negative_slope = slope; param->SetFpgaArgs(args); - } else if (input->type() == type_id()) { - auto float_input_x = param->float_input_x_; - float_input_x = std::make_shared(); - float_input_x->Resize(input->dims()); - float_input_x->init(type_id().hash_code()); - fpga::format_ofm(float_input_x.get()); - out->mutable_data(framework::make_ddim(dims)); - fpga::format_ofm(out); } else { out->mutable_data(framework::make_ddim(dims)); fpga::format_ofm(out); @@ -78,36 +75,45 @@ template <> void SoftmaxKernel::Compute(const SoftmaxParam ¶m) { auto *in_x = (param.InputX()); auto dims = in_x->dims(); - auto n = dims[0]; - auto h = dims[1]; - auto w = dims[2]; - auto c = dims[3]; + + auto n = 1; + auto h = 1; + auto w = 1; + auto c = 1; + if (dims.size() == 4) { + h = dims[1]; + w = dims[2]; + c = dims[3]; + if (c == 1) { // This input is generated by FC op, dims = [N C 1 1] + PADDLE_MOBILE_ENFORCE(w == 1, "Softmax input must come from FC op"); + c = dims[1]; + h = 1; + } + } else if (dims.size() == 2) { + c = dims[1]; + } if ((c == 2) && (in_x->type() == type_id())) { fpga::PerformBypass(param.FpgaArgs()); } else if (in_x->type() == type_id()) { auto in_data = in_x->data(); float Si = in_x->scale[0]; Tensor *out = param.Out(); - out->Resize( - {in_x->dims()[0], out->dims()[1], out->dims()[2], out->dims()[3]}); - + out->Resize({n, h, w, c}); auto float_input_x = param.float_input_x_; + float_input_x = std::make_shared(); + float_input_x->Resize(in_x->dims()); + float_input_x->init(type_id().hash_code()); + fpga::format_fp32_ofm(float_input_x.get()); auto float_input_x_data = float_input_x->data(); int dataNum = n * h * fpga::align_to_x(w * c, IMAGE_ALIGNMENT); for (int i = 0; i < dataNum; i++) { float_input_x_data[i] = in_data[i] * Si / 127; } math::SoftmaxFuntor()(float_input_x.get(), out); - auto out_data = out->data(); - fpga::fpga_flush(out_data, dataNum * sizeof(float)); } else { Tensor *out = param.Out(); - out->Resize( - {in_x->dims()[0], out->dims()[1], out->dims()[2], out->dims()[3]}); + out->Resize({n, h, w, c}); math::SoftmaxFuntor()(in_x, out); - int dataNum = n * h * fpga::align_to_x(w * c, IMAGE_ALIGNMENT); - auto out_data = out->data(); - fpga::fpga_flush(out_data, dataNum * sizeof(float)); } }