diff --git a/src/operators/kernel/fpga/mul_kernel.cpp b/src/operators/kernel/fpga/mul_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..07aa4bcc43d28805ab0660bf89149c5ec5f1c732 --- /dev/null +++ b/src/operators/kernel/fpga/mul_kernel.cpp @@ -0,0 +1,70 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef MUL_OP + +#include "operators/kernel/mul_kernel.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool MulKernel::Init(MulParam *param) { + bool relu_enabled = false; + auto input_x = const_cast(param->InputX()); + auto filter = const_cast(param->InputY()); + auto out = param->Out(); + + PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == filter->dims()[0], + "Image channel should be equal to weight number"); + int channel = (uint32_t)out->dims()[1]; + auto bs_ptr = + (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); // NOLINT + for (int i = 0; i < channel; i++) { + bs_ptr[i + channel] = 1; + bs_ptr[i] = 0; + } + int num = (uint32_t)filter->dims()[1]; + int chw = (uint32_t)filter->dims()[0]; + PADDLE_MOBILE_ENFORCE( + chw == input_x->numel(), + "Filter element num should be equal to IFM element num"); + int height = (uint32_t)input_x->dims()[2]; + int width = (uint32_t)input_x->dims()[3]; + int filter_channel = chw / height / width; + + filter->Resize(framework::make_ddim({num, filter_channel, height, width})); + float max_value = fpga::filter_find_max(filter); + fpga::format_fc_filter(filter, max_value); + + int element_num_per_div = fpga::get_filter_num_per_div(filter, 1); + fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); + fpga::format_fp16_ofm(out); + + fpga::WrapperConvArgs conv_arg = {0}; + fpga::fill_conv_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, 0, + 0, bs_ptr); + param->SetFpgaArgs(conv_arg); + return true; +} + +template <> +void MulKernel::Compute(const MulParam ¶m) const { + fpga::ComputeFpgaConv(param.FpgaArgs()); +} + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/mul_op.cpp b/src/operators/mul_op.cpp index a6b055b62fa25fbca2a85dfa386fa406e207b2e9..69e3bb300d741e74ab8d6eea6c62052b4d0d8f1d 100644 --- a/src/operators/mul_op.cpp +++ b/src/operators/mul_op.cpp @@ -61,5 +61,7 @@ REGISTER_OPERATOR_CPU(mul, ops::MulOp); #ifdef PADDLE_MOBILE_MALI_GPU REGISTER_OPERATOR_MALI_GPU(mul, ops::MulOp); #endif - +#ifdef PADDLE_MOBILE_FPGA +REGISTER_OPERATOR_FPGA(mul, ops::MulOp); +#endif #endif diff --git a/src/operators/op_param.h b/src/operators/op_param.h index f1c9f09b65ec35960320ab7b2aca0bc2904cc838..f50c00457fee099a5fb26415b846de945a66d73e 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -438,6 +438,15 @@ class MulParam : OpParam { GType *out_; int x_num_col_dims_; int y_num_col_dims_; +#ifdef PADDLE_MOBILE_FPGA + + private: + fpga::WrapperConvArgs fpga_conv_args; + + public: + const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; } + void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; } +#endif }; #endif diff --git a/test/fpga/test_resnet50.cpp b/test/fpga/test_resnet50.cpp index cca6793f10da5a0784cf8a3ba2d0104f3508028d..f850eb3e5ea3a03fe90d82c1eca2af6c9f8e9106 100644 --- a/test/fpga/test_resnet50.cpp +++ b/test/fpga/test_resnet50.cpp @@ -18,8 +18,9 @@ static const char *g_resnet_combine = "../models/resnet50"; int main() { DLOG << paddle_mobile::fpga::open_device(); paddle_mobile::PaddleMobile paddle_mobile; - if (paddle_mobile.Load(std::string(g_resnet_combine) + "/model", - std::string(g_resnet_combine) + "/params", true)) { + // if (paddle_mobile.Load(std::string(g_resnet_combine) + "/model", + // std::string(g_resnet_combine) + "/params", true)) { + if (paddle_mobile.Load(std::string(g_resnet_combine), true)) { std::vector dims{1, 3, 224, 224}; Tensor input_tensor; SetupTensor(&input_tensor, {1, 3, 224, 224}, static_cast(0), diff --git a/tools/op.cmake b/tools/op.cmake index 9a6ec0a147b564296d89113a2838cc6bd73975a1..898f66a634d70a5def7c7ce328a7a291d9b55c70 100644 --- a/tools/op.cmake +++ b/tools/op.cmake @@ -121,6 +121,7 @@ if (CON GREATER -1) set(FUSION_CONVBNRELU_OP ON) set(FUSION_CONVBN_OP ON) set(FUSION_CONVADD_OP ON) + set(MUL_OP ON) set(FOUND_MATCH ON) endif()