diff --git a/src/operators/kernel/fpga/V1/tanh_kernel.cpp b/src/operators/kernel/fpga/V1/tanh_kernel.cpp index 46dd3a0f6f8819f6485243a445725554943ab2bf..216cb726e3fe93e9ebfaf328a9ab4ca0725b6bb1 100644 --- a/src/operators/kernel/fpga/V1/tanh_kernel.cpp +++ b/src/operators/kernel/fpga/V1/tanh_kernel.cpp @@ -15,17 +15,61 @@ limitations under the License. */ #ifdef TANH_OP #include "operators/kernel/tanh_kernel.h" - +#include namespace paddle_mobile { namespace operators { template <> bool TanhKernel::Init(TanhParam *param) { + auto input = const_cast(param->InputX()); + auto input_ptr = input->data(); + auto float_input = new Tensor; + float_input->mutable_data( + {1, input->dims()[1], input->dims()[2], input->dims()[3]}); + fpga::format_fp32_ofm(float_input); + + fpga::BypassArgs args = {fpga::DATA_TYPE_FP16}; + args.input_layout_type = fpga::LAYOUT_HWC; + args.output_layout_type = fpga::LAYOUT_CHW; + args.input_data_type = fpga::DATA_TYPE_FP16; + args.output_data_type = fpga::DATA_TYPE_FP32; + args.image.address = input_ptr; + args.image.height = (uint32_t)input->dims()[2]; + args.image.width = (uint32_t)input->dims()[3]; + args.image.channels = (uint32_t)input->dims()[1]; + args.output.address = float_input->data(); + args.output.scale_address = float_input->scale; + param->SetFloatInput(float_input); + param->SetFpgaArgs(args); return true; } +#define EXP_MAX_INPUT 40.0 +template +T Tanh(const T a) { + T tmp = -2.0 * a; + tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp; + return (2.0 / (1.0 + exp(tmp))) - 1.0; +} +template +void tanhFuntor(Tensor *input, Tensor *output) { + auto *input_ptr = input->data(); + auto *output_ptr = output->mutable_data(); + for (int i = 0; i < input->numel(); i++) { + *(output_ptr + i) = Tanh(*(input_ptr + i)); + } +} template <> -void TanhKernel::Compute(const TanhParam ¶m) {} +void TanhKernel::Compute(const TanhParam ¶m) { + Tensor *in_x = param.FloatInput(); + Tensor *out = param.Out(); + + fpga::PerformBypass(param.FpgaArgs()); + fpga::fpga_invalidate((void *)in_x->data(), + in_x->numel() * sizeof(float)); + tanhFuntor(in_x, out); + fpga::fpga_flush(out->data(), out->memory_size()); +} } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/op_param.h b/src/operators/op_param.h index fe5cce379d199be5d3931308513823c7279c21ff..24af798a84c89ba6434178d5c4392a03c0ba5d87 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -1554,6 +1554,20 @@ class TanhParam : public OpParam { private: RType *input_x_; RType *out_; +#ifdef PADDLE_MOBILE_FPGA + + private: + std::shared_ptr float_input_x_; + fpga::BypassArgs fpga_bypass_args; + + public: + RType *FloatInput() const { + return float_input_x_ == nullptr ? input_x_ : float_input_x_.get(); + } + void SetFloatInput(Tensor *input) { float_input_x_.reset(input); } + const fpga::BypassArgs &FpgaArgs() const { return fpga_bypass_args; } + void SetFpgaArgs(const fpga::BypassArgs &args) { fpga_bypass_args = args; } +#endif }; #endif