diff --git a/src/operators/kernel/fpga/V1/conv_add_bn_relu_kernel.cpp b/src/operators/kernel/fpga/V1/conv_add_kernel.cpp similarity index 58% rename from src/operators/kernel/fpga/V1/conv_add_bn_relu_kernel.cpp rename to src/operators/kernel/fpga/V1/conv_add_kernel.cpp index 6c99750eb824940b32a857ee2baffc72bce05a7a..5ad4c86441f7870b00e6639e7cda22083d3c10d5 100644 --- a/src/operators/kernel/fpga/V1/conv_add_bn_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V1/conv_add_kernel.cpp @@ -12,49 +12,31 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#ifdef FUSION_CONVADDBNRELU_OP +#ifdef FUSION_CONVADD_OP -#include "operators/kernel/conv_add_bn_relu_kernel.h" +#include "operators/kernel/conv_add_kernel.h" namespace paddle_mobile { namespace operators { template <> -bool ConvAddBNReluKernel::Init( - FusionConvAddBNReluParam *param) { - bool relu_enabled = true; +bool ConvAddKernel::Init(FusionConvAddParam *param) { + bool relu_enabled = false; auto input = const_cast(param->Input()); const Tensor *bias = param->Bias(); auto bias_ptr = bias->data(); auto filter = const_cast(param->Filter()); auto out = param->Output(); - auto bn_mean_ptr = param->InputMean()->data(); - auto bn_var_ptr = param->InputVariance()->data(); - auto bn_scale_ptr = param->InputScale()->data(); - auto bn_bias_ptr = param->InputBias()->data(); - const float epsilon = param->Epsilon(); - PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0] && - bias->dims()[0] == param->InputBias()->dims()[0], - "Output channel should be equal to bias number"); - const int channel = out->dims()[1]; + PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], + "Output channel should be equal to bias number"); + int channel = out->dims()[1]; auto bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); // NOLINT - auto new_scale = new Tensor(); - auto new_bias = new Tensor(); - auto new_scale_ptr = new_scale->mutable_data({channel}); - auto new_bias_ptr = new_bias->mutable_data({channel}); - for (int i = 0; i < channel; i++) { - new_scale_ptr[i] = bn_scale_ptr[i] / - static_cast(pow((bn_var_ptr[i] + epsilon), 0.5)); - new_bias_ptr[i] = - bn_bias_ptr[i] + (bias_ptr[i] - bn_mean_ptr[i]) * new_scale_ptr[i]; - bs_ptr[i + 2] = new_scale_ptr[i]; - bs_ptr[i] = new_bias_ptr[i]; + bs_ptr[i + channel] = 1; + bs_ptr[i] = bias_ptr[i]; } - param->SetNewScale(new_scale); - param->SetNewBias(new_bias); float max_value = fpga::filter_find_max(filter); fpga::format_filter(filter, max_value, param->Groups()); @@ -75,8 +57,8 @@ bool ConvAddBNReluKernel::Init( } template <> -void ConvAddBNReluKernel::Compute( - const FusionConvAddBNReluParam ¶m) { +void ConvAddKernel::Compute( + const FusionConvAddParam ¶m) { fpga::ComputeFpgaConv(param.FpgaArgs()); } diff --git a/src/operators/kernel/fpga/V1/deconv_add_kernel.cpp b/src/operators/kernel/fpga/V1/deconv_add_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..39d7e818976b56eaea8649392784e7b5dc8b7e1f --- /dev/null +++ b/src/operators/kernel/fpga/V1/deconv_add_kernel.cpp @@ -0,0 +1,36 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_DECONVADD_OP + +#include "operators/kernel/deconv_add_kernel.h" +#include "framework/operator.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool DeconvAddKernel::Init(FusionDeconvAddParam *param) { + return true; +} + +template <> +void DeconvAddKernel::Compute( + const FusionDeconvAddParam ¶m) {} + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/fpga/V1/deconv_add_relu_kernel.cpp b/src/operators/kernel/fpga/V1/deconv_add_relu_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ef2556208a8650a86522264f40f42cb596ec4190 --- /dev/null +++ b/src/operators/kernel/fpga/V1/deconv_add_relu_kernel.cpp @@ -0,0 +1,37 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_DECONVADDRELU_OP + +#include "operators/kernel/deconv_add_relu_kernel.h" +#include "framework/operator.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool DeconvAddReluKernel::Init( + FusionDeconvAddReluParam *param) { + return true; +} + +template <> +void DeconvAddReluKernel::Compute( + const FusionDeconvAddReluParam ¶m) {} + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/fpga/V1/elementwise_add_kernel.cpp b/src/operators/kernel/fpga/V1/elementwise_add_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f8eeb53159411276fbab957c676a01cb31b597c8 --- /dev/null +++ b/src/operators/kernel/fpga/V1/elementwise_add_kernel.cpp @@ -0,0 +1,64 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#ifdef ELEMENTWISEADD_OP + +#include "operators/kernel/elementwise_add_kernel.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool ElementwiseAddKernel::Init(ElementwiseAddParam *param) { + bool relu_enabled = false; + auto *input_x = const_cast(param->InputX()); + auto *input_y = const_cast(param->InputY()); + auto *out = param->Out(); + auto input_x_ptr = input_x->data(); + auto input_y_ptr = input_y->data(); + fpga::format_fp16_ofm(out); + auto out_ptr = out->mutable_data(); + + fpga::EWAddArgs ewaddArgs = {0}; + ewaddArgs.relu_enabled = relu_enabled; + ewaddArgs.const0 = 0x3c00; // =1 + ewaddArgs.const1 = 0x3c00; // =1 + ewaddArgs.image0.address = input_x_ptr; + ewaddArgs.image0.channels = (uint32_t)input_x->dims()[1]; + ewaddArgs.image0.scale_address = input_x->scale; + ewaddArgs.image0.height = (uint32_t)input_x->dims()[2]; + ewaddArgs.image0.width = (uint32_t)input_x->dims()[3]; + ewaddArgs.image0.pad_height = 0; + ewaddArgs.image0.pad_width = 0; + ewaddArgs.image1.address = input_y_ptr; + ewaddArgs.image1.channels = (uint32_t)input_y->dims()[1]; + ewaddArgs.image1.scale_address = input_y->scale; + ewaddArgs.image1.height = (uint32_t)input_y->dims()[2]; + ewaddArgs.image1.width = (uint32_t)input_y->dims()[3]; + ewaddArgs.image1.pad_height = 0; + ewaddArgs.image1.pad_width = 0; + ewaddArgs.output.scale_address = out->scale; + ewaddArgs.output.address = out_ptr; + param->SetFpgaArgs(ewaddArgs); + return true; +} + +template <> +void ElementwiseAddKernel::Compute( + const ElementwiseAddParam ¶m) { + fpga::ComputeFpgaEWAdd(param.FpgaArgs()); +} +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/fpga/V1/fc_relu_kernel.cpp b/src/operators/kernel/fpga/V1/fc_relu_kernel.cpp deleted file mode 100644 index 2c6b616689dca14474d1cbdc3769b438de1358e4..0000000000000000000000000000000000000000 --- a/src/operators/kernel/fpga/V1/fc_relu_kernel.cpp +++ /dev/null @@ -1,70 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ -#ifdef FUSION_FCRELU_OP -#include "operators/kernel/fc_relu_kernel.h" - -namespace paddle_mobile { -namespace operators { - -template <> -bool FusionFcReluKernel::Init(FusionFcReluParam *param) { - bool relu_enabled = true; - auto input_x = const_cast(param->InputX()); - auto filter = const_cast(param->InputY()); - auto input_z = param->InputZ(); - auto input_z_ptr = input_z->data(); - auto out = param->Out(); - PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == filter->dims()[0], - "Image channel should be equal to weight number"); - int channel = (uint32_t)out->dims()[1]; - auto bs_ptr = - (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); // NOLINT - for (int i = 0; i < channel; i++) { - bs_ptr[i + channel] = 1; - bs_ptr[i] = input_z_ptr[i]; - } - - int num = (uint32_t)filter->dims()[1]; - int chw = (uint32_t)filter->dims()[0]; - PADDLE_MOBILE_ENFORCE( - chw == input_x->numel(), - "Filter element num should be equal to IFM element num"); - int height = (uint32_t)input_x->dims()[2]; - int width = (uint32_t)input_x->dims()[3]; - int filter_channel = chw / height / width; - - out->Resize(framework::make_ddim({1, channel, 1, 1})); - filter->Resize(framework::make_ddim({num, filter_channel, height, width})); - float max_value = fpga::filter_find_max(filter); - fpga::format_fc_filter(filter, max_value); - - int element_num_per_div = fpga::get_filter_num_per_div(filter, 1); - fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); - fpga::format_fp16_ofm(out); - - fpga::SplitConvArgs conv_arg = {0}; - fpga::fill_split_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, - 0, 0, bs_ptr); - param->SetFpgaArgs(conv_arg); - return true; -} -template <> -void FusionFcReluKernel::Compute( - const FusionFcReluParam ¶m) { - fpga::ComputeFpgaConv(param.FpgaArgs()); -} - -} // namespace operators -} // namespace paddle_mobile -#endif diff --git a/src/operators/kernel/fpga/V1/split_kernel.cpp b/src/operators/kernel/fpga/V1/split_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..faa1da9186d2a74961450925dea6e3d0f98856bc --- /dev/null +++ b/src/operators/kernel/fpga/V1/split_kernel.cpp @@ -0,0 +1,30 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef SPLIT_OP + +#include "operators/kernel/split_kernel.h" + +namespace paddle_mobile { +namespace operators { +template <> +bool SplitKernel::Init(SplitParam* param) { + return true; +} +template <> +void SplitKernel::Compute(const SplitParam& param) {} + +} // namespace operators +} // namespace paddle_mobile +#endif diff --git a/src/operators/kernel/fpga/V2/conv_transpose_kernel.cpp b/src/operators/kernel/fpga/V1/tanh_kernel.cpp similarity index 75% rename from src/operators/kernel/fpga/V2/conv_transpose_kernel.cpp rename to src/operators/kernel/fpga/V1/tanh_kernel.cpp index 3284ddcdece3ab7fcf4fb4458a59d39c452ad1ce..46dd3a0f6f8819f6485243a445725554943ab2bf 100644 --- a/src/operators/kernel/fpga/V2/conv_transpose_kernel.cpp +++ b/src/operators/kernel/fpga/V1/tanh_kernel.cpp @@ -12,21 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#ifdef CONV_TRANSPOSE_OP +#ifdef TANH_OP -#include "operators/kernel/conv_transpose_kernel.h" +#include "operators/kernel/tanh_kernel.h" namespace paddle_mobile { namespace operators { template <> -bool ConvTransposeKernel::Init(ConvTransposeParam *param) { +bool TanhKernel::Init(TanhParam *param) { return true; } template <> -void ConvTransposeKernel::Compute( - const ConvTransposeParam ¶m) {} +void TanhKernel::Compute(const TanhParam ¶m) {} } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/kernel/fpga/V1/transpose2_kernel.cpp b/src/operators/kernel/fpga/V1/transpose2_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..585cc52947fa5de991fee446ba3c0098ae99d0af --- /dev/null +++ b/src/operators/kernel/fpga/V1/transpose2_kernel.cpp @@ -0,0 +1,36 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#ifdef TRANSPOSE2_OP + +#include "operators/kernel/transpose2_kernel.h" +#include "operators/kernel/central-arm-func/transpose2_arm_func.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool Transpose2Kernel::Init(Transpose2Param *param) { + return true; +} + +template <> +void Transpose2Kernel::Compute( + const Transpose2Param ¶m) { + // Transpose2Compute(param); +} + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/fpga/V2/fc_relu_kernel.cpp b/src/operators/kernel/fpga/V2/fc_relu_kernel.cpp deleted file mode 100644 index ba869aaca7f3f5d5c598feb3837a59a3a738493b..0000000000000000000000000000000000000000 --- a/src/operators/kernel/fpga/V2/fc_relu_kernel.cpp +++ /dev/null @@ -1,65 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ -#ifdef FUSION_FCRELU_OP -#include "operators/kernel/fc_relu_kernel.h" - -namespace paddle_mobile { -namespace operators { - -template <> -bool FusionFcReluKernel::Init(FusionFcReluParam *param) { - bool relu_enabled = true; - auto input_x = const_cast(param->InputX()); - auto filter = const_cast(param->InputY()); - auto input_z = param->InputZ(); - auto input_z_ptr = input_z->data(); - auto out = param->Out(); - PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == filter->dims()[0], - "Image channel should be equal to weight number"); - int channel = (uint32_t)out->dims()[1]; - auto bs_ptr = - (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); // NOLINT - for (int i = 0; i < channel; i++) { - bs_ptr[i + channel] = 1; - bs_ptr[i] = input_z_ptr[i]; - } - - int num = (uint32_t)filter->dims()[1]; - int chw = (uint32_t)filter->dims()[0]; - PADDLE_MOBILE_ENFORCE( - chw == input_x->numel(), - "Filter element num should be equal to IFM element num"); - int height = (uint32_t)input_x->dims()[2]; - int width = (uint32_t)input_x->dims()[3]; - int filter_channel = chw / height / width; - - out->Resize(framework::make_ddim({1, channel, 1, 1})); - filter->Resize(framework::make_ddim({num, filter_channel, height, width})); - fpga::format_fc_data(filter, out, bs_ptr); - - fpga::SplitConvArgs conv_arg = {0}; - fpga::fill_split_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, - 0, 0, bs_ptr); - param->SetFpgaArgs(conv_arg); - return true; -} -template <> -void FusionFcReluKernel::Compute( - const FusionFcReluParam ¶m) { - fpga::ComputeFpgaConv(param.FpgaArgs()); -} - -} // namespace operators -} // namespace paddle_mobile -#endif diff --git a/src/operators/op_param.h b/src/operators/op_param.h index 4d4878789ea5d86ff5f57e1ce14e101a8e9fd9bc..a4d29a0f3a83b2c8a6bb375477039f79ba5f5b7d 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -2211,7 +2211,6 @@ class DropoutParam : public OpParam { }; #endif -#ifdef CONV_TRANSPOSE_OP template class ConvTransposeParam : public OpParam { typedef typename DtypeTensorTrait::gtype GType; @@ -2266,7 +2265,7 @@ class ConvTransposeParam : public OpParam { void SetFpgaArgs(const fpga::DeconvArgs &args) { fpga_conv_args = args; } #endif }; -#endif + #ifdef FUSION_DECONVADD_OP template class FusionDeconvAddParam : public ConvTransposeParam { diff --git a/tools/op.cmake b/tools/op.cmake index 3c70f1754fbdddd9594cb25731979f17137f66d4..3a4a0597a44694c4edea8173af47627cb5680df2 100644 --- a/tools/op.cmake +++ b/tools/op.cmake @@ -109,16 +109,19 @@ list(FIND NET "FPGA_NET_V1" CON) if (CON GREATER -1) message("FPGA_NET_V1 enabled") set(FUSION_CONVADDRELU_OP ON) - set(FUSION_CONVADDBNRELU_OP ON) - set(FUSION_CONVADDBN_OP ON) set(FUSION_ELEMENTWISEADDRELU_OP ON) set(FUSION_FC_OP ON) - set(FUSION_FCRELU_OP ON) set(POOL_OP ON) - set(CONCAT_OP ON) set(SOFTMAX_OP ON) set(FUSION_CONVBNRELU_OP ON) set(FUSION_CONVBN_OP ON) + set(TANH_OP ON) + set(ELEMENTWISEADD_OP ON) + set(TRANSPOSE2_OP ON) + set(FUSION_CONVADD_OP ON) + set(SPLIT_OP ON) + set(FUSION_DECONVADD_OP ON) + set(FUSION_DECONVADDRELU_OP ON) set(FOUND_MATCH ON) endif() @@ -132,7 +135,6 @@ if (CON GREATER -1) set(SOFTMAX_OP ON) set(FUSION_CONVBNRELU_OP ON) set(FUSION_CONVBN_OP ON) - set(CONV_TRANSPOSE_OP ON) set(TANH_OP ON) set(ELEMENTWISEADD_OP ON) set(TRANSPOSE2_OP ON) @@ -140,8 +142,6 @@ if (CON GREATER -1) set(SPLIT_OP ON) set(FUSION_DECONVADD_OP ON) set(FUSION_DECONVADDRELU_OP ON) - - set(FOUND_MATCH ON) endif()