diff --git a/src/common/types.cpp b/src/common/types.cpp index 8b996fa5511a6d8e1b10b5a0aa13e820ee643c26..170d262e98348b406780633e02930bda5e97969a 100755 --- a/src/common/types.cpp +++ b/src/common/types.cpp @@ -113,6 +113,7 @@ const char *G_OP_TYPE_ROI_PERSPECTIVE = "roi_perspective_transform"; const char *G_OP_TYPE_PAD2D = "pad2d"; const char *G_OP_TYPE_FUSION_DECONV_ADD_BN_RELU = "fusion_deconv_add_bn_relu"; const char *G_OP_TYPE_FUSION_DECONV_ADD_BN = "fusion_deconv_add_bn"; +const char *G_OP_TYPE_FUSION_DECONV_BN_RELU = "fusion_deconv_bn_relu"; std::unordered_map< std::string, std::pair, std::vector>> @@ -215,5 +216,6 @@ std::unordered_map< {G_OP_TYPE_ROI_PERSPECTIVE, {{"X", "ROIs"}, {"Out"}}}, {G_OP_TYPE_FUSION_DECONV_ADD_BN_RELU, {{"Input"}, {"Out"}}}, {G_OP_TYPE_FUSION_DECONV_ADD_BN, {{"Input"}, {"Out"}}}, + {G_OP_TYPE_FUSION_DECONV_BN_RELU, {{"Input"}, {"Out"}}}, {G_OP_TYPE_PAD2D, {{"X"}, {"Out"}}}}; } // namespace paddle_mobile diff --git a/src/common/types.h b/src/common/types.h index 12f5253a74043a8609004520d68f1137c387f37d..45e86500abc8babae37391c78443e5c71ad1d43e 100755 --- a/src/common/types.h +++ b/src/common/types.h @@ -202,6 +202,7 @@ extern const char *G_OP_TYPE_ROI_PERSPECTIVE; extern const char *G_OP_TYPE_PAD2D; extern const char *G_OP_TYPE_FUSION_DECONV_ADD_BN_RELU; extern const char *G_OP_TYPE_FUSION_DECONV_ADD_BN; +extern const char *G_OP_TYPE_FUSION_DECONV_BN_RELU; extern std::unordered_map< std::string, std::pair, std::vector>> diff --git a/src/operators/fusion_deconv_bn_relu_op.cpp b/src/operators/fusion_deconv_bn_relu_op.cpp new file mode 100644 index 0000000000000000000000000000000000000000..22f549d1fcd501c420d3fb3c209c4dbb1273f7a8 --- /dev/null +++ b/src/operators/fusion_deconv_bn_relu_op.cpp @@ -0,0 +1,33 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_DECONVBNRELU_OP + +#include "operators/fusion_deconv_bn_relu_op.h" + +namespace paddle_mobile { +namespace operators {} +} // namespace paddle_mobile + +namespace ops = paddle_mobile::operators; +REGISTER_FUSION_MATCHER(fusion_deconv_bn_relu, ops::FusionDeconvBNReluMatcher); +#ifdef PADDLE_MOBILE_CPU +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +REGISTER_OPERATOR_FPGA(fusion_deconv_bn_relu, ops::FusionDeconvBNReluOp); +#endif + +#endif diff --git a/src/operators/fusion_deconv_bn_relu_op.h b/src/operators/fusion_deconv_bn_relu_op.h new file mode 100644 index 0000000000000000000000000000000000000000..ad0920ebd69b1a13ebc0e85f2c5f6008379715da --- /dev/null +++ b/src/operators/fusion_deconv_bn_relu_op.h @@ -0,0 +1,115 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#ifdef FUSION_DECONVBNRELU_OP +#pragma once +#include +#include + +#include "framework/operator.h" +#include "framework/program/program-optimize/fusion_op_register.h" +#include "operators/kernel/deconv_bn_relu_kernel.h" + +namespace paddle_mobile { +namespace operators { +using std::string; +using std::vector; +class FusionDeconvBNReluMatcher : public framework::FusionOpMatcher { + public: + FusionDeconvBNReluMatcher() { + node_ = framework::Node(G_OP_TYPE_CONV_TRANSPOSE); + node_ > std::make_shared(G_OP_TYPE_BATCHNORM) > + std::make_shared(G_OP_TYPE_RELU); + } + + void FolderNodes( + framework::Node *node, + std::vector> *removed_nodes) { + node->Folder(node_.Depth(), Type(), + {{G_OP_TYPE_BATCHNORM, + {{"Scale", "Scale"}, + {"Mean", "Mean"}, + {"Bias", "Bias"}, + {"Variance", "Variance"}}}}, + removed_nodes); + } + + std::string Type() { return G_OP_TYPE_FUSION_DECONV_BN_RELU; } +}; + +template +class FusionDeconvBNReluOp + : public framework::OperatorWithKernel< + DeviceType, FusionDeconvBNReluParam, + operators::DeconvBNReluKernel> { + public: + FusionDeconvBNReluOp(const string &type, const VariableNameMap &inputs, + const VariableNameMap &outputs, + const framework::AttributeMap &attrs, + std::shared_ptr scope) + : framework::OperatorWithKernel< + DeviceType, FusionDeconvBNReluParam, + operators::DeconvBNReluKernel>(type, inputs, outputs, + attrs, scope) {} + + void InferShape() const { + auto input = this->param_.Input(); + auto in_dims = input->dims(); + + auto filter = this->param_.Filter(); + auto filter_dims = filter->dims(); + + std::vector strides = this->param_.Strides(); + std::vector paddings = this->param_.Paddings(); + std::vector dilations = this->param_.Dilations(); + + int groups = this->param_.Groups(); + + PADDLE_MOBILE_ENFORCE( + in_dims.size() == 4 || in_dims.size() == 5, + "ConvTransposeOp intput should be 4-D or 5-D tensor."); + PADDLE_MOBILE_ENFORCE( + in_dims.size() == filter_dims.size(), + "ConvTransposeOp input dimension and filter dimension " + "should be the same."); + PADDLE_MOBILE_ENFORCE( + in_dims.size() - strides.size() == 2U, + "ConvTransposeOp input dimension and strides dimension should " + "be consistent."); + PADDLE_MOBILE_ENFORCE(paddings.size() == strides.size(), + "ConvTransposeOp paddings dimension and strides " + "dimension should be the same."); + PADDLE_MOBILE_ENFORCE(paddings.size() == dilations.size(), + "ConvTransposeOp paddings dimension and dilations " + "dimension should be the same."); + PADDLE_MOBILE_ENFORCE( + in_dims[1] == filter_dims[0], + "In ConvTransposeOp, The number of input channels should " + "be equal to the number of filter's channels."); + + std::vector output_shape({in_dims[0], filter_dims[1] * groups}); + for (size_t i = 0; i < strides.size(); ++i) { + auto filter_extent = dilations[i] * (filter_dims[i + 2] - 1) + 1; + output_shape.push_back((in_dims[i + 2] - 1) * strides[i] - + 2 * paddings[i] + filter_extent); + } + this->param_.Output()->Resize(framework::make_ddim(output_shape)); + } + + protected: +}; + +} // namespace operators +} // namespace paddle_mobile + +#endif // FUSION_DECONV_BN_RELU_OP diff --git a/src/operators/kernel/deconv_bn_relu_kernel.h b/src/operators/kernel/deconv_bn_relu_kernel.h new file mode 100755 index 0000000000000000000000000000000000000000..4ab0257b07e53149ff88c6a6ecca2dc77c0eb634 --- /dev/null +++ b/src/operators/kernel/deconv_bn_relu_kernel.h @@ -0,0 +1,39 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_DECONVBNRELU_OP + +#pragma once + +#include "framework/operator.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { + +using framework::OpKernelBase; + +template +class DeconvBNReluKernel + : public OpKernelBase> { + public: + void Compute(const FusionDeconvBNReluParam ¶m); + + bool Init(FusionDeconvBNReluParam *param); +}; + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/fpga/V1/conv_kernel.cpp b/src/operators/kernel/fpga/V1/conv_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..73722820bd90b54abd64dd01b157c74c6a1069e8 --- /dev/null +++ b/src/operators/kernel/fpga/V1/conv_kernel.cpp @@ -0,0 +1,56 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef CONV_OP + +#include "operators/kernel/conv_kernel.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool ConvKernel::Init(ConvParam *param) { + paddle_mobile::fpga::ActivationType activation_enable = + paddle_mobile::fpga::NONE; + int16_t leaky_relu_negative_slope = 0; + auto input = const_cast(param->Input()); + auto filter = const_cast(param->Filter()); + auto out = param->Output(); + int channel = out->dims()[1]; + auto bs_ptr = + (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); // NOLINT + for (int i = 0; i < channel; i++) { + bs_ptr[i + channel] = 1; + bs_ptr[i] = 0; + } + + fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); + fpga::SplitConvArgs conv_arg = {0}; + fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable, + leaky_relu_negative_slope, param->Groups(), + param->Strides()[0], param->Strides()[1], + param->Paddings()[0], param->Paddings()[1], bs_ptr); + param->SetFpgaArgs(conv_arg); + return true; +} + +template <> +void ConvKernel::Compute(const ConvParam ¶m) { + fpga::ComputeFpgaConv(param.FpgaArgs()); +} + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/fpga/V1/deconv_bn_relu_kernel.cpp b/src/operators/kernel/fpga/V1/deconv_bn_relu_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f166587109e5f63e30203a940aa3baa8ae87f844 --- /dev/null +++ b/src/operators/kernel/fpga/V1/deconv_bn_relu_kernel.cpp @@ -0,0 +1,108 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_DECONVBNRELU_OP + +#include "operators/kernel/deconv_bn_relu_kernel.h" +#include +#include "framework/operator.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool DeconvBNReluKernel::Init( + FusionDeconvBNReluParam *param) { + // bool relu_enabled = true; + paddle_mobile::fpga::ActivationType activation_enable = + paddle_mobile::fpga::LEAKYRELU; + int16_t leaky_relu_negative_slope = 0; + auto input = const_cast(param->Input()); + const Tensor *bias = param->InputBias(); + auto bias_ptr = bias->data(); + auto filter = const_cast(param->Filter()); + auto out = param->Output(); + auto bn_mean_ptr = param->InputMean()->data(); + auto bn_var_ptr = param->InputVariance()->data(); + auto bn_scale_ptr = param->InputScale()->data(); + auto bn_bias_ptr = param->InputBias()->data(); + const float epsilon = param->Epsilon(); + + PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], + "Output channel should be equal to bias number"); + int channel = out->dims()[1]; + auto new_scale = new Tensor(); + auto new_bias = new Tensor(); + auto new_scale_ptr = new_scale->mutable_data({channel}); + auto new_bias_ptr = new_bias->mutable_data({channel}); + for (int i = 0; i < channel; i++) { + new_scale_ptr[i] = bn_scale_ptr[i] / + static_cast(pow((bn_var_ptr[i] + epsilon), 0.5)); + new_bias_ptr[i] = bn_bias_ptr[i] + (0 - bn_mean_ptr[i]) * new_scale_ptr[i]; + } + + int sub_conv_n = param->Strides()[0]; + auto bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sub_conv_n * // NOLINT + sizeof(float)); // NOLINT + + for (int i = 0; i < channel * sub_conv_n; i++) { + bs_ptr[i + sub_conv_n * channel] = new_scale_ptr[i % channel]; + bs_ptr[i] = new_bias_ptr[i % (channel)]; + } + + PADDLE_MOBILE_ENFORCE(param->Strides()[1] == param->Strides()[0], + "stride_width should be equal to stride_height "); + PADDLE_MOBILE_ENFORCE(filter->dims()[2] == filter->dims()[3], + "filter width should be equal to filter height "); + PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0), + "filter axis should be the multiple of stride axis "); + if (param->Groups() == channel) { + fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(), + sub_conv_n); + fpga::DWDeconvArgs DWDeconv_arg = {0}; + fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter, + activation_enable, leaky_relu_negative_slope, + param->Strides()[0], param->Strides()[1], + param->Paddings()[0], param->Paddings()[1], bs_ptr); + param->SetFpgaArgs(DWDeconv_arg); + } else { + fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); + fpga::DeconvArgs deconv_arg = {0}; + fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable, + leaky_relu_negative_slope, param->Groups(), + param->Strides()[0], param->Strides()[1], + param->Paddings()[0], param->Paddings()[1], bs_ptr); + param->SetFpgaArgs(deconv_arg); + } + delete new_scale; + delete new_bias; + return true; +} + +template <> +void DeconvBNReluKernel::Compute( + const FusionDeconvBNReluParam ¶m) { + // fpga::ComputeFpgaDeconv(param.FpgaArgs()); + if (param.Groups() == param.Output()->dims()[1]) { + fpga::ComputeDWDeconv(param.FpgaDWDconvArgs()); + } else { + fpga::ComputeFpgaDeconv(param.FpgaArgs()); + } +} + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/fpga/V1/fetch_kernel.cpp b/src/operators/kernel/fpga/V1/fetch_kernel.cpp index 99af21a1ceebcaa82e322bc2c11c16a9723614be..056e18b29fbe8a85000983289f0e36b5c6d78357 100644 --- a/src/operators/kernel/fpga/V1/fetch_kernel.cpp +++ b/src/operators/kernel/fpga/V1/fetch_kernel.cpp @@ -46,24 +46,39 @@ bool FetchKernel::Init(FetchParam *param) { return true; } - +void dealign(float *src, float *dst, int input_c, int input_h, int input_w) { + int alignCW = paddle_mobile::fpga::align_to_x(input_c * input_w, 16); + int dealignCW = input_c * input_w; + for (int h = 0; h < input_h; ++h) { + auto input_offset = h * alignCW; + auto output_offset = h * dealignCW; + memcpy((dst + output_offset), (src + input_offset), + dealignCW * sizeof(float)); + } +} template <> void FetchKernel::Compute(const FetchParam ¶m) { - auto input = const_cast(param.InputX()); + auto input = param.InputX(); if (input->type() == typeid(float)) { auto output = param.Out(); output->ShareDataWith(*input); return; } - fpga::BypassArgs args = param.fpga_bypass_args; - auto input_address = (input->data()); - args.image.address = static_cast(input_address); - - fpga::PerformBypass(args); + fpga::PerformBypass(param.fpga_bypass_args); + auto outC = param.Out()->dims()[1]; + auto outH = param.Out()->dims()[2]; + auto outW = param.Out()->dims()[3]; fpga::fpga_invalidate(param.fpga_bypass_args.output.address, - param.fpga_bypass_args.image.channels * sizeof(float)); + outH * + (paddle_mobile::fpga::align_to_x(outC * outW, 16)) * + sizeof(float)); - // TODO(zhangyang): DEalign: get rid of extra 0 + float *outdata_ptr = + reinterpret_cast(param.fpga_bypass_args.output.address); + float *data_tmp = + reinterpret_cast(malloc(outC * outH * outW * sizeof(float))); + dealign(outdata_ptr, data_tmp, outC, outH, outW); + memcpy(outdata_ptr, data_tmp, outC * outH * outW * sizeof(float)); } template class FetchKernel; diff --git a/src/operators/op_param.h b/src/operators/op_param.h index 5683138ef1341a42c69fca33dc892a01e79736e4..134a2159297a2401d3554f021ccfb02147cc7b40 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -2473,6 +2473,62 @@ class FusionDeconvAddBNParam : public ConvTransposeParam { RType *new_scale_; }; #endif +#ifdef FUSION_DECONVBNRELU_OP +template +class FusionDeconvBNReluParam : public ConvTransposeParam { + typedef typename DtypeTensorTrait::gtype GType; + typedef typename DtypeTensorTrait::rtype RType; + + public: + FusionDeconvBNReluParam(const VariableNameMap &inputs, + const VariableNameMap &outputs, + const AttributeMap &attrs, const Scope &scope) + : ConvTransposeParam(inputs, outputs, attrs, scope) { + output_ = OpParam::OutFrom(outputs, scope); + input_bias_ = OpParam::InputBiasFrom(inputs, scope); + input_mean_ = OpParam::InputMeanFrom(inputs, scope); + input_scale_ = OpParam::InputScaleFrom(inputs, scope); + input_variance_ = OpParam::InputVarianceFrom(inputs, scope); + epsilon_ = OpParam::GetAttr("epsilon", attrs); + momentum_ = OpParam::GetAttr("momentum", attrs); + } + RType *Output() const { return output_; } + + const RType *InputBias() const { return input_bias_; } + + const RType *InputMean() const { return input_mean_; } + + const RType *InputScale() const { return input_scale_; } + + const RType *InputVariance() const { return input_variance_; } + + const float &Epsilon() const { return epsilon_; } + + const float &Momentum() const { return momentum_; } + + const bool &IsTest() const { return is_test_; } + + void SetNewScale(RType *new_scale) { new_scale_ = new_scale; } + + void SetNewBias(RType *new_bias) { new_bias_ = new_bias; } + + const RType *NewScale() const { return new_scale_; } + + const RType *NewBias() const { return new_bias_; } + + protected: + RType *output_; + RType *input_bias_; + RType *input_mean_; + RType *input_scale_; + RType *input_variance_; + float epsilon_; + float momentum_; + bool is_test_; + RType *new_bias_; + RType *new_scale_; +}; +#endif #ifdef FUSION_DECONVADDBNRELU_OP template class FusionDeconvAddBNReluParam : public ConvTransposeParam {