diff --git a/src/common/types.cpp b/src/common/types.cpp old mode 100644 new mode 100755 index 93e3ee516a59a1615b738793d06f3c35557243dc..8b996fa5511a6d8e1b10b5a0aa13e820ee643c26 --- a/src/common/types.cpp +++ b/src/common/types.cpp @@ -105,12 +105,14 @@ const char *G_OP_TYPE_FUSION_DECONV_ADD_RELU = "fusion_deconv_add_relu"; const char *G_OP_TYPE_SEQUENCE_EXPAND = "sequence_expand"; const char *G_OP_TYPE_SEQUENCE_POOL = "sequence_pool"; const char *G_OP_TYPE_SEQUENCE_SOFTMAX = "sequence_softmax"; - const char *G_OP_TYPE_SLICE = "slice"; const char *G_OP_TYPE_ANCHOR_GENERATOR = "anchor_generator"; const char *G_OP_TYPE_GENERATE_PROPOSALS = "generate_proposals"; const char *G_OP_TYPE_PSROI_POOL = "psroi_pool"; const char *G_OP_TYPE_ROI_PERSPECTIVE = "roi_perspective_transform"; +const char *G_OP_TYPE_PAD2D = "pad2d"; +const char *G_OP_TYPE_FUSION_DECONV_ADD_BN_RELU = "fusion_deconv_add_bn_relu"; +const char *G_OP_TYPE_FUSION_DECONV_ADD_BN = "fusion_deconv_add_bn"; std::unordered_map< std::string, std::pair, std::vector>> @@ -210,5 +212,8 @@ std::unordered_map< {{"Scores", "BboxDeltas", "ImInfo", "Anchors", "Variances"}, {"RpnRois", "RpnRoiProbs"}}}, {G_OP_TYPE_PSROI_POOL, {{"X", "ROIs"}, {"Out"}}}, - {G_OP_TYPE_ROI_PERSPECTIVE, {{"X", "ROIs"}, {"Out"}}}}; + {G_OP_TYPE_ROI_PERSPECTIVE, {{"X", "ROIs"}, {"Out"}}}, + {G_OP_TYPE_FUSION_DECONV_ADD_BN_RELU, {{"Input"}, {"Out"}}}, + {G_OP_TYPE_FUSION_DECONV_ADD_BN, {{"Input"}, {"Out"}}}, + {G_OP_TYPE_PAD2D, {{"X"}, {"Out"}}}}; } // namespace paddle_mobile diff --git a/src/common/types.h b/src/common/types.h old mode 100644 new mode 100755 index 9c189d5921546ebaaf3d058a47858157864e13ae..12f5253a74043a8609004520d68f1137c387f37d --- a/src/common/types.h +++ b/src/common/types.h @@ -199,6 +199,9 @@ extern const char *G_OP_TYPE_ANCHOR_GENERATOR; extern const char *G_OP_TYPE_GENERATE_PROPOSALS; extern const char *G_OP_TYPE_PSROI_POOL; extern const char *G_OP_TYPE_ROI_PERSPECTIVE; +extern const char *G_OP_TYPE_PAD2D; +extern const char *G_OP_TYPE_FUSION_DECONV_ADD_BN_RELU; +extern const char *G_OP_TYPE_FUSION_DECONV_ADD_BN; extern std::unordered_map< std::string, std::pair, std::vector>> diff --git a/src/fpga/V1/api.cpp b/src/fpga/V1/api.cpp index 570ab6a19895a64d024abe24cffbc6fe00fcc596..9607961c4785f631afb4b5e207ebff2c8e33623e 100644 --- a/src/fpga/V1/api.cpp +++ b/src/fpga/V1/api.cpp @@ -30,9 +30,9 @@ void format_image(framework::Tensor *image_tensor) { auto data_ptr = image_tensor->data(); auto external_ptr = reinterpret_cast(image_tensor->external_data); float *p_data = external_ptr == nullptr ? data_ptr : external_ptr; - + float *old_p = p_data; image::format_image(&p_data, channel, height, width); - if (p_data != data_ptr) { + if (old_p != p_data) { image_tensor->reset_data_ptr(p_data); } } @@ -48,9 +48,9 @@ void format_fp16_ofm(framework::Tensor *ofm_tensor) { auto dims = ofm_tensor->dims(); size_t memory_size = 0; if (dims.size() == 4) { - auto channel = dims[1], height = dims[2], width = dims[3], num = dims[0]; - memory_size = num * height * align_to_x(channel * width, IMAGE_ALIGNMENT) * - sizeof(half); + auto channel = dims[1], height = dims[2], width = dims[3]; + memory_size = + height * align_to_x(channel * width, IMAGE_ALIGNMENT) * sizeof(half); } else if (dims.size() == 2) { memory_size = align_to_x(dims[1], IMAGE_ALIGNMENT) * sizeof(half); } else { @@ -162,7 +162,7 @@ void format_dwconv_filter(framework::Tensor *filter_tensor, float *scale_ptr) { fpga_copy(new_data, data_ptr, memory_size); filter::format_dwconv_filter(&new_data, num, height, width, scale_ptr); filter_tensor->reset_data_ptr(new_data); - filter_tensor->set_type(typeid(int8_t)); + filter_tensor->set_type(typeid(int16_t)); } void format_DWDconv_filter(framework::Tensor *filter_tensor, float *scale_ptr, @@ -396,8 +396,8 @@ void expand_conv_arg(ConvArgs *arg) { // auto cmd = 0UL | (args.relu_enabled ? USE_RELU : 0) | USE_BIAS; auto cmd = 0UL | USE_BIAS; - auto deconv_param = ((args.deconv_tx_param.deconv_en) << 24) | - ((args.deconv_tx_param.sub_conv_num) << 16) | + auto deconv_param = ((args.deconv_tx_param.deconv_en) << 16) | + ((args.deconv_tx_param.sub_conv_num) << 8) | ((args.deconv_tx_param.omit_size) << 0); (*arg).driver.image_address_phy = vaddr_to_paddr(args.image.address); (*arg).driver.sb_address_phy = vaddr_to_paddr(args.sb_address); @@ -623,7 +623,7 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input, fpga::format_fp16_ofm(out, dims_out_new); auto out_ptr = out->data(); arg->output.address = - out_ptr + + (half *)out_ptr + // NOLINT omit_size * sizeof(half) * (align_to_x(real_out_width * arg->filter_num, IMAGE_ALIGNMENT)); arg->output.scale_address = out->scale; @@ -713,6 +713,7 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input, } for (int j = 0; j < split_num; ++j) { + // arg->split_conv_args[i]->conv_arg[j].relu_enabled = relu_enabled; arg->split_conv_args[i]->conv_arg[j].output.activation.activation_type = activation_enable; arg->split_conv_args[i] @@ -758,9 +759,9 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input, align_to_x(arg->split_conv_args[i]->conv_arg[j].filter_num, FILTER_NUM_ALIGNMENT) * sizeof(int8_t); - auto filter_head = - &filter_ptr[j * element_num * filter_num_per_div + // NOLINT - i * filter_sub_conv_offset]; + auto filter_head = &(( + int8_t *)filter_ptr)[j * element_num * filter_num_per_div + // NOLINT + i * filter_sub_conv_offset]; arg->split_conv_args[i]->conv_arg[j].filter_address = fpga_malloc(filter_size); arg->split_conv_args[i]->vector_conv_space.push_back( @@ -774,6 +775,19 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input, fpga_flush(arg->split_conv_args[i]->conv_arg[j].filter_address, filter_size); + /*{ + static int cnt = 0; + std::string str = "deconv_filter"; + if(cnt <= 1){ + cnt++; + str += std::to_string(cnt); + int8_t result = 0; + fpga::savefile(str, + arg->split_conv_args[i]->conv_arg[j].filter_address, filter_size, result); + } + + }*/ + size_t bs_align_num = align_to_x( arg->split_conv_args[i]->conv_arg[j].filter_num, BS_NUM_ALIGNMENT); size_t bs_size = 2 * bs_align_num * sizeof(float); @@ -789,6 +803,20 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input, memcpy(arg->split_conv_args[i]->conv_arg[j].sb_address, bs_head, bs_size); fpga_flush(arg->split_conv_args[i]->conv_arg[j].sb_address, bs_size); + /* { + static int cnt = 0; + std::string str = "deconv_sb"; + if(cnt <= 1){ + cnt++; + str += std::to_string(cnt); + float result = 0; + fpga::savefile(str, + arg->split_conv_args[i]->conv_arg[j].sb_address, 2 * bs_align_num, + result); + } + + }*/ + if (split_num == 1) { arg->split_conv_args[i]->conv_arg[j].output.address = arg->split_conv_args[i]->output.address; @@ -835,13 +863,10 @@ void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input, int16_t leaky_relu_negative_slope, int stride_h, int stride_w, int padding_h, int padding_w, float *bias_ptr) { - auto deleter = [](void *p) { fpga_free(p); }; - arg->vector_dwconv_space.push_back( - std::shared_ptr(reinterpret_cast(bias_ptr), deleter)); - - auto filter_ptr = filter->data(); + auto filter_ptr = filter->data(); auto input_ptr = input->data(); - auto output_ptr = out->mutable_data(); + auto output_ptr = out->data(); + arg->sub_conv_num = 1; // arg->relu_enabled = relu_enabled; arg->output.activation.activation_type = activation_enable; @@ -960,10 +985,10 @@ void fill_DWDeconv_arg(struct DWDeconvArgs *arg, framework::Tensor *input, sizeof(int16_t)); arg->dw_conv_args[i]->output.scale_address = static_cast(fpga_malloc(2 * sizeof(float))); - arg->vector_dw_conv_space.push_back(std::shared_ptr( // NOLINT + arg->vector_dw_conv_space.push_back(std::shared_ptr( reinterpret_cast(arg->dw_conv_args[i]->output.address), deleter)); - arg->vector_dw_conv_space.push_back(std::shared_ptr( // NOLINT + arg->vector_dw_conv_space.push_back(std::shared_ptr( reinterpret_cast(arg->dw_conv_args[i]->output.scale_address), deleter)); } diff --git a/src/operators/fusion_deconv_add_bn_op.cpp b/src/operators/fusion_deconv_add_bn_op.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cb22e29f0903259d7bcf46271fb2a8bd70ba8eb7 --- /dev/null +++ b/src/operators/fusion_deconv_add_bn_op.cpp @@ -0,0 +1,33 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_DECONVADDBN_OP + +#include "operators/fusion_deconv_add_bn_op.h" + +namespace paddle_mobile { +namespace operators {} +} // namespace paddle_mobile + +namespace ops = paddle_mobile::operators; +REGISTER_FUSION_MATCHER(fusion_deconv_add_bn, ops::FusionDeconvAddBNMatcher); +#ifdef PADDLE_MOBILE_CPU +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +REGISTER_OPERATOR_FPGA(fusion_deconv_add_bn, ops::FusionDeconvAddBNOp); +#endif + +#endif diff --git a/src/operators/fusion_deconv_add_bn_op.h b/src/operators/fusion_deconv_add_bn_op.h new file mode 100644 index 0000000000000000000000000000000000000000..f7f9b9e2094a7228c944b70b88ae3105ae9f37e8 --- /dev/null +++ b/src/operators/fusion_deconv_add_bn_op.h @@ -0,0 +1,116 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#ifdef FUSION_DECONVADDBN_OP +#pragma once +#include +#include + +#include "framework/operator.h" +#include "framework/program/program-optimize/fusion_op_register.h" +#include "operators/kernel/deconv_add_bn_kernel.h" + +namespace paddle_mobile { +namespace operators { +using std::string; +using std::vector; +class FusionDeconvAddBNMatcher : public framework::FusionOpMatcher { + public: + FusionDeconvAddBNMatcher() { + node_ = framework::Node(G_OP_TYPE_CONV_TRANSPOSE); + node_ > std::make_shared(G_OP_TYPE_ELEMENTWISE_ADD) > + std::make_shared(G_OP_TYPE_BATCHNORM); + } + + void FolderNodes( + framework::Node *node, + std::vector> *removed_nodes) { + node->Folder(node_.Depth(), Type(), + {{G_OP_TYPE_ELEMENTWISE_ADD, {{"Y", "Y"}, {"X", "X"}}}, + {G_OP_TYPE_BATCHNORM, + {{"Scale", "Scale"}, + {"Mean", "Mean"}, + {"Bias", "Bias"}, + {"Variance", "Variance"}, + {"Y", "BNY"}}}}, + removed_nodes); + } + + std::string Type() { return G_OP_TYPE_FUSION_DECONV_ADD_BN; } +}; + +template +class FusionDeconvAddBNOp : public framework::OperatorWithKernel< + DeviceType, FusionDeconvAddBNParam, + operators::DeconvAddBNKernel> { + public: + FusionDeconvAddBNOp(const string &type, const VariableNameMap &inputs, + const VariableNameMap &outputs, + const framework::AttributeMap &attrs, + std::shared_ptr scope) + : framework::OperatorWithKernel< + DeviceType, FusionDeconvAddBNParam, + operators::DeconvAddBNKernel>(type, inputs, outputs, + attrs, scope) {} + + void InferShape() const { + auto input = this->param_.Input(); + auto in_dims = input->dims(); + + auto filter = this->param_.Filter(); + auto filter_dims = filter->dims(); + + std::vector strides = this->param_.Strides(); + std::vector paddings = this->param_.Paddings(); + std::vector dilations = this->param_.Dilations(); + + int groups = this->param_.Groups(); + + PADDLE_MOBILE_ENFORCE( + in_dims.size() == 4 || in_dims.size() == 5, + "ConvTransposeOp intput should be 4-D or 5-D tensor."); + PADDLE_MOBILE_ENFORCE( + in_dims.size() == filter_dims.size(), + "ConvTransposeOp input dimension and filter dimension " + "should be the same."); + PADDLE_MOBILE_ENFORCE( + in_dims.size() - strides.size() == 2U, + "ConvTransposeOp input dimension and strides dimension should " + "be consistent."); + PADDLE_MOBILE_ENFORCE(paddings.size() == strides.size(), + "ConvTransposeOp paddings dimension and strides " + "dimension should be the same."); + PADDLE_MOBILE_ENFORCE(paddings.size() == dilations.size(), + "ConvTransposeOp paddings dimension and dilations " + "dimension should be the same."); + PADDLE_MOBILE_ENFORCE( + in_dims[1] == filter_dims[0], + "In ConvTransposeOp, The number of input channels should " + "be equal to the number of filter's channels."); + + std::vector output_shape({in_dims[0], filter_dims[1] * groups}); + for (size_t i = 0; i < strides.size(); ++i) { + auto filter_extent = dilations[i] * (filter_dims[i + 2] - 1) + 1; + output_shape.push_back((in_dims[i + 2] - 1) * strides[i] - + 2 * paddings[i] + filter_extent); + } + this->param_.Output()->Resize(framework::make_ddim(output_shape)); + } + + protected: +}; + +} // namespace operators +} // namespace paddle_mobile + +#endif // FUSION_DECONV_ADD_BN_OP diff --git a/src/operators/fusion_deconv_add_bn_relu_op.cpp b/src/operators/fusion_deconv_add_bn_relu_op.cpp new file mode 100755 index 0000000000000000000000000000000000000000..b7e9abe660b350e9d3ccc89aef685505a7449a9f --- /dev/null +++ b/src/operators/fusion_deconv_add_bn_relu_op.cpp @@ -0,0 +1,34 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_DECONVADDBNRELU_OP + +#include "operators/fusion_deconv_add_bn_relu_op.h" + +namespace paddle_mobile { +namespace operators {} +} // namespace paddle_mobile + +namespace ops = paddle_mobile::operators; +REGISTER_FUSION_MATCHER(fusion_deconv_add_bn_relu, + ops::FusionDeconvAddBNReluMatcher); +#ifdef PADDLE_MOBILE_CPU +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +REGISTER_OPERATOR_FPGA(fusion_deconv_add_bn_relu, ops::FusionDeconvAddBNReluOp); +#endif + +#endif diff --git a/src/operators/fusion_deconv_add_bn_relu_op.h b/src/operators/fusion_deconv_add_bn_relu_op.h new file mode 100644 index 0000000000000000000000000000000000000000..97070ef01e544839be8eab6ddba21c43dfa9a26e --- /dev/null +++ b/src/operators/fusion_deconv_add_bn_relu_op.h @@ -0,0 +1,118 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#ifdef FUSION_DECONVADDBNRELU_OP +#pragma once +#include +#include + +#include "framework/operator.h" +#include "framework/program/program-optimize/fusion_op_register.h" +#include "operators/kernel/deconv_add_bn_relu_kernel.h" + +namespace paddle_mobile { +namespace operators { +using std::string; +using std::vector; +class FusionDeconvAddBNReluMatcher : public framework::FusionOpMatcher { + public: + FusionDeconvAddBNReluMatcher() { + node_ = framework::Node(G_OP_TYPE_CONV_TRANSPOSE); + node_ > std::make_shared(G_OP_TYPE_ELEMENTWISE_ADD) > + std::make_shared(G_OP_TYPE_BATCHNORM) > + std::make_shared(G_OP_TYPE_RELU); + } + + void FolderNodes( + framework::Node *node, + std::vector> *removed_nodes) { + node->Folder(node_.Depth(), Type(), + {{G_OP_TYPE_ELEMENTWISE_ADD, {{"Y", "Y"}, {"X", "X"}}}, + {G_OP_TYPE_BATCHNORM, + {{"Scale", "Scale"}, + {"Mean", "Mean"}, + {"Bias", "Bias"}, + {"Variance", "Variance"}, + {"Y", "BNY"}}}}, + removed_nodes); + } + + std::string Type() { return G_OP_TYPE_FUSION_DECONV_ADD_BN_RELU; } +}; + +template +class FusionDeconvAddBNReluOp + : public framework::OperatorWithKernel< + DeviceType, FusionDeconvAddBNReluParam, + operators::DeconvAddBNReluKernel> { + public: + FusionDeconvAddBNReluOp(const string &type, const VariableNameMap &inputs, + const VariableNameMap &outputs, + const framework::AttributeMap &attrs, + std::shared_ptr scope) + : framework::OperatorWithKernel< + DeviceType, FusionDeconvAddBNReluParam, + operators::DeconvAddBNReluKernel>( + type, inputs, outputs, attrs, scope) {} + + void InferShape() const { + auto input = this->param_.Input(); + auto in_dims = input->dims(); + + auto filter = this->param_.Filter(); + auto filter_dims = filter->dims(); + + std::vector strides = this->param_.Strides(); + std::vector paddings = this->param_.Paddings(); + std::vector dilations = this->param_.Dilations(); + + int groups = this->param_.Groups(); + + PADDLE_MOBILE_ENFORCE( + in_dims.size() == 4 || in_dims.size() == 5, + "ConvTransposeOp intput should be 4-D or 5-D tensor."); + PADDLE_MOBILE_ENFORCE( + in_dims.size() == filter_dims.size(), + "ConvTransposeOp input dimension and filter dimension " + "should be the same."); + PADDLE_MOBILE_ENFORCE( + in_dims.size() - strides.size() == 2U, + "ConvTransposeOp input dimension and strides dimension should " + "be consistent."); + PADDLE_MOBILE_ENFORCE(paddings.size() == strides.size(), + "ConvTransposeOp paddings dimension and strides " + "dimension should be the same."); + PADDLE_MOBILE_ENFORCE(paddings.size() == dilations.size(), + "ConvTransposeOp paddings dimension and dilations " + "dimension should be the same."); + PADDLE_MOBILE_ENFORCE( + in_dims[1] == filter_dims[0], + "In ConvTransposeOp, The number of input channels should " + "be equal to the number of filter's channels."); + + std::vector output_shape({in_dims[0], filter_dims[1] * groups}); + for (size_t i = 0; i < strides.size(); ++i) { + auto filter_extent = dilations[i] * (filter_dims[i + 2] - 1) + 1; + output_shape.push_back((in_dims[i + 2] - 1) * strides[i] - + 2 * paddings[i] + filter_extent); + } + this->param_.Output()->Resize(framework::make_ddim(output_shape)); + } + + protected: +}; + +} // namespace operators +} // namespace paddle_mobile + +#endif // FUSION_DECONV_ADD_BN_RELU_OP diff --git a/src/operators/kernel/deconv_add_bn_kernel.h b/src/operators/kernel/deconv_add_bn_kernel.h new file mode 100755 index 0000000000000000000000000000000000000000..181367031c0be48666efeda3df4426da38c67d4f --- /dev/null +++ b/src/operators/kernel/deconv_add_bn_kernel.h @@ -0,0 +1,39 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_DECONVADDBN_OP + +#pragma once + +#include "framework/operator.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { + +using framework::OpKernelBase; + +template +class DeconvAddBNKernel + : public OpKernelBase> { + public: + void Compute(const FusionDeconvAddBNParam ¶m); + + bool Init(FusionDeconvAddBNParam *param); +}; + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/deconv_add_bn_relu_kernel.h b/src/operators/kernel/deconv_add_bn_relu_kernel.h new file mode 100755 index 0000000000000000000000000000000000000000..c63b4db050ade64903ff817b40900faaef65924d --- /dev/null +++ b/src/operators/kernel/deconv_add_bn_relu_kernel.h @@ -0,0 +1,39 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_DECONVADDBNRELU_OP + +#pragma once + +#include "framework/operator.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { + +using framework::OpKernelBase; + +template +class DeconvAddBNReluKernel + : public OpKernelBase> { + public: + void Compute(const FusionDeconvAddBNReluParam ¶m); + + bool Init(FusionDeconvAddBNReluParam *param); +}; + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/fpga/V1/conv_add_bn_relu_kernel.cpp b/src/operators/kernel/fpga/V1/conv_add_bn_relu_kernel.cpp old mode 100644 new mode 100755 diff --git a/src/operators/kernel/fpga/V1/conv_bn_relu_kernel.cpp b/src/operators/kernel/fpga/V1/conv_bn_relu_kernel.cpp index d1adec36adc73665d2e542b14b2e368830a2202d..5f8f85278e81911d67f1e072b390e6cd74149ee4 100644 --- a/src/operators/kernel/fpga/V1/conv_bn_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V1/conv_bn_relu_kernel.cpp @@ -16,13 +16,10 @@ limitations under the License. */ #include "operators/kernel/conv_bn_relu_kernel.h" #include - namespace paddle_mobile { namespace operators { - template <> bool ConvBNReluKernel::Init(FusionConvBNReluParam *param) { - // bool relu_enabled = true; paddle_mobile::fpga::ActivationType activation_enable = paddle_mobile::fpga::LEAKYRELU; int16_t leaky_relu_negative_slope = 0; @@ -43,7 +40,6 @@ bool ConvBNReluKernel::Init(FusionConvBNReluParam *param) { auto new_bias = new Tensor(); auto new_scale_ptr = new_scale->mutable_data({channel}); auto new_bias_ptr = new_bias->mutable_data({channel}); - for (int i = 0; i < channel; i++) { new_scale_ptr[i] = bn_scale_ptr[i] / static_cast(pow((bn_var_ptr[i] + epsilon), 0.5)); @@ -51,24 +47,36 @@ bool ConvBNReluKernel::Init(FusionConvBNReluParam *param) { bs_ptr[i + channel] = new_scale_ptr[i]; bs_ptr[i] = new_bias_ptr[i]; } - - fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); - fpga::SplitConvArgs conv_arg = {0}; - fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable, - leaky_relu_negative_slope, param->Groups(), - param->Strides()[0], param->Strides()[1], - param->Paddings()[0], param->Paddings()[1], bs_ptr); - param->SetFpgaArgs(conv_arg); - + const int groups = param->Groups(); + if (groups == channel) { + fpga::format_dwconv_data(filter, out, new_scale_ptr, &new_bias_ptr); + fpga::DWconvArgs dwconv_arg = {0}; + fpga::fill_dwconv_arg(&dwconv_arg, input, out, filter, activation_enable, + leaky_relu_negative_slope, param->Strides()[0], + param->Strides()[1], param->Paddings()[0], + param->Paddings()[1], new_bias_ptr); + param->SetFpgaArgs(dwconv_arg); + } else { + fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); + fpga::SplitConvArgs conv_arg = {0}; + fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable, + leaky_relu_negative_slope, param->Groups(), + param->Strides()[0], param->Strides()[1], + param->Paddings()[0], param->Paddings()[1], bs_ptr); + param->SetFpgaArgs(conv_arg); + } delete new_scale; delete new_bias; return true; } - template <> void ConvBNReluKernel::Compute( const FusionConvBNReluParam ¶m) { - fpga::ComputeFpgaConv(param.FpgaArgs()); + if (param.Groups() == param.Output()->dims()[1]) { + fpga::ComputeDWConv(param.FpgaDwconvArgs()); + } else { + fpga::ComputeFpgaConv(param.FpgaArgs()); + } } } // namespace operators diff --git a/src/operators/kernel/fpga/V1/conv_transpose_kernel.cpp b/src/operators/kernel/fpga/V1/conv_transpose_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..788504df5d2ea1005cfaa76f12b58e61c0218391 --- /dev/null +++ b/src/operators/kernel/fpga/V1/conv_transpose_kernel.cpp @@ -0,0 +1,89 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef CONV_TRANSPOSE_OP + +#include "operators/kernel/conv_transpose_kernel.h" +#include "framework/operator.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool ConvTransposeKernel::Init(ConvTransposeParam *param) { + // bool relu_enabled = false; + paddle_mobile::fpga::ActivationType activation_enable = + paddle_mobile::fpga::NONE; + int16_t leaky_relu_negative_slope = 0; + auto input = const_cast(param->Input()); + // const Tensor *bias = param->Bias(); + // auto bias_ptr = bias->data(); + auto filter = const_cast(param->Filter()); + auto out = param->Output(); + + // PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], + // "Output channel should be equal to bias number"); + int channel = out->dims()[1]; + + int sub_conv_n = param->Strides()[0]; + auto bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sub_conv_n * // NOLINT + sizeof(float)); // NOLINT + + for (int i = 0; i < channel * sub_conv_n; i++) { + bs_ptr[i + sub_conv_n * channel] = 1; + bs_ptr[i] = 0; // bias_ptr[i % (channel)]; + } + + PADDLE_MOBILE_ENFORCE(param->Strides()[1] == param->Strides()[0], + "stride_width should be equal to stride_height "); + PADDLE_MOBILE_ENFORCE(filter->dims()[2] == filter->dims()[3], + "filter width should be equal to filter height "); + PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0), + "filter axis should be the multiple of stride axis "); + if (param->Groups() == channel) { + fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(), + sub_conv_n); + fpga::DWDeconvArgs DWDeconv_arg = {0}; + fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter, + activation_enable, leaky_relu_negative_slope, + param->Strides()[0], param->Strides()[1], + param->Paddings()[0], param->Paddings()[1], bs_ptr); + param->SetFpgaArgs(DWDeconv_arg); + } else { + fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); + fpga::DeconvArgs deconv_arg = {0}; + fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable, + leaky_relu_negative_slope, param->Groups(), + param->Strides()[0], param->Strides()[1], + param->Paddings()[0], param->Paddings()[1], bs_ptr); + param->SetFpgaArgs(deconv_arg); + } + return true; +} + +template <> +void ConvTransposeKernel::Compute( + const ConvTransposeParam ¶m) { + if (param.Groups() == param.Output()->dims()[1]) { + fpga::ComputeDWDeconv(param.FpgaDWDconvArgs()); + } else { + fpga::ComputeFpgaDeconv(param.FpgaArgs()); + } +} + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/fpga/V1/deconv_add_bn_kernel.cpp b/src/operators/kernel/fpga/V1/deconv_add_bn_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4239ac1e5da421cb0e2421a8919d8d15e40348af --- /dev/null +++ b/src/operators/kernel/fpga/V1/deconv_add_bn_kernel.cpp @@ -0,0 +1,90 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_DECONVADDBN_OP + +#include "operators/kernel/deconv_add_bn_kernel.h" +#include "framework/operator.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool DeconvAddBNKernel::Init(FusionDeconvAddBNParam *param) { + // bool relu_enabled = true; + paddle_mobile::fpga::ActivationType activation_enable = + paddle_mobile::fpga::NONE; + int16_t leaky_relu_negative_slope = 0; + auto input = const_cast(param->Input()); + const Tensor *bias = param->InputBias(); + auto bias_ptr = bias->data(); + auto filter = const_cast(param->Filter()); + auto out = param->Output(); + + PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], + "Output channel should be equal to bias number"); + int channel = out->dims()[1]; + + int sub_conv_n = param->Strides()[0]; + auto bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sub_conv_n * // NOLINT + sizeof(float)); // NOLINT + + for (int i = 0; i < channel * sub_conv_n; i++) { + bs_ptr[i + sub_conv_n * channel] = 1; + bs_ptr[i] = bias_ptr[i % (channel)]; + } + + PADDLE_MOBILE_ENFORCE(param->Strides()[1] == param->Strides()[0], + "stride_width should be equal to stride_height "); + PADDLE_MOBILE_ENFORCE(filter->dims()[2] == filter->dims()[3], + "filter width should be equal to filter height "); + PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0), + "filter axis should be the multiple of stride axis "); + if (param->Groups() == channel) { + fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(), + sub_conv_n); + fpga::DWDeconvArgs DWDeconv_arg = {0}; + fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter, + activation_enable, leaky_relu_negative_slope, + param->Strides()[0], param->Strides()[1], + param->Paddings()[0], param->Paddings()[1], bs_ptr); + param->SetFpgaArgs(DWDeconv_arg); + } else { + fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); + fpga::DeconvArgs deconv_arg = {0}; + fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable, + leaky_relu_negative_slope, param->Groups(), + param->Strides()[0], param->Strides()[1], + param->Paddings()[0], param->Paddings()[1], bs_ptr); + param->SetFpgaArgs(deconv_arg); + } + return true; +} + +template <> +void DeconvAddBNKernel::Compute( + const FusionDeconvAddBNParam ¶m) { + // fpga::ComputeFpgaDeconv(param.FpgaArgs()); + if (param.Groups() == param.Output()->dims()[1]) { + fpga::ComputeDWDeconv(param.FpgaDWDconvArgs()); + } else { + fpga::ComputeFpgaDeconv(param.FpgaArgs()); + } +} + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/fpga/V1/deconv_add_bn_relu_kernel.cpp b/src/operators/kernel/fpga/V1/deconv_add_bn_relu_kernel.cpp new file mode 100755 index 0000000000000000000000000000000000000000..28b8c83198a5517ed0dc9732e0033030a876a7da --- /dev/null +++ b/src/operators/kernel/fpga/V1/deconv_add_bn_relu_kernel.cpp @@ -0,0 +1,91 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_DECONVADDBNRELU_OP + +#include "operators/kernel/deconv_add_bn_relu_kernel.h" +#include "framework/operator.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool DeconvAddBNReluKernel::Init( + FusionDeconvAddBNReluParam *param) { + // bool relu_enabled = true; + paddle_mobile::fpga::ActivationType activation_enable = + paddle_mobile::fpga::LEAKYRELU; + int16_t leaky_relu_negative_slope = 0; + auto input = const_cast(param->Input()); + const Tensor *bias = param->InputBias(); + auto bias_ptr = bias->data(); + auto filter = const_cast(param->Filter()); + auto out = param->Output(); + + PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], + "Output channel should be equal to bias number"); + int channel = out->dims()[1]; + + int sub_conv_n = param->Strides()[0]; + auto bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sub_conv_n * // NOLINT + sizeof(float)); // NOLINT + + for (int i = 0; i < channel * sub_conv_n; i++) { + bs_ptr[i + sub_conv_n * channel] = 1; + bs_ptr[i] = bias_ptr[i % (channel)]; + } + + PADDLE_MOBILE_ENFORCE(param->Strides()[1] == param->Strides()[0], + "stride_width should be equal to stride_height "); + PADDLE_MOBILE_ENFORCE(filter->dims()[2] == filter->dims()[3], + "filter width should be equal to filter height "); + PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0), + "filter axis should be the multiple of stride axis "); + if (param->Groups() == channel) { + fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(), + sub_conv_n); + fpga::DWDeconvArgs DWDeconv_arg = {0}; + fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter, + activation_enable, leaky_relu_negative_slope, + param->Strides()[0], param->Strides()[1], + param->Paddings()[0], param->Paddings()[1], bs_ptr); + param->SetFpgaArgs(DWDeconv_arg); + } else { + fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); + fpga::DeconvArgs deconv_arg = {0}; + fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable, + leaky_relu_negative_slope, param->Groups(), + param->Strides()[0], param->Strides()[1], + param->Paddings()[0], param->Paddings()[1], bs_ptr); + param->SetFpgaArgs(deconv_arg); + } + return true; +} + +template <> +void DeconvAddBNReluKernel::Compute( + const FusionDeconvAddBNReluParam ¶m) { + // fpga::ComputeFpgaDeconv(param.FpgaArgs()); + if (param.Groups() == param.Output()->dims()[1]) { + fpga::ComputeDWDeconv(param.FpgaDWDconvArgs()); + } else { + fpga::ComputeFpgaDeconv(param.FpgaArgs()); + } +} + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/fpga/V1/pad2d_kernel.cpp b/src/operators/kernel/fpga/V1/pad2d_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f47a585ee412316ce65084c5fa10a622ffb93a4f --- /dev/null +++ b/src/operators/kernel/fpga/V1/pad2d_kernel.cpp @@ -0,0 +1,60 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "operators/kernel/pad2d_kernel.h" +namespace paddle_mobile { +namespace operators { +template <> +bool Pad2dKernel::Init(Pad2dParam *param) { + Tensor *output = param->Out(); + fpga::format_fp16_ofm(output); + return true; +} +void pad2dFunc(const framework::Tensor *input, framework::Tensor *output) { + auto input_data = (input->data()); + auto output_data = (output->data()); + auto input_c = input->dims()[1]; + auto input_h = input->dims()[2]; + auto input_w = input->dims()[3]; + auto output_c = output->dims()[1]; + auto output_w = output->dims()[3]; + auto copysize = input_c * input_w; + for (int h = 0; h < input_h; ++h) { + auto input_offset = h * input_c * input_w; + auto output_offset = h * paddle_mobile::fpga::align_to_x( + output_c * output_w, IMAGE_ALIGNMENT); + memcpy((output_data + output_offset), (input_data + input_offset), + copysize * sizeof(half)); + } +} +template <> +void Pad2dKernel::Compute(const Pad2dParam ¶m) { + auto in_x = param.InputX(); + auto out = param.Out(); + fpga::fpga_invalidate((void *)in_x->data(), // NOLINT + in_x->numel() * sizeof(half)); + pad2dFunc(in_x, out); + (out->scale)[0] = (in_x->scale)[0]; + (out->scale)[1] = (in_x->scale)[1]; + DLOG << (out->scale)[0]; + DLOG << (out->scale)[1]; + size_t outputSize = + out->dims()[2] * + paddle_mobile::fpga::align_to_x((out->dims()[1]) * (out->dims()[3]), + IMAGE_ALIGNMENT) * + sizeof(half); + fpga::fpga_flush(out->data(), outputSize); +} +} // namespace operators +} // namespace paddle_mobile diff --git a/src/operators/kernel/pad2d_kernel.h b/src/operators/kernel/pad2d_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..58b8c1a15884b00dc0c309c99da7de0706524cdd --- /dev/null +++ b/src/operators/kernel/pad2d_kernel.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "framework/operator.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { + +template +class Pad2dKernel + : public framework::OpKernelBase> { + public: + void Compute(const Pad2dParam ¶m); + bool Init(Pad2dParam *param); +}; + +} // namespace operators +} // namespace paddle_mobile diff --git a/src/operators/op_param.h b/src/operators/op_param.h index 8cd804444a2d8f65d027ecccb240b5ada9aa274f..5683138ef1341a42c69fca33dc892a01e79736e4 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -1221,6 +1221,7 @@ class FetchParam : public OpParam { RType *input_x_; Tensor *out_; #ifdef PADDLE_MOBILE_FPGA + public: fpga::BypassArgs fpga_bypass_args; @@ -2415,6 +2416,120 @@ class FusionDeconvAddParam : public ConvTransposeParam { template using FusionDeconvAddReluParam = FusionDeconvAddParam; #endif +#ifdef FUSION_DECONVADDBN_OP +template +class FusionDeconvAddBNParam : public ConvTransposeParam { + typedef typename DtypeTensorTrait::gtype GType; + typedef typename DtypeTensorTrait::rtype RType; + + public: + FusionDeconvAddBNParam(const VariableNameMap &inputs, + const VariableNameMap &outputs, + const AttributeMap &attrs, const Scope &scope) + : ConvTransposeParam(inputs, outputs, attrs, scope) { + output_ = OpParam::OutFrom(outputs, scope); + input_bias_ = OpParam::InputBiasFrom(inputs, scope); + input_mean_ = OpParam::InputMeanFrom(inputs, scope); + input_scale_ = OpParam::InputScaleFrom(inputs, scope); + input_variance_ = OpParam::InputVarianceFrom(inputs, scope); + epsilon_ = OpParam::GetAttr("epsilon", attrs); + momentum_ = OpParam::GetAttr("momentum", attrs); + // is_test_ = OpParam::GetAttr("is_test", attrs); + } + RType *Output() const { return output_; } + + const RType *InputBias() const { return input_bias_; } + + const RType *InputMean() const { return input_mean_; } + + const RType *InputScale() const { return input_scale_; } + + const RType *InputVariance() const { return input_variance_; } + + const float &Epsilon() const { return epsilon_; } + + const float &Momentum() const { return momentum_; } + + const bool &IsTest() const { return is_test_; } + + void SetNewScale(RType *new_scale) { new_scale_ = new_scale; } + + void SetNewBias(RType *new_bias) { new_bias_ = new_bias; } + + const RType *NewScale() const { return new_scale_; } + + const RType *NewBias() const { return new_bias_; } + + protected: + RType *output_; + RType *input_bias_; + RType *input_mean_; + RType *input_scale_; + RType *input_variance_; + float epsilon_; + float momentum_; + bool is_test_; + RType *new_bias_; + RType *new_scale_; +}; +#endif +#ifdef FUSION_DECONVADDBNRELU_OP +template +class FusionDeconvAddBNReluParam : public ConvTransposeParam { + typedef typename DtypeTensorTrait::gtype GType; + typedef typename DtypeTensorTrait::rtype RType; + + public: + FusionDeconvAddBNReluParam(const VariableNameMap &inputs, + const VariableNameMap &outputs, + const AttributeMap &attrs, const Scope &scope) + : ConvTransposeParam(inputs, outputs, attrs, scope) { + output_ = OpParam::OutFrom(outputs, scope); + input_bias_ = OpParam::InputBiasFrom(inputs, scope); + input_mean_ = OpParam::InputMeanFrom(inputs, scope); + input_scale_ = OpParam::InputScaleFrom(inputs, scope); + input_variance_ = OpParam::InputVarianceFrom(inputs, scope); + epsilon_ = OpParam::GetAttr("epsilon", attrs); + momentum_ = OpParam::GetAttr("momentum", attrs); + // is_test_ = OpParam::GetAttr("is_test", attrs); + } + RType *Output() const { return output_; } + + const RType *InputBias() const { return input_bias_; } + + const RType *InputMean() const { return input_mean_; } + + const RType *InputScale() const { return input_scale_; } + + const RType *InputVariance() const { return input_variance_; } + + const float &Epsilon() const { return epsilon_; } + + const float &Momentum() const { return momentum_; } + + const bool &IsTest() const { return is_test_; } + + void SetNewScale(RType *new_scale) { new_scale_ = new_scale; } + + void SetNewBias(RType *new_bias) { new_bias_ = new_bias; } + + const RType *NewScale() const { return new_scale_; } + + const RType *NewBias() const { return new_bias_; } + + protected: + RType *output_; + RType *input_bias_; + RType *input_mean_; + RType *input_scale_; + RType *input_variance_; + float epsilon_; + float momentum_; + bool is_test_; + RType *new_bias_; + RType *new_scale_; +}; +#endif #ifdef FUSION_DECONVRELU_OP template @@ -3114,6 +3229,26 @@ class IncrementParam : public OpParam { int step_; }; #endif // INCREMENT_OP +#ifdef PAD2D_OP +template +class Pad2dParam : public OpParam { + typedef typename DtypeTensorTrait::gtype GType; + typedef typename DtypeTensorTrait::rtype RType; + + public: + Pad2dParam(const VariableNameMap &inputs, const VariableNameMap &outputs, + const AttributeMap &attrs, const Scope &scope) { + input_x_ = InputXFrom(inputs, scope); + out_ = OutFrom(outputs, scope); + } + const RType *InputX() const { return input_x_; } + RType *Out() const { return out_; } + + private: + RType *input_x_; + RType *out_; +}; +#endif } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/pad2d_op.cpp b/src/operators/pad2d_op.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e7eda00d0830f719f8d7aa76ab77544b585d9b45 --- /dev/null +++ b/src/operators/pad2d_op.cpp @@ -0,0 +1,43 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PAD2D_OP + +#include "operators/pad2d_op.h" +namespace paddle_mobile { +namespace operators { + +template +void Pad2dOp::InferShape() const { + auto input_dims = this->param_.InputX()->dims(); + auto input_n = input_dims[0]; + auto input_c = input_dims[1]; + auto input_h = input_dims[2]; + auto input_w = input_dims[3]; + + this->param_.Out()->Resize({input_n, input_c, input_h + 1, input_w + 1}); +} + +} // namespace operators +} // namespace paddle_mobile + +namespace ops = paddle_mobile::operators; +#ifdef PADDLE_MOBILE_CPU +REGISTER_OPERATOR_CPU(pad2d, ops::Pad2dOp); +#endif +#ifdef PADDLE_MOBILE_FPGA +REGISTER_OPERATOR_FPGA(pad2d, ops::Pad2dOp); +#endif + +#endif diff --git a/src/operators/pad2d_op.h b/src/operators/pad2d_op.h new file mode 100644 index 0000000000000000000000000000000000000000..761e2b837d34b8d51629b883a8cd6797037e5d9b --- /dev/null +++ b/src/operators/pad2d_op.h @@ -0,0 +1,49 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PAD2D_OP + +#pragma once + +#include + +#include "framework/operator.h" +#include "operators/kernel/pad2d_kernel.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { +using framework::AttributeMap; +using framework::OperatorWithKernel; +using framework::Scope; +using std::string; +template +class Pad2dOp + : public OperatorWithKernel, + operators::Pad2dKernel> { + public: + Pad2dOp(const string &type, const VariableNameMap &inputs, + const VariableNameMap &outputs, const AttributeMap &attrs, + std::shared_ptr scope) + : OperatorWithKernel, + operators::Pad2dKernel>( + type, inputs, outputs, attrs, scope) {} + void InferShape() const override; + + private: +}; +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/tools/op.cmake b/tools/op.cmake old mode 100644 new mode 100755 index 0ceacaa15f6a37f580ea415401d76701908e8455..3b613473df8e7aa99276b864569ef55146bd0ad6 --- a/tools/op.cmake +++ b/tools/op.cmake @@ -131,7 +131,12 @@ if (CON GREATER -1) set(PROPOSAL_OP ON) set(ANCHOR_GENERATOR_OP ON) set(SLICE_OP ON) - + set(SIGMOID_OP ON) + set(CONCAT_OP ON) + set(PAD2D_OP ON) + set(CONV_TRANSPOSE_OP ON) + set(FUSION_DECONVADDBNRELU_OP ON) + set(FUSION_DECONVADDBN_OP ON) set(FOUND_MATCH ON) endif() @@ -573,7 +578,6 @@ endif() if (FUSION_DECONVADDRELU_OP) add_definitions(-DFUSION_DECONVADDRELU_OP) endif() - if (WHILE_OP) add_definitions(-DWHILE_OP) endif() @@ -602,3 +606,12 @@ endif() if (ROI_PERSPECTIVE_OP) add_definitions(-DROI_PERSPECTIVE_OP) endif() +if (FUSION_DECONVADDBNRELU_OP) + add_definitions(-DFUSION_DECONVADDBNRELU_OP) +endif() +if (FUSION_DECONVADDBN_OP) + add_definitions(-DFUSION_DECONVADDBN_OP) +endif() +if (PAD2D_OP) + add_definitions(-DPAD2D_OP) +endif()