diff --git a/src/fpga/api/fpga_api.cpp b/src/fpga/api/fpga_api.cpp index f91c21beb2d6b5fbce86b56d49b7d8c6a3ec9219..779c846d1f3c465e5113f805b2b3856a1a7894c5 100644 --- a/src/fpga/api/fpga_api.cpp +++ b/src/fpga/api/fpga_api.cpp @@ -35,7 +35,7 @@ namespace fpga { static int fd = -1; static const char *device_path = "/dev/fpgadrv0"; -static inline int do_ioctl(int req, void *arg) { +static inline int do_ioctl(int req, const void *arg) { return ioctl(req, (unsigned int64_t)arg); } @@ -58,12 +58,17 @@ void fpga_copy(void *dest, const void *src, size_t num) { memcpy(dest, src, num); } -int ComputeFpgaConv(const struct ConvArgs &args) { return do_ioctl(21, &args); } +int ComputeFpgaConv(const struct ConvArgs &args) { + return do_ioctl(IOCTL_CONFIG_CONV, &args); +} int ComputeFpgaPool(const struct PoolingArgs &args) { - return do_ioctl(22, &args); + return do_ioctl(IOCTL_CONFIG_POOLING, &args); } int ComputeFpgaEWAdd(const struct EWAddArgs &args) { - return do_ioctl(23, &args); + return do_ioctl(IOCTL_CONFIG_EW, &args); +} +int PerformBypass(const struct BypassArgs &args) { + return do_ioctl(IOCTL_CONFIG_BYPASS, &args); } } // namespace fpga diff --git a/src/fpga/api/fpga_api.h b/src/fpga/api/fpga_api.h index 08635cdb5c01b50f59eb35554bba9a7b70f6ebfb..0823e19a7f9dfaba709b6ad2723e3228c27e2e0f 100644 --- a/src/fpga/api/fpga_api.h +++ b/src/fpga/api/fpga_api.h @@ -86,12 +86,12 @@ struct ImageOutputArgs { struct ConvArgs { bool relu_enabled; - void* bias_address; + void* sb_address; // scale and bias are interlaced; void* filter_address; + float* filter_scale_address; uint32_t filter_num; uint32_t group_num; - void* sb_address; // scale and bias are interlaced; struct KernelArgs kernel; struct ImageInputArgs image; // input image; struct ImageOutputArgs output; @@ -116,6 +116,7 @@ struct EWAddArgs { struct BypassArgs { enum DataConvertType convert_type; + enum LayoutConvertType layout_type; struct ImageInputArgs image; struct ImageOutputArgs output; }; @@ -125,11 +126,6 @@ struct FpgaRegWriteArgs { uint64_t value; }; -struct FpgaRegReadArgs { - uint64_t address; - uint64_t value; -}; - #define IOCTL_FPGA_MAGIC 'FPGA' #define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 01, struct VersionArgs) @@ -143,6 +139,7 @@ struct FpgaRegReadArgs { #define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct ConvArgs) #define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct PoolingArgs) #define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct EWAddArgs) +#define IOCTL_CONFIG_BYPASS _IOW(IOCTL_FPGA_MAGIC, 24, struct BypassArgs) #define IOCTL_FPGA_REG_READ _IOW(IOCTL_FPGA_MAGIC, 28, struct FpgaRegReadArgs) #define IOCTL_FPGA_REG_WRITE _IOW(IOCTL_FPGA_MAGIC, 29, struct FpgaRegWriteArgs) @@ -172,6 +169,7 @@ enum FPGA_ERR_TYPE { //============================== API ============================= +int PerformBypass(const struct BypassArgs& args); int ComputeFpgaConv(const struct ConvArgs& args); int ComputeFpgaPool(const struct PoolingArgs& args); int ComputeFpgaEWAdd(const struct EWAddArgs& args); diff --git a/src/fpga/fpga_quantilization.h b/src/fpga/fpga_quantilization.h index d2d2d61835de84c94760c10a25a973d4eaff1fbe..7a1df04732580c7225423cedeb277beca3edc154 100644 --- a/src/fpga/fpga_quantilization.h +++ b/src/fpga/fpga_quantilization.h @@ -13,55 +13,40 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include #include "common/types.h" #include "framework/lod_tensor.h" -#include "framework/operator.h" -#include "framework/scope.h" #include "framework/tensor.h" namespace paddle_mobile { -bool is_conv(std::string type) { - if (type.compare(G_OP_TYPE_CONV) == 0) { - return true; - } - if (type.compare(G_OP_TYPE_FUSION_CONV_ADD) == 0) { - return true; - } - if (type.compare(G_OP_TYPE_FUSION_CONV_ADD_RELU) == 0) { - return true; - } - if (type.compare(G_OP_TYPE_FUSION_CONV_BN_RELU) == 0) { - return true; - } - if (type.compare(G_OP_TYPE_FUSION_CONV_ADD_BN) == 0) { - return true; - } - return false; -} - template -void quantilize_op(std::shared_ptr> op, - std::shared_ptr scope) { - if (!is_conv(op.get()->Type())) { - return; - } - framework::Tensor* filter = nullptr; - auto var_vec = op.get()->Inputs().at("Filter"); - if (!var_vec.empty()) { - auto var = scope.get()->FindVar(var_vec[0]); - filter = var->template GetMutable(); - } +framework::Tensor* quantilize_filter(framework::Tensor* filter) { float scale = 0; - // 32bit filter -> 8bit filter; + float min = 0f; + float max = 0f; if (filter->type() == typeid(float)) { + float* floatData = originalFilter->data(); + for (int i = 0; i < filter->numel(); ++i) { + min = std::min(min, floatData[i]); + max = std::max(max, floatData[i]); + } + + float fix_range = (float)((1 << (8 - 1)) - 1); + float float_range = max; + scale = (float_range / fix_range); + framework::Tensor* originalFilter = filter; framework::Tensor* quantFilter = new framework::Tensor(); - float* floatData = originalFilter->data(); int8_t* intData = quantFilter->mutable_data(); - } + for (int i = 0; i < filter->numel(); ++i) { + intData[i] = (int8_t)floatData[i] * scale; + } + quantFilter.scale = scale; + // NCHW -> NHWC; + return quantFilter; + } + return filter; } } // namespace paddle_mobile diff --git a/src/framework/tensor.h b/src/framework/tensor.h index 8d9407e8ee25a4dadbee16713324f4afa90bb03f..364f79cc84b5a3f4c2aa1838961eb092a9b842f0 100644 --- a/src/framework/tensor.h +++ b/src/framework/tensor.h @@ -257,10 +257,10 @@ class Tensor { struct FPGAArgs { float scale; - inline float *scale_pointer() const { return &scale; } + inline const float *scale_pointer() const { return &scale; } }; - const struct FPGAArgs &fpga_args() const { return fpgaArgs_; } + const struct FPGAArgs fpga_args() const { return fpgaArgs_; } #endif private: diff --git a/src/io/executor.cpp b/src/io/executor.cpp index c09fe2c58532437336307ce007532d43689d8fd2..d6434b64aa752fd62bc637a882298228d59880b8 100644 --- a/src/io/executor.cpp +++ b/src/io/executor.cpp @@ -32,10 +32,6 @@ limitations under the License. */ #include "common/threadpool.h" #endif -#ifdef PADDLE_MOBILE_FPGA -#include "fpga/fpga_quantilization.h" -#endif - namespace paddle_mobile { using framework::Variable; @@ -100,11 +96,6 @@ Executor::Executor(const framework::Program p, int batch_size, for (const auto &op : ops) { op->Init(); } -#ifdef PADDLE_MOBILE_FPGA - for (const auto &op : ops) { - quantilize_op(op, program_.scope); - } -#endif } template diff --git a/src/operators/fusion_conv_add_bn_op.cpp b/src/operators/fusion_conv_add_bn_op.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5b61bf5d390cc2904a3f40f5400a5a3eec9a2dd5 --- /dev/null +++ b/src/operators/fusion_conv_add_bn_op.cpp @@ -0,0 +1,61 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_CONVADDBN_OP + +#include "operators/fusion_conv_add_bn_op.h" +#include "operators/math/conv_func.h" + +namespace paddle_mobile { +namespace operators { + +template +void FusionConvAddBNOp::InferShape() const { + auto in_dims = this->param_.Input()->dims(); + auto filter_dims = this->param_.Filter()->dims(); + const std::vector &strides = this->param_.Strides(); + std::vector paddings = this->param_.Paddings(); + int groups = this->param_.Groups(); + std::vector dilations = this->param_.Dilations(); + + PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() && + dilations.size() == paddings.size() && + paddings.size() == strides.size()), + "ConvParam is not suitable"); + + std::vector output_shape({in_dims[0], filter_dims[0]}); + for (size_t i = 0; i < strides.size(); ++i) { + output_shape.push_back( + math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i], + paddings[i], strides[i])); + } + + framework::DDim ddim = framework::make_ddim(output_shape); + this->param_.Output()->Resize(ddim); +} + +} // namespace operators +} // namespace paddle_mobile + +namespace ops = paddle_mobile::operators; +#ifdef PADDLE_MOBILE_CPU +REGISTER_OPERATOR_CPU(fusion_conv_add_bn, ops::FusionConvAddBNOp); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +REGISTER_OPERATOR_FPGA(fusion_conv_add_bn, ops::FusionConvAddBNOp); +#endif + +#endif diff --git a/src/operators/fusion_conv_add_bn_op.h b/src/operators/fusion_conv_add_bn_op.h new file mode 100644 index 0000000000000000000000000000000000000000..7a7f6b2bababd3f5d36d7b6faf60069567d45423 --- /dev/null +++ b/src/operators/fusion_conv_add_bn_op.h @@ -0,0 +1,115 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_CONVADDBN_OP + +#pragma once + +#include +#include +#include "framework/operator.h" +#include "framework/program/program-optimize/fusion_op_register.h" +#include "op_param.h" +#include "operators/kernel/conv_add_bn_kernel.h" + +namespace paddle_mobile { +namespace operators { +using std::string; +using std::vector; +class FusionConvAddBNMatcher : public framework::FusionOpMatcher { + public: + FusionConvAddBNMatcher() { + node_ = framework::Node(G_OP_TYPE_CONV); + node_ > std::make_shared(G_OP_TYPE_ELEMENTWISE_ADD) > + std::make_shared(G_OP_TYPE_BATCHNORM); + } + + void FolderNodes( + framework::Node *node, + std::vector> *removed_nodes) { + node->Folder(node_.Depth(), Type(), + {{G_OP_TYPE_ELEMENTWISE_ADD, {{"Y", "Y"}}}, + {G_OP_TYPE_BATCHNORM, + {{"Scale", "Scale"}, + {"Mean", "Mean"}, + {"Bias", "Bias"}, + {"Variance", "Variance"}}}}, + removed_nodes); + } + + std::string Type() { return G_OP_TYPE_FUSION_CONV_ADD_BN; } +}; + +template +class FusionConvAddBNOp : public framework::OperatorWithKernel< + DeviceType, FusionConvAddBNParam, + operators::ConvAddBNKernel> { + public: + FusionConvAddBNOp(const string &type, const VariableNameMap &inputs, + const VariableNameMap &outputs, + const framework::AttributeMap &attrs, + std::shared_ptr scope) + : framework::OperatorWithKernel< + DeviceType, FusionConvAddBNParam, + operators::ConvAddBNKernel>(type, inputs, outputs, + attrs, scope) {} + + void InferShape() const override; + + protected: +}; + +#ifdef PADDLE_MOBILE_CPU + +#ifndef FUSION_CONV_ADD_BN_REGISTER +static framework::FusionOpRegistrar fusion_conv_add_bn_registrar( + new FusionConvAddBNMatcher()); +#define FUSION_CONV_ADD_BN_REGISTER +#endif + +#endif + +#ifdef PADDLE_MOBILE_MALI_GPU + +#ifndef FUSION_CONV_ADD_BN_REGISTER +static framework::FusionOpRegistrar fusion_conv_add_bn_registrar( + new FusionConvAddBNMatcher()); +#define FUSION_CONV_ADD_BN_REGISTER +#endif + +#endif + +#ifdef PADDLE_MOBILE_FPGA + +#ifndef FUSION_CONV_ADD_BN_REGISTER +static framework::FusionOpRegistrar fusion_conv_add_bn_registrar( + new FusionConvAddBNMatcher()); +#define FUSION_CONV_ADD_BN_REGISTER +#endif + +#endif + +} // namespace operators +} // namespace paddle_mobile + +#ifdef PADDLE_MOBILE_CPU +USE_OP_CPU(fusion_conv_add_bn); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +USE_OP_FPGA(fusion_conv_add_bn); +#endif + +#endif diff --git a/src/operators/fusion_conv_add_bn_relu_op.cpp b/src/operators/fusion_conv_add_bn_relu_op.cpp index 16f4650a64ec0c363d5fa94ee27c15c73cf58a70..793634eec392fabe6c7399127ec9cb3e187697bc 100644 --- a/src/operators/fusion_conv_add_bn_relu_op.cpp +++ b/src/operators/fusion_conv_add_bn_relu_op.cpp @@ -55,6 +55,7 @@ REGISTER_OPERATOR_CPU(fusion_conv_add_bn_relu, ops::FusionConvAddBNReluOp); #ifdef PADDLE_MOBILE_MALI_GPU #endif #ifdef PADDLE_MOBILE_FPGA +REGISTER_OPERATOR_FPGA(fusion_conv_add_bn_relu, ops::FusionConvAddBNReluOp); #endif #endif diff --git a/src/operators/fusion_conv_add_bn_relu_op.h b/src/operators/fusion_conv_add_bn_relu_op.h index 19e33465c06921e9a6a7beb77053f05a03a6c760..54e7e58f8af4111edd0b86c85bb1cffc87f5cd22 100644 --- a/src/operators/fusion_conv_add_bn_relu_op.h +++ b/src/operators/fusion_conv_add_bn_relu_op.h @@ -96,6 +96,13 @@ static framework::FusionOpRegistrar fusion_conv_add_bn_relu_registrar( #endif #ifdef PADDLE_MOBILE_FPGA + +#ifndef FUSION_CONV_ADD_BN_RELU_REGISTER +static framework::FusionOpRegistrar fusion_conv_add_bn_relu_registrar( + new FusionConvAddBNReluMatcher()); +#define FUSION_CONV_ADD_BN_RELU_REGISTER +#endif + #endif } // namespace operators @@ -107,6 +114,7 @@ USE_OP_CPU(fusion_conv_add_bn_relu); #ifdef PADDLE_MOBILE_MALI_GPU #endif #ifdef PADDLE_MOBILE_FPGA +USE_OP_FPGA(fusion_conv_add_bn_relu); #endif #endif diff --git a/src/operators/fusion_conv_add_relu_op.cpp b/src/operators/fusion_conv_add_relu_op.cpp index 18618886cccba08c7502b3e1d75fbba9b6916f56..99b770a6c5e3bc89024e467631e129b914f0bcec 100644 --- a/src/operators/fusion_conv_add_relu_op.cpp +++ b/src/operators/fusion_conv_add_relu_op.cpp @@ -54,6 +54,7 @@ REGISTER_OPERATOR_CPU(fusion_conv_add_relu, ops::FusionConvAddReluOp); #ifdef PADDLE_MOBILE_MALI_GPU #endif #ifdef PADDLE_MOBILE_FPGA +REGISTER_OPERATOR_FPGA(fusion_conv_add_relu, ops::FusionConvAddReluOp); #endif #endif diff --git a/src/operators/fusion_conv_add_relu_op.h b/src/operators/fusion_conv_add_relu_op.h index 50a4a2c7c64526c9a5dc1057829ed14f09357780..cda97ba1a342e5b9451fd8363643f638792e3579 100644 --- a/src/operators/fusion_conv_add_relu_op.h +++ b/src/operators/fusion_conv_add_relu_op.h @@ -75,6 +75,13 @@ class FusionConvAddReluOp : public framework::OperatorWithKernel< #ifdef PADDLE_MOBILE_MALI_GPU #endif #ifdef PADDLE_MOBILE_FPGA + +#ifndef CONV_ADD_RELU_REGISTER +#define CONV_ADD_RELU_REGISTER +static framework::FusionOpRegistrar fusion_conv_add_relu_registrar( + new FusionConvAddReluOpMatcher()); +#endif + #endif } // namespace operators @@ -86,6 +93,7 @@ USE_OP_CPU(fusion_conv_add_relu); #ifdef PADDLE_MOBILE_MALI_GPU #endif #ifdef PADDLE_MOBILE_FPGA +USE_OP_FPGA(fusion_conv_add_relu); #endif #endif diff --git a/src/operators/kernel/central-arm-func/mul_arm_func.h b/src/operators/kernel/central-arm-func/mul_arm_func.h index 9dfb1f48a574156f1b026fc6af3a03d77b81263f..d2da67afe1d2eb746971a2443bdb449eb2b66ec4 100644 --- a/src/operators/kernel/central-arm-func/mul_arm_func.h +++ b/src/operators/kernel/central-arm-func/mul_arm_func.h @@ -19,6 +19,40 @@ limitations under the License. */ namespace paddle_mobile { namespace operators { +// 1、如果x,y维度都是2维, +// x = [[1,2], y = [[5,6], +// [3,4]] [7,8]] +// 运算结果为正常矩阵相乘。结果 out = +// [[1*5+2*7,1*6+2*8],[3*5+4*7, 3*6+4*8]] +// +// 2、如果x的维度大于2或者y的维度大于2,x的维度(2,3,4) ,y的维度(4,1,2) +// x = [[[1,2,3,4], +// [2,3,4,5], +// [3,4,5,6]], +// [[1,2,3,4], +// [2,3,4,5], +// [3,4,5,6]]] +// y = [[[1,2]], +// [[3,4]], +// [[5,6]], +// [[7,8]]] +// 需要借助x_num_col_dims和y_num_col_dims将x和y的维度转换为2维 +// 从模型中读到参数,x_num_col_dims = 2,y_num_col_dims = 1,左开右闭 +// (1) 将x = (2,3,4)的index [0,x_num_col_dims)部分2,3相乘,得到6, +// [x_num_col_dims,xdim.size())部分4相乘,得到4, +// 将Tensor x的dims重写成(6,4) +// (2) 将y = (4,1,2)的index [0,y_num_col_dims)部分4相乘,得到4, +// [y_num_col_dims,ydim.size())部分1,2相乘,得到2, +// 将Tensor y的dims重写成(4,2) +// 并不影响x,y在内存中的分布。 +// x = [[1,2,3,4], y = [[1,2], +// [2,3,4,5], [3,4], +// [3,4,5,6], 矩阵乘法 [5,6], +// [1,2,3,4], [7,8]] +// [2,3,4,5], +// [3,4,5,6]] +// 结果x(6行4列)乘y(4行2列),按1中矩阵相乘,结果out(6行2列) + template void MulCompute(const MulParam ¶m) { const Tensor *input_x = param.InputX(); diff --git a/src/operators/kernel/fpga/conv_kernel.cpp b/src/operators/kernel/conv_add_bn_kernel.h similarity index 54% rename from src/operators/kernel/fpga/conv_kernel.cpp rename to src/operators/kernel/conv_add_bn_kernel.h index dc537362a216983974bea325433c456136356fc8..cc11ef1d71f402f32b2da6490877626247884a44 100644 --- a/src/operators/kernel/fpga/conv_kernel.cpp +++ b/src/operators/kernel/conv_add_bn_kernel.h @@ -12,21 +12,31 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#ifdef CONV_OP +#pragma once -#include "operators/kernel/conv_kernel.h" +#ifdef FUSION_CONVADDBN_OP + +#include +#include "framework/ddim.h" +#include "framework/operator.h" +#include "operators/math/conv_func.h" +#include "operators/math/im2col.h" +#include "operators/math/math_function.h" +#include "operators/math/vol2col.h" +#include "operators/op_param.h" namespace paddle_mobile { namespace operators { -template <> -bool ConvKernel::Init(ConvParam *param) { - return true; -} +using framework::DDim; +using framework::OpKernelBase; -template <> -void ConvKernel::Compute(const ConvParam ¶m) const {} -template class ConvKernel; +template +class ConvAddBNKernel : public OpKernelBase { + public: + void Compute(const FusionConvAddBNParam ¶m) const; + bool Init(FusionConvAddBNParam *param); +}; } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/kernel/fpga/conv_add_bn_kernel.cpp b/src/operators/kernel/fpga/conv_add_bn_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6f9da6bc1dde924e2c499bb2478d29a8d4a9e5d9 --- /dev/null +++ b/src/operators/kernel/fpga/conv_add_bn_kernel.cpp @@ -0,0 +1,94 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_CONVADDBN_OP + +#include "operators/kernel/conv_add_bn_kernel.h" +#include "fpga/api/fpga_api.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool ConvAddBNKernel::Init(FusionConvAddBNParam *param) { + bool relu_enabled = false; + const Tensor *input = param->Input(); + auto input_ptr = input->data(); + const Tensor *bias = param->Bias(); + auto bias_ptr = bias->data(); + const Tensor *filter = param->Filter(); + auto filter_ptr = filter->data(); + Tensor *out = param->Output(); + auto out_ptr = out->mutable_data(); + auto bn_mean_ptr = param->InputMean()->data(); + auto bn_var_ptr = param->InputVariance()->data(); + auto bn_scale_ptr = param->InputScale()->data(); + auto bn_bias_ptr = param->InputBias()->data(); + const float epsilon = param->Epsilon(); + PADDLE_MOBILE_ENFORCE(input->dims()[1] == bias->dims()[0] && + bias->dims()[0] == param->InputBias()->dims()[0], + "Image channel should be equal to bias number"); + + const int channel = input->dims()[1]; + float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); + Tensor *new_scale = new Tensor(); + Tensor *new_bias = new Tensor(); + auto new_scale_ptr = new_scale->mutable_data({channel}); + auto new_bias_ptr = new_bias->mutable_data({channel}); + + for (int i = 0; i < channel; i++) { + new_scale_ptr[i] = bn_scale_ptr[i] / + static_cast(pow((bn_var_ptr[i] + epsilon), 0.5)); + new_bias_ptr[i] = + bn_bias_ptr[i] + (bias_ptr[i] - bn_mean_ptr[i]) * new_scale_ptr[i]; + bs_ptr[i * 2] = new_scale_ptr[i]; + bs_ptr[i * 2 + 1] = new_bias_ptr[i]; + } + param->SetNewScale(new_scale); + param->SetNewBias(new_bias); + + fpga::ConvArgs convArgs; + convArgs.relu_enabled = relu_enabled; + convArgs.filter_address = (void *)filter_ptr; + convArgs.filter_num = filter->dims()[0]; + convArgs.group_num = param->Groups(); + convArgs.sb_address = (void *)bs_ptr; + convArgs.kernel.stride_h = param->Strides()[0]; + convArgs.kernel.stride_w = param->Strides()[1]; + convArgs.kernel.height = filter->dims()[2]; + convArgs.kernel.width = filter->dims()[3]; + convArgs.image.address = (void *)input_ptr; + convArgs.image.channels = input->dims()[1]; + convArgs.image.height = input->dims()[2]; + convArgs.image.width = input->dims()[3]; + convArgs.image.pad_height = param->Paddings()[0]; + convArgs.image.pad_width = param->Paddings()[1]; + convArgs.image.scale_address = input->fpga_args().scale_pointer(); + convArgs.output.address = (void *)out_ptr; + convArgs.output.scale_address = out->fpga_args().scale_pointer(); + param->SetFpgaArgs(convArgs); + return true; +} + +template <> +void ConvAddBNKernel::Compute( + const FusionConvAddBNParam ¶m) const { + fpga::ComputeFpgaConv(param.FpgaArgs()); +} +template class ConvAddBNKernel; + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp b/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..66a593df84c12f87371a9bde9f0aef514b392584 --- /dev/null +++ b/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp @@ -0,0 +1,94 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_CONVADDBNRELU_OP + +#include "operators/kernel/conv_add_bn_relu_kernel.h" +#include "memory/t_malloc.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool ConvAddBNReluKernel::Init(FusionConvAddBNReluParam *param) { + bool relu_enabled = true; + const Tensor *input = param->Input(); + auto input_ptr = input->data(); + const Tensor *bias = param->Bias(); + auto bias_ptr = bias->data(); + const Tensor *filter = param->Filter(); + auto filter_ptr = filter->data(); + Tensor *out = param->Output(); + auto out_ptr = out->mutable_data(); + auto bn_mean_ptr = param->InputMean()->data(); + auto bn_var_ptr = param->InputVariance()->data(); + auto bn_scale_ptr = param->InputScale()->data(); + auto bn_bias_ptr = param->InputBias()->data(); + const float epsilon = param->Epsilon(); + PADDLE_MOBILE_ENFORCE(input->dims()[1] == bias->dims()[0] && + bias->dims()[0] == param->InputBias()->dims()[0], + "Image channel should be equal to bias number"); + + const int channel = input->dims()[1]; + float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); + Tensor *new_scale = new Tensor(); + Tensor *new_bias = new Tensor(); + auto new_scale_ptr = new_scale->mutable_data({channel}); + auto new_bias_ptr = new_bias->mutable_data({channel}); + + for (int i = 0; i < channel; i++) { + new_scale_ptr[i] = bn_scale_ptr[i] / + static_cast(pow((bn_var_ptr[i] + epsilon), 0.5)); + new_bias_ptr[i] = + bn_bias_ptr[i] + (bias_ptr[i] - bn_mean_ptr[i]) * new_scale_ptr[i]; + bs_ptr[i * 2] = new_scale_ptr[i]; + bs_ptr[i * 2 + 1] = new_bias_ptr[i]; + } + param->SetNewScale(new_scale); + param->SetNewBias(new_bias); + + fpga::ConvArgs convArgs; + convArgs.relu_enabled = relu_enabled; + convArgs.filter_address = (void *)filter_ptr; + convArgs.filter_num = filter->dims()[0]; + convArgs.group_num = param->Groups(); + convArgs.sb_address = (void *)bs_ptr; + convArgs.kernel.stride_h = param->Strides()[0]; + convArgs.kernel.stride_w = param->Strides()[1]; + convArgs.kernel.height = filter->dims()[2]; + convArgs.kernel.width = filter->dims()[3]; + convArgs.image.address = (void *)input_ptr; + convArgs.image.channels = input->dims()[1]; + convArgs.image.height = input->dims()[2]; + convArgs.image.width = input->dims()[3]; + convArgs.image.pad_height = param->Paddings()[0]; + convArgs.image.pad_width = param->Paddings()[1]; + convArgs.image.scale_address = input->fpga_args().scale_pointer(); + convArgs.output.address = (void *)out_ptr; + convArgs.output.scale_address = out->fpga_args().scale_pointer(); + param->SetFpgaArgs(convArgs); + return true; +} + +template <> +void ConvAddBNReluKernel::Compute( + const FusionConvAddBNReluParam ¶m) const { + fpga::ComputeFpgaConv(param.FpgaArgs()); +} +template class ConvAddBNReluKernel; + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/fpga/conv_add_relu_kernel.cpp b/src/operators/kernel/fpga/conv_add_relu_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9692bcef872f956e2cdbe82545b3ab4173bf1348 --- /dev/null +++ b/src/operators/kernel/fpga/conv_add_relu_kernel.cpp @@ -0,0 +1,78 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_CONVADDRELU_OP + +#include "operators/kernel/conv_add_relu_kernel.h" +#include "common/enforce.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool ConvAddReluKernel::Init(FusionConvAddReluParam *param) { + bool relu_enabled = true; + const Tensor *input = param->Input(); + auto input_ptr = input->data(); + const Tensor *bias = param->Bias(); + auto bias_ptr = bias->data(); + const Tensor *filter = param->Filter(); + auto filter_ptr = filter->data(); + Tensor *out = param->Output(); + auto out_ptr = out->mutable_data(); + + PADDLE_MOBILE_ENFORCE(input->dims()[1] == bias->dims()[0], + "Image channel should be equal to bias number"); + int channel = input->dims()[1]; + float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); + for (int i = 0; i < channel; i++) { + bs_ptr[i * 2] = 1; + bs_ptr[i * 2 + 1] = bias_ptr[i]; + } + + fpga::ConvArgs convArgs; + convArgs.relu_enabled = relu_enabled; + convArgs.filter_address = (void *)filter_ptr; + convArgs.filter_num = filter->dims()[0]; + convArgs.group_num = param->Groups(); + convArgs.sb_address = (void *)bs_ptr; + convArgs.kernel.stride_h = param->Strides()[0]; + convArgs.kernel.stride_w = param->Strides()[1]; + convArgs.kernel.height = filter->dims()[2]; + convArgs.kernel.width = filter->dims()[3]; + convArgs.image.address = (void *)input_ptr; + convArgs.image.channels = input->dims()[1]; + convArgs.image.height = input->dims()[2]; + convArgs.image.width = input->dims()[3]; + + convArgs.image.pad_height = param->Paddings()[0]; + convArgs.image.pad_width = param->Paddings()[1]; + convArgs.image.scale_address = input->fpga_args().scale_pointer(); + convArgs.output.address = (void *)out_ptr; + convArgs.output.scale_address = out->fpga_args().scale_pointer(); + param->SetFpgaArgs(convArgs); + return true; +} + +template <> +void ConvAddReluKernel::Compute( + const FusionConvAddReluParam ¶m) const { + fpga::ComputeFpgaConv(param.FpgaArgs()); +} +template class ConvAddReluKernel; + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/op_param.h b/src/operators/op_param.h index 88c1886ad7ade5960d1d8175a1b46e12363ca849..0821ab8c32a6ba232a673ddd100a4e7fe6475571 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -1136,7 +1136,7 @@ class FusionConvAddBNParam : public OpParam { const Tensor *Filter() const { return filter_; } - Tensor *OutputY() const { return output_y_; } + Tensor *Output() const { return output_y_; } const vector &Strides() const { return strides_; } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 8839079fecfdbefcdaff85354d3a6a8208af10ee..5072db53874e0becf1318a26633fb13cc33d07f4 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -21,6 +21,7 @@ elseif("resnet" IN_LIST NET) # gen test ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h test_include.h executor_for_test.h) target_link_libraries(test-resnet paddle-mobile) +elseif("FPGAnets" IN_LIST NET) else () # gen test