提交 7306e1d0 编写于 作者: qnqinan's avatar qnqinan 提交者: GitHub

Merge pull request #696 from zhangyang0701/develop

add 3 FPGA ops for conv, close #695
......@@ -257,10 +257,12 @@ class Tensor {
struct FPGAArgs {
float scale;
inline float *scale_pointer() const { return &scale; }
inline float *scale_pointer() { return &scale; }
};
const struct FPGAArgs &fpga_args() const { return fpgaArgs_; }
struct FPGAArgs fpga_args() const {
return fpgaArgs_;
}
#endif
private:
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADDBN_OP
#include "operators/fusion_conv_add_bn_op.h"
#include "operators/math/conv_func.h"
namespace paddle_mobile {
namespace operators {
template <typename Dtype, typename T>
void FusionConvAddBNOp<Dtype, T>::InferShape() const {
auto in_dims = this->param_.Input()->dims();
auto filter_dims = this->param_.Filter()->dims();
const std::vector<int> &strides = this->param_.Strides();
std::vector<int> paddings = this->param_.Paddings();
int groups = this->param_.Groups();
std::vector<int> dilations = this->param_.Dilations();
PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() &&
dilations.size() == paddings.size() &&
paddings.size() == strides.size()),
"ConvParam is not suitable");
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back(
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i],
paddings[i], strides[i]));
}
framework::DDim ddim = framework::make_ddim(output_shape);
this->param_.Output()->Resize(ddim);
}
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(fusion_conv_add_bn, ops::FusionConvAddBNOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fusion_conv_add_bn, ops::FusionConvAddBNOp);
#endif
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADDBN_OP
#pragma once
#include <string>
#include <vector>
#include "framework/operator.h"
#include "framework/program/program-optimize/fusion_op_register.h"
#include "op_param.h"
#include "operators/kernel/conv_add_bn_kernel.h"
namespace paddle_mobile {
namespace operators {
using std::string;
using std::vector;
class FusionConvAddBNMatcher : public framework::FusionOpMatcher {
public:
FusionConvAddBNMatcher() {
node_ = framework::Node(G_OP_TYPE_CONV);
node_ > std::make_shared<framework::Node>(G_OP_TYPE_ELEMENTWISE_ADD) >
std::make_shared<framework::Node>(G_OP_TYPE_BATCHNORM);
}
void FolderNodes(
framework::Node *node,
std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
node->Folder(node_.Depth(), Type(),
{{G_OP_TYPE_ELEMENTWISE_ADD, {{"Y", "Y"}}},
{G_OP_TYPE_BATCHNORM,
{{"Scale", "Scale"},
{"Mean", "Mean"},
{"Bias", "Bias"},
{"Variance", "Variance"}}}},
removed_nodes);
}
std::string Type() { return G_OP_TYPE_FUSION_CONV_ADD_BN; }
};
template <typename DeviceType, typename T>
class FusionConvAddBNOp : public framework::OperatorWithKernel<
DeviceType, FusionConvAddBNParam,
operators::ConvAddBNKernel<DeviceType, T>> {
public:
FusionConvAddBNOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<
DeviceType, FusionConvAddBNParam,
operators::ConvAddBNKernel<DeviceType, T>>(type, inputs, outputs,
attrs, scope) {}
void InferShape() const override;
protected:
};
#ifdef PADDLE_MOBILE_CPU
#ifndef FUSION_CONV_ADD_BN_REGISTER
static framework::FusionOpRegistrar fusion_conv_add_bn_registrar(
new FusionConvAddBNMatcher());
#define FUSION_CONV_ADD_BN_REGISTER
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#ifndef FUSION_CONV_ADD_BN_REGISTER
static framework::FusionOpRegistrar fusion_conv_add_bn_registrar(
new FusionConvAddBNMatcher());
#define FUSION_CONV_ADD_BN_REGISTER
#endif
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifndef FUSION_CONV_ADD_BN_REGISTER
static framework::FusionOpRegistrar fusion_conv_add_bn_registrar(
new FusionConvAddBNMatcher());
#define FUSION_CONV_ADD_BN_REGISTER
#endif
#endif
} // namespace operators
} // namespace paddle_mobile
#ifdef PADDLE_MOBILE_CPU
USE_OP_CPU(fusion_conv_add_bn);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
USE_OP_FPGA(fusion_conv_add_bn);
#endif
#endif
......@@ -55,6 +55,7 @@ REGISTER_OPERATOR_CPU(fusion_conv_add_bn_relu, ops::FusionConvAddBNReluOp);
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fusion_conv_add_bn_relu, ops::FusionConvAddBNReluOp);
#endif
#endif
......@@ -96,6 +96,13 @@ static framework::FusionOpRegistrar fusion_conv_add_bn_relu_registrar(
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifndef FUSION_CONV_ADD_BN_RELU_REGISTER
static framework::FusionOpRegistrar fusion_conv_add_bn_relu_registrar(
new FusionConvAddBNReluMatcher());
#define FUSION_CONV_ADD_BN_RELU_REGISTER
#endif
#endif
} // namespace operators
......@@ -107,6 +114,7 @@ USE_OP_CPU(fusion_conv_add_bn_relu);
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
USE_OP_FPGA(fusion_conv_add_bn_relu);
#endif
#endif
......@@ -54,6 +54,7 @@ REGISTER_OPERATOR_CPU(fusion_conv_add_relu, ops::FusionConvAddReluOp);
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fusion_conv_add_relu, ops::FusionConvAddReluOp);
#endif
#endif
......@@ -75,6 +75,13 @@ class FusionConvAddReluOp : public framework::OperatorWithKernel<
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifndef CONV_ADD_RELU_REGISTER
#define CONV_ADD_RELU_REGISTER
static framework::FusionOpRegistrar fusion_conv_add_relu_registrar(
new FusionConvAddReluOpMatcher());
#endif
#endif
} // namespace operators
......@@ -86,6 +93,7 @@ USE_OP_CPU(fusion_conv_add_relu);
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
USE_OP_FPGA(fusion_conv_add_relu);
#endif
#endif
......@@ -12,21 +12,31 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef CONV_OP
#pragma once
#include "operators/kernel/conv_kernel.h"
#ifdef FUSION_CONVADDBN_OP
#include <vector>
#include "framework/ddim.h"
#include "framework/operator.h"
#include "operators/math/conv_func.h"
#include "operators/math/im2col.h"
#include "operators/math/math_function.h"
#include "operators/math/vol2col.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <>
bool ConvKernel<FPGA, float>::Init(ConvParam *param) {
return true;
}
using framework::DDim;
using framework::OpKernelBase;
template <>
void ConvKernel<FPGA, float>::Compute(const ConvParam &param) const {}
template class ConvKernel<FPGA, float>;
template <typename DeviceType, typename T>
class ConvAddBNKernel : public OpKernelBase<DeviceType, FusionConvAddBNParam> {
public:
void Compute(const FusionConvAddBNParam &param) const;
bool Init(FusionConvAddBNParam *param);
};
} // namespace operators
} // namespace paddle_mobile
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADDBN_OP
#include "operators/kernel/conv_add_bn_kernel.h"
#include "fpga/api/fpga_api.h"
namespace paddle_mobile {
namespace operators {
template <>
bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) {
bool relu_enabled = false;
const Tensor *input = param->Input();
auto input_ptr = input->data<float>();
const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>();
const Tensor *filter = param->Filter();
auto filter_ptr = filter->data<float>();
Tensor *out = param->Output();
auto out_ptr = out->mutable_data<float>();
auto bn_mean_ptr = param->InputMean()->data<float>();
auto bn_var_ptr = param->InputVariance()->data<float>();
auto bn_scale_ptr = param->InputScale()->data<float>();
auto bn_bias_ptr = param->InputBias()->data<float>();
const float epsilon = param->Epsilon();
PADDLE_MOBILE_ENFORCE(input->dims()[1] == bias->dims()[0] &&
bias->dims()[0] == param->InputBias()->dims()[0],
"Image channel should be equal to bias number");
const int channel = input->dims()[1];
float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
Tensor *new_scale = new Tensor();
Tensor *new_bias = new Tensor();
auto new_scale_ptr = new_scale->mutable_data<float>({channel});
auto new_bias_ptr = new_bias->mutable_data<float>({channel});
for (int i = 0; i < channel; i++) {
new_scale_ptr[i] = bn_scale_ptr[i] /
static_cast<float>(pow((bn_var_ptr[i] + epsilon), 0.5));
new_bias_ptr[i] =
bn_bias_ptr[i] + (bias_ptr[i] - bn_mean_ptr[i]) * new_scale_ptr[i];
bs_ptr[i * 2] = new_scale_ptr[i];
bs_ptr[i * 2 + 1] = new_bias_ptr[i];
}
param->SetNewScale(new_scale);
param->SetNewBias(new_bias);
fpga::ConvArgs convArgs;
convArgs.relu_enabled = relu_enabled;
convArgs.filter_address = (void *)filter_ptr;
convArgs.filter_num = filter->dims()[0];
convArgs.group_num = param->Groups();
convArgs.sb_address = (void *)bs_ptr;
convArgs.kernel.stride_h = param->Strides()[0];
convArgs.kernel.stride_w = param->Strides()[1];
convArgs.kernel.height = filter->dims()[2];
convArgs.kernel.width = filter->dims()[3];
convArgs.image.address = (void *)input_ptr;
convArgs.image.channels = input->dims()[1];
convArgs.image.height = input->dims()[2];
convArgs.image.width = input->dims()[3];
convArgs.image.pad_height = param->Paddings()[0];
convArgs.image.pad_width = param->Paddings()[1];
convArgs.image.scale_address = input->fpga_args().scale_pointer();
convArgs.output.address = (void *)out_ptr;
convArgs.output.scale_address = out->fpga_args().scale_pointer();
param->SetFpgaArgs(convArgs);
return true;
}
template <>
void ConvAddBNKernel<FPGA, float>::Compute(
const FusionConvAddBNParam &param) const {
fpga::ComputeFpgaConv(param.FpgaArgs());
}
template class ConvAddBNKernel<FPGA, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADDBNRELU_OP
#include "operators/kernel/conv_add_bn_relu_kernel.h"
#include "memory/t_malloc.h"
namespace paddle_mobile {
namespace operators {
template <>
bool ConvAddBNReluKernel<FPGA, float>::Init(FusionConvAddBNReluParam *param) {
bool relu_enabled = true;
const Tensor *input = param->Input();
auto input_ptr = input->data<float>();
const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>();
const Tensor *filter = param->Filter();
auto filter_ptr = filter->data<float>();
Tensor *out = param->Output();
auto out_ptr = out->mutable_data<float>();
auto bn_mean_ptr = param->InputMean()->data<float>();
auto bn_var_ptr = param->InputVariance()->data<float>();
auto bn_scale_ptr = param->InputScale()->data<float>();
auto bn_bias_ptr = param->InputBias()->data<float>();
const float epsilon = param->Epsilon();
PADDLE_MOBILE_ENFORCE(input->dims()[1] == bias->dims()[0] &&
bias->dims()[0] == param->InputBias()->dims()[0],
"Image channel should be equal to bias number");
const int channel = input->dims()[1];
float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
Tensor *new_scale = new Tensor();
Tensor *new_bias = new Tensor();
auto new_scale_ptr = new_scale->mutable_data<float>({channel});
auto new_bias_ptr = new_bias->mutable_data<float>({channel});
for (int i = 0; i < channel; i++) {
new_scale_ptr[i] = bn_scale_ptr[i] /
static_cast<float>(pow((bn_var_ptr[i] + epsilon), 0.5));
new_bias_ptr[i] =
bn_bias_ptr[i] + (bias_ptr[i] - bn_mean_ptr[i]) * new_scale_ptr[i];
bs_ptr[i * 2] = new_scale_ptr[i];
bs_ptr[i * 2 + 1] = new_bias_ptr[i];
}
param->SetNewScale(new_scale);
param->SetNewBias(new_bias);
fpga::ConvArgs convArgs;
convArgs.relu_enabled = relu_enabled;
convArgs.filter_address = (void *)filter_ptr;
convArgs.filter_num = filter->dims()[0];
convArgs.group_num = param->Groups();
convArgs.sb_address = (void *)bs_ptr;
convArgs.kernel.stride_h = param->Strides()[0];
convArgs.kernel.stride_w = param->Strides()[1];
convArgs.kernel.height = filter->dims()[2];
convArgs.kernel.width = filter->dims()[3];
convArgs.image.address = (void *)input_ptr;
convArgs.image.channels = input->dims()[1];
convArgs.image.height = input->dims()[2];
convArgs.image.width = input->dims()[3];
convArgs.image.pad_height = param->Paddings()[0];
convArgs.image.pad_width = param->Paddings()[1];
convArgs.image.scale_address = input->fpga_args().scale_pointer();
convArgs.output.address = (void *)out_ptr;
convArgs.output.scale_address = out->fpga_args().scale_pointer();
param->SetFpgaArgs(convArgs);
return true;
}
template <>
void ConvAddBNReluKernel<FPGA, float>::Compute(
const FusionConvAddBNReluParam &param) const {
fpga::ComputeFpgaConv(param.FpgaArgs());
}
template class ConvAddBNReluKernel<FPGA, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADDRELU_OP
#include "operators/kernel/conv_add_relu_kernel.h"
#include "common/enforce.h"
namespace paddle_mobile {
namespace operators {
template <>
bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam *param) {
bool relu_enabled = true;
const Tensor *input = param->Input();
auto input_ptr = input->data<float>();
const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>();
const Tensor *filter = param->Filter();
auto filter_ptr = filter->data<float>();
Tensor *out = param->Output();
auto out_ptr = out->mutable_data<float>();
PADDLE_MOBILE_ENFORCE(input->dims()[1] == bias->dims()[0],
"Image channel should be equal to bias number");
int channel = input->dims()[1];
float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
for (int i = 0; i < channel; i++) {
bs_ptr[i * 2] = 1;
bs_ptr[i * 2 + 1] = bias_ptr[i];
}
fpga::ConvArgs convArgs;
convArgs.relu_enabled = relu_enabled;
convArgs.filter_address = (void *)filter_ptr;
convArgs.filter_num = filter->dims()[0];
convArgs.group_num = param->Groups();
convArgs.sb_address = (void *)bs_ptr;
convArgs.kernel.stride_h = param->Strides()[0];
convArgs.kernel.stride_w = param->Strides()[1];
convArgs.kernel.height = filter->dims()[2];
convArgs.kernel.width = filter->dims()[3];
convArgs.image.address = (void *)input_ptr;
convArgs.image.channels = input->dims()[1];
convArgs.image.height = input->dims()[2];
convArgs.image.width = input->dims()[3];
convArgs.image.pad_height = param->Paddings()[0];
convArgs.image.pad_width = param->Paddings()[1];
convArgs.image.scale_address = input->fpga_args().scale_pointer();
convArgs.output.address = (void *)out_ptr;
convArgs.output.scale_address = out->fpga_args().scale_pointer();
param->SetFpgaArgs(convArgs);
return true;
}
template <>
void ConvAddReluKernel<FPGA, float>::Compute(
const FusionConvAddReluParam &param) const {
fpga::ComputeFpgaConv(param.FpgaArgs());
}
template class ConvAddReluKernel<FPGA, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -1136,7 +1136,7 @@ class FusionConvAddBNParam : public OpParam {
const Tensor *Filter() const { return filter_; }
Tensor *OutputY() const { return output_y_; }
Tensor *Output() const { return output_y_; }
const vector<int> &Strides() const { return strides_; }
......
......@@ -21,6 +21,7 @@ elseif("resnet" IN_LIST NET)
# gen test
ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-resnet paddle-mobile)
elseif("FPGAnets" IN_LIST NET)
else ()
# gen test
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册