提交 bd00d449 编写于 作者: C Chon 提交者: GitHub

Merge pull request #769 from zhangyang0701/develop

Need to realize conv_bn & conv_bn_relu for FPGA googlenet close #768
...@@ -41,7 +41,7 @@ class FeedOp : public framework::OperatorBase<DeviceType> { ...@@ -41,7 +41,7 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
void RunImpl() const { fpga::PerformBypass(param_.FpgaArgs()); } void RunImpl() const { fpga::PerformBypass(param_.FpgaArgs()); }
void Init() { void Init() {
const Tensor *input = param_.InputX(); const Tensor *input = param_.InputX();
auto input_ptr = input->mutable_data<float>(); auto input_ptr = (const_cast<Tensor *>(input))->mutable_data<float>();
Tensor *output = param_.Out(); Tensor *output = param_.Out();
auto output_ptr = output->mutable_data<half>(); auto output_ptr = output->mutable_data<half>();
fpga::BypassArgs args; fpga::BypassArgs args;
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVBN_OP
#include "operators/fusion_conv_bn_op.h"
namespace paddle_mobile {
namespace operators {
template <typename Dtype, typename T>
void FusionConvBNOp<Dtype, T>::InferShape() const {
auto in_dims = this->param_.Input()->dims();
auto filter_dims = this->param_.Filter()->dims();
const std::vector<int> &strides = this->param_.Strides();
std::vector<int> paddings = this->param_.Paddings();
int groups = this->param_.Groups();
std::vector<int> dilations = this->param_.Dilations();
PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() &&
dilations.size() == paddings.size() &&
paddings.size() == strides.size()),
"ConvParam is not suitable");
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back(
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i],
paddings[i], strides[i]));
}
framework::DDim ddim = framework::make_ddim(output_shape);
this->param_.Output()->Resize(ddim);
}
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(fusion_conv_bn, ops::FusionConvBNOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fusion_conv_bn, ops::FusionConvBNOp);
#endif
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVBN_OP
#pragma once
#include <string>
#include <vector>
#include "framework/operator.h"
#include "framework/program/program-optimize/fusion_op_register.h"
#include "operators/kernel/conv_bn_kernel.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
using std::string;
using std::vector;
class FusionConvBNMatcher : public framework::FusionOpMatcher {
public:
FusionConvBNMatcher() {
node_ = framework::Node(G_OP_TYPE_CONV);
node_ > std::make_shared<framework::Node>(G_OP_TYPE_BATCHNORM);
}
void FolderNodes(
framework::Node *node,
std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
node->Folder(node_.Depth(), Type(),
{{G_OP_TYPE_BATCHNORM,
{{"Scale", "Scale"},
{"Mean", "Mean"},
{"Bias", "Bias"},
{"Variance", "Variance"}}}},
removed_nodes);
}
std::string Type() { return G_OP_TYPE_FUSION_CONV_BN; }
};
template <typename DeviceType, typename T>
class FusionConvBNOp : public framework::OperatorWithKernel<
DeviceType, FusionConvBNParam,
operators::ConvBNKernel<DeviceType, T>> {
public:
FusionConvBNOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<DeviceType, FusionConvBNParam,
operators::ConvBNKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {}
void InferShape() const override;
protected:
};
#ifdef PADDLE_MOBILE_CPU
#ifndef FUSION_CONV_BN_REGISTER
static framework::FusionOpRegistrar fusion_conv_bn_registrar(
new FusionConvBNMatcher());
#define FUSION_CONV_BN_REGISTER
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifndef FUSION_CONV_BN_REGISTER
static framework::FusionOpRegistrar fusion_conv_bn_registrar(
new FusionConvBNMatcher());
#define FUSION_CONV_BN_REGISTER
#endif
#endif
} // namespace operators
} // namespace paddle_mobile
#ifdef PADDLE_MOBILE_CPU
USE_OP_CPU(fusion_conv_bn);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
USE_OP_FPGA(fusion_conv_bn);
#endif
#endif
...@@ -55,6 +55,7 @@ REGISTER_OPERATOR_CPU(fusion_conv_bn_relu, ops::FusionConvBNReluOp); ...@@ -55,6 +55,7 @@ REGISTER_OPERATOR_CPU(fusion_conv_bn_relu, ops::FusionConvBNReluOp);
#ifdef PADDLE_MOBILE_MALI_GPU #ifdef PADDLE_MOBILE_MALI_GPU
#endif #endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fusion_conv_bn_relu, ops::FusionConvBNReluOp);
#endif #endif
#endif #endif
...@@ -87,6 +87,12 @@ static framework::FusionOpRegistrar fusion_conv_bn_relu_registrar( ...@@ -87,6 +87,12 @@ static framework::FusionOpRegistrar fusion_conv_bn_relu_registrar(
#endif #endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
#ifndef FUSION_CONV_BN_RELU_REGISTER
static framework::FusionOpRegistrar fusion_conv_bn_relu_registrar(
new FusionConvBNReluMatcher());
#define FUSION_CONV_BN_RELU_REGISTER
#endif
#endif #endif
} // namespace operators } // namespace operators
...@@ -98,6 +104,7 @@ USE_OP_CPU(fusion_conv_bn_relu); ...@@ -98,6 +104,7 @@ USE_OP_CPU(fusion_conv_bn_relu);
#ifdef PADDLE_MOBILE_MALI_GPU #ifdef PADDLE_MOBILE_MALI_GPU
#endif #endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
USE_OP_FPGA(fusion_conv_bn_relu);
#endif #endif
#endif #endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef FUSION_CONVBN_OP
#include <vector>
#include "framework/ddim.h"
#include "framework/operator.h"
#include "operators/math/conv_func.h"
#include "operators/math/im2col.h"
#include "operators/math/math_function.h"
#include "operators/math/vol2col.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
using framework::DDim;
using framework::OpKernelBase;
template <typename DeviceType, typename T>
class ConvBNKernel : public OpKernelBase<DeviceType, FusionConvBNParam> {
public:
void Compute(const FusionConvBNParam &param) const;
bool Init(FusionConvBNParam *param);
};
} // namespace operators
} // namespace paddle_mobile
#endif
...@@ -61,8 +61,8 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) { ...@@ -61,8 +61,8 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) {
param->SetNewBias(new_bias); param->SetNewBias(new_bias);
fpga::quantify_filter(filter); fpga::quantify_filter(filter);
auto filter_ptr = filter->data<float>(); auto filter_ptr = filter->data<float>();
fpga::ConvArgs convArgs; fpga::ConvArgs convArgs;
convArgs.relu_enabled = relu_enabled; convArgs.relu_enabled = relu_enabled;
convArgs.filter_address = (void *)filter_ptr; convArgs.filter_address = (void *)filter_ptr;
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#ifdef FUSION_CONVADDBNRELU_OP #ifdef FUSION_CONVADDBNRELU_OP
#include "operators/kernel/conv_add_bn_relu_kernel.h" #include "operators/kernel/conv_add_bn_relu_kernel.h"
#include "memory/t_malloc.h" #include "fpga/fpga_quantilization.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -27,8 +27,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(FusionConvAddBNReluParam *param) { ...@@ -27,8 +27,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(FusionConvAddBNReluParam *param) {
auto input_ptr = input->data<half>(); auto input_ptr = input->data<half>();
const Tensor *bias = param->Bias(); const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
const Tensor *filter = param->Filter(); Tensor *filter = param->Filter();
auto filter_ptr = filter->data<float>();
Tensor *out = param->Output(); Tensor *out = param->Output();
auto out_ptr = out->mutable_data<half>(); auto out_ptr = out->mutable_data<half>();
auto bn_mean_ptr = param->InputMean()->data<float>(); auto bn_mean_ptr = param->InputMean()->data<float>();
...@@ -57,6 +56,8 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(FusionConvAddBNReluParam *param) { ...@@ -57,6 +56,8 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(FusionConvAddBNReluParam *param) {
} }
param->SetNewScale(new_scale); param->SetNewScale(new_scale);
param->SetNewBias(new_bias); param->SetNewBias(new_bias);
fpga::quantify_filter(filter);
auto filter_ptr = filter->data<float>();
fpga::ConvArgs convArgs; fpga::ConvArgs convArgs;
convArgs.relu_enabled = relu_enabled; convArgs.relu_enabled = relu_enabled;
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#ifdef FUSION_CONVADDRELU_OP #ifdef FUSION_CONVADDRELU_OP
#include "operators/kernel/conv_add_relu_kernel.h" #include "operators/kernel/conv_add_relu_kernel.h"
#include "common/enforce.h" #include "fpga/fpga_quantilization.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -27,8 +27,7 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam *param) { ...@@ -27,8 +27,7 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam *param) {
auto input_ptr = input->data<half>(); auto input_ptr = input->data<half>();
const Tensor *bias = param->Bias(); const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
const Tensor *filter = param->Filter(); Tensor *filter = param->Filter();
auto filter_ptr = filter->data<float>();
Tensor *out = param->Output(); Tensor *out = param->Output();
auto out_ptr = out->mutable_data<half>(); auto out_ptr = out->mutable_data<half>();
...@@ -41,6 +40,9 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam *param) { ...@@ -41,6 +40,9 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam *param) {
bs_ptr[i * 2 + 1] = bias_ptr[i]; bs_ptr[i * 2 + 1] = bias_ptr[i];
} }
fpga::quantify_filter(filter);
auto filter_ptr = filter->data<float>();
fpga::ConvArgs convArgs; fpga::ConvArgs convArgs;
convArgs.relu_enabled = relu_enabled; convArgs.relu_enabled = relu_enabled;
convArgs.filter_address = (void *)filter_ptr; convArgs.filter_address = (void *)filter_ptr;
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVBN_OP
#include "operators/kernel/conv_bn_kernel.h"
#include "fpga/api/fpga_api.h"
#include "fpga/fpga_quantilization.h"
namespace paddle_mobile {
namespace operators {
template <>
bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam *param) {
bool relu_enabled = false;
const Tensor *input = param->Input();
auto input_ptr = input->data<half>();
Tensor *filter = param->Filter();
Tensor *out = param->Output();
auto out_ptr = out->mutable_data<half>();
auto bn_mean_ptr = param->InputMean()->data<float>();
auto bn_var_ptr = param->InputVariance()->data<float>();
auto bn_scale_ptr = param->InputScale()->data<float>();
auto bn_bias_ptr = param->InputBias()->data<float>();
const float epsilon = param->Epsilon();
PADDLE_MOBILE_ENFORCE(input->dims()[1] == param->InputBias()->dims()[0],
"Image channel should be equal to bias number");
const int channel = input->dims()[1];
float *bs_ptr =
reinterpret_cast<float *>(fpga::fpga_malloc(2 * channel * sizeof(float)));
Tensor *new_scale = new Tensor();
Tensor *new_bias = new Tensor();
auto new_scale_ptr = new_scale->mutable_data<float>({channel});
auto new_bias_ptr = new_bias->mutable_data<float>({channel});
for (int i = 0; i < channel; i++) {
new_scale_ptr[i] = bn_scale_ptr[i] /
static_cast<float>(pow((bn_var_ptr[i] + epsilon), 0.5));
new_bias_ptr[i] = bn_bias_ptr[i] + (0 - bn_mean_ptr[i]) * new_scale_ptr[i];
bs_ptr[i * 2] = new_scale_ptr[i];
bs_ptr[i * 2 + 1] = new_bias_ptr[i];
}
param->SetNewScale(new_scale);
param->SetNewBias(new_bias);
fpga::quantify_filter(filter);
auto filter_ptr = filter->data<float>();
fpga::ConvArgs convArgs;
convArgs.relu_enabled = relu_enabled;
convArgs.filter_address = (void *)filter_ptr;
convArgs.filter_num = filter->dims()[0];
convArgs.group_num = param->Groups();
convArgs.sb_address = (void *)bs_ptr;
convArgs.kernel.stride_h = param->Strides()[0];
convArgs.kernel.stride_w = param->Strides()[1];
convArgs.kernel.height = filter->dims()[2];
convArgs.kernel.width = filter->dims()[3];
convArgs.image.address = (void *)input_ptr;
convArgs.image.channels = input->dims()[1];
convArgs.image.height = input->dims()[2];
convArgs.image.width = input->dims()[3];
convArgs.image.pad_height = param->Paddings()[0];
convArgs.image.pad_width = param->Paddings()[1];
convArgs.image.scale_address = input->fpga_args().scale_pointer();
convArgs.output.address = (void *)out_ptr;
convArgs.output.scale_address = out->fpga_args().scale_pointer();
param->SetFpgaArgs(convArgs);
return true;
}
template <>
void ConvBNKernel<FPGA, float>::Compute(const FusionConvBNParam &param) const {
fpga::ComputeFpgaConv(param.FpgaArgs());
}
template class ConvBNKernel<FPGA, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVBNRELU_OP
#include "operators/kernel/conv_bn_relu_kernel.h"
#include "fpga/fpga_quantilization.h"
namespace paddle_mobile {
namespace operators {
template <>
bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam *param) {
bool relu_enabled = true;
const Tensor *input = param->Input();
auto input_ptr = input->data<half>();
Tensor *filter = param->Filter();
Tensor *out = param->Output();
auto out_ptr = out->mutable_data<half>();
auto bn_mean_ptr = param->InputMean()->data<float>();
auto bn_var_ptr = param->InputVariance()->data<float>();
auto bn_scale_ptr = param->InputScale()->data<float>();
auto bn_bias_ptr = param->InputBias()->data<float>();
const float epsilon = param->Epsilon();
PADDLE_MOBILE_ENFORCE(input->dims()[1] == param->InputBias()->dims()[0],
"Image channel should be equal to bias number");
const int channel = input->dims()[1];
float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
Tensor *new_scale = new Tensor();
Tensor *new_bias = new Tensor();
auto new_scale_ptr = new_scale->mutable_data<float>({channel});
auto new_bias_ptr = new_bias->mutable_data<float>({channel});
for (int i = 0; i < channel; i++) {
new_scale_ptr[i] = bn_scale_ptr[i] /
static_cast<float>(pow((bn_var_ptr[i] + epsilon), 0.5));
new_bias_ptr[i] = bn_bias_ptr[i] + (0 - bn_mean_ptr[i]) * new_scale_ptr[i];
bs_ptr[i * 2] = new_scale_ptr[i];
bs_ptr[i * 2 + 1] = new_bias_ptr[i];
}
param->SetNewScale(new_scale);
param->SetNewBias(new_bias);
fpga::quantify_filter(filter);
auto filter_ptr = filter->data<float>();
fpga::ConvArgs convArgs;
convArgs.relu_enabled = relu_enabled;
convArgs.filter_address = (void *)filter_ptr;
convArgs.filter_num = filter->dims()[0];
convArgs.group_num = param->Groups();
convArgs.sb_address = (void *)bs_ptr;
convArgs.kernel.stride_h = param->Strides()[0];
convArgs.kernel.stride_w = param->Strides()[1];
convArgs.kernel.height = filter->dims()[2];
convArgs.kernel.width = filter->dims()[3];
convArgs.image.address = (void *)input_ptr;
convArgs.image.channels = input->dims()[1];
convArgs.image.height = input->dims()[2];
convArgs.image.width = input->dims()[3];
convArgs.image.pad_height = param->Paddings()[0];
convArgs.image.pad_width = param->Paddings()[1];
convArgs.image.scale_address = input->fpga_args().scale_pointer();
convArgs.output.address = (void *)out_ptr;
convArgs.output.scale_address = out->fpga_args().scale_pointer();
param->SetFpgaArgs(convArgs);
return true;
}
template <>
void ConvBNReluKernel<FPGA, float>::Compute(
const FusionConvBNReluParam &param) const {
fpga::ComputeFpgaConv(param.FpgaArgs());
}
template class ConvBNReluKernel<FPGA, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
...@@ -1127,6 +1127,100 @@ class FusionConvAddBNReluParam : public OpParam { ...@@ -1127,6 +1127,100 @@ class FusionConvAddBNReluParam : public OpParam {
}; };
#endif #endif
#ifdef FUSION_CONVBN_OP
class FusionConvBNParam : public OpParam {
public:
FusionConvBNParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs,
const Scope &scope) {
axis_ = GetAttr<int>("axis", attrs);
filter_ = FilterFrom<LoDTensor>(inputs, scope);
input_ = InputFrom<LoDTensor>(inputs, scope);
output_y_ = OutputYFrom<LoDTensor>(outputs, scope);
strides_ = GetAttr<vector<int>>("strides", attrs);
paddings_ = GetAttr<vector<int>>("paddings", attrs);
dilations_ = GetAttr<vector<int>>("dilations", attrs);
groups = GetAttr<int>("groups", attrs);
input_bias_ = InputBiasFrom<LoDTensor>(inputs, scope);
input_mean_ = InputMeanFrom<LoDTensor>(inputs, scope);
input_scale_ = InputScaleFrom<LoDTensor>(inputs, scope);
input_variance_ = InputVarianceFrom<LoDTensor>(inputs, scope);
epsilon_ = GetAttr<float>("epsilon", attrs);
momentum_ = GetAttr<float>("momentum", attrs);
// is_test_ = GetAttr<bool>("is_test", attrs);
}
const int &Axis() const { return axis_; }
const Tensor *Input() const { return input_; }
#ifdef PADDLE_MOBILE_FPGA
Tensor *Filter() const { return filter_; }
#else
const Tensor *Filter() const { return filter_; }
#endif
Tensor *Output() const { return output_y_; }
const vector<int> &Strides() const { return strides_; }
const vector<int> &Paddings() const { return paddings_; }
const vector<int> &Dilations() const { return dilations_; }
const int &Groups() const { return groups; }
const Tensor *InputBias() const { return input_bias_; }
const Tensor *InputMean() const { return input_mean_; }
const Tensor *InputScale() const { return input_scale_; }
const Tensor *InputVariance() const { return input_variance_; }
const float &Epsilon() const { return epsilon_; }
const float &Momentum() const { return momentum_; }
const bool &IsTest() const { return is_test_; }
void SetNewScale(Tensor *new_scale) { new_scale_ = new_scale; }
void SetNewBias(Tensor *new_bias) { new_bias_ = new_bias; }
const Tensor *NewScale() const { return new_scale_; }
const Tensor *NewBias() const { return new_bias_; }
protected:
int axis_;
Tensor *input_;
Tensor *output_y_;
Tensor *filter_;
vector<int> strides_;
vector<int> paddings_;
vector<int> dilations_;
int groups;
Tensor *input_bias_;
Tensor *input_mean_;
Tensor *input_scale_;
Tensor *input_variance_;
float epsilon_;
float momentum_;
bool is_test_;
Tensor *new_bias_;
Tensor *new_scale_;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::ConvArgs fpga_conv_args;
public:
const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; }
#endif
};
#endif
#ifdef FUSION_CONVADDBN_OP #ifdef FUSION_CONVADDBN_OP
class FusionConvAddBNParam : public OpParam { class FusionConvAddBNParam : public OpParam {
public: public:
...@@ -1329,7 +1423,11 @@ class FusionConvBNReluParam : public OpParam { ...@@ -1329,7 +1423,11 @@ class FusionConvBNReluParam : public OpParam {
const Tensor *Input() const { return input_; } const Tensor *Input() const { return input_; }
#ifdef PADDLE_MOBILE_FPGA
Tensor *Filter() const { return filter_; }
#else
const Tensor *Filter() const { return filter_; } const Tensor *Filter() const { return filter_; }
#endif
Tensor *Output() const { return output_; } Tensor *Output() const { return output_; }
...@@ -1380,6 +1478,15 @@ class FusionConvBNReluParam : public OpParam { ...@@ -1380,6 +1478,15 @@ class FusionConvBNReluParam : public OpParam {
bool is_test_; bool is_test_;
Tensor *new_bias_; Tensor *new_bias_;
Tensor *new_scale_; Tensor *new_scale_;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::ConvArgs fpga_conv_args;
public:
const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; }
#endif
}; };
#endif #endif
......
...@@ -17,20 +17,27 @@ limitations under the License. */ ...@@ -17,20 +17,27 @@ limitations under the License. */
#include "../test_include.h" #include "../test_include.h"
int main() { int main() {
#ifdef PADDLE_MOBILE_FPGA
paddle_mobile::PaddleMobile<paddle_mobile::FPGA> paddle_mobile;
#endif
#ifdef PADDLE_MOBILE_CPU
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile; paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
#endif
paddle_mobile.SetThreadNum(4); paddle_mobile.SetThreadNum(4);
bool optimize = true; bool optimize = true;
auto time1 = time(); auto time1 = time();
if (paddle_mobile.Load(g_googlenet, optimize)) { if (paddle_mobile.Load(g_resnet, optimize)) {
auto time2 = time(); auto time2 = time();
std::cout << "load cost :" << time_diff(time1, time2) << "ms" << std::endl; std::cout << "load cost :" << time_diff(time1, time2) << "ms" << std::endl;
std::vector<float> input; std::vector<float> input;
std::vector<int64_t> dims{1, 3, 224, 224}; std::vector<int64_t> dims{1, 3, 224, 224};
GetInput<float>(g_test_image_1x3x224x224, &input, dims); // GetInput<float>(g_test_image_1x3x224x224, &input, dims);
// 预热一次 // 预热一次
auto vec_result = paddle_mobile.Predict(input, dims); auto vec_result = paddle_mobile.Predict(input, dims);
auto time3 = time(); auto time3 = time();
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 1; ++i) {
auto vec_result = paddle_mobile.Predict(input, dims); auto vec_result = paddle_mobile.Predict(input, dims);
} }
auto time4 = time(); auto time4 = time();
......
...@@ -82,7 +82,8 @@ if ("FPGAnets" IN_LIST NET) ...@@ -82,7 +82,8 @@ if ("FPGAnets" IN_LIST NET)
set(CONCAT_OP ON) set(CONCAT_OP ON)
set(SOFTMAX_OP ON) set(SOFTMAX_OP ON)
set(DROPOUT_OP ON) set(DROPOUT_OP ON)
# set(CONV_OP ON) set(FUSION_CONVBNRELU_OP ON)
set(FUSION_CONVBN_OP ON)
set(FOUND_MATCH ON) set(FOUND_MATCH ON)
endif() endif()
...@@ -241,8 +242,8 @@ endif() ...@@ -241,8 +242,8 @@ endif()
if (FUSION_ELEMENTWISEADDRELU_OP) if (FUSION_ELEMENTWISEADDRELU_OP)
add_definitions(-DFUSION_ELEMENTWISEADDRELU_OP) add_definitions(-DFUSION_ELEMENTWISEADDRELU_OP)
endif() endif()
if (REGION_OP) if (FUSION_CONVBN_OP)
add_definitions(-DREGION_OP) add_definitions(-DFUSION_CONVBN_OP)
endif() endif()
if (CONV_TRANSPOSE_OP) if (CONV_TRANSPOSE_OP)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册