提交 380c55f2 编写于 作者: qnqinan's avatar qnqinan

Merge remote-tracking branch 'origin/develop' into develop

......@@ -35,7 +35,7 @@ namespace fpga {
static int fd = -1;
static const char *device_path = "/dev/fpgadrv0";
static inline int do_ioctl(int req, void *arg) {
static inline int do_ioctl(int req, const void *arg) {
return ioctl(req, (unsigned int64_t)arg);
}
......@@ -58,12 +58,17 @@ void fpga_copy(void *dest, const void *src, size_t num) {
memcpy(dest, src, num);
}
int ComputeFpgaConv(const struct ConvArgs &args) { return do_ioctl(21, &args); }
int ComputeFpgaConv(const struct ConvArgs &args) {
return do_ioctl(IOCTL_CONFIG_CONV, &args);
}
int ComputeFpgaPool(const struct PoolingArgs &args) {
return do_ioctl(22, &args);
return do_ioctl(IOCTL_CONFIG_POOLING, &args);
}
int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
return do_ioctl(23, &args);
return do_ioctl(IOCTL_CONFIG_EW, &args);
}
int PerformBypass(const struct BypassArgs &args) {
return do_ioctl(IOCTL_CONFIG_BYPASS, &args);
}
} // namespace fpga
......
......@@ -86,12 +86,12 @@ struct ImageOutputArgs {
struct ConvArgs {
bool relu_enabled;
void* bias_address;
void* sb_address; // scale and bias are interlaced;
void* filter_address;
float* filter_scale_address;
uint32_t filter_num;
uint32_t group_num;
void* sb_address; // scale and bias are interlaced;
struct KernelArgs kernel;
struct ImageInputArgs image; // input image;
struct ImageOutputArgs output;
......@@ -116,6 +116,7 @@ struct EWAddArgs {
struct BypassArgs {
enum DataConvertType convert_type;
enum LayoutConvertType layout_type;
struct ImageInputArgs image;
struct ImageOutputArgs output;
};
......@@ -125,11 +126,6 @@ struct FpgaRegWriteArgs {
uint64_t value;
};
struct FpgaRegReadArgs {
uint64_t address;
uint64_t value;
};
#define IOCTL_FPGA_MAGIC 'FPGA'
#define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 01, struct VersionArgs)
......@@ -143,6 +139,7 @@ struct FpgaRegReadArgs {
#define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct ConvArgs)
#define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct PoolingArgs)
#define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct EWAddArgs)
#define IOCTL_CONFIG_BYPASS _IOW(IOCTL_FPGA_MAGIC, 24, struct BypassArgs)
#define IOCTL_FPGA_REG_READ _IOW(IOCTL_FPGA_MAGIC, 28, struct FpgaRegReadArgs)
#define IOCTL_FPGA_REG_WRITE _IOW(IOCTL_FPGA_MAGIC, 29, struct FpgaRegWriteArgs)
......@@ -172,6 +169,7 @@ enum FPGA_ERR_TYPE {
//============================== API =============================
int PerformBypass(const struct BypassArgs& args);
int ComputeFpgaConv(const struct ConvArgs& args);
int ComputeFpgaPool(const struct PoolingArgs& args);
int ComputeFpgaEWAdd(const struct EWAddArgs& args);
......
......@@ -13,55 +13,40 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include "common/types.h"
#include "framework/lod_tensor.h"
#include "framework/operator.h"
#include "framework/scope.h"
#include "framework/tensor.h"
namespace paddle_mobile {
bool is_conv(std::string type) {
if (type.compare(G_OP_TYPE_CONV) == 0) {
return true;
}
if (type.compare(G_OP_TYPE_FUSION_CONV_ADD) == 0) {
return true;
}
if (type.compare(G_OP_TYPE_FUSION_CONV_ADD_RELU) == 0) {
return true;
}
if (type.compare(G_OP_TYPE_FUSION_CONV_BN_RELU) == 0) {
return true;
}
if (type.compare(G_OP_TYPE_FUSION_CONV_ADD_BN) == 0) {
return true;
}
return false;
}
template <typename Dtype>
void quantilize_op(std::shared_ptr<framework::OperatorBase<Dtype>> op,
std::shared_ptr<framework::Scope> scope) {
if (!is_conv(op.get()->Type())) {
return;
}
framework::Tensor* filter = nullptr;
auto var_vec = op.get()->Inputs().at("Filter");
if (!var_vec.empty()) {
auto var = scope.get()->FindVar(var_vec[0]);
filter = var->template GetMutable<framework::LoDTensor>();
}
framework::Tensor* quantilize_filter(framework::Tensor* filter) {
float scale = 0;
// 32bit filter -> 8bit filter;
float min = 0f;
float max = 0f;
if (filter->type() == typeid(float)) {
float* floatData = originalFilter->data<float>();
for (int i = 0; i < filter->numel(); ++i) {
min = std::min(min, floatData[i]);
max = std::max(max, floatData[i]);
}
float fix_range = (float)((1 << (8 - 1)) - 1);
float float_range = max;
scale = (float_range / fix_range);
framework::Tensor* originalFilter = filter;
framework::Tensor* quantFilter = new framework::Tensor();
float* floatData = originalFilter->data<float>();
int8_t* intData = quantFilter->mutable_data<int8_t>();
}
for (int i = 0; i < filter->numel(); ++i) {
intData[i] = (int8_t)floatData[i] * scale;
}
quantFilter.scale = scale;
// NCHW -> NHWC;
return quantFilter;
}
return filter;
}
} // namespace paddle_mobile
......@@ -257,10 +257,10 @@ class Tensor {
struct FPGAArgs {
float scale;
inline float *scale_pointer() const { return &scale; }
inline const float *scale_pointer() const { return &scale; }
};
const struct FPGAArgs &fpga_args() const { return fpgaArgs_; }
const struct FPGAArgs fpga_args() const { return fpgaArgs_; }
#endif
private:
......
......@@ -32,10 +32,6 @@ limitations under the License. */
#include "common/threadpool.h"
#endif
#ifdef PADDLE_MOBILE_FPGA
#include "fpga/fpga_quantilization.h"
#endif
namespace paddle_mobile {
using framework::Variable;
......@@ -100,11 +96,6 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
for (const auto &op : ops) {
op->Init();
}
#ifdef PADDLE_MOBILE_FPGA
for (const auto &op : ops) {
quantilize_op(op, program_.scope);
}
#endif
}
template <typename Dtype, Precision P>
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADDBN_OP
#include "operators/fusion_conv_add_bn_op.h"
#include "operators/math/conv_func.h"
namespace paddle_mobile {
namespace operators {
template <typename Dtype, typename T>
void FusionConvAddBNOp<Dtype, T>::InferShape() const {
auto in_dims = this->param_.Input()->dims();
auto filter_dims = this->param_.Filter()->dims();
const std::vector<int> &strides = this->param_.Strides();
std::vector<int> paddings = this->param_.Paddings();
int groups = this->param_.Groups();
std::vector<int> dilations = this->param_.Dilations();
PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() &&
dilations.size() == paddings.size() &&
paddings.size() == strides.size()),
"ConvParam is not suitable");
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back(
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i],
paddings[i], strides[i]));
}
framework::DDim ddim = framework::make_ddim(output_shape);
this->param_.Output()->Resize(ddim);
}
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(fusion_conv_add_bn, ops::FusionConvAddBNOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fusion_conv_add_bn, ops::FusionConvAddBNOp);
#endif
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADDBN_OP
#pragma once
#include <string>
#include <vector>
#include "framework/operator.h"
#include "framework/program/program-optimize/fusion_op_register.h"
#include "op_param.h"
#include "operators/kernel/conv_add_bn_kernel.h"
namespace paddle_mobile {
namespace operators {
using std::string;
using std::vector;
class FusionConvAddBNMatcher : public framework::FusionOpMatcher {
public:
FusionConvAddBNMatcher() {
node_ = framework::Node(G_OP_TYPE_CONV);
node_ > std::make_shared<framework::Node>(G_OP_TYPE_ELEMENTWISE_ADD) >
std::make_shared<framework::Node>(G_OP_TYPE_BATCHNORM);
}
void FolderNodes(
framework::Node *node,
std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
node->Folder(node_.Depth(), Type(),
{{G_OP_TYPE_ELEMENTWISE_ADD, {{"Y", "Y"}}},
{G_OP_TYPE_BATCHNORM,
{{"Scale", "Scale"},
{"Mean", "Mean"},
{"Bias", "Bias"},
{"Variance", "Variance"}}}},
removed_nodes);
}
std::string Type() { return G_OP_TYPE_FUSION_CONV_ADD_BN; }
};
template <typename DeviceType, typename T>
class FusionConvAddBNOp : public framework::OperatorWithKernel<
DeviceType, FusionConvAddBNParam,
operators::ConvAddBNKernel<DeviceType, T>> {
public:
FusionConvAddBNOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<
DeviceType, FusionConvAddBNParam,
operators::ConvAddBNKernel<DeviceType, T>>(type, inputs, outputs,
attrs, scope) {}
void InferShape() const override;
protected:
};
#ifdef PADDLE_MOBILE_CPU
#ifndef FUSION_CONV_ADD_BN_REGISTER
static framework::FusionOpRegistrar fusion_conv_add_bn_registrar(
new FusionConvAddBNMatcher());
#define FUSION_CONV_ADD_BN_REGISTER
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#ifndef FUSION_CONV_ADD_BN_REGISTER
static framework::FusionOpRegistrar fusion_conv_add_bn_registrar(
new FusionConvAddBNMatcher());
#define FUSION_CONV_ADD_BN_REGISTER
#endif
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifndef FUSION_CONV_ADD_BN_REGISTER
static framework::FusionOpRegistrar fusion_conv_add_bn_registrar(
new FusionConvAddBNMatcher());
#define FUSION_CONV_ADD_BN_REGISTER
#endif
#endif
} // namespace operators
} // namespace paddle_mobile
#ifdef PADDLE_MOBILE_CPU
USE_OP_CPU(fusion_conv_add_bn);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
USE_OP_FPGA(fusion_conv_add_bn);
#endif
#endif
......@@ -55,6 +55,7 @@ REGISTER_OPERATOR_CPU(fusion_conv_add_bn_relu, ops::FusionConvAddBNReluOp);
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fusion_conv_add_bn_relu, ops::FusionConvAddBNReluOp);
#endif
#endif
......@@ -96,6 +96,13 @@ static framework::FusionOpRegistrar fusion_conv_add_bn_relu_registrar(
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifndef FUSION_CONV_ADD_BN_RELU_REGISTER
static framework::FusionOpRegistrar fusion_conv_add_bn_relu_registrar(
new FusionConvAddBNReluMatcher());
#define FUSION_CONV_ADD_BN_RELU_REGISTER
#endif
#endif
} // namespace operators
......@@ -107,6 +114,7 @@ USE_OP_CPU(fusion_conv_add_bn_relu);
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
USE_OP_FPGA(fusion_conv_add_bn_relu);
#endif
#endif
......@@ -54,6 +54,7 @@ REGISTER_OPERATOR_CPU(fusion_conv_add_relu, ops::FusionConvAddReluOp);
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fusion_conv_add_relu, ops::FusionConvAddReluOp);
#endif
#endif
......@@ -75,6 +75,13 @@ class FusionConvAddReluOp : public framework::OperatorWithKernel<
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifndef CONV_ADD_RELU_REGISTER
#define CONV_ADD_RELU_REGISTER
static framework::FusionOpRegistrar fusion_conv_add_relu_registrar(
new FusionConvAddReluOpMatcher());
#endif
#endif
} // namespace operators
......@@ -86,6 +93,7 @@ USE_OP_CPU(fusion_conv_add_relu);
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
USE_OP_FPGA(fusion_conv_add_relu);
#endif
#endif
......@@ -19,6 +19,40 @@ limitations under the License. */
namespace paddle_mobile {
namespace operators {
// 1、如果x,y维度都是2维,
// x = [[1,2], y = [[5,6],
// [3,4]] [7,8]]
// 运算结果为正常矩阵相乘。结果 out =
// [[1*5+2*7,1*6+2*8],[3*5+4*7, 3*6+4*8]]
//
// 2、如果x的维度大于2或者y的维度大于2,x的维度(2,3,4) ,y的维度(4,1,2)
// x = [[[1,2,3,4],
// [2,3,4,5],
// [3,4,5,6]],
// [[1,2,3,4],
// [2,3,4,5],
// [3,4,5,6]]]
// y = [[[1,2]],
// [[3,4]],
// [[5,6]],
// [[7,8]]]
// 需要借助x_num_col_dims和y_num_col_dims将x和y的维度转换为2维
// 从模型中读到参数,x_num_col_dims = 2,y_num_col_dims = 1,左开右闭
// (1) 将x = (2,3,4)的index [0,x_num_col_dims)部分2,3相乘,得到6,
// [x_num_col_dims,xdim.size())部分4相乘,得到4,
// 将Tensor x的dims重写成(6,4)
// (2) 将y = (4,1,2)的index [0,y_num_col_dims)部分4相乘,得到4,
// [y_num_col_dims,ydim.size())部分1,2相乘,得到2,
// 将Tensor y的dims重写成(4,2)
// 并不影响x,y在内存中的分布。
// x = [[1,2,3,4], y = [[1,2],
// [2,3,4,5], [3,4],
// [3,4,5,6], 矩阵乘法 [5,6],
// [1,2,3,4], [7,8]]
// [2,3,4,5],
// [3,4,5,6]]
// 结果x(6行4列)乘y(4行2列),按1中矩阵相乘,结果out(6行2列)
template <typename P>
void MulCompute(const MulParam &param) {
const Tensor *input_x = param.InputX();
......
......@@ -12,21 +12,31 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef CONV_OP
#pragma once
#include "operators/kernel/conv_kernel.h"
#ifdef FUSION_CONVADDBN_OP
#include <vector>
#include "framework/ddim.h"
#include "framework/operator.h"
#include "operators/math/conv_func.h"
#include "operators/math/im2col.h"
#include "operators/math/math_function.h"
#include "operators/math/vol2col.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <>
bool ConvKernel<FPGA, float>::Init(ConvParam *param) {
return true;
}
using framework::DDim;
using framework::OpKernelBase;
template <>
void ConvKernel<FPGA, float>::Compute(const ConvParam &param) const {}
template class ConvKernel<FPGA, float>;
template <typename DeviceType, typename T>
class ConvAddBNKernel : public OpKernelBase<DeviceType, FusionConvAddBNParam> {
public:
void Compute(const FusionConvAddBNParam &param) const;
bool Init(FusionConvAddBNParam *param);
};
} // namespace operators
} // namespace paddle_mobile
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADDBN_OP
#include "operators/kernel/conv_add_bn_kernel.h"
#include "fpga/api/fpga_api.h"
namespace paddle_mobile {
namespace operators {
template <>
bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) {
bool relu_enabled = false;
const Tensor *input = param->Input();
auto input_ptr = input->data<float>();
const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>();
const Tensor *filter = param->Filter();
auto filter_ptr = filter->data<float>();
Tensor *out = param->Output();
auto out_ptr = out->mutable_data<float>();
auto bn_mean_ptr = param->InputMean()->data<float>();
auto bn_var_ptr = param->InputVariance()->data<float>();
auto bn_scale_ptr = param->InputScale()->data<float>();
auto bn_bias_ptr = param->InputBias()->data<float>();
const float epsilon = param->Epsilon();
PADDLE_MOBILE_ENFORCE(input->dims()[1] == bias->dims()[0] &&
bias->dims()[0] == param->InputBias()->dims()[0],
"Image channel should be equal to bias number");
const int channel = input->dims()[1];
float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
Tensor *new_scale = new Tensor();
Tensor *new_bias = new Tensor();
auto new_scale_ptr = new_scale->mutable_data<float>({channel});
auto new_bias_ptr = new_bias->mutable_data<float>({channel});
for (int i = 0; i < channel; i++) {
new_scale_ptr[i] = bn_scale_ptr[i] /
static_cast<float>(pow((bn_var_ptr[i] + epsilon), 0.5));
new_bias_ptr[i] =
bn_bias_ptr[i] + (bias_ptr[i] - bn_mean_ptr[i]) * new_scale_ptr[i];
bs_ptr[i * 2] = new_scale_ptr[i];
bs_ptr[i * 2 + 1] = new_bias_ptr[i];
}
param->SetNewScale(new_scale);
param->SetNewBias(new_bias);
fpga::ConvArgs convArgs;
convArgs.relu_enabled = relu_enabled;
convArgs.filter_address = (void *)filter_ptr;
convArgs.filter_num = filter->dims()[0];
convArgs.group_num = param->Groups();
convArgs.sb_address = (void *)bs_ptr;
convArgs.kernel.stride_h = param->Strides()[0];
convArgs.kernel.stride_w = param->Strides()[1];
convArgs.kernel.height = filter->dims()[2];
convArgs.kernel.width = filter->dims()[3];
convArgs.image.address = (void *)input_ptr;
convArgs.image.channels = input->dims()[1];
convArgs.image.height = input->dims()[2];
convArgs.image.width = input->dims()[3];
convArgs.image.pad_height = param->Paddings()[0];
convArgs.image.pad_width = param->Paddings()[1];
convArgs.image.scale_address = input->fpga_args().scale_pointer();
convArgs.output.address = (void *)out_ptr;
convArgs.output.scale_address = out->fpga_args().scale_pointer();
param->SetFpgaArgs(convArgs);
return true;
}
template <>
void ConvAddBNKernel<FPGA, float>::Compute(
const FusionConvAddBNParam &param) const {
fpga::ComputeFpgaConv(param.FpgaArgs());
}
template class ConvAddBNKernel<FPGA, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADDBNRELU_OP
#include "operators/kernel/conv_add_bn_relu_kernel.h"
#include "memory/t_malloc.h"
namespace paddle_mobile {
namespace operators {
template <>
bool ConvAddBNReluKernel<FPGA, float>::Init(FusionConvAddBNReluParam *param) {
bool relu_enabled = true;
const Tensor *input = param->Input();
auto input_ptr = input->data<float>();
const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>();
const Tensor *filter = param->Filter();
auto filter_ptr = filter->data<float>();
Tensor *out = param->Output();
auto out_ptr = out->mutable_data<float>();
auto bn_mean_ptr = param->InputMean()->data<float>();
auto bn_var_ptr = param->InputVariance()->data<float>();
auto bn_scale_ptr = param->InputScale()->data<float>();
auto bn_bias_ptr = param->InputBias()->data<float>();
const float epsilon = param->Epsilon();
PADDLE_MOBILE_ENFORCE(input->dims()[1] == bias->dims()[0] &&
bias->dims()[0] == param->InputBias()->dims()[0],
"Image channel should be equal to bias number");
const int channel = input->dims()[1];
float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
Tensor *new_scale = new Tensor();
Tensor *new_bias = new Tensor();
auto new_scale_ptr = new_scale->mutable_data<float>({channel});
auto new_bias_ptr = new_bias->mutable_data<float>({channel});
for (int i = 0; i < channel; i++) {
new_scale_ptr[i] = bn_scale_ptr[i] /
static_cast<float>(pow((bn_var_ptr[i] + epsilon), 0.5));
new_bias_ptr[i] =
bn_bias_ptr[i] + (bias_ptr[i] - bn_mean_ptr[i]) * new_scale_ptr[i];
bs_ptr[i * 2] = new_scale_ptr[i];
bs_ptr[i * 2 + 1] = new_bias_ptr[i];
}
param->SetNewScale(new_scale);
param->SetNewBias(new_bias);
fpga::ConvArgs convArgs;
convArgs.relu_enabled = relu_enabled;
convArgs.filter_address = (void *)filter_ptr;
convArgs.filter_num = filter->dims()[0];
convArgs.group_num = param->Groups();
convArgs.sb_address = (void *)bs_ptr;
convArgs.kernel.stride_h = param->Strides()[0];
convArgs.kernel.stride_w = param->Strides()[1];
convArgs.kernel.height = filter->dims()[2];
convArgs.kernel.width = filter->dims()[3];
convArgs.image.address = (void *)input_ptr;
convArgs.image.channels = input->dims()[1];
convArgs.image.height = input->dims()[2];
convArgs.image.width = input->dims()[3];
convArgs.image.pad_height = param->Paddings()[0];
convArgs.image.pad_width = param->Paddings()[1];
convArgs.image.scale_address = input->fpga_args().scale_pointer();
convArgs.output.address = (void *)out_ptr;
convArgs.output.scale_address = out->fpga_args().scale_pointer();
param->SetFpgaArgs(convArgs);
return true;
}
template <>
void ConvAddBNReluKernel<FPGA, float>::Compute(
const FusionConvAddBNReluParam &param) const {
fpga::ComputeFpgaConv(param.FpgaArgs());
}
template class ConvAddBNReluKernel<FPGA, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADDRELU_OP
#include "operators/kernel/conv_add_relu_kernel.h"
#include "common/enforce.h"
namespace paddle_mobile {
namespace operators {
template <>
bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam *param) {
bool relu_enabled = true;
const Tensor *input = param->Input();
auto input_ptr = input->data<float>();
const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>();
const Tensor *filter = param->Filter();
auto filter_ptr = filter->data<float>();
Tensor *out = param->Output();
auto out_ptr = out->mutable_data<float>();
PADDLE_MOBILE_ENFORCE(input->dims()[1] == bias->dims()[0],
"Image channel should be equal to bias number");
int channel = input->dims()[1];
float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
for (int i = 0; i < channel; i++) {
bs_ptr[i * 2] = 1;
bs_ptr[i * 2 + 1] = bias_ptr[i];
}
fpga::ConvArgs convArgs;
convArgs.relu_enabled = relu_enabled;
convArgs.filter_address = (void *)filter_ptr;
convArgs.filter_num = filter->dims()[0];
convArgs.group_num = param->Groups();
convArgs.sb_address = (void *)bs_ptr;
convArgs.kernel.stride_h = param->Strides()[0];
convArgs.kernel.stride_w = param->Strides()[1];
convArgs.kernel.height = filter->dims()[2];
convArgs.kernel.width = filter->dims()[3];
convArgs.image.address = (void *)input_ptr;
convArgs.image.channels = input->dims()[1];
convArgs.image.height = input->dims()[2];
convArgs.image.width = input->dims()[3];
convArgs.image.pad_height = param->Paddings()[0];
convArgs.image.pad_width = param->Paddings()[1];
convArgs.image.scale_address = input->fpga_args().scale_pointer();
convArgs.output.address = (void *)out_ptr;
convArgs.output.scale_address = out->fpga_args().scale_pointer();
param->SetFpgaArgs(convArgs);
return true;
}
template <>
void ConvAddReluKernel<FPGA, float>::Compute(
const FusionConvAddReluParam &param) const {
fpga::ComputeFpgaConv(param.FpgaArgs());
}
template class ConvAddReluKernel<FPGA, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -1136,7 +1136,7 @@ class FusionConvAddBNParam : public OpParam {
const Tensor *Filter() const { return filter_; }
Tensor *OutputY() const { return output_y_; }
Tensor *Output() const { return output_y_; }
const vector<int> &Strides() const { return strides_; }
......
......@@ -21,6 +21,7 @@ elseif("resnet" IN_LIST NET)
# gen test
ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-resnet paddle-mobile)
elseif("FPGAnets" IN_LIST NET)
else ()
# gen test
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册