提交 a38ec594 编写于 作者: Z zhangyang

repair bugs in op kernels for FPGA track

上级 96461030
......@@ -24,7 +24,7 @@ template <>
bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
bool relu_enabled = false;
Tensor *input = const_cast<Tensor *>(param->Input());
auto input_ptr = input->data<half>();
auto input_ptr = input->data<float>();
const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>();
Tensor *filter = param->Filter();
......@@ -61,14 +61,14 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
float max_value = fpga::filter_find_max(filter);
fpga::format_filter(filter, max_value, param->Groups());
auto filter_ptr = filter->data<int8_t>();
auto filter_ptr = filter->data<float>();
int element_num_per_div =
fpga::get_element_num_per_div(filter, param->Groups());
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_ofm(out);
auto out_ptr = out->mutable_data<half>();
auto out_ptr = out->mutable_data<float>();
fpga::ConvArgs convArgs;
convArgs.relu_enabled = relu_enabled;
......
......@@ -24,7 +24,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
FusionConvAddBNReluParam<FPGA> *param) {
bool relu_enabled = true;
Tensor *input = const_cast<Tensor *>(param->Input());
auto input_ptr = input->data<half>();
auto input_ptr = input->data<float>();
const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>();
Tensor *filter = param->Filter();
......@@ -58,14 +58,14 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
float max_value = fpga::filter_find_max(filter);
fpga::format_filter(filter, max_value, param->Groups());
auto filter_ptr = filter->data<int8_t>();
auto filter_ptr = filter->data<float>();
int element_num_per_div =
fpga::get_element_num_per_div(filter, param->Groups());
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_ofm(out);
auto out_ptr = out->mutable_data<half>();
auto out_ptr = out->mutable_data<float>();
fpga::ConvArgs convArgs;
convArgs.relu_enabled = relu_enabled;
......
......@@ -23,7 +23,7 @@ template <>
bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
bool relu_enabled = true;
Tensor *input = const_cast<Tensor *>(param->Input());
auto input_ptr = input->data<half>();
auto input_ptr = input->data<float>();
const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>();
Tensor *filter = param->Filter();
......@@ -40,14 +40,14 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
float max_value = fpga::filter_find_max(filter);
fpga::format_filter(filter, max_value, param->Groups());
auto filter_ptr = filter->data<int8_t>();
auto filter_ptr = filter->data<float>();
int element_num_per_div =
fpga::get_element_num_per_div(filter, param->Groups());
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_ofm(out);
auto out_ptr = out->mutable_data<half>();
auto out_ptr = out->mutable_data<float>();
fpga::ConvArgs convArgs;
convArgs.relu_enabled = relu_enabled;
......
......@@ -24,7 +24,7 @@ template <>
bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
bool relu_enabled = false;
Tensor *input = const_cast<Tensor *>(param->Input());
auto input_ptr = input->data<half>();
auto input_ptr = input->data<float>();
Tensor *filter = param->Filter();
Tensor *out = param->Output();
......@@ -56,14 +56,14 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
float max_value = fpga::filter_find_max(filter);
fpga::format_filter(filter, max_value, param->Groups());
auto filter_ptr = filter->data<int8_t>();
auto filter_ptr = filter->data<float>();
int element_num_per_div =
fpga::get_element_num_per_div(filter, param->Groups());
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_ofm(out);
auto out_ptr = out->mutable_data<half>();
auto out_ptr = out->mutable_data<float>();
fpga::ConvArgs convArgs;
convArgs.relu_enabled = relu_enabled;
......
......@@ -23,7 +23,7 @@ template <>
bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
bool relu_enabled = true;
Tensor *input = const_cast<Tensor *>(param->Input());
auto input_ptr = input->data<half>();
auto input_ptr = input->data<float>();
Tensor *filter = param->Filter();
Tensor *out = param->Output();
auto bn_mean_ptr = param->InputMean()->data<float>();
......@@ -33,7 +33,6 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
const float epsilon = param->Epsilon();
PADDLE_MOBILE_ENFORCE(out->dims()[1] == param->InputBias()->dims()[0],
"Output channel should be equal to bias number");
const int channel = out->dims()[1];
float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
Tensor *new_scale = new Tensor();
......@@ -53,14 +52,14 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
float max_value = fpga::filter_find_max(filter);
fpga::format_filter(filter, max_value, param->Groups());
auto filter_ptr = filter->data<int8_t>();
auto filter_ptr = filter->data<float>();
int element_num_per_div =
fpga::get_element_num_per_div(filter, param->Groups());
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_ofm(out);
auto out_ptr = out->mutable_data<half>();
auto out_ptr = out->mutable_data<float>();
fpga::ConvArgs convArgs;
convArgs.relu_enabled = relu_enabled;
......
......@@ -25,10 +25,10 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init(
Tensor *input_x = const_cast<Tensor *>(param->InputX());
Tensor *input_y = const_cast<Tensor *>(param->InputY());
Tensor *out = param->Out();
auto input_x_ptr = input_x->data<half>();
auto input_y_ptr = input_y->data<half>();
auto input_x_ptr = input_x->data<float>();
auto input_y_ptr = input_y->data<float>();
fpga::format_ofm(out);
auto out_ptr = out->mutable_data<half>();
auto out_ptr = out->mutable_data<float>();
fpga::EWAddArgs ewaddArgs;
ewaddArgs.relu_enabled = relu_enabled;
......
......@@ -23,7 +23,7 @@ template <>
bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
bool relu_enabled = true;
Tensor *input_x = const_cast<Tensor *>(param->InputX());
auto input_x_ptr = input_x->data<half>();
auto input_x_ptr = input_x->data<float>();
Tensor *input_y = param->InputY();
const Tensor *input_z = param->InputZ();
auto input_z_ptr = input_z->data<float>();
......@@ -50,13 +50,13 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
input_y->Resize(framework::make_ddim({num, filter_channel, height, width}));
float max_value = fpga::filter_find_max(input_y);
fpga::format_filter(input_y, max_value, 1);
auto input_y_ptr = input_y->data<int8_t>();
auto input_y_ptr = input_y->data<float>();
int element_num_per_div = fpga::get_element_num_per_div(input_y, 1);
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_ofm(out);
auto out_ptr = out->mutable_data<half>();
auto out_ptr = out->mutable_data<float>();
fpga::ConvArgs convArgs;
convArgs.relu_enabled = relu_enabled;
......
......@@ -22,7 +22,7 @@ template <>
bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
bool relu_enabled = false;
Tensor *input_x = const_cast<Tensor *>(param->InputX());
auto input_x_ptr = input_x->data<half>();
auto input_x_ptr = input_x->data<float>();
Tensor *input_y = param->InputY();
const Tensor *input_z = param->InputZ();
auto input_z_ptr = input_z->data<float>();
......@@ -45,17 +45,13 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
int height = input_x->dims()[2];
int width = input_x->dims()[3];
int filter_channel = chw / height / width;
input_y->Resize(framework::make_ddim({num, filter_channel, height, width}));
float max_value = fpga::filter_find_max(input_y);
fpga::format_filter(input_y, max_value, 1);
auto input_y_ptr = input_y->data<int8_t>();
auto input_y_ptr = input_y->data<float>();
int element_num_per_div = fpga::get_element_num_per_div(input_y, 1);
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_ofm(out);
auto out_ptr = out->mutable_data<half>();
auto out_ptr = out->mutable_data<float>();
fpga::ConvArgs convArgs;
convArgs.relu_enabled = relu_enabled;
......
......@@ -22,10 +22,10 @@ namespace operators {
template <>
bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) {
Tensor *input = const_cast<Tensor *>(param->Input());
auto input_ptr = input->data<half>();
auto input_ptr = input->data<float>();
Tensor *output = param->Output();
fpga::format_ofm(output);
auto output_ptr = output->mutable_data<half>();
auto output_ptr = output->mutable_data<float>();
vector<int> ksize = param->Ksize();
vector<int> strides = param->Strides();
vector<int> paddings = param->Paddings();
......
......@@ -25,19 +25,19 @@ namespace operators {
template <>
bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
const Tensor *input = param->InputX();
if (input->type() == typeid(half)) {
auto input_ptr = input->data<half>();
auto output_ptr = param->Out();
fpga::BypassArgs args;
args.convert_type = fpga::DATA_FP16_TO_FP32;
args.layout_type = fpga::LAYOUT_HWC_TO_CHW;
args.image.address = (void *)(input_ptr);
args.image.height = input->dims()[0];
args.image.width = input->dims()[1];
args.image.channels = 1;
args.output.address = output_ptr;
param->SetFpgaArgs(args);
}
auto input_ptr = input->data<float>();
auto output = param->Out();
auto output_ptr = output->mutable_data<float>();
fpga::BypassArgs args;
args.convert_type = fpga::DATA_FP16_TO_FP32;
args.layout_type = fpga::LAYOUT_NO_CONVERT;
args.image.address = (void *)(input_ptr);
args.image.height = input->dims()[0];
args.image.width = input->dims()[1];
args.image.channels = 1;
args.output.address = output_ptr;
param->SetFpgaArgs(args);
return true;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册