提交 a38ec594 编写于 作者: Z zhangyang

repair bugs in op kernels for FPGA track

上级 96461030
...@@ -24,7 +24,7 @@ template <> ...@@ -24,7 +24,7 @@ template <>
bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) { bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
bool relu_enabled = false; bool relu_enabled = false;
Tensor *input = const_cast<Tensor *>(param->Input()); Tensor *input = const_cast<Tensor *>(param->Input());
auto input_ptr = input->data<half>(); auto input_ptr = input->data<float>();
const Tensor *bias = param->Bias(); const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
Tensor *filter = param->Filter(); Tensor *filter = param->Filter();
...@@ -61,14 +61,14 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) { ...@@ -61,14 +61,14 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
float max_value = fpga::filter_find_max(filter); float max_value = fpga::filter_find_max(filter);
fpga::format_filter(filter, max_value, param->Groups()); fpga::format_filter(filter, max_value, param->Groups());
auto filter_ptr = filter->data<int8_t>(); auto filter_ptr = filter->data<float>();
int element_num_per_div = int element_num_per_div =
fpga::get_element_num_per_div(filter, param->Groups()); fpga::get_element_num_per_div(filter, param->Groups());
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_ofm(out); fpga::format_ofm(out);
auto out_ptr = out->mutable_data<half>(); auto out_ptr = out->mutable_data<float>();
fpga::ConvArgs convArgs; fpga::ConvArgs convArgs;
convArgs.relu_enabled = relu_enabled; convArgs.relu_enabled = relu_enabled;
......
...@@ -24,7 +24,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init( ...@@ -24,7 +24,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
FusionConvAddBNReluParam<FPGA> *param) { FusionConvAddBNReluParam<FPGA> *param) {
bool relu_enabled = true; bool relu_enabled = true;
Tensor *input = const_cast<Tensor *>(param->Input()); Tensor *input = const_cast<Tensor *>(param->Input());
auto input_ptr = input->data<half>(); auto input_ptr = input->data<float>();
const Tensor *bias = param->Bias(); const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
Tensor *filter = param->Filter(); Tensor *filter = param->Filter();
...@@ -58,14 +58,14 @@ bool ConvAddBNReluKernel<FPGA, float>::Init( ...@@ -58,14 +58,14 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
float max_value = fpga::filter_find_max(filter); float max_value = fpga::filter_find_max(filter);
fpga::format_filter(filter, max_value, param->Groups()); fpga::format_filter(filter, max_value, param->Groups());
auto filter_ptr = filter->data<int8_t>(); auto filter_ptr = filter->data<float>();
int element_num_per_div = int element_num_per_div =
fpga::get_element_num_per_div(filter, param->Groups()); fpga::get_element_num_per_div(filter, param->Groups());
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_ofm(out); fpga::format_ofm(out);
auto out_ptr = out->mutable_data<half>(); auto out_ptr = out->mutable_data<float>();
fpga::ConvArgs convArgs; fpga::ConvArgs convArgs;
convArgs.relu_enabled = relu_enabled; convArgs.relu_enabled = relu_enabled;
......
...@@ -23,7 +23,7 @@ template <> ...@@ -23,7 +23,7 @@ template <>
bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) { bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
bool relu_enabled = true; bool relu_enabled = true;
Tensor *input = const_cast<Tensor *>(param->Input()); Tensor *input = const_cast<Tensor *>(param->Input());
auto input_ptr = input->data<half>(); auto input_ptr = input->data<float>();
const Tensor *bias = param->Bias(); const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
Tensor *filter = param->Filter(); Tensor *filter = param->Filter();
...@@ -40,14 +40,14 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) { ...@@ -40,14 +40,14 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
float max_value = fpga::filter_find_max(filter); float max_value = fpga::filter_find_max(filter);
fpga::format_filter(filter, max_value, param->Groups()); fpga::format_filter(filter, max_value, param->Groups());
auto filter_ptr = filter->data<int8_t>(); auto filter_ptr = filter->data<float>();
int element_num_per_div = int element_num_per_div =
fpga::get_element_num_per_div(filter, param->Groups()); fpga::get_element_num_per_div(filter, param->Groups());
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_ofm(out); fpga::format_ofm(out);
auto out_ptr = out->mutable_data<half>(); auto out_ptr = out->mutable_data<float>();
fpga::ConvArgs convArgs; fpga::ConvArgs convArgs;
convArgs.relu_enabled = relu_enabled; convArgs.relu_enabled = relu_enabled;
......
...@@ -24,7 +24,7 @@ template <> ...@@ -24,7 +24,7 @@ template <>
bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) { bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
bool relu_enabled = false; bool relu_enabled = false;
Tensor *input = const_cast<Tensor *>(param->Input()); Tensor *input = const_cast<Tensor *>(param->Input());
auto input_ptr = input->data<half>(); auto input_ptr = input->data<float>();
Tensor *filter = param->Filter(); Tensor *filter = param->Filter();
Tensor *out = param->Output(); Tensor *out = param->Output();
...@@ -56,14 +56,14 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) { ...@@ -56,14 +56,14 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
float max_value = fpga::filter_find_max(filter); float max_value = fpga::filter_find_max(filter);
fpga::format_filter(filter, max_value, param->Groups()); fpga::format_filter(filter, max_value, param->Groups());
auto filter_ptr = filter->data<int8_t>(); auto filter_ptr = filter->data<float>();
int element_num_per_div = int element_num_per_div =
fpga::get_element_num_per_div(filter, param->Groups()); fpga::get_element_num_per_div(filter, param->Groups());
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_ofm(out); fpga::format_ofm(out);
auto out_ptr = out->mutable_data<half>(); auto out_ptr = out->mutable_data<float>();
fpga::ConvArgs convArgs; fpga::ConvArgs convArgs;
convArgs.relu_enabled = relu_enabled; convArgs.relu_enabled = relu_enabled;
......
...@@ -23,7 +23,7 @@ template <> ...@@ -23,7 +23,7 @@ template <>
bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) { bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
bool relu_enabled = true; bool relu_enabled = true;
Tensor *input = const_cast<Tensor *>(param->Input()); Tensor *input = const_cast<Tensor *>(param->Input());
auto input_ptr = input->data<half>(); auto input_ptr = input->data<float>();
Tensor *filter = param->Filter(); Tensor *filter = param->Filter();
Tensor *out = param->Output(); Tensor *out = param->Output();
auto bn_mean_ptr = param->InputMean()->data<float>(); auto bn_mean_ptr = param->InputMean()->data<float>();
...@@ -33,7 +33,6 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) { ...@@ -33,7 +33,6 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
const float epsilon = param->Epsilon(); const float epsilon = param->Epsilon();
PADDLE_MOBILE_ENFORCE(out->dims()[1] == param->InputBias()->dims()[0], PADDLE_MOBILE_ENFORCE(out->dims()[1] == param->InputBias()->dims()[0],
"Output channel should be equal to bias number"); "Output channel should be equal to bias number");
const int channel = out->dims()[1]; const int channel = out->dims()[1];
float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
Tensor *new_scale = new Tensor(); Tensor *new_scale = new Tensor();
...@@ -53,14 +52,14 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) { ...@@ -53,14 +52,14 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
float max_value = fpga::filter_find_max(filter); float max_value = fpga::filter_find_max(filter);
fpga::format_filter(filter, max_value, param->Groups()); fpga::format_filter(filter, max_value, param->Groups());
auto filter_ptr = filter->data<int8_t>(); auto filter_ptr = filter->data<float>();
int element_num_per_div = int element_num_per_div =
fpga::get_element_num_per_div(filter, param->Groups()); fpga::get_element_num_per_div(filter, param->Groups());
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_ofm(out); fpga::format_ofm(out);
auto out_ptr = out->mutable_data<half>(); auto out_ptr = out->mutable_data<float>();
fpga::ConvArgs convArgs; fpga::ConvArgs convArgs;
convArgs.relu_enabled = relu_enabled; convArgs.relu_enabled = relu_enabled;
......
...@@ -25,10 +25,10 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init( ...@@ -25,10 +25,10 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init(
Tensor *input_x = const_cast<Tensor *>(param->InputX()); Tensor *input_x = const_cast<Tensor *>(param->InputX());
Tensor *input_y = const_cast<Tensor *>(param->InputY()); Tensor *input_y = const_cast<Tensor *>(param->InputY());
Tensor *out = param->Out(); Tensor *out = param->Out();
auto input_x_ptr = input_x->data<half>(); auto input_x_ptr = input_x->data<float>();
auto input_y_ptr = input_y->data<half>(); auto input_y_ptr = input_y->data<float>();
fpga::format_ofm(out); fpga::format_ofm(out);
auto out_ptr = out->mutable_data<half>(); auto out_ptr = out->mutable_data<float>();
fpga::EWAddArgs ewaddArgs; fpga::EWAddArgs ewaddArgs;
ewaddArgs.relu_enabled = relu_enabled; ewaddArgs.relu_enabled = relu_enabled;
......
...@@ -23,7 +23,7 @@ template <> ...@@ -23,7 +23,7 @@ template <>
bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) { bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
bool relu_enabled = true; bool relu_enabled = true;
Tensor *input_x = const_cast<Tensor *>(param->InputX()); Tensor *input_x = const_cast<Tensor *>(param->InputX());
auto input_x_ptr = input_x->data<half>(); auto input_x_ptr = input_x->data<float>();
Tensor *input_y = param->InputY(); Tensor *input_y = param->InputY();
const Tensor *input_z = param->InputZ(); const Tensor *input_z = param->InputZ();
auto input_z_ptr = input_z->data<float>(); auto input_z_ptr = input_z->data<float>();
...@@ -50,13 +50,13 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) { ...@@ -50,13 +50,13 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
input_y->Resize(framework::make_ddim({num, filter_channel, height, width})); input_y->Resize(framework::make_ddim({num, filter_channel, height, width}));
float max_value = fpga::filter_find_max(input_y); float max_value = fpga::filter_find_max(input_y);
fpga::format_filter(input_y, max_value, 1); fpga::format_filter(input_y, max_value, 1);
auto input_y_ptr = input_y->data<int8_t>(); auto input_y_ptr = input_y->data<float>();
int element_num_per_div = fpga::get_element_num_per_div(input_y, 1); int element_num_per_div = fpga::get_element_num_per_div(input_y, 1);
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_ofm(out); fpga::format_ofm(out);
auto out_ptr = out->mutable_data<half>(); auto out_ptr = out->mutable_data<float>();
fpga::ConvArgs convArgs; fpga::ConvArgs convArgs;
convArgs.relu_enabled = relu_enabled; convArgs.relu_enabled = relu_enabled;
......
...@@ -22,7 +22,7 @@ template <> ...@@ -22,7 +22,7 @@ template <>
bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) { bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
bool relu_enabled = false; bool relu_enabled = false;
Tensor *input_x = const_cast<Tensor *>(param->InputX()); Tensor *input_x = const_cast<Tensor *>(param->InputX());
auto input_x_ptr = input_x->data<half>(); auto input_x_ptr = input_x->data<float>();
Tensor *input_y = param->InputY(); Tensor *input_y = param->InputY();
const Tensor *input_z = param->InputZ(); const Tensor *input_z = param->InputZ();
auto input_z_ptr = input_z->data<float>(); auto input_z_ptr = input_z->data<float>();
...@@ -45,17 +45,13 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) { ...@@ -45,17 +45,13 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
int height = input_x->dims()[2]; int height = input_x->dims()[2];
int width = input_x->dims()[3]; int width = input_x->dims()[3];
int filter_channel = chw / height / width; int filter_channel = chw / height / width;
input_y->Resize(framework::make_ddim({num, filter_channel, height, width})); input_y->Resize(framework::make_ddim({num, filter_channel, height, width}));
float max_value = fpga::filter_find_max(input_y); float max_value = fpga::filter_find_max(input_y);
fpga::format_filter(input_y, max_value, 1); fpga::format_filter(input_y, max_value, 1);
auto input_y_ptr = input_y->data<int8_t>(); auto input_y_ptr = input_y->data<float>();
int element_num_per_div = fpga::get_element_num_per_div(input_y, 1); int element_num_per_div = fpga::get_element_num_per_div(input_y, 1);
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
auto out_ptr = out->mutable_data<float>();
fpga::format_ofm(out);
auto out_ptr = out->mutable_data<half>();
fpga::ConvArgs convArgs; fpga::ConvArgs convArgs;
convArgs.relu_enabled = relu_enabled; convArgs.relu_enabled = relu_enabled;
......
...@@ -22,10 +22,10 @@ namespace operators { ...@@ -22,10 +22,10 @@ namespace operators {
template <> template <>
bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) { bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) {
Tensor *input = const_cast<Tensor *>(param->Input()); Tensor *input = const_cast<Tensor *>(param->Input());
auto input_ptr = input->data<half>(); auto input_ptr = input->data<float>();
Tensor *output = param->Output(); Tensor *output = param->Output();
fpga::format_ofm(output); fpga::format_ofm(output);
auto output_ptr = output->mutable_data<half>(); auto output_ptr = output->mutable_data<float>();
vector<int> ksize = param->Ksize(); vector<int> ksize = param->Ksize();
vector<int> strides = param->Strides(); vector<int> strides = param->Strides();
vector<int> paddings = param->Paddings(); vector<int> paddings = param->Paddings();
......
...@@ -25,19 +25,19 @@ namespace operators { ...@@ -25,19 +25,19 @@ namespace operators {
template <> template <>
bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) { bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
const Tensor *input = param->InputX(); const Tensor *input = param->InputX();
if (input->type() == typeid(half)) {
auto input_ptr = input->data<half>(); auto input_ptr = input->data<float>();
auto output_ptr = param->Out(); auto output = param->Out();
fpga::BypassArgs args; auto output_ptr = output->mutable_data<float>();
args.convert_type = fpga::DATA_FP16_TO_FP32; fpga::BypassArgs args;
args.layout_type = fpga::LAYOUT_HWC_TO_CHW; args.convert_type = fpga::DATA_FP16_TO_FP32;
args.image.address = (void *)(input_ptr); args.layout_type = fpga::LAYOUT_NO_CONVERT;
args.image.height = input->dims()[0]; args.image.address = (void *)(input_ptr);
args.image.width = input->dims()[1]; args.image.height = input->dims()[0];
args.image.channels = 1; args.image.width = input->dims()[1];
args.output.address = output_ptr; args.image.channels = 1;
param->SetFpgaArgs(args); args.output.address = output_ptr;
} param->SetFpgaArgs(args);
return true; return true;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册