提交 660ee569 编写于 作者: Z zhangyang

fix bugs

上级 250969bb
...@@ -68,26 +68,26 @@ void fpga_copy(void *dest, const void *src, size_t num) { ...@@ -68,26 +68,26 @@ void fpga_copy(void *dest, const void *src, size_t num) {
memcpy(dest, src, num); memcpy(dest, src, num);
} }
int ComputeFpgaConv(const struct ConvArgs &args) { int ComputeFpgaConv(const struct WrapperConvArgs &args) {
#ifdef FPGA_TEST_MODE #ifdef FPGA_TEST_MODE
DLOG << " relu_enabled:" << args.relu_enabled /*DLOG << " relu_enabled:" << args.relu_enabled
<< " sb_address:" << args.sb_address << " sb_address:" << args.sb_address
<< " filter_address:" << args.filter_address << " filter_address:" << args.filter_address
<< " filter_num:" << args.filter_num << " filter_num:" << args.filter_num
<< " group_num:" << args.group_num; << " group_num:" << args.group_num;
DLOG << " image_address:" << args.image.address DLOG << " image_address:" << args.image.address
<< " image_scale_address:" << args.image.scale_address << " image_scale_address:" << args.image.scale_address
<< " image_channels:" << args.image.channels << " image_channels:" << args.image.channels
<< " image_height:" << args.image.height << " image_height:" << args.image.height
<< " image_width:" << args.image.width << " image_width:" << args.image.width
<< " pad_height:" << args.image.pad_height << " pad_height:" << args.image.pad_height
<< " pad_width:" << args.image.pad_width; << " pad_width:" << args.image.pad_width;
DLOG << " kernel_height:" << args.kernel.height DLOG << " kernel_height:" << args.kernel.height
<< " kernel_width:" << args.kernel.width << " kernel_width:" << args.kernel.width
<< " stride_h:" << args.kernel.stride_h << " stride_h:" << args.kernel.stride_h
<< " stride_w:" << args.kernel.stride_w; << " stride_w:" << args.kernel.stride_w;
DLOG << " out_address:" << args.output.address DLOG << " out_address:" << args.output.address
<< " out_scale_address:" << args.output.scale_address; << " out_scale_address:" << args.output.scale_address;*/
#endif #endif
return do_ioctl(IOCTL_CONFIG_CONV, &args); return do_ioctl(IOCTL_CONFIG_CONV, &args);
...@@ -178,16 +178,31 @@ float filter_find_max(framework::Tensor *filter_tensor) { ...@@ -178,16 +178,31 @@ float filter_find_max(framework::Tensor *filter_tensor) {
auto filter_ptr = filter_tensor->data<float>(); auto filter_ptr = filter_tensor->data<float>();
return filter::find_max(filter_ptr, filter_tensor->numel()); return filter::find_max(filter_ptr, filter_tensor->numel());
} }
int get_plit_num(framework::Tensor *filter_tensor) {
auto dims = filter_tensor->dims();
int chw = dims[1] * dims[2] * dims[3];
int num = dims[0];
int div_capacity = filter::calc_division_capacity(chw);
return filter::calc_split_num(num, div_capacity);
}
int get_element_num_per_div(framework::Tensor *filter_tensor, int group_num) { int get_element_num_per_div(framework::Tensor *filter_tensor, int group_num) {
auto dims = filter_tensor->dims(); auto dims = filter_tensor->dims();
PADDLE_MOBILE_ENFORCE(dims.size() == 4 || dims.size() == 2, int chw = dims[1] * dims[2] * dims[3];
"Filter order should be 4 or 2"); int num = dims[0];
int chw = dims.size() == 4 ? dims[1] * dims[2] * dims[3] : dims[1];
int num = dims.size() == 4 ? dims[0] : dims[1];
int div_capacity = filter::calc_division_capacity(chw); int div_capacity = filter::calc_division_capacity(chw);
return filter::calc_num_per_div(num, group_num, div_capacity); return filter::calc_num_per_div(num, group_num, div_capacity);
} }
int get_aligned_filter_element_num(int chw) {
return align_to_x(chw, FILTER_ELEMENT_ALIGNMENT);
}
int get_aligned_filter_num(int num) {
return align_to_x(num, FILTER_NUM_ALIGNMENT);
}
void format_filter(framework::Tensor *filter_tensor, float max_value, void format_filter(framework::Tensor *filter_tensor, float max_value,
int group_num) { int group_num) {
auto dims = filter_tensor->dims(); auto dims = filter_tensor->dims();
......
...@@ -92,6 +92,14 @@ struct ConvArgs { ...@@ -92,6 +92,14 @@ struct ConvArgs {
struct ImageOutputArgs output; struct ImageOutputArgs output;
}; };
struct WrapperConvArgs {
uint32_t split_num;
uint32_t group_num;
uint32_t filter_num;
struct ImageOutputArgs output;
struct ConvArgs* args;
};
struct PoolingArgs { struct PoolingArgs {
struct KernelArgs kernel; struct KernelArgs kernel;
struct ImageInputArgs image; // input image; struct ImageInputArgs image; // input image;
...@@ -165,7 +173,7 @@ enum FPGA_ERR_TYPE { ...@@ -165,7 +173,7 @@ enum FPGA_ERR_TYPE {
//============================== API ============================= //============================== API =============================
int PerformBypass(const struct BypassArgs& args); int PerformBypass(const struct BypassArgs& args);
int ComputeFpgaConv(const struct ConvArgs& args); int ComputeFpgaConv(const struct WrapperConvArgs& args);
int ComputeFpgaPool(const struct PoolingArgs& args); int ComputeFpgaPool(const struct PoolingArgs& args);
int ComputeFpgaEWAdd(const struct EWAddArgs& args); int ComputeFpgaEWAdd(const struct EWAddArgs& args);
...@@ -174,6 +182,10 @@ void format_image(framework::Tensor* image_tensor); ...@@ -174,6 +182,10 @@ void format_image(framework::Tensor* image_tensor);
void format_ofm(framework::Tensor* ofm_tensor); // only allocate memory void format_ofm(framework::Tensor* ofm_tensor); // only allocate memory
float filter_find_max(framework::Tensor* filter_tensor); float filter_find_max(framework::Tensor* filter_tensor);
int get_element_num_per_div(framework::Tensor* filter_tensor, int group_num); int get_element_num_per_div(framework::Tensor* filter_tensor, int group_num);
int get_plit_num(framework::Tensor* filter_tensor);
int get_aligned_filter_element_num(int chw);
int get_aligned_filter_num(int num);
void format_filter(framework::Tensor* filter_tensor, float max_value, void format_filter(framework::Tensor* filter_tensor, float max_value,
int group_num); int group_num);
void format_fc_matrix(framework::Tensor* filter_tensor, float max_value, void format_fc_matrix(framework::Tensor* filter_tensor, float max_value,
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once #pragma once
#include <cstdlib> #include <cstdlib>
#include <string>
#include "common/enforce.h" #include "common/enforce.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
......
...@@ -15,7 +15,6 @@ limitations under the License. */ ...@@ -15,7 +15,6 @@ limitations under the License. */
#ifdef FUSION_CONVADDBN_OP #ifdef FUSION_CONVADDBN_OP
#include "operators/kernel/conv_add_bn_kernel.h" #include "operators/kernel/conv_add_bn_kernel.h"
#include "fpga/api.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -23,11 +22,11 @@ namespace operators { ...@@ -23,11 +22,11 @@ namespace operators {
template <> template <>
bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) { bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
bool relu_enabled = false; bool relu_enabled = false;
Tensor *input = const_cast<Tensor *>(param->Input()); auto *input = const_cast<Tensor *>(param->Input());
auto input_ptr = input->data<float>(); auto input_ptr = input->data<float>();
const Tensor *bias = param->Bias(); const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
Tensor *filter = param->Filter(); auto *filter = const_cast<Tensor *>(param->Filter());
Tensor *out = param->Output(); Tensor *out = param->Output();
...@@ -41,10 +40,10 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) { ...@@ -41,10 +40,10 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
"Output channel should be equal to bias number"); "Output channel should be equal to bias number");
const int channel = out->dims()[1]; const int channel = out->dims()[1];
float *bs_ptr = auto *bs_ptr =
reinterpret_cast<float *>(fpga::fpga_malloc(2 * channel * sizeof(float))); reinterpret_cast<float *>(fpga::fpga_malloc(2 * channel * sizeof(float)));
Tensor *new_scale = new Tensor(); auto *new_scale = new Tensor();
Tensor *new_bias = new Tensor(); auto *new_bias = new Tensor();
auto new_scale_ptr = new_scale->mutable_data<float>({channel}); auto new_scale_ptr = new_scale->mutable_data<float>({channel});
auto new_bias_ptr = new_bias->mutable_data<float>({channel}); auto new_bias_ptr = new_bias->mutable_data<float>({channel});
...@@ -70,27 +69,42 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) { ...@@ -70,27 +69,42 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
fpga::format_ofm(out); fpga::format_ofm(out);
auto out_ptr = out->mutable_data<float>(); auto out_ptr = out->mutable_data<float>();
fpga::ConvArgs convArgs; fpga::WrapperConvArgs convArgs;
convArgs.relu_enabled = relu_enabled; convArgs.group_num = (uint32_t)param->Groups();
convArgs.filter_address = (void *)filter_ptr; convArgs.split_num = (uint32_t)fpga::get_plit_num(filter);
convArgs.filter_num = filter->dims()[0]; convArgs.filter_num = (uint32_t)filter->dims()[0];
convArgs.group_num = param->Groups(); convArgs.output.address = out_ptr;
convArgs.sb_address = (void *)bs_ptr;
convArgs.kernel.stride_h = param->Strides()[0];
convArgs.kernel.stride_w = param->Strides()[1];
convArgs.kernel.height = filter->dims()[2];
convArgs.kernel.width = filter->dims()[3];
convArgs.image.address = (void *)input_ptr;
convArgs.image.channels = input->dims()[1];
convArgs.image.height = input->dims()[2];
convArgs.image.width = input->dims()[3];
convArgs.image.pad_height = param->Paddings()[0];
convArgs.image.pad_width = param->Paddings()[1];
convArgs.image.scale_address = input->scale;
convArgs.output.address = (void *)out_ptr;
convArgs.output.scale_address = out->scale; convArgs.output.scale_address = out->scale;
convArgs.args = (fpga::ConvArgs *)fpga::fpga_malloc(convArgs.split_num *
sizeof(fpga::ConvArgs));
param->SetFpgaArgs(convArgs); param->SetFpgaArgs(convArgs);
int element_num = fpga::get_aligned_filter_element_num(
filter->dims()[1] * filter->dims()[2] * filter->dims()[3]);
int n = convArgs.split_num;
for (int i = 0; i < n; i++) {
convArgs.args[i].relu_enabled = relu_enabled;
convArgs.args[i].group_num = (uint32_t)param->Groups();
convArgs.args[i].kernel.stride_h = (uint32_t)param->Strides()[0];
convArgs.args[i].kernel.stride_w = (uint32_t)param->Strides()[1];
convArgs.args[i].kernel.height = (uint32_t)filter->dims()[2];
convArgs.args[i].kernel.width = (uint32_t)filter->dims()[3];
convArgs.args[i].image.address = input_ptr;
convArgs.args[i].image.channels = (uint32_t)input->dims()[1];
convArgs.args[i].image.height = (uint32_t)input->dims()[2];
convArgs.args[i].image.width = (uint32_t)input->dims()[3];
convArgs.args[i].image.pad_height = (uint32_t)param->Paddings()[0];
convArgs.args[i].image.pad_width = (uint32_t)param->Paddings()[1];
convArgs.args[i].filter_address = &((int8_t *)filter_ptr)[i * element_num];
convArgs.args[i].sb_address = &((int8_t *)bs_ptr)[i * element_num];
convArgs.args[i].filter_num =
(uint32_t)(i == n - 1 ? fpga::get_aligned_filter_num(
channel - (n - 1) * element_num_per_div)
: element_num_per_div);
convArgs.args[i].image.scale_address =
(float *)fpga::fpga_malloc(2 * sizeof(float));
}
return true; return true;
} }
......
...@@ -27,7 +27,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init( ...@@ -27,7 +27,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
auto input_ptr = input->data<float>(); auto input_ptr = input->data<float>();
const Tensor *bias = param->Bias(); const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
Tensor *filter = param->Filter(); Tensor *filter = const_cast<Tensor *>(param->Filter());
Tensor *out = param->Output(); Tensor *out = param->Output();
auto bn_mean_ptr = param->InputMean()->data<float>(); auto bn_mean_ptr = param->InputMean()->data<float>();
auto bn_var_ptr = param->InputVariance()->data<float>(); auto bn_var_ptr = param->InputVariance()->data<float>();
...@@ -67,26 +67,43 @@ bool ConvAddBNReluKernel<FPGA, float>::Init( ...@@ -67,26 +67,43 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
fpga::format_ofm(out); fpga::format_ofm(out);
auto out_ptr = out->mutable_data<float>(); auto out_ptr = out->mutable_data<float>();
fpga::ConvArgs convArgs; fpga::WrapperConvArgs convArgs;
convArgs.relu_enabled = relu_enabled; convArgs.group_num = (uint32_t)param->Groups();
convArgs.filter_address = (void *)filter_ptr; convArgs.split_num = (uint32_t)fpga::get_plit_num(filter);
convArgs.filter_num = filter->dims()[0]; convArgs.filter_num = (uint32_t)filter->dims()[0];
convArgs.group_num = param->Groups(); convArgs.output.address = out_ptr;
convArgs.sb_address = (void *)bs_ptr;
convArgs.kernel.stride_h = param->Strides()[0];
convArgs.kernel.stride_w = param->Strides()[1];
convArgs.kernel.height = filter->dims()[2];
convArgs.kernel.width = filter->dims()[3];
convArgs.image.address = (void *)input_ptr;
convArgs.image.channels = input->dims()[1];
convArgs.image.height = input->dims()[2];
convArgs.image.width = input->dims()[3];
convArgs.image.pad_height = param->Paddings()[0];
convArgs.image.pad_width = param->Paddings()[1];
convArgs.image.scale_address = input->scale;
convArgs.output.address = (void *)out_ptr;
convArgs.output.scale_address = out->scale; convArgs.output.scale_address = out->scale;
convArgs.args = (fpga::ConvArgs *)fpga::fpga_malloc(convArgs.split_num *
sizeof(fpga::ConvArgs));
param->SetFpgaArgs(convArgs); param->SetFpgaArgs(convArgs);
int element_num = fpga::get_aligned_filter_element_num(
filter->dims()[1] * filter->dims()[2] * filter->dims()[3]);
int n = convArgs.split_num;
for (int i = 0; i < n; i++) {
convArgs.args[i].relu_enabled = relu_enabled;
convArgs.args[i].group_num = (uint32_t)param->Groups();
convArgs.args[i].kernel.stride_h = (uint32_t)param->Strides()[0];
convArgs.args[i].kernel.stride_w = (uint32_t)param->Strides()[1];
convArgs.args[i].kernel.height = (uint32_t)filter->dims()[2];
convArgs.args[i].kernel.width = (uint32_t)filter->dims()[3];
convArgs.args[i].image.address = input_ptr;
convArgs.args[i].image.channels = (uint32_t)input->dims()[1];
convArgs.args[i].image.height = (uint32_t)input->dims()[2];
convArgs.args[i].image.width = (uint32_t)input->dims()[3];
convArgs.args[i].image.pad_height = (uint32_t)param->Paddings()[0];
convArgs.args[i].image.pad_width = (uint32_t)param->Paddings()[1];
convArgs.args[i].filter_address = &((int8_t *)filter_ptr)[i * element_num];
convArgs.args[i].sb_address = &((int8_t *)bs_ptr)[i * element_num];
convArgs.args[i].filter_num =
(uint32_t)(i == n - 1 ? fpga::get_aligned_filter_num(
channel - (n - 1) * element_num_per_div)
: element_num_per_div);
convArgs.args[i].image.scale_address =
(float *)fpga::fpga_malloc(2 * sizeof(float));
}
return true;
return true; return true;
} }
......
...@@ -26,13 +26,13 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) { ...@@ -26,13 +26,13 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
auto input_ptr = input->data<float>(); auto input_ptr = input->data<float>();
const Tensor *bias = param->Bias(); const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
Tensor *filter = param->Filter(); auto *filter = const_cast<Tensor *>(param->Filter());
Tensor *out = param->Output(); Tensor *out = param->Output();
PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
"Output channel should be equal to bias number"); "Output channel should be equal to bias number");
int channel = out->dims()[1]; int channel = out->dims()[1];
float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); auto *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
for (int i = 0; i < channel; i++) { for (int i = 0; i < channel; i++) {
bs_ptr[i + channel] = 1; bs_ptr[i + channel] = 1;
bs_ptr[i] = bias_ptr[i]; bs_ptr[i] = bias_ptr[i];
...@@ -49,27 +49,42 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) { ...@@ -49,27 +49,42 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
fpga::format_ofm(out); fpga::format_ofm(out);
auto out_ptr = out->mutable_data<float>(); auto out_ptr = out->mutable_data<float>();
fpga::ConvArgs convArgs; fpga::WrapperConvArgs convArgs;
convArgs.relu_enabled = relu_enabled; convArgs.group_num = (uint32_t)param->Groups();
convArgs.filter_address = (void *)filter_ptr; convArgs.split_num = (uint32_t)fpga::get_plit_num(filter);
convArgs.filter_num = filter->dims()[0]; convArgs.filter_num = (uint32_t)filter->dims()[0];
convArgs.group_num = param->Groups(); convArgs.output.address = out_ptr;
convArgs.sb_address = (void *)bs_ptr;
convArgs.kernel.stride_h = param->Strides()[0];
convArgs.kernel.stride_w = param->Strides()[1];
convArgs.kernel.height = filter->dims()[2];
convArgs.kernel.width = filter->dims()[3];
convArgs.image.address = (void *)input_ptr;
convArgs.image.channels = input->dims()[1];
convArgs.image.height = input->dims()[2];
convArgs.image.width = input->dims()[3];
convArgs.image.pad_height = param->Paddings()[0];
convArgs.image.pad_width = param->Paddings()[1];
convArgs.image.scale_address = input->scale;
convArgs.output.address = (void *)out_ptr;
convArgs.output.scale_address = out->scale; convArgs.output.scale_address = out->scale;
convArgs.args = (fpga::ConvArgs *)fpga::fpga_malloc(convArgs.split_num *
sizeof(fpga::ConvArgs));
param->SetFpgaArgs(convArgs); param->SetFpgaArgs(convArgs);
int element_num = fpga::get_aligned_filter_element_num(
filter->dims()[1] * filter->dims()[2] * filter->dims()[3]);
int n = convArgs.split_num;
for (int i = 0; i < n; i++) {
convArgs.args[i].relu_enabled = relu_enabled;
convArgs.args[i].group_num = (uint32_t)param->Groups();
convArgs.args[i].kernel.stride_h = (uint32_t)param->Strides()[0];
convArgs.args[i].kernel.stride_w = (uint32_t)param->Strides()[1];
convArgs.args[i].kernel.height = (uint32_t)filter->dims()[2];
convArgs.args[i].kernel.width = (uint32_t)filter->dims()[3];
convArgs.args[i].image.address = input_ptr;
convArgs.args[i].image.channels = (uint32_t)input->dims()[1];
convArgs.args[i].image.height = (uint32_t)input->dims()[2];
convArgs.args[i].image.width = (uint32_t)input->dims()[3];
convArgs.args[i].image.pad_height = (uint32_t)param->Paddings()[0];
convArgs.args[i].image.pad_width = (uint32_t)param->Paddings()[1];
convArgs.args[i].filter_address = &((int8_t *)filter_ptr)[i * element_num];
convArgs.args[i].sb_address = &((int8_t *)bs_ptr)[i * element_num];
convArgs.args[i].filter_num =
(uint32_t)(i == n - 1 ? fpga::get_aligned_filter_num(
channel - (n - 1) * element_num_per_div)
: element_num_per_div);
convArgs.args[i].image.scale_address =
(float *)fpga::fpga_malloc(2 * sizeof(float));
}
return true; return true;
} }
......
...@@ -25,8 +25,7 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) { ...@@ -25,8 +25,7 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
bool relu_enabled = false; bool relu_enabled = false;
Tensor *input = const_cast<Tensor *>(param->Input()); Tensor *input = const_cast<Tensor *>(param->Input());
auto input_ptr = input->data<float>(); auto input_ptr = input->data<float>();
Tensor *filter = param->Filter(); Tensor *filter = const_cast<Tensor *>(param->Filter());
Tensor *out = param->Output(); Tensor *out = param->Output();
auto bn_mean_ptr = param->InputMean()->data<float>(); auto bn_mean_ptr = param->InputMean()->data<float>();
auto bn_var_ptr = param->InputVariance()->data<float>(); auto bn_var_ptr = param->InputVariance()->data<float>();
...@@ -65,27 +64,42 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) { ...@@ -65,27 +64,42 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
fpga::format_ofm(out); fpga::format_ofm(out);
auto out_ptr = out->mutable_data<float>(); auto out_ptr = out->mutable_data<float>();
fpga::ConvArgs convArgs; fpga::WrapperConvArgs convArgs;
convArgs.relu_enabled = relu_enabled; convArgs.group_num = (uint32_t)param->Groups();
convArgs.filter_address = (void *)filter_ptr; convArgs.split_num = (uint32_t)fpga::get_plit_num(filter);
convArgs.filter_num = filter->dims()[0]; convArgs.filter_num = (uint32_t)filter->dims()[0];
convArgs.group_num = param->Groups(); convArgs.output.address = out_ptr;
convArgs.sb_address = (void *)bs_ptr;
convArgs.kernel.stride_h = param->Strides()[0];
convArgs.kernel.stride_w = param->Strides()[1];
convArgs.kernel.height = filter->dims()[2];
convArgs.kernel.width = filter->dims()[3];
convArgs.image.address = (void *)input_ptr;
convArgs.image.channels = input->dims()[1];
convArgs.image.height = input->dims()[2];
convArgs.image.width = input->dims()[3];
convArgs.image.pad_height = param->Paddings()[0];
convArgs.image.pad_width = param->Paddings()[1];
convArgs.image.scale_address = input->scale;
convArgs.output.address = (void *)out_ptr;
convArgs.output.scale_address = out->scale; convArgs.output.scale_address = out->scale;
convArgs.args = (fpga::ConvArgs *)fpga::fpga_malloc(convArgs.split_num *
sizeof(fpga::ConvArgs));
param->SetFpgaArgs(convArgs); param->SetFpgaArgs(convArgs);
int element_num = fpga::get_aligned_filter_element_num(
filter->dims()[1] * filter->dims()[2] * filter->dims()[3]);
int n = convArgs.split_num;
for (int i = 0; i < n; i++) {
convArgs.args[i].relu_enabled = relu_enabled;
convArgs.args[i].group_num = (uint32_t)param->Groups();
convArgs.args[i].kernel.stride_h = (uint32_t)param->Strides()[0];
convArgs.args[i].kernel.stride_w = (uint32_t)param->Strides()[1];
convArgs.args[i].kernel.height = (uint32_t)filter->dims()[2];
convArgs.args[i].kernel.width = (uint32_t)filter->dims()[3];
convArgs.args[i].image.address = input_ptr;
convArgs.args[i].image.channels = (uint32_t)input->dims()[1];
convArgs.args[i].image.height = (uint32_t)input->dims()[2];
convArgs.args[i].image.width = (uint32_t)input->dims()[3];
convArgs.args[i].image.pad_height = (uint32_t)param->Paddings()[0];
convArgs.args[i].image.pad_width = (uint32_t)param->Paddings()[1];
convArgs.args[i].filter_address = &((int8_t *)filter_ptr)[i * element_num];
convArgs.args[i].sb_address = &((int8_t *)bs_ptr)[i * element_num];
convArgs.args[i].filter_num =
(uint32_t)(i == n - 1 ? fpga::get_aligned_filter_num(
channel - (n - 1) * element_num_per_div)
: element_num_per_div);
convArgs.args[i].image.scale_address =
(float *)fpga::fpga_malloc(2 * sizeof(float));
}
return true; return true;
} }
......
...@@ -24,7 +24,7 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) { ...@@ -24,7 +24,7 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
bool relu_enabled = true; bool relu_enabled = true;
Tensor *input = const_cast<Tensor *>(param->Input()); Tensor *input = const_cast<Tensor *>(param->Input());
auto input_ptr = input->data<float>(); auto input_ptr = input->data<float>();
Tensor *filter = param->Filter(); Tensor *filter = const_cast<Tensor *>(param->Filter());
Tensor *out = param->Output(); Tensor *out = param->Output();
auto bn_mean_ptr = param->InputMean()->data<float>(); auto bn_mean_ptr = param->InputMean()->data<float>();
auto bn_var_ptr = param->InputVariance()->data<float>(); auto bn_var_ptr = param->InputVariance()->data<float>();
...@@ -61,26 +61,42 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) { ...@@ -61,26 +61,42 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
fpga::format_ofm(out); fpga::format_ofm(out);
auto out_ptr = out->mutable_data<float>(); auto out_ptr = out->mutable_data<float>();
fpga::ConvArgs convArgs; fpga::WrapperConvArgs convArgs;
convArgs.relu_enabled = relu_enabled; convArgs.group_num = (uint32_t)param->Groups();
convArgs.filter_address = (void *)filter_ptr; convArgs.split_num = (uint32_t)fpga::get_plit_num(filter);
convArgs.filter_num = filter->dims()[0]; convArgs.filter_num = (uint32_t)filter->dims()[0];
convArgs.group_num = param->Groups(); convArgs.output.address = out_ptr;
convArgs.sb_address = (void *)bs_ptr;
convArgs.kernel.stride_h = param->Strides()[0];
convArgs.kernel.stride_w = param->Strides()[1];
convArgs.kernel.height = filter->dims()[2];
convArgs.kernel.width = filter->dims()[3];
convArgs.image.address = (void *)input_ptr;
convArgs.image.channels = input->dims()[1];
convArgs.image.height = input->dims()[2];
convArgs.image.width = input->dims()[3];
convArgs.image.pad_height = param->Paddings()[0];
convArgs.image.pad_width = param->Paddings()[1];
convArgs.image.scale_address = input->scale;
convArgs.output.address = (void *)out_ptr;
convArgs.output.scale_address = out->scale; convArgs.output.scale_address = out->scale;
convArgs.args = (fpga::ConvArgs *)fpga::fpga_malloc(convArgs.split_num *
sizeof(fpga::ConvArgs));
param->SetFpgaArgs(convArgs); param->SetFpgaArgs(convArgs);
int element_num = fpga::get_aligned_filter_element_num(
filter->dims()[1] * filter->dims()[2] * filter->dims()[3]);
int n = convArgs.split_num;
for (int i = 0; i < n; i++) {
convArgs.args[i].relu_enabled = relu_enabled;
convArgs.args[i].group_num = (uint32_t)param->Groups();
convArgs.args[i].kernel.stride_h = (uint32_t)param->Strides()[0];
convArgs.args[i].kernel.stride_w = (uint32_t)param->Strides()[1];
convArgs.args[i].kernel.height = (uint32_t)filter->dims()[2];
convArgs.args[i].kernel.width = (uint32_t)filter->dims()[3];
convArgs.args[i].image.address = input_ptr;
convArgs.args[i].image.channels = (uint32_t)input->dims()[1];
convArgs.args[i].image.height = (uint32_t)input->dims()[2];
convArgs.args[i].image.width = (uint32_t)input->dims()[3];
convArgs.args[i].image.pad_height = (uint32_t)param->Paddings()[0];
convArgs.args[i].image.pad_width = (uint32_t)param->Paddings()[1];
convArgs.args[i].filter_address = &((int8_t *)filter_ptr)[i * element_num];
convArgs.args[i].sb_address = &((int8_t *)bs_ptr)[i * element_num];
convArgs.args[i].filter_num =
(uint32_t)(i == n - 1 ? fpga::get_aligned_filter_num(
channel - (n - 1) * element_num_per_div)
: element_num_per_div);
convArgs.args[i].image.scale_address =
(float *)fpga::fpga_malloc(2 * sizeof(float));
}
return true; return true;
} }
......
...@@ -22,9 +22,9 @@ template <> ...@@ -22,9 +22,9 @@ template <>
bool ElementwiseAddReluKernel<FPGA, float>::Init( bool ElementwiseAddReluKernel<FPGA, float>::Init(
ElementwiseAddReluParam<FPGA> *param) { ElementwiseAddReluParam<FPGA> *param) {
bool relu_enabled = true; bool relu_enabled = true;
Tensor *input_x = const_cast<Tensor *>(param->InputX()); auto *input_x = const_cast<Tensor *>(param->InputX());
Tensor *input_y = const_cast<Tensor *>(param->InputY()); auto *input_y = const_cast<Tensor *>(param->InputY());
Tensor *out = param->Out(); auto *out = param->Out();
auto input_x_ptr = input_x->data<float>(); auto input_x_ptr = input_x->data<float>();
auto input_y_ptr = input_y->data<float>(); auto input_y_ptr = input_y->data<float>();
fpga::format_ofm(out); fpga::format_ofm(out);
...@@ -34,22 +34,22 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init( ...@@ -34,22 +34,22 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init(
ewaddArgs.relu_enabled = relu_enabled; ewaddArgs.relu_enabled = relu_enabled;
ewaddArgs.const0 = 1; ewaddArgs.const0 = 1;
ewaddArgs.const1 = 1; ewaddArgs.const1 = 1;
ewaddArgs.image0.address = (void *)input_x_ptr; ewaddArgs.image0.address = input_x_ptr;
ewaddArgs.image0.channels = input_x->dims()[1]; ewaddArgs.image0.channels = (uint32_t)input_x->dims()[1];
ewaddArgs.image0.scale_address = input_x->scale; ewaddArgs.image0.scale_address = input_x->scale;
ewaddArgs.image0.height = input_x->dims()[2]; ewaddArgs.image0.height = (uint32_t)input_x->dims()[2];
ewaddArgs.image0.width = input_x->dims()[3]; ewaddArgs.image0.width = (uint32_t)input_x->dims()[3];
ewaddArgs.image0.pad_height = 0; ewaddArgs.image0.pad_height = 0;
ewaddArgs.image0.pad_width = 0; ewaddArgs.image0.pad_width = 0;
ewaddArgs.image1.address = (void *)input_y_ptr; ewaddArgs.image1.address = input_y_ptr;
ewaddArgs.image1.channels = input_y->dims()[1]; ewaddArgs.image1.channels = (uint32_t)input_y->dims()[1];
ewaddArgs.image1.scale_address = input_y->scale; ewaddArgs.image1.scale_address = input_y->scale;
ewaddArgs.image1.height = input_y->dims()[2]; ewaddArgs.image1.height = (uint32_t)input_y->dims()[2];
ewaddArgs.image1.width = input_y->dims()[3]; ewaddArgs.image1.width = (uint32_t)input_y->dims()[3];
ewaddArgs.image1.pad_height = 0; ewaddArgs.image1.pad_height = 0;
ewaddArgs.image1.pad_width = 0; ewaddArgs.image1.pad_width = 0;
ewaddArgs.output.scale_address = out->scale; ewaddArgs.output.scale_address = out->scale;
ewaddArgs.output.address = (void *)out_ptr; ewaddArgs.output.address = out_ptr;
param->SetFpgaArgs(ewaddArgs); param->SetFpgaArgs(ewaddArgs);
return true; return true;
} }
......
...@@ -14,71 +14,82 @@ limitations under the License. */ ...@@ -14,71 +14,82 @@ limitations under the License. */
#ifdef FUSION_FCRELU_OP #ifdef FUSION_FCRELU_OP
#include "operators/kernel/fc_relu_kernel.h" #include "operators/kernel/fc_relu_kernel.h"
#include "fpga/api.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
template <> template <>
bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) { bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
bool relu_enabled = true; bool relu_enabled = true;
Tensor *input_x = const_cast<Tensor *>(param->InputX()); auto *input_x = const_cast<Tensor *>(param->InputX());
auto input_x_ptr = input_x->data<float>(); auto input_x_ptr = input_x->data<float>();
Tensor *input_y = param->InputY(); auto *filter = const_cast<Tensor *>(param->InputY());
const Tensor *input_z = param->InputZ(); const Tensor *input_z = param->InputZ();
auto input_z_ptr = input_z->data<float>(); auto input_z_ptr = input_z->data<float>();
Tensor *out = param->Out(); Tensor *out = param->Out();
PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == filter->dims()[0],
PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_y->dims()[0],
"Image channel should be equal to weight number"); "Image channel should be equal to weight number");
int channel = out->dims()[1]; int channel = (uint32_t)out->dims()[1];
float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); auto *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
for (int i = 0; i < channel; i++) { for (int i = 0; i < channel; i++) {
bs_ptr[i + channel] = 1; bs_ptr[i + channel] = 1;
bs_ptr[i] = input_z_ptr[i]; bs_ptr[i] = input_z_ptr[i];
} }
int num = input_y->dims()[1]; int num = (uint32_t)filter->dims()[1];
int chw = input_y->dims()[0]; int chw = (uint32_t)filter->dims()[0];
PADDLE_MOBILE_ENFORCE( PADDLE_MOBILE_ENFORCE(
chw == input_x->numel(), chw == input_x->numel(),
"Filter element num should be equal to IFM element num"); "Filter element num should be equal to IFM element num");
int height = input_x->dims()[2]; int height = (uint32_t)input_x->dims()[2];
int width = input_x->dims()[3]; int width = (uint32_t)input_x->dims()[3];
int filter_channel = chw / height / width; int filter_channel = chw / height / width;
input_y->Resize(framework::make_ddim({num, filter_channel, height, width})); filter->Resize(framework::make_ddim({num, filter_channel, height, width}));
float max_value = fpga::filter_find_max(input_y); float max_value = fpga::filter_find_max(filter);
fpga::format_filter(input_y, max_value, 1); fpga::format_filter(filter, max_value, 1);
auto input_y_ptr = input_y->data<float>(); auto filter_ptr = filter->data<float>();
int element_num_per_div = fpga::get_element_num_per_div(input_y, 1); int element_num_per_div = fpga::get_element_num_per_div(filter, 1);
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_ofm(out);
auto out_ptr = out->mutable_data<float>(); auto out_ptr = out->mutable_data<float>();
fpga::ConvArgs convArgs; fpga::WrapperConvArgs convArgs;
convArgs.relu_enabled = relu_enabled;
convArgs.filter_address = (void *)input_y_ptr;
convArgs.filter_num = out->dims()[1];
convArgs.group_num = 1; convArgs.group_num = 1;
convArgs.sb_address = (void *)bs_ptr; convArgs.split_num = (uint32_t)fpga::get_plit_num(filter);
convArgs.kernel.stride_w = 1; convArgs.filter_num = (uint32_t)filter->dims()[0];
convArgs.kernel.stride_h = 1; convArgs.output.address = out_ptr;
convArgs.kernel.height = input_x->dims()[2];
convArgs.kernel.width = input_x->dims()[3];
convArgs.image.address = (void *)input_x_ptr;
convArgs.image.channels = input_x->dims()[1];
convArgs.image.height = input_x->dims()[2];
convArgs.image.width = input_x->dims()[3];
convArgs.image.pad_height = 0;
convArgs.image.pad_width = 0;
convArgs.image.scale_address = input_x->scale;
convArgs.output.address = (void *)out_ptr;
convArgs.output.scale_address = out->scale; convArgs.output.scale_address = out->scale;
convArgs.args = (fpga::ConvArgs *)fpga::fpga_malloc(convArgs.split_num *
sizeof(fpga::ConvArgs));
param->SetFpgaArgs(convArgs); param->SetFpgaArgs(convArgs);
int element_num = fpga::get_aligned_filter_element_num(
filter->dims()[1] * filter->dims()[2] * filter->dims()[3]);
int n = convArgs.split_num;
for (int i = 0; i < n; i++) {
convArgs.args[i].relu_enabled = relu_enabled;
convArgs.args[i].group_num = 1;
convArgs.args[i].kernel.stride_h = 1;
convArgs.args[i].kernel.stride_w = 1;
convArgs.args[i].kernel.height = (uint32_t)filter->dims()[2];
convArgs.args[i].kernel.width = (uint32_t)filter->dims()[3];
convArgs.args[i].image.address = input_x_ptr;
convArgs.args[i].image.channels = (uint32_t)input_x->dims()[1];
convArgs.args[i].image.height = (uint32_t)input_x->dims()[2];
convArgs.args[i].image.width = (uint32_t)input_x->dims()[3];
convArgs.args[i].image.pad_height = 0;
convArgs.args[i].image.pad_width = 0;
convArgs.args[i].filter_address = &((int8_t *)filter_ptr)[i * element_num];
convArgs.args[i].sb_address = &((int8_t *)bs_ptr)[i * element_num];
convArgs.args[i].filter_num =
(uint32_t)(i == n - 1 ? fpga::get_aligned_filter_num(
channel - (n - 1) * element_num_per_div)
: element_num_per_div);
convArgs.args[i].image.scale_address =
(float *)fpga::fpga_malloc(2 * sizeof(float));
}
return true; return true;
} }
template <> template <>
......
...@@ -21,58 +21,76 @@ namespace operators { ...@@ -21,58 +21,76 @@ namespace operators {
template <> template <>
bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) { bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
bool relu_enabled = false; bool relu_enabled = false;
Tensor *input_x = const_cast<Tensor *>(param->InputX()); auto *input_x = const_cast<Tensor *>(param->InputX());
auto input_x_ptr = input_x->data<float>(); auto input_x_ptr = input_x->data<float>();
Tensor *input_y = param->InputY(); auto *filter = const_cast<Tensor *>(param->InputY());
const Tensor *input_z = param->InputZ(); const Tensor *input_z = param->InputZ();
auto input_z_ptr = input_z->data<float>(); auto input_z_ptr = input_z->data<float>();
Tensor *out = param->Out(); Tensor *out = param->Out();
PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_y->dims()[0], PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == filter->dims()[0],
"Image channel should be equal to weight number"); "Image channel should be equal to weight number");
int channel = out->dims()[1]; int channel = (uint32_t)out->dims()[1];
float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); auto *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
for (int i = 0; i < channel; i++) { for (int i = 0; i < channel; i++) {
bs_ptr[i + channel] = 1; bs_ptr[i + channel] = 1;
bs_ptr[i] = input_z_ptr[i]; bs_ptr[i] = input_z_ptr[i];
} }
int num = (uint32_t)filter->dims()[1];
int num = input_y->dims()[1]; int chw = (uint32_t)filter->dims()[0];
int chw = input_y->dims()[0];
PADDLE_MOBILE_ENFORCE( PADDLE_MOBILE_ENFORCE(
chw == input_x->numel(), chw == input_x->numel(),
"Filter element num should be equal to IFM element num"); "Filter element num should be equal to IFM element num");
int height = input_x->dims()[2]; int height = (uint32_t)input_x->dims()[2];
int width = input_x->dims()[3]; int width = (uint32_t)input_x->dims()[3];
int filter_channel = chw / height / width; int filter_channel = chw / height / width;
input_y->Resize(framework::make_ddim({num, filter_channel, height, width}));
float max_value = fpga::filter_find_max(input_y); filter->Resize(framework::make_ddim({num, filter_channel, height, width}));
fpga::format_filter(input_y, max_value, 1); float max_value = fpga::filter_find_max(filter);
auto input_y_ptr = input_y->data<float>(); fpga::format_filter(filter, max_value, 1);
int element_num_per_div = fpga::get_element_num_per_div(input_y, 1); auto filter_ptr = filter->data<float>();
int element_num_per_div = fpga::get_element_num_per_div(filter, 1);
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
auto out_ptr = out->mutable_data<float>(); auto out_ptr = out->mutable_data<float>();
fpga::ConvArgs convArgs; fpga::WrapperConvArgs convArgs;
convArgs.relu_enabled = relu_enabled;
convArgs.filter_address = (void *)input_y_ptr;
convArgs.filter_num = out->dims()[1];
convArgs.group_num = 1; convArgs.group_num = 1;
convArgs.sb_address = (void *)bs_ptr; convArgs.split_num = (uint32_t)fpga::get_plit_num(filter);
convArgs.kernel.stride_w = 1; convArgs.filter_num = (uint32_t)filter->dims()[0];
convArgs.kernel.stride_h = 1; convArgs.output.address = out_ptr;
convArgs.kernel.height = input_x->dims()[2];
convArgs.kernel.width = input_x->dims()[3];
convArgs.image.address = (void *)input_x_ptr;
convArgs.image.channels = input_x->dims()[1];
convArgs.image.height = input_x->dims()[2];
convArgs.image.width = input_x->dims()[3];
convArgs.image.pad_height = 0;
convArgs.image.pad_width = 0;
convArgs.image.scale_address = input_x->scale;
convArgs.output.address = (void *)out_ptr;
convArgs.output.scale_address = out->scale; convArgs.output.scale_address = out->scale;
convArgs.args = (fpga::ConvArgs *)fpga::fpga_malloc(convArgs.split_num *
sizeof(fpga::ConvArgs));
param->SetFpgaArgs(convArgs); param->SetFpgaArgs(convArgs);
int element_num = fpga::get_aligned_filter_element_num(
filter->dims()[1] * filter->dims()[2] * filter->dims()[3]);
int n = convArgs.split_num;
for (int i = 0; i < n; i++) {
convArgs.args[i].relu_enabled = relu_enabled;
convArgs.args[i].group_num = 1;
convArgs.args[i].kernel.stride_h = 1;
convArgs.args[i].kernel.stride_w = 1;
convArgs.args[i].kernel.height = (uint32_t)filter->dims()[2];
convArgs.args[i].kernel.width = (uint32_t)filter->dims()[3];
convArgs.args[i].image.address = input_x_ptr;
convArgs.args[i].image.channels = (uint32_t)input_x->dims()[1];
convArgs.args[i].image.height = (uint32_t)input_x->dims()[2];
convArgs.args[i].image.width = (uint32_t)input_x->dims()[3];
convArgs.args[i].image.pad_height = 0;
convArgs.args[i].image.pad_width = 0;
convArgs.args[i].filter_address = &((int8_t *)filter_ptr)[i * element_num];
convArgs.args[i].sb_address = &((int8_t *)bs_ptr)[i * element_num];
convArgs.args[i].filter_num =
(uint32_t)(i == n - 1 ? fpga::get_aligned_filter_num(
channel - (n - 1) * element_num_per_div)
: element_num_per_div);
convArgs.args[i].image.scale_address =
(float *)fpga::fpga_malloc(2 * sizeof(float));
}
return true; return true;
} }
......
...@@ -21,7 +21,7 @@ namespace operators { ...@@ -21,7 +21,7 @@ namespace operators {
template <> template <>
bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) { bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) {
Tensor *input = const_cast<Tensor *>(param->Input()); auto *input = const_cast<Tensor *>(param->Input());
auto input_ptr = input->data<float>(); auto input_ptr = input->data<float>();
Tensor *output = param->Output(); Tensor *output = param->Output();
fpga::format_ofm(output); fpga::format_ofm(output);
...@@ -31,19 +31,19 @@ bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) { ...@@ -31,19 +31,19 @@ bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) {
vector<int> paddings = param->Paddings(); vector<int> paddings = param->Paddings();
fpga::PoolingArgs poolArgs; fpga::PoolingArgs poolArgs;
poolArgs.image.address = (void *)input_ptr; poolArgs.image.address = input_ptr;
poolArgs.image.channels = input->dims()[1]; poolArgs.image.channels = (uint32_t)input->dims()[1];
poolArgs.image.height = input->dims()[2]; poolArgs.image.height = (uint32_t)input->dims()[2];
poolArgs.image.width = input->dims()[3]; poolArgs.image.width = (uint32_t)input->dims()[3];
poolArgs.image.pad_height = paddings[0]; poolArgs.image.pad_height = (uint32_t)paddings[0];
poolArgs.image.pad_width = paddings[1]; poolArgs.image.pad_width = (uint32_t)paddings[1];
poolArgs.image.scale_address = input->scale; poolArgs.image.scale_address = input->scale;
poolArgs.output.address = output_ptr; poolArgs.output.address = output_ptr;
poolArgs.output.scale_address = input->scale; poolArgs.output.scale_address = input->scale;
poolArgs.kernel.height = ksize[0]; poolArgs.kernel.height = (uint32_t)ksize[0];
poolArgs.kernel.width = ksize[1]; poolArgs.kernel.width = (uint32_t)ksize[1];
poolArgs.kernel.stride_h = strides[0]; poolArgs.kernel.stride_h = (uint32_t)strides[0];
poolArgs.kernel.stride_w = strides[1]; poolArgs.kernel.stride_w = (uint32_t)strides[1];
param->SetFpgaArgs(poolArgs); param->SetFpgaArgs(poolArgs);
return true; return true;
} }
......
...@@ -33,8 +33,8 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) { ...@@ -33,8 +33,8 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
args.convert_type = fpga::DATA_FP16_TO_FP32; args.convert_type = fpga::DATA_FP16_TO_FP32;
args.layout_type = fpga::LAYOUT_NO_CONVERT; args.layout_type = fpga::LAYOUT_NO_CONVERT;
args.image.address = (void *)(input_ptr); args.image.address = (void *)(input_ptr);
args.image.height = input->dims()[0]; args.image.height = (uint32_t)input->dims()[0];
args.image.width = input->dims()[1]; args.image.width = (uint32_t)input->dims()[1];
args.image.channels = 1; args.image.channels = 1;
args.output.address = output_ptr; args.output.address = output_ptr;
param->SetFpgaArgs(args); param->SetFpgaArgs(args);
......
...@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "operators/math/gemm.h" #include "operators/math/gemm.h"
#include <string> #include <string.h>
#include "common/log.h" #include "common/log.h"
#include "memory/t_malloc.h" #include "memory/t_malloc.h"
#if __ARM_NEON #if __ARM_NEON
......
...@@ -56,7 +56,7 @@ struct DtypeTensorTrait<CPU> { ...@@ -56,7 +56,7 @@ struct DtypeTensorTrait<CPU> {
template <> template <>
struct DtypeTensorTrait<FPGA> { struct DtypeTensorTrait<FPGA> {
// This is the type we obtained in variable. // This is the type we obtained in variable.
typedef framework::LoDTensor gtype; typedef framework::Tensor gtype;
// This type will be the parent class type // This type will be the parent class type
// or the same type. // or the same type.
typedef framework::Tensor rtype; typedef framework::Tensor rtype;
...@@ -1232,11 +1232,7 @@ class FusionFcParam : public OpParam { ...@@ -1232,11 +1232,7 @@ class FusionFcParam : public OpParam {
} }
const GType *InputX() const { return input_x_; } const GType *InputX() const { return input_x_; }
#ifdef PADDLE_MOBILE_FPGA
RType *InputY() const { return input_y_; }
#else
const RType *InputY() const { return input_y_; } const RType *InputY() const { return input_y_; }
#endif
const RType *InputZ() const { return input_z_; } const RType *InputZ() const { return input_z_; }
...@@ -1259,11 +1255,11 @@ class FusionFcParam : public OpParam { ...@@ -1259,11 +1255,11 @@ class FusionFcParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
private: private:
fpga::ConvArgs fpga_conv_args; fpga::WrapperConvArgs fpga_conv_args;
public: public:
const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; } const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; } void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; }
#endif #endif
}; };
...@@ -1297,11 +1293,7 @@ class FusionConvAddParam : public OpParam { ...@@ -1297,11 +1293,7 @@ class FusionConvAddParam : public OpParam {
const RType *Input() const { return input_; } const RType *Input() const { return input_; }
#ifdef PADDLE_MOBILE_FPGA
RType *Filter() const { return filter_; }
#else
const RType *Filter() const { return filter_; } const RType *Filter() const { return filter_; }
#endif
RType *Output() const { return output_; } RType *Output() const { return output_; }
...@@ -1326,11 +1318,11 @@ class FusionConvAddParam : public OpParam { ...@@ -1326,11 +1318,11 @@ class FusionConvAddParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
private: private:
fpga::ConvArgs fpga_conv_args; fpga::WrapperConvArgs fpga_conv_args;
public: public:
const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; } const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; } void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; }
#endif #endif
}; };
...@@ -1379,11 +1371,7 @@ class FusionConvAddPReluParam : public OpParam { ...@@ -1379,11 +1371,7 @@ class FusionConvAddPReluParam : public OpParam {
const RType *Input() const { return input_; } const RType *Input() const { return input_; }
#ifdef PADDLE_MOBILE_FPGA
RType *Filter() const { return filter_; }
#else
const RType *Filter() const { return filter_; } const RType *Filter() const { return filter_; }
#endif
RType *Output() const { return output_; } RType *Output() const { return output_; }
...@@ -1410,11 +1398,11 @@ class FusionConvAddPReluParam : public OpParam { ...@@ -1410,11 +1398,11 @@ class FusionConvAddPReluParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
private: private:
fpga::ConvArgs fpga_conv_args; fpga::WrapperConvArgs fpga_conv_args;
public: public:
const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; } const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; } void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; }
#endif #endif
}; };
#endif #endif
...@@ -1461,11 +1449,7 @@ class FusionConvAddAddPReluParam : public OpParam { ...@@ -1461,11 +1449,7 @@ class FusionConvAddAddPReluParam : public OpParam {
const RType *Input() const { return input_; } const RType *Input() const { return input_; }
#ifdef PADDLE_MOBILE_FPGA
RType *Filter() const { return filter_; }
#else
const RType *Filter() const { return filter_; } const RType *Filter() const { return filter_; }
#endif
RType *Output() const { return output_; } RType *Output() const { return output_; }
...@@ -1496,11 +1480,11 @@ class FusionConvAddAddPReluParam : public OpParam { ...@@ -1496,11 +1480,11 @@ class FusionConvAddAddPReluParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
private: private:
fpga::ConvArgs fpga_conv_args; fpga::WrapperConvArgs fpga_conv_args;
public: public:
const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; } const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; } void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; }
#endif #endif
}; };
#endif #endif
...@@ -1538,11 +1522,7 @@ class FusionConvAddBNReluParam : public OpParam { ...@@ -1538,11 +1522,7 @@ class FusionConvAddBNReluParam : public OpParam {
const RType *Input() const { return input_; } const RType *Input() const { return input_; }
#ifdef PADDLE_MOBILE_FPGA
RType *Filter() const { return filter_; }
#else
const RType *Filter() const { return filter_; } const RType *Filter() const { return filter_; }
#endif
RType *Output() const { return output_; } RType *Output() const { return output_; }
...@@ -1598,11 +1578,11 @@ class FusionConvAddBNReluParam : public OpParam { ...@@ -1598,11 +1578,11 @@ class FusionConvAddBNReluParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
private: private:
fpga::ConvArgs fpga_conv_args; fpga::WrapperConvArgs fpga_conv_args;
public: public:
const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; } const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; } void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; }
#endif #endif
}; };
#endif #endif
...@@ -1648,11 +1628,7 @@ class FusionConvBNAddReluParam : public OpParam { ...@@ -1648,11 +1628,7 @@ class FusionConvBNAddReluParam : public OpParam {
const RType *Input() const { return input_; } const RType *Input() const { return input_; }
#ifdef PADDLE_MOBILE_FPGA
RType *Filter() const { return filter_; }
#else
const RType *Filter() const { return filter_; } const RType *Filter() const { return filter_; }
#endif
RType *Output() const { return output_; } RType *Output() const { return output_; }
...@@ -1711,11 +1687,11 @@ class FusionConvBNAddReluParam : public OpParam { ...@@ -1711,11 +1687,11 @@ class FusionConvBNAddReluParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
private: private:
fpga::ConvArgs fpga_conv_args; fpga::WrapperConvArgs fpga_conv_args;
public: public:
const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; } const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; } void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; }
#endif #endif
}; };
#endif #endif
...@@ -1748,11 +1724,8 @@ class FusionConvBNParam : public OpParam { ...@@ -1748,11 +1724,8 @@ class FusionConvBNParam : public OpParam {
const RType *Input() const { return input_; } const RType *Input() const { return input_; }
#ifdef PADDLE_MOBILE_FPGA
RType *Filter() const { return filter_; }
#else
const RType *Filter() const { return filter_; } const RType *Filter() const { return filter_; }
#endif
RType *Output() const { return output_y_; } RType *Output() const { return output_y_; }
const vector<int> &Strides() const { return strides_; } const vector<int> &Strides() const { return strides_; }
...@@ -1805,11 +1778,11 @@ class FusionConvBNParam : public OpParam { ...@@ -1805,11 +1778,11 @@ class FusionConvBNParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
private: private:
fpga::ConvArgs fpga_conv_args; fpga::WrapperConvArgs fpga_conv_args;
public: public:
const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; } const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; } void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; }
#endif #endif
}; };
#endif #endif
...@@ -1847,11 +1820,8 @@ class FusionConvAddBNParam : public OpParam { ...@@ -1847,11 +1820,8 @@ class FusionConvAddBNParam : public OpParam {
const RType *Input() const { return input_; } const RType *Input() const { return input_; }
#ifdef PADDLE_MOBILE_FPGA
RType *Filter() const { return filter_; }
#else
const RType *Filter() const { return filter_; } const RType *Filter() const { return filter_; }
#endif
RType *Output() const { return output_y_; } RType *Output() const { return output_y_; }
const vector<int> &Strides() const { return strides_; } const vector<int> &Strides() const { return strides_; }
...@@ -1906,11 +1876,11 @@ class FusionConvAddBNParam : public OpParam { ...@@ -1906,11 +1876,11 @@ class FusionConvAddBNParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
private: private:
fpga::ConvArgs fpga_conv_args; fpga::WrapperConvArgs fpga_conv_args;
public: public:
const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; } const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; } void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; }
#endif #endif
}; };
#endif #endif
...@@ -2027,11 +1997,7 @@ class FusionConvBNReluParam : public OpParam { ...@@ -2027,11 +1997,7 @@ class FusionConvBNReluParam : public OpParam {
const RType *Input() const { return input_; } const RType *Input() const { return input_; }
#ifdef PADDLE_MOBILE_FPGA
RType *Filter() const { return filter_; }
#else
const RType *Filter() const { return filter_; } const RType *Filter() const { return filter_; }
#endif
RType *Output() const { return output_; } RType *Output() const { return output_; }
...@@ -2085,11 +2051,11 @@ class FusionConvBNReluParam : public OpParam { ...@@ -2085,11 +2051,11 @@ class FusionConvBNReluParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
private: private:
fpga::ConvArgs fpga_conv_args; fpga::WrapperConvArgs fpga_conv_args;
public: public:
const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; } const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; } void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; }
#endif #endif
}; };
#endif #endif
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册