提交 77e69801 编写于 作者: Z zhangyang0701 提交者: GitHub

Merge pull request #1423 from qnqinan/develop

add activation in FPGA track fixed#1422
...@@ -346,9 +346,9 @@ void expand_conv_arg(ConvArgs *arg) { ...@@ -346,9 +346,9 @@ void expand_conv_arg(ConvArgs *arg) {
auto filter_pad_width_mul_channel = auto filter_pad_width_mul_channel =
args.image.pad_width * args.image.channels; args.image.pad_width * args.image.channels;
auto image_amount_per_row_multi_win_first = auto image_amount_per_row_multi_win_first =
image_amount_per_row * (4 * args.kernel.stride_h - args.image.pad_height); image_amount_per_row * (2 * args.kernel.stride_h - args.image.pad_height);
auto image_amount_per_row_multi_win = auto image_amount_per_row_multi_win =
image_amount_per_row * (4 * args.kernel.stride_h); image_amount_per_row * (2 * args.kernel.stride_h);
auto image_block_num = block_num; auto image_block_num = block_num;
auto image_block_len = auto image_block_len =
...@@ -375,7 +375,8 @@ void expand_conv_arg(ConvArgs *arg) { ...@@ -375,7 +375,8 @@ void expand_conv_arg(ConvArgs *arg) {
(512 / (align_to_x(args.filter_num, 4) / 4 * 2) > 2) (512 / (align_to_x(args.filter_num, 4) / 4 * 2) > 2)
? (512 / (align_to_x(args.filter_num, 4) / 4 * 2) - 2) ? (512 / (align_to_x(args.filter_num, 4) / 4 * 2) - 2)
: 0; : 0;
auto cmd = 0UL | (args.relu_enabled ? USE_RELU : 0) | USE_BIAS; // auto cmd = 0UL | (args.relu_enabled ? USE_RELU : 0) | USE_BIAS;
auto cmd = 0UL | USE_BIAS;
auto deconv_param = ((args.deconv_tx_param.deconv_en) << 24) | auto deconv_param = ((args.deconv_tx_param.deconv_en) << 24) |
((args.deconv_tx_param.sub_conv_num) << 16) | ((args.deconv_tx_param.sub_conv_num) << 16) |
...@@ -413,7 +414,8 @@ void expand_conv_arg(ConvArgs *arg) { ...@@ -413,7 +414,8 @@ void expand_conv_arg(ConvArgs *arg) {
void expand_EW_arg(EWAddArgs *arg) { void expand_EW_arg(EWAddArgs *arg) {
EWAddArgs args = *arg; EWAddArgs args = *arg;
uint64_t cmd = args.relu_enabled ? USE_RELU : 0; // uint64_t cmd = args.relu_enabled ? USE_RELU : 0;
uint64_t cmd = 0;
uint64_t datalen = (uint64_t)args.image0.width * uint64_t datalen = (uint64_t)args.image0.width *
(uint64_t)args.image0.height * (uint64_t)args.image0.height *
(uint64_t)args.image0.channels; (uint64_t)args.image0.channels;
...@@ -441,8 +443,10 @@ void expand_EW_arg(EWAddArgs *arg) { ...@@ -441,8 +443,10 @@ void expand_EW_arg(EWAddArgs *arg) {
void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input, void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
framework::Tensor *out, framework::Tensor *filter, framework::Tensor *out, framework::Tensor *filter,
bool relu_enabled, int group_num, int stride_h, ActivationType activation_enable,
int stride_w, int padding_h, int padding_w, float *bs_ptr) { int16_t leaky_relu_negative_slope, int group_num,
int stride_h, int stride_w, int padding_h, int padding_w,
float *bs_ptr) {
auto input_ptr = input->data<float>(); auto input_ptr = input->data<float>();
auto filter_ptr = filter->data<float>(); auto filter_ptr = filter->data<float>();
auto out_ptr = out->data<float>(); auto out_ptr = out->data<float>();
...@@ -488,7 +492,10 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input, ...@@ -488,7 +492,10 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
filter->dims()[3])); filter->dims()[3]));
for (int i = 0; i < n; i++) { for (int i = 0; i < n; i++) {
arg->conv_arg[i].relu_enabled = relu_enabled; // arg->conv_arg[i].relu_enabled = relu_enabled;
arg->conv_arg[i].output.activation.activation_type = activation_enable;
arg->conv_arg[i].output.activation.leaky_relu_negative_slope =
leaky_relu_negative_slope;
arg->conv_arg[i].group_num = (uint32_t)group_num; arg->conv_arg[i].group_num = (uint32_t)group_num;
arg->conv_arg[i].kernel.stride_h = (uint32_t)stride_h; arg->conv_arg[i].kernel.stride_h = (uint32_t)stride_h;
arg->conv_arg[i].kernel.stride_w = (uint32_t)stride_w; arg->conv_arg[i].kernel.stride_w = (uint32_t)stride_w;
...@@ -560,8 +567,9 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input, ...@@ -560,8 +567,9 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input, void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
framework::Tensor *out, framework::Tensor *filter, framework::Tensor *out, framework::Tensor *filter,
bool relu_enabled, int group_num, int stride_h, ActivationType activation_enable,
int stride_w, int padding_h, int padding_w, int16_t leaky_relu_negative_slope, int group_num,
int stride_h, int stride_w, int padding_h, int padding_w,
float *bs_ptr) { float *bs_ptr) {
auto input_ptr = input->data<float>(); auto input_ptr = input->data<float>();
auto filter_ptr = filter->data<float>(); auto filter_ptr = filter->data<float>();
...@@ -687,7 +695,13 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input, ...@@ -687,7 +695,13 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
} }
for (int j = 0; j < split_num; ++j) { for (int j = 0; j < split_num; ++j) {
arg->split_conv_args[i]->conv_arg[j].relu_enabled = relu_enabled; // arg->split_conv_args[i]->conv_arg[j].relu_enabled = relu_enabled;
arg->split_conv_args[i]->conv_arg[j].output.activation.activation_type =
activation_enable;
arg->split_conv_args[i]
->conv_arg[j]
.output.activation.leaky_relu_negative_slope =
leaky_relu_negative_slope;
arg->split_conv_args[i]->conv_arg[j].group_num = (uint32_t)group_num; arg->split_conv_args[i]->conv_arg[j].group_num = (uint32_t)group_num;
arg->split_conv_args[i]->conv_arg[j].kernel.width = arg->split_conv_args[i]->conv_arg[j].kernel.width =
...@@ -800,13 +814,17 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input, ...@@ -800,13 +814,17 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input, void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input,
framework::Tensor *out, framework::Tensor *filter, framework::Tensor *out, framework::Tensor *filter,
bool relu_enabled, int stride_h, int stride_w, ActivationType activation_enable,
int padding_h, int padding_w, float *bias_ptr) { int16_t leaky_relu_negative_slope, int stride_h,
int stride_w, int padding_h, int padding_w,
float *bias_ptr) {
auto filter_ptr = filter->data<float>(); auto filter_ptr = filter->data<float>();
auto input_ptr = input->data<float>(); auto input_ptr = input->data<float>();
auto output_ptr = out->mutable_data<float>(); auto output_ptr = out->mutable_data<float>();
arg->sub_conv_num = 1; arg->sub_conv_num = 1;
arg->relu_enabled = relu_enabled; // arg->relu_enabled = relu_enabled;
arg->output.activation.activation_type = activation_enable;
arg->output.activation.leaky_relu_negative_slope = leaky_relu_negative_slope;
arg->bias_address = bias_ptr; arg->bias_address = bias_ptr;
arg->filter_address = filter_ptr; arg->filter_address = filter_ptr;
arg->kernel.height = (uint32_t)filter->dims()[2]; arg->kernel.height = (uint32_t)filter->dims()[2];
...@@ -826,8 +844,10 @@ void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input, ...@@ -826,8 +844,10 @@ void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input,
void fill_DWDeconv_arg(struct DWDeconvArgs *arg, framework::Tensor *input, void fill_DWDeconv_arg(struct DWDeconvArgs *arg, framework::Tensor *input,
framework::Tensor *out, framework::Tensor *filter, framework::Tensor *out, framework::Tensor *filter,
bool relu_enabled, int stride_h, int stride_w, ActivationType activation_enable,
int padding_h, int padding_w, float *bias_ptr) { int16_t leaky_relu_negative_slope, int stride_h,
int stride_w, int padding_h, int padding_w,
float *bias_ptr) {
auto filter_ptr = filter->data<float>(); auto filter_ptr = filter->data<float>();
auto input_ptr = input->data<float>(); auto input_ptr = input->data<float>();
auto output_ptr = out->mutable_data<float>(); auto output_ptr = out->mutable_data<float>();
...@@ -884,7 +904,10 @@ void fill_DWDeconv_arg(struct DWDeconvArgs *arg, framework::Tensor *input, ...@@ -884,7 +904,10 @@ void fill_DWDeconv_arg(struct DWDeconvArgs *arg, framework::Tensor *input,
arg->dw_conv_args.push_back(std::make_shared<DWconvArgs>()); arg->dw_conv_args.push_back(std::make_shared<DWconvArgs>());
arg->dw_conv_args[i]->sub_conv_num = sub_conv_num; arg->dw_conv_args[i]->sub_conv_num = sub_conv_num;
arg->dw_conv_args[i]->relu_enabled = relu_enabled; // arg->dw_conv_args[i]->relu_enabled = relu_enabled;
arg->dw_conv_args[i]->output.activation.activation_type = activation_enable;
arg->dw_conv_args[i]->output.activation.leaky_relu_negative_slope =
leaky_relu_negative_slope;
arg->dw_conv_args[i]->bias_address = bias_ptr; arg->dw_conv_args[i]->bias_address = bias_ptr;
arg->dw_conv_args[i]->filter_address = arg->dw_conv_args[i]->filter_address =
......
...@@ -47,20 +47,28 @@ void format_concat_output(framework::Tensor* out, int height, int width, ...@@ -47,20 +47,28 @@ void format_concat_output(framework::Tensor* out, int height, int width,
void fill_split_arg(struct SplitConvArgs* arg, framework::Tensor* input, void fill_split_arg(struct SplitConvArgs* arg, framework::Tensor* input,
framework::Tensor* out, framework::Tensor* filter, framework::Tensor* out, framework::Tensor* filter,
bool relu_enabled, int group_num, int stride_h, ActivationType activation_enable,
int stride_w, int padding_h, int padding_w, float* bs_ptr); int16_t leaky_relu_negative_slope, int group_num,
int stride_h, int stride_w, int padding_h, int padding_w,
float* bs_ptr);
void fill_deconv_arg(struct DeconvArgs* arg, framework::Tensor* input, void fill_deconv_arg(struct DeconvArgs* arg, framework::Tensor* input,
framework::Tensor* out, framework::Tensor* filter, framework::Tensor* out, framework::Tensor* filter,
bool relu_enabled, int group_num, int stride_h, ActivationType activation_enable,
int stride_w, int padding_h, int padding_w, float* bs_ptr); int16_t leaky_relu_negative_slope, int group_num,
int stride_h, int stride_w, int padding_h, int padding_w,
float* bs_ptr);
void fill_dwconv_arg(struct DWconvArgs* arg, framework::Tensor* input, void fill_dwconv_arg(struct DWconvArgs* arg, framework::Tensor* input,
framework::Tensor* out, framework::Tensor* filter, framework::Tensor* out, framework::Tensor* filter,
bool relu_enabled, int stride_h, int stride_w, ActivationType activation_enable,
int padding_h, int padding_w, float* bias_ptr); int16_t leaky_relu_negative_slope, int stride_h,
int stride_w, int padding_h, int padding_w,
float* bias_ptr);
void fill_DWDeconv_arg(struct DWDeconvArgs* arg, framework::Tensor* input, void fill_DWDeconv_arg(struct DWDeconvArgs* arg, framework::Tensor* input,
framework::Tensor* out, framework::Tensor* filter, framework::Tensor* out, framework::Tensor* filter,
bool relu_enabled, int stride_h, int stride_w, ActivationType activation_enable,
int padding_h, int padding_w, float* bs_ptr); int16_t leaky_relu_negative_slope, int stride_h,
int stride_w, int padding_h, int padding_w,
float* bs_ptr);
void format_deconv_filter(framework::Tensor* filter_tensor, float max_value, void format_deconv_filter(framework::Tensor* filter_tensor, float max_value,
int group_num, int stride); int group_num, int stride);
......
...@@ -19,7 +19,6 @@ limitations under the License. */ ...@@ -19,7 +19,6 @@ limitations under the License. */
#include "fpga/V1/filter.h" #include "fpga/V1/filter.h"
// #include "filter.h" // #include "filter.h"
#include "fpga/V1/api.h" #include "fpga/V1/api.h"
// #include "fpga_api.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace fpga { namespace fpga {
......
...@@ -63,6 +63,7 @@ using namespace std; // NOLINT ...@@ -63,6 +63,7 @@ using namespace std; // NOLINT
#define REG_TIMER_COUNTER 0x070 #define REG_TIMER_COUNTER 0x070
#define REG_SCALE_PARAMETER 0x080 #define REG_SCALE_PARAMETER 0x080
#define REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR 0x090
#define REG_FLASH_CMD 0x200 #define REG_FLASH_CMD 0x200
#define REG_FLASH_DATA 0x208 #define REG_FLASH_DATA 0x208
...@@ -189,8 +190,8 @@ int ComputeFpgaConv(const struct SplitConvArgs &args) { ...@@ -189,8 +190,8 @@ int ComputeFpgaConv(const struct SplitConvArgs &args) {
int ComputeBasicConv(const struct ConvArgs &args) { int ComputeBasicConv(const struct ConvArgs &args) {
#ifdef FPGA_PRINT_MODE #ifdef FPGA_PRINT_MODE
DLOG << "======Compute Basic Conv======"; DLOG << "======Compute Basic Conv======";
DLOG << " relu_enabled:" << args.relu_enabled // DLOG << " relu_enabled:" << args.relu_enabled
<< " sb_address:" << args.sb_address DLOG << " sb_address:" << args.sb_address
<< " filter_address:" << args.filter_address << " filter_address:" << args.filter_address
<< " filter_num:" << args.filter_num << " filter_num:" << args.filter_num
<< " group_num:" << args.group_num; << " group_num:" << args.group_num;
...@@ -212,6 +213,25 @@ int ComputeBasicConv(const struct ConvArgs &args) { ...@@ -212,6 +213,25 @@ int ComputeBasicConv(const struct ConvArgs &args) {
#ifdef PADDLE_MOBILE_ZU5 #ifdef PADDLE_MOBILE_ZU5
int ret = 0; int ret = 0;
uint64_t output_scale = 0; uint64_t output_scale = 0;
uint64_t reg_ActivationArgs = 0;
// active function:{none,leakeyrelu,sigmoid,tanh}
ActivationArgs active_args;
// active_args.activation_type = LEAKYRELU;
active_args.activation_type = args.output.activation.activation_type;
active_args.leaky_relu_negative_slope =
args.output.activation.leaky_relu_negative_slope;
reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) |
active_args.leaky_relu_negative_slope;
DLOG << " activation_type:" << active_args.activation_type
<< " leaky_relu_negative_slope:"
<< active_args.leaky_relu_negative_slope;
DLOG << " reg_ActivationArgs:" << reg_ActivationArgs;
pthread_mutex_lock(&g_fpgainfo.pe_data->mutex); pthread_mutex_lock(&g_fpgainfo.pe_data->mutex);
if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_CONV]->status) { if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_CONV]->status) {
ret = -EIO; ret = -EIO;
...@@ -219,6 +239,10 @@ int ComputeBasicConv(const struct ConvArgs &args) { ...@@ -219,6 +239,10 @@ int ComputeBasicConv(const struct ConvArgs &args) {
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret; return ret;
} }
reg_writeq(reg_ActivationArgs,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion
reg_writeq(output_scale, REG_SCALE_PARAMETER); reg_writeq(output_scale, REG_SCALE_PARAMETER);
reg_writeq( reg_writeq(
((uint64_t)args.image.height) | (((uint64_t)args.image.width) << 32), ((uint64_t)args.image.height) | (((uint64_t)args.image.width) << 32),
...@@ -278,6 +302,9 @@ int ComputeBasicConv(const struct ConvArgs &args) { ...@@ -278,6 +302,9 @@ int ComputeBasicConv(const struct ConvArgs &args) {
output_scale = (output_scale << 32) | (output_scale >> 32); output_scale = (output_scale << 32) | (output_scale >> 32);
fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2); fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
active_args.activation_type = NONE;
reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR);
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret; return ret;
...@@ -314,6 +341,23 @@ int ComputeFpgaPool(const struct PoolingArgs &args) { ...@@ -314,6 +341,23 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
uint64_t image_physical_address = 0; uint64_t image_physical_address = 0;
uint64_t output_physical_address = 0; uint64_t output_physical_address = 0;
uint64_t reg_ActivationArgs = 0;
// active function:{none,leakeyrelu,sigmoid,tanh}
ActivationArgs active_args;
// active_args.activation_type = LEAKYRELU;
active_args.activation_type = args.output.activation.activation_type;
active_args.leaky_relu_negative_slope =
args.output.activation.leaky_relu_negative_slope;
reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) |
active_args.leaky_relu_negative_slope;
DLOG << " activation_type:" << active_args.activation_type
<< " leaky_relu_negative_slope:"
<< active_args.leaky_relu_negative_slope;
DLOG << " reg_ActivationArgs:" << reg_ActivationArgs;
image_physical_address = vaddr_to_paddr_driver(args.image.address); image_physical_address = vaddr_to_paddr_driver(args.image.address);
output_physical_address = vaddr_to_paddr_driver(args.output.address); output_physical_address = vaddr_to_paddr_driver(args.output.address);
uint32_t output_height = (uint32_t)( uint32_t output_height = (uint32_t)(
...@@ -364,6 +408,9 @@ int ComputeFpgaPool(const struct PoolingArgs &args) { ...@@ -364,6 +408,9 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
return ret; return ret;
} }
reg_writeq(reg_ActivationArgs,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion
reg_writeq(output_scale, REG_SCALE_PARAMETER); reg_writeq(output_scale, REG_SCALE_PARAMETER);
reg_writeq(image_physical_address, REG_POOLING_IMAGE_BASE_ADDR); reg_writeq(image_physical_address, REG_POOLING_IMAGE_BASE_ADDR);
reg_writeq(output_physical_address, REG_POOLING_RESULT_BASE_ADDR); reg_writeq(output_physical_address, REG_POOLING_RESULT_BASE_ADDR);
...@@ -408,6 +455,10 @@ int ComputeFpgaPool(const struct PoolingArgs &args) { ...@@ -408,6 +455,10 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
output_scale = reg_readq(REG_SCALE_PARAMETER); output_scale = reg_readq(REG_SCALE_PARAMETER);
output_scale = (output_scale << 32) | (output_scale >> 32); output_scale = (output_scale << 32) | (output_scale >> 32);
fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2); fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
active_args.activation_type = NONE;
reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR);
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret; return ret;
...@@ -418,8 +469,8 @@ int ComputeFpgaPool(const struct PoolingArgs &args) { ...@@ -418,8 +469,8 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
int ComputeFpgaEWAdd(const struct EWAddArgs &args) { int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
#ifdef FPGA_PRINT_MODE #ifdef FPGA_PRINT_MODE
DLOG << "=============ComputeFpgaEWAdd==========="; DLOG << "=============ComputeFpgaEWAdd===========";
DLOG << " relu_enabled:" << args.relu_enabled // DLOG << " relu_enabled:" << args.relu_enabled
<< " const0:" << fp16_2_fp32(int16_t(args.const0)) DLOG << " const0:" << fp16_2_fp32(int16_t(args.const0))
<< " const1:" << fp16_2_fp32(int16_t(args.const1)); << " const1:" << fp16_2_fp32(int16_t(args.const1));
DLOG << " image0_address:" << args.image0.address DLOG << " image0_address:" << args.image0.address
<< " image0_scale_address:" << args.image0.scale_address << " image0_scale_address:" << args.image0.scale_address
...@@ -441,6 +492,19 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) { ...@@ -441,6 +492,19 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
#ifdef PADDLE_MOBILE_ZU5 #ifdef PADDLE_MOBILE_ZU5
int ret = 0; int ret = 0;
uint64_t output_scale = 0; uint64_t output_scale = 0;
uint64_t reg_ActivationArgs = 0;
ActivationArgs active_args;
active_args.activation_type = args.output.activation.activation_type;
active_args.leaky_relu_negative_slope =
args.output.activation.leaky_relu_negative_slope;
reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) |
active_args.leaky_relu_negative_slope;
DLOG << " activation_type:" << active_args.activation_type
<< " leaky_relu_negative_slope:"
<< active_args.leaky_relu_negative_slope;
DLOG << " reg_ActivationArgs:" << reg_ActivationArgs;
pthread_mutex_lock(&g_fpgainfo.pe_data->mutex); pthread_mutex_lock(&g_fpgainfo.pe_data->mutex);
if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_EW]->status) { if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_EW]->status) {
ret = -EIO; ret = -EIO;
...@@ -449,6 +513,9 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) { ...@@ -449,6 +513,9 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
return ret; return ret;
} }
reg_writeq(reg_ActivationArgs,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion
reg_writeq(output_scale, REG_SCALE_PARAMETER); reg_writeq(output_scale, REG_SCALE_PARAMETER);
reg_writeq(args.driver.image0_address_phy, REG_EW_IMAGE0_BASE_ADDR); reg_writeq(args.driver.image0_address_phy, REG_EW_IMAGE0_BASE_ADDR);
reg_writeq(args.driver.image1_address_phy, REG_EW_IMAGE1_BASE_ADDR); reg_writeq(args.driver.image1_address_phy, REG_EW_IMAGE1_BASE_ADDR);
...@@ -468,6 +535,9 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) { ...@@ -468,6 +535,9 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
output_scale = reg_readq(REG_SCALE_PARAMETER); output_scale = reg_readq(REG_SCALE_PARAMETER);
output_scale = (output_scale << 32) | (output_scale >> 32); output_scale = (output_scale << 32) | (output_scale >> 32);
fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2); fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
active_args.activation_type = NONE;
reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR);
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret; return ret;
#endif #endif
...@@ -501,6 +571,17 @@ int PerformBypass(const struct BypassArgs &args) { ...@@ -501,6 +571,17 @@ int PerformBypass(const struct BypassArgs &args) {
uint8_t data_cell_in = 0; uint8_t data_cell_in = 0;
uint8_t data_cell_out = 0; uint8_t data_cell_out = 0;
int ret = 0; int ret = 0;
uint64_t reg_ActivationArgs = 0;
ActivationArgs active_args;
active_args.activation_type = args.output.activation.activation_type;
active_args.leaky_relu_negative_slope =
args.output.activation.leaky_relu_negative_slope;
reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) |
active_args.leaky_relu_negative_slope;
datalen = (uint64_t)args.image.width * (uint64_t)args.image.height * datalen = (uint64_t)args.image.width * (uint64_t)args.image.height *
(uint64_t)args.image.channels; (uint64_t)args.image.channels;
datalen = align_to_x(datalen, 16); datalen = align_to_x(datalen, 16);
...@@ -559,7 +640,6 @@ int PerformBypass(const struct BypassArgs &args) { ...@@ -559,7 +640,6 @@ int PerformBypass(const struct BypassArgs &args) {
(data_cell_out != SIZE_FP16 && data_cell_out != SIZE_FP32)) { (data_cell_out != SIZE_FP16 && data_cell_out != SIZE_FP32)) {
return -EFAULT; return -EFAULT;
} }
pthread_mutex_lock(&g_fpgainfo.pe_data->mutex); pthread_mutex_lock(&g_fpgainfo.pe_data->mutex);
if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_BYPASS]->status) { if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_BYPASS]->status) {
ret = -EIO; ret = -EIO;
...@@ -567,7 +647,8 @@ int PerformBypass(const struct BypassArgs &args) { ...@@ -567,7 +647,8 @@ int PerformBypass(const struct BypassArgs &args) {
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret; return ret;
} }
reg_writeq(reg_ActivationArgs,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion
reg_writeq(output_scale, REG_SCALE_PARAMETER); reg_writeq(output_scale, REG_SCALE_PARAMETER);
reg_writeq(input_address_phy, REG_CONVERT_SRC_ADDR); reg_writeq(input_address_phy, REG_CONVERT_SRC_ADDR);
reg_writeq(output_address_phy, REG_CONVERT_DST_ADDR); reg_writeq(output_address_phy, REG_CONVERT_DST_ADDR);
...@@ -585,6 +666,7 @@ int PerformBypass(const struct BypassArgs &args) { ...@@ -585,6 +666,7 @@ int PerformBypass(const struct BypassArgs &args) {
output_scale = reg_readq(REG_SCALE_PARAMETER); output_scale = reg_readq(REG_SCALE_PARAMETER);
output_scale = (output_scale << 32) | (output_scale >> 32); output_scale = (output_scale << 32) | (output_scale >> 32);
fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2); fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR);
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret; return ret;
#endif #endif
...@@ -808,7 +890,7 @@ int ComputeFPGASplit(const struct SplitArgs &args) { ...@@ -808,7 +890,7 @@ int ComputeFPGASplit(const struct SplitArgs &args) {
int ComputeDWConv(const struct DWconvArgs &args) { int ComputeDWConv(const struct DWconvArgs &args) {
#ifdef FPGA_PRINT_MODE #ifdef FPGA_PRINT_MODE
DLOG << "=============ComputeDWConv==========="; DLOG << "=============ComputeDWConv===========";
DLOG << " mode:" << args.relu_enabled; // DLOG << " mode:" << args.relu_enabled;
DLOG << " image_address:" << args.image.address DLOG << " image_address:" << args.image.address
<< " image_scale_address:" << args.image.scale_address << " image_scale_address:" << args.image.scale_address
<< " image_channels:" << args.image.channels << " image_channels:" << args.image.channels
...@@ -831,7 +913,8 @@ int ComputeDWConv(const struct DWconvArgs &args) { ...@@ -831,7 +913,8 @@ int ComputeDWConv(const struct DWconvArgs &args) {
uint64_t output_scale = 0; uint64_t output_scale = 0;
uint64_t timer_cnt = 0; uint64_t timer_cnt = 0;
int ret = 0; int ret = 0;
uint64_t cmd = args.relu_enabled; // uint64_t cmd = args.relu_enabled;
uint64_t cmd = 0;
uint64_t image_physical_address = 0; uint64_t image_physical_address = 0;
uint64_t output_physical_address = 0; uint64_t output_physical_address = 0;
uint64_t filter_physical_address = 0; uint64_t filter_physical_address = 0;
......
...@@ -154,7 +154,6 @@ int memory_request(struct fpga_memory *memory, size_t size, uint64_t *addr) { ...@@ -154,7 +154,6 @@ int memory_request(struct fpga_memory *memory, size_t size, uint64_t *addr) {
unsigned int nr = (unsigned int)_nr; unsigned int nr = (unsigned int)_nr;
int ret = 0; int ret = 0;
uint64_t a_size = FPGA_PAGE_SIZE * nr; uint64_t a_size = FPGA_PAGE_SIZE * nr;
DLOG << a_size;
pthread_mutex_lock(&memory->mutex); pthread_mutex_lock(&memory->mutex);
...@@ -391,9 +390,6 @@ int fpga_invalidate_driver(void *address, size_t size) { ...@@ -391,9 +390,6 @@ int fpga_invalidate_driver(void *address, size_t size) {
void fpga_copy_driver(void *dest, const void *src, size_t num) { void fpga_copy_driver(void *dest, const void *src, size_t num) {
uint64_t i; uint64_t i;
DLOG << "dest:" << dest << " src:" << src << " size:" << num;
for (i = 0; i < num; i++) { for (i = 0; i < num; i++) {
*((int8_t *)dest + i) = *((int8_t *)src + i); // NOLINT *((int8_t *)dest + i) = *((int8_t *)src + i); // NOLINT
} }
......
...@@ -29,7 +29,7 @@ namespace driver { ...@@ -29,7 +29,7 @@ namespace driver {
#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d)) #define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
#define FPGA_REG_PHY_ADDR 0xa0000000 #define FPGA_REG_PHY_ADDR 0x80000000
#define FPGA_REG_SIZE 0x1000 #define FPGA_REG_SIZE 0x1000
#define FPGA_MEM_PHY_ADDR 0x40000000 #define FPGA_MEM_PHY_ADDR 0x40000000
#define FPGA_MEM_SIZE 0x80000000 #define FPGA_MEM_SIZE 0x80000000
......
...@@ -45,6 +45,7 @@ enum ActivationType { ...@@ -45,6 +45,7 @@ enum ActivationType {
LEAKYRELU = 1, LEAKYRELU = 1,
SIGMOID = 2, SIGMOID = 2,
TANH = 3, TANH = 3,
SOFTMAX = 4,
}; };
struct ActivationArgs { struct ActivationArgs {
...@@ -132,7 +133,7 @@ struct DeconvTxParm { ...@@ -132,7 +133,7 @@ struct DeconvTxParm {
#endif #endif
struct ConvArgs { struct ConvArgs {
bool relu_enabled; // bool relu_enabled;
void* sb_address; // scale and bias void* sb_address; // scale and bias
void* filter_address; void* filter_address;
float* filter_scale_address; float* filter_scale_address;
...@@ -198,7 +199,7 @@ struct PoolingArgs { ...@@ -198,7 +199,7 @@ struct PoolingArgs {
}; };
struct EWAddArgs { struct EWAddArgs {
bool relu_enabled; // bool relu_enabled;
uint32_t const0; // output0 = const0 x input0 + const1 x input1; uint32_t const0; // output0 = const0 x input0 + const1 x input1;
uint32_t const1; uint32_t const1;
struct ImageInputArgs image0; struct ImageInputArgs image0;
...@@ -230,7 +231,7 @@ struct DeconvArgs { ...@@ -230,7 +231,7 @@ struct DeconvArgs {
}; };
struct DWconvArgs { struct DWconvArgs {
uint32_t sub_conv_num; uint32_t sub_conv_num;
bool relu_enabled; // bool relu_enabled;
void* bias_address; void* bias_address;
void* filter_address; void* filter_address;
struct KernelArgs kernel; struct KernelArgs kernel;
......
...@@ -31,6 +31,10 @@ DEFINE_ACTIVATION_INFERSHAPE(Relu6); ...@@ -31,6 +31,10 @@ DEFINE_ACTIVATION_INFERSHAPE(Relu6);
#ifdef SIGMOID_OP #ifdef SIGMOID_OP
DEFINE_ACTIVATION_INFERSHAPE(Sigmoid); DEFINE_ACTIVATION_INFERSHAPE(Sigmoid);
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(sigmoid, ops::SigmoidOp);
#endif
#endif // SIGMOID_OP #endif // SIGMOID_OP
#ifdef TANH_OP #ifdef TANH_OP
......
...@@ -22,7 +22,10 @@ namespace operators { ...@@ -22,7 +22,10 @@ namespace operators {
template <> template <>
bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) { bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
bool relu_enabled = false; // bool relu_enabled = false;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input()); auto input = const_cast<Tensor *>(param->Input());
auto bias = param->Bias(); auto bias = param->Bias();
...@@ -61,10 +64,10 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) { ...@@ -61,10 +64,10 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
fpga::SplitConvArgs conv_arg = {0}; fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
param->Groups(), param->Strides()[0], leaky_relu_negative_slope, param->Groups(),
param->Strides()[1], param->Paddings()[0], param->Strides()[0], param->Strides()[1],
param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
return true; return true;
......
...@@ -23,7 +23,10 @@ namespace operators { ...@@ -23,7 +23,10 @@ namespace operators {
template <> template <>
bool ConvAddBNReluKernel<FPGA, float>::Init( bool ConvAddBNReluKernel<FPGA, float>::Init(
FusionConvAddBNReluParam<FPGA> *param) { FusionConvAddBNReluParam<FPGA> *param) {
bool relu_enabled = true; // bool relu_enabled = true;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::LEAKYRELU;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input()); auto input = const_cast<Tensor *>(param->Input());
auto bias = param->Bias(); auto bias = param->Bias();
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
...@@ -64,16 +67,16 @@ bool ConvAddBNReluKernel<FPGA, float>::Init( ...@@ -64,16 +67,16 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
if (groups == channel) { if (groups == channel) {
fpga::format_dwconv_data(filter, out, new_scale_ptr, &new_bias_ptr); fpga::format_dwconv_data(filter, out, new_scale_ptr, &new_bias_ptr);
fpga::DWconvArgs dwconv_arg = {0}; fpga::DWconvArgs dwconv_arg = {0};
fpga::fill_dwconv_arg(&dwconv_arg, input, out, filter, relu_enabled, fpga::fill_dwconv_arg(&dwconv_arg, input, out, filter, activation_enable,
strides[0], strides[1], paddings[0], paddings[1], leaky_relu_negative_slope, strides[0], strides[1],
new_bias_ptr); paddings[0], paddings[1], new_bias_ptr);
param->SetFpgaArgs(dwconv_arg); param->SetFpgaArgs(dwconv_arg);
} else { } else {
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
fpga::SplitConvArgs conv_arg = {0}; fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
param->Groups(), strides[0], strides[1], paddings[0], leaky_relu_negative_slope, param->Groups(), strides[0],
paddings[1], bs_ptr); strides[1], paddings[0], paddings[1], bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
} }
return true; return true;
......
...@@ -21,7 +21,10 @@ namespace operators { ...@@ -21,7 +21,10 @@ namespace operators {
template <> template <>
bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) { bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) {
bool relu_enabled = false; // bool relu_enabled = false;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input()); auto input = const_cast<Tensor *>(param->Input());
const Tensor *bias = param->Bias(); const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
...@@ -40,10 +43,10 @@ bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) { ...@@ -40,10 +43,10 @@ bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) {
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
fpga::SplitConvArgs conv_arg = {0}; fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
param->Groups(), param->Strides()[0], leaky_relu_negative_slope, param->Groups(),
param->Strides()[1], param->Paddings()[0], param->Strides()[0], param->Strides()[1],
param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
return true; return true;
} }
......
...@@ -21,7 +21,10 @@ namespace operators { ...@@ -21,7 +21,10 @@ namespace operators {
template <> template <>
bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) { bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
bool relu_enabled = true; // bool relu_enabled = true;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::LEAKYRELU;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input()); auto input = const_cast<Tensor *>(param->Input());
const Tensor *bias = param->Bias(); const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
...@@ -40,10 +43,10 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) { ...@@ -40,10 +43,10 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
fpga::SplitConvArgs conv_arg = {0}; fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
param->Groups(), param->Strides()[0], leaky_relu_negative_slope, param->Groups(),
param->Strides()[1], param->Paddings()[0], param->Strides()[0], param->Strides()[1],
param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
return true; return true;
} }
......
...@@ -22,7 +22,10 @@ namespace operators { ...@@ -22,7 +22,10 @@ namespace operators {
template <> template <>
bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) { bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
bool relu_enabled = false; // bool relu_enabled = false;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input()); auto input = const_cast<Tensor *>(param->Input());
auto filter = const_cast<Tensor *>(param->Filter()); auto filter = const_cast<Tensor *>(param->Filter());
auto out = param->Output(); auto out = param->Output();
...@@ -53,10 +56,10 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) { ...@@ -53,10 +56,10 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
fpga::SplitConvArgs conv_arg = {0}; fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
param->Groups(), param->Strides()[0], leaky_relu_negative_slope, param->Groups(),
param->Strides()[1], param->Paddings()[0], param->Strides()[0], param->Strides()[1],
param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
return true; return true;
} }
......
...@@ -22,7 +22,10 @@ namespace operators { ...@@ -22,7 +22,10 @@ namespace operators {
template <> template <>
bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) { bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
bool relu_enabled = true; // bool relu_enabled = true;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::LEAKYRELU;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input()); auto input = const_cast<Tensor *>(param->Input());
auto filter = const_cast<Tensor *>(param->Filter()); auto filter = const_cast<Tensor *>(param->Filter());
auto out = param->Output(); auto out = param->Output();
...@@ -53,10 +56,10 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) { ...@@ -53,10 +56,10 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
fpga::SplitConvArgs conv_arg = {0}; fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
param->Groups(), param->Strides()[0], leaky_relu_negative_slope, param->Groups(),
param->Strides()[1], param->Paddings()[0], param->Strides()[0], param->Strides()[1],
param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
return true; return true;
} }
......
...@@ -23,7 +23,10 @@ namespace operators { ...@@ -23,7 +23,10 @@ namespace operators {
template <> template <>
bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) { bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
bool relu_enabled = false; // bool relu_enabled = false;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input()); auto input = const_cast<Tensor *>(param->Input());
const Tensor *bias = param->Bias(); const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
...@@ -53,17 +56,18 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) { ...@@ -53,17 +56,18 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(), fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
sub_conv_n); sub_conv_n);
fpga::DWDeconvArgs DWDeconv_arg = {0}; fpga::DWDeconvArgs DWDeconv_arg = {0};
fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter, relu_enabled, fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter,
activation_enable, leaky_relu_negative_slope,
param->Strides()[0], param->Strides()[1], param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(DWDeconv_arg); param->SetFpgaArgs(DWDeconv_arg);
} else { } else {
fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
fpga::DeconvArgs deconv_arg = {0}; fpga::DeconvArgs deconv_arg = {0};
fpga::fill_deconv_arg(&deconv_arg, input, out, filter, relu_enabled, fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
param->Groups(), param->Strides()[0], leaky_relu_negative_slope, param->Groups(),
param->Strides()[1], param->Paddings()[0], param->Strides()[0], param->Strides()[1],
param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(deconv_arg); param->SetFpgaArgs(deconv_arg);
} }
......
...@@ -24,7 +24,10 @@ namespace operators { ...@@ -24,7 +24,10 @@ namespace operators {
template <> template <>
bool DeconvAddReluKernel<FPGA, float>::Init( bool DeconvAddReluKernel<FPGA, float>::Init(
FusionDeconvAddReluParam<FPGA> *param) { FusionDeconvAddReluParam<FPGA> *param) {
bool relu_enabled = true; // bool relu_enabled = true;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::LEAKYRELU;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input()); auto input = const_cast<Tensor *>(param->Input());
const Tensor *bias = param->Bias(); const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
...@@ -54,17 +57,18 @@ bool DeconvAddReluKernel<FPGA, float>::Init( ...@@ -54,17 +57,18 @@ bool DeconvAddReluKernel<FPGA, float>::Init(
fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(), fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
sub_conv_n); sub_conv_n);
fpga::DWDeconvArgs DWDeconv_arg = {0}; fpga::DWDeconvArgs DWDeconv_arg = {0};
fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter, relu_enabled, fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter,
activation_enable, leaky_relu_negative_slope,
param->Strides()[0], param->Strides()[1], param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(DWDeconv_arg); param->SetFpgaArgs(DWDeconv_arg);
} else { } else {
fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
fpga::DeconvArgs deconv_arg = {0}; fpga::DeconvArgs deconv_arg = {0};
fpga::fill_deconv_arg(&deconv_arg, input, out, filter, relu_enabled, fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
param->Groups(), param->Strides()[0], leaky_relu_negative_slope, param->Groups(),
param->Strides()[1], param->Paddings()[0], param->Strides()[0], param->Strides()[1],
param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(deconv_arg); param->SetFpgaArgs(deconv_arg);
} }
return true; return true;
......
...@@ -20,7 +20,10 @@ namespace operators { ...@@ -20,7 +20,10 @@ namespace operators {
template <> template <>
bool ElementwiseAddKernel<FPGA, float>::Init(ElementwiseAddParam<FPGA> *param) { bool ElementwiseAddKernel<FPGA, float>::Init(ElementwiseAddParam<FPGA> *param) {
bool relu_enabled = false; // bool relu_enabled = false;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0;
auto *input_x = const_cast<LoDTensor *>(param->InputX()); auto *input_x = const_cast<LoDTensor *>(param->InputX());
auto *input_y = const_cast<LoDTensor *>(param->InputY()); auto *input_y = const_cast<LoDTensor *>(param->InputY());
auto *out = param->Out(); auto *out = param->Out();
...@@ -30,7 +33,10 @@ bool ElementwiseAddKernel<FPGA, float>::Init(ElementwiseAddParam<FPGA> *param) { ...@@ -30,7 +33,10 @@ bool ElementwiseAddKernel<FPGA, float>::Init(ElementwiseAddParam<FPGA> *param) {
auto out_ptr = out->mutable_data<float>(); auto out_ptr = out->mutable_data<float>();
fpga::EWAddArgs ewaddArgs = {0}; fpga::EWAddArgs ewaddArgs = {0};
ewaddArgs.relu_enabled = relu_enabled; // ewaddArgs.relu_enabled = relu_enabled;
ewaddArgs.output.activation.activation_type = activation_enable;
ewaddArgs.output.activation.leaky_relu_negative_slope =
leaky_relu_negative_slope;
ewaddArgs.const0 = 0x3c00; // =1 ewaddArgs.const0 = 0x3c00; // =1
ewaddArgs.const1 = 0x3c00; // =1 ewaddArgs.const1 = 0x3c00; // =1
ewaddArgs.image0.address = input_x_ptr; ewaddArgs.image0.address = input_x_ptr;
......
...@@ -21,7 +21,10 @@ namespace operators { ...@@ -21,7 +21,10 @@ namespace operators {
template <> template <>
bool ElementwiseAddReluKernel<FPGA, float>::Init( bool ElementwiseAddReluKernel<FPGA, float>::Init(
ElementwiseAddReluParam<FPGA> *param) { ElementwiseAddReluParam<FPGA> *param) {
bool relu_enabled = true; // bool relu_enabled = true;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::LEAKYRELU;
int16_t leaky_relu_negative_slope = 0;
auto *input_x = const_cast<LoDTensor *>(param->InputX()); auto *input_x = const_cast<LoDTensor *>(param->InputX());
auto *input_y = const_cast<LoDTensor *>(param->InputY()); auto *input_y = const_cast<LoDTensor *>(param->InputY());
auto *out = param->Out(); auto *out = param->Out();
...@@ -31,7 +34,10 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init( ...@@ -31,7 +34,10 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init(
auto out_ptr = out->mutable_data<float>(); auto out_ptr = out->mutable_data<float>();
fpga::EWAddArgs ewaddArgs = {0}; fpga::EWAddArgs ewaddArgs = {0};
ewaddArgs.relu_enabled = relu_enabled; // ewaddArgs.relu_enabled = relu_enabled;
ewaddArgs.output.activation.activation_type = activation_enable;
ewaddArgs.output.activation.leaky_relu_negative_slope =
leaky_relu_negative_slope;
ewaddArgs.const0 = 0x3c00; // =1 ewaddArgs.const0 = 0x3c00; // =1
ewaddArgs.const1 = 0x3c00; // =1 ewaddArgs.const1 = 0x3c00; // =1
ewaddArgs.image0.address = input_x_ptr; ewaddArgs.image0.address = input_x_ptr;
......
...@@ -19,12 +19,34 @@ namespace operators { ...@@ -19,12 +19,34 @@ namespace operators {
template <> template <>
bool FetchKernel<FPGA, float>::Init(FetchParam<FPGA> *param) { bool FetchKernel<FPGA, float>::Init(FetchParam<FPGA> *param) {
Tensor *output = param->Out();
// fpga::format_fp16_ofm(output);
return true; return true;
} }
template <> template <>
void FetchKernel<FPGA, float>::Compute(const FetchParam<FPGA> &param) { void FetchKernel<FPGA, float>::Compute(const FetchParam<FPGA> &param) {
param.Out()->ShareDataWith(*(param.InputX())); param.Out()->ShareDataWith(*(param.InputX()));
/*auto input =
reinterpret_cast<Tensor *>(const_cast<Tensor *>(param.InputX()));
fpga::format_image(input);
auto input_ptr = input->data<float>();
Tensor *output = param.Out();
auto output_ptr = output->data<float>();
fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
args.input_data_type = fpga::DATA_TYPE_FP16;
args.output_data_type = fpga::DATA_TYPE_FP32;
args.input_layout_type = fpga::LAYOUT_CHW;
args.output_layout_type = fpga::LAYOUT_HWC;
args.image.address = reinterpret_cast<void *>(input_ptr);
args.image.channels = (uint32_t)input->dims()[1];
args.image.height = (input->dims().size() == 4) ? (uint32_t)input->dims()[2] :
1; args.image.width = (input->dims().size() == 4) ? (uint32_t)input->dims()[3]
: 1; args.image.pad_height = 0; args.image.pad_width = 0; args.output.address
= output_ptr; args.output.scale_address = output->scale;
fpga::PerformBypass(args);*/
} }
template class FetchKernel<FPGA, float>; template class FetchKernel<FPGA, float>;
......
...@@ -20,7 +20,10 @@ namespace operators { ...@@ -20,7 +20,10 @@ namespace operators {
template <> template <>
bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) { bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
bool relu_enabled = false; // bool relu_enabled = false;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0;
auto input_x = const_cast<LoDTensor *>(param->InputX()); auto input_x = const_cast<LoDTensor *>(param->InputX());
auto filter = const_cast<Tensor *>(param->InputY()); auto filter = const_cast<Tensor *>(param->InputY());
const Tensor *input_z = param->InputZ(); const Tensor *input_z = param->InputZ();
...@@ -55,8 +58,8 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) { ...@@ -55,8 +58,8 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
fpga::format_fp16_ofm(out); fpga::format_fp16_ofm(out);
fpga::SplitConvArgs conv_arg = {0}; fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, fpga::fill_split_arg(&conv_arg, input_x, out, filter, activation_enable,
0, 0, bs_ptr); leaky_relu_negative_slope, 1, 1, 1, 0, 0, bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
return true; return true;
} }
......
...@@ -22,6 +22,12 @@ namespace operators { ...@@ -22,6 +22,12 @@ namespace operators {
template <> template <>
bool ReshapeKernel<FPGA, float>::Init(ReshapeParam<FPGA> *param) { bool ReshapeKernel<FPGA, float>::Init(ReshapeParam<FPGA> *param) {
param->Out()->ShareDataWith(*param->InputX()); param->Out()->ShareDataWith(*param->InputX());
const int in_n = param->InputX()->dims()[0];
const int in_c = param->InputX()->dims()[1];
const int in_h = param->InputX()->dims()[2];
const int in_w = param->InputX()->dims()[3];
auto out = param->Out();
out->Resize(framework::make_ddim({in_n, in_c * in_h * in_w}));
return true; return true;
} }
......
...@@ -15,73 +15,41 @@ limitations under the License. */ ...@@ -15,73 +15,41 @@ limitations under the License. */
#ifdef SIGMOID_OP #ifdef SIGMOID_OP
#include "operators/kernel/activation_kernel.h" #include "operators/kernel/activation_kernel.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
using framework::DDim;
using framework::Tensor;
template <> template <>
bool SigmoidKernel<FPGA, float>::Init(SigmoidParam<FPGA> *param) { bool SigmoidKernel<FPGA, float>::Init(SigmoidParam<FPGA> *param) {
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::SIGMOID;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->InputX()); auto input = const_cast<Tensor *>(param->InputX());
auto input_ptr = input->data<float>(); auto input_ptr = input->data<float>();
auto out = param->Out(); auto out = param->Out();
fpga::format_fp32_ofm(out); fpga::format_fp16_ofm(out);
auto float_input = new Tensor;
if (input->dims().size() == 2) {
float_input->mutable_data<float>({1, input->dims()[1]});
} else if (input->dims().size() == 4) {
float_input->mutable_data<float>(
{1, input->dims()[2], input->dims()[3], input->dims()[1]});
} else {
DLOG << "wrong dimension of softmax input";
}
fpga::format_fp32_ofm(float_input);
fpga::BypassArgs args = {fpga::DATA_TYPE_FP16}; fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
args.input_layout_type = fpga::LAYOUT_HWC;
args.output_layout_type = fpga::LAYOUT_CHW;
args.input_data_type = fpga::DATA_TYPE_FP16; args.input_data_type = fpga::DATA_TYPE_FP16;
args.output_data_type = fpga::DATA_TYPE_FP32; args.output_data_type = fpga::DATA_TYPE_FP16;
args.image.address = input_ptr; args.image.address = input_ptr;
args.image.height = args.image.height =
(input->dims().size() == 4) ? (uint32_t)input->dims()[2] : 1; (input->dims().size() == 4) ? (uint32_t)input->dims()[2] : 1;
args.image.width = args.image.width =
(input->dims().size() == 4) ? (uint32_t)input->dims()[3] : 1; (input->dims().size() == 4) ? (uint32_t)input->dims()[3] : 1;
args.image.channels = (uint32_t)input->dims()[1]; args.image.channels = (uint32_t)input->dims()[1];
args.output.address = float_input->data<float>(); args.output.address = out->data<float>();
args.output.scale_address = float_input->scale; args.output.scale_address = out->scale;
param->SetFloatInput(float_input); args.output.activation.activation_type = activation_enable;
args.output.activation.leaky_relu_negative_slope = leaky_relu_negative_slope;
param->SetFpgaArgs(args); param->SetFpgaArgs(args);
return true; return true;
} }
template <typename T>
T Sigmoid(const T a) {
T tmp = -1.0f * a;
return (1.0 / (1.0 + exp(tmp)));
}
template <typename T>
void sigmoidFuntor(Tensor *input, Tensor *output) {
auto *input_ptr = input->data<T>();
auto *output_ptr = output->mutable_data<T>();
for (int i = 0; i < input->numel(); i++) {
*(output_ptr + i) = Sigmoid<T>(*(input_ptr + i));
}
}
template <> template <>
void SigmoidKernel<FPGA, float>::Compute(const SigmoidParam<FPGA> &param) { void SigmoidKernel<FPGA, float>::Compute(const SigmoidParam<FPGA> &param) {
Tensor *in_x = param.FloatInput();
Tensor *out = param.Out();
fpga::PerformBypass(param.FpgaArgs()); fpga::PerformBypass(param.FpgaArgs());
fpga::fpga_invalidate((void *)in_x->data<float>(), // NOLINT
in_x->numel() * sizeof(float));
// TODO: In general case, 0 should be squeezed before softmax input // NOLINT
sigmoidFuntor<float>(in_x, out);
fpga::fpga_flush(out->data<float>(), out->memory_size());
} }
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -26,7 +26,6 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) { ...@@ -26,7 +26,6 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
auto input_ptr = input->data<float>(); auto input_ptr = input->data<float>();
auto out = param->Out(); auto out = param->Out();
fpga::format_fp32_ofm(out); fpga::format_fp32_ofm(out);
auto float_input = new Tensor; auto float_input = new Tensor;
if (input->dims().size() == 2) { if (input->dims().size() == 2) {
float_input->mutable_data<float>({1, input->dims()[1]}); float_input->mutable_data<float>({1, input->dims()[1]});
...@@ -36,7 +35,6 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) { ...@@ -36,7 +35,6 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
} else { } else {
DLOG << "wrong dimension of softmax input"; DLOG << "wrong dimension of softmax input";
} }
fpga::format_fp32_ofm(float_input); fpga::format_fp32_ofm(float_input);
fpga::BypassArgs args = {fpga::DATA_TYPE_FP16}; fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
args.input_layout_type = fpga::LAYOUT_HWC; args.input_layout_type = fpga::LAYOUT_HWC;
...@@ -53,6 +51,7 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) { ...@@ -53,6 +51,7 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
args.output.scale_address = float_input->scale; args.output.scale_address = float_input->scale;
param->SetFloatInput(float_input); param->SetFloatInput(float_input);
param->SetFpgaArgs(args); param->SetFpgaArgs(args);
return true; return true;
} }
......
...@@ -1081,14 +1081,9 @@ class SigmoidParam : public OpParam { ...@@ -1081,14 +1081,9 @@ class SigmoidParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
private: private:
std::shared_ptr<RType> float_input_x_;
fpga::BypassArgs fpga_bypass_args; fpga::BypassArgs fpga_bypass_args;
public: public:
RType *FloatInput() const {
return float_input_x_ == nullptr ? input_x_ : float_input_x_.get();
}
void SetFloatInput(Tensor *input) { float_input_x_.reset(input); }
const fpga::BypassArgs &FpgaArgs() const { return fpga_bypass_args; } const fpga::BypassArgs &FpgaArgs() const { return fpga_bypass_args; }
void SetFpgaArgs(const fpga::BypassArgs &args) { fpga_bypass_args = args; } void SetFpgaArgs(const fpga::BypassArgs &args) { fpga_bypass_args = args; }
#endif #endif
...@@ -1214,6 +1209,20 @@ class FetchParam : public OpParam { ...@@ -1214,6 +1209,20 @@ class FetchParam : public OpParam {
private: private:
RType *input_x_; RType *input_x_;
Tensor *out_; Tensor *out_;
#ifdef PADDLE_MOBILE_FPGA
private:
std::shared_ptr<RType> float_input_x_;
fpga::BypassArgs fpga_bypass_args;
public:
RType *FloatInput() const {
return float_input_x_ == nullptr ? input_x_ : float_input_x_.get();
}
void SetFloatInput(Tensor *input) { float_input_x_.reset(input); }
const fpga::BypassArgs &FpgaArgs() const { return fpga_bypass_args; }
void SetFpgaArgs(const fpga::BypassArgs &args) { fpga_bypass_args = args; }
#endif
}; };
#ifdef FILL_CONSTANT_OP #ifdef FILL_CONSTANT_OP
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册