提交 06eead1c 编写于 作者: Z zhaojiaying01

resolve the conflict

...@@ -346,9 +346,9 @@ void expand_conv_arg(ConvArgs *arg) { ...@@ -346,9 +346,9 @@ void expand_conv_arg(ConvArgs *arg) {
auto filter_pad_width_mul_channel = auto filter_pad_width_mul_channel =
args.image.pad_width * args.image.channels; args.image.pad_width * args.image.channels;
auto image_amount_per_row_multi_win_first = auto image_amount_per_row_multi_win_first =
image_amount_per_row * (4 * args.kernel.stride_h - args.image.pad_height); image_amount_per_row * (2 * args.kernel.stride_h - args.image.pad_height);
auto image_amount_per_row_multi_win = auto image_amount_per_row_multi_win =
image_amount_per_row * (4 * args.kernel.stride_h); image_amount_per_row * (2 * args.kernel.stride_h);
auto image_block_num = block_num; auto image_block_num = block_num;
auto image_block_len = auto image_block_len =
...@@ -375,7 +375,8 @@ void expand_conv_arg(ConvArgs *arg) { ...@@ -375,7 +375,8 @@ void expand_conv_arg(ConvArgs *arg) {
(512 / (align_to_x(args.filter_num, 4) / 4 * 2) > 2) (512 / (align_to_x(args.filter_num, 4) / 4 * 2) > 2)
? (512 / (align_to_x(args.filter_num, 4) / 4 * 2) - 2) ? (512 / (align_to_x(args.filter_num, 4) / 4 * 2) - 2)
: 0; : 0;
auto cmd = 0UL | (args.relu_enabled ? USE_RELU : 0) | USE_BIAS; // auto cmd = 0UL | (args.relu_enabled ? USE_RELU : 0) | USE_BIAS;
auto cmd = 0UL | USE_BIAS;
auto deconv_param = ((args.deconv_tx_param.deconv_en) << 24) | auto deconv_param = ((args.deconv_tx_param.deconv_en) << 24) |
((args.deconv_tx_param.sub_conv_num) << 16) | ((args.deconv_tx_param.sub_conv_num) << 16) |
...@@ -413,7 +414,8 @@ void expand_conv_arg(ConvArgs *arg) { ...@@ -413,7 +414,8 @@ void expand_conv_arg(ConvArgs *arg) {
void expand_EW_arg(EWAddArgs *arg) { void expand_EW_arg(EWAddArgs *arg) {
EWAddArgs args = *arg; EWAddArgs args = *arg;
uint64_t cmd = args.relu_enabled ? USE_RELU : 0; // uint64_t cmd = args.relu_enabled ? USE_RELU : 0;
uint64_t cmd = 0;
uint64_t datalen = (uint64_t)args.image0.width * uint64_t datalen = (uint64_t)args.image0.width *
(uint64_t)args.image0.height * (uint64_t)args.image0.height *
(uint64_t)args.image0.channels; (uint64_t)args.image0.channels;
...@@ -441,8 +443,10 @@ void expand_EW_arg(EWAddArgs *arg) { ...@@ -441,8 +443,10 @@ void expand_EW_arg(EWAddArgs *arg) {
void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input, void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
framework::Tensor *out, framework::Tensor *filter, framework::Tensor *out, framework::Tensor *filter,
bool relu_enabled, int group_num, int stride_h, ActivationType activation_enable,
int stride_w, int padding_h, int padding_w, float *bs_ptr) { int16_t leaky_relu_negative_slope, int group_num,
int stride_h, int stride_w, int padding_h, int padding_w,
float *bs_ptr) {
auto input_ptr = input->data<float>(); auto input_ptr = input->data<float>();
auto filter_ptr = filter->data<float>(); auto filter_ptr = filter->data<float>();
auto out_ptr = out->data<float>(); auto out_ptr = out->data<float>();
...@@ -488,7 +492,10 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input, ...@@ -488,7 +492,10 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
filter->dims()[3])); filter->dims()[3]));
for (int i = 0; i < n; i++) { for (int i = 0; i < n; i++) {
arg->conv_arg[i].relu_enabled = relu_enabled; // arg->conv_arg[i].relu_enabled = relu_enabled;
arg->conv_arg[i].output.activation.activation_type = activation_enable;
arg->conv_arg[i].output.activation.leaky_relu_negative_slope =
leaky_relu_negative_slope;
arg->conv_arg[i].group_num = (uint32_t)group_num; arg->conv_arg[i].group_num = (uint32_t)group_num;
arg->conv_arg[i].kernel.stride_h = (uint32_t)stride_h; arg->conv_arg[i].kernel.stride_h = (uint32_t)stride_h;
arg->conv_arg[i].kernel.stride_w = (uint32_t)stride_w; arg->conv_arg[i].kernel.stride_w = (uint32_t)stride_w;
...@@ -560,8 +567,9 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input, ...@@ -560,8 +567,9 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input, void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
framework::Tensor *out, framework::Tensor *filter, framework::Tensor *out, framework::Tensor *filter,
bool relu_enabled, int group_num, int stride_h, ActivationType activation_enable,
int stride_w, int padding_h, int padding_w, int16_t leaky_relu_negative_slope, int group_num,
int stride_h, int stride_w, int padding_h, int padding_w,
float *bs_ptr) { float *bs_ptr) {
auto input_ptr = input->data<float>(); auto input_ptr = input->data<float>();
auto filter_ptr = filter->data<float>(); auto filter_ptr = filter->data<float>();
...@@ -687,7 +695,13 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input, ...@@ -687,7 +695,13 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
} }
for (int j = 0; j < split_num; ++j) { for (int j = 0; j < split_num; ++j) {
arg->split_conv_args[i]->conv_arg[j].relu_enabled = relu_enabled; // arg->split_conv_args[i]->conv_arg[j].relu_enabled = relu_enabled;
arg->split_conv_args[i]->conv_arg[j].output.activation.activation_type =
activation_enable;
arg->split_conv_args[i]
->conv_arg[j]
.output.activation.leaky_relu_negative_slope =
leaky_relu_negative_slope;
arg->split_conv_args[i]->conv_arg[j].group_num = (uint32_t)group_num; arg->split_conv_args[i]->conv_arg[j].group_num = (uint32_t)group_num;
arg->split_conv_args[i]->conv_arg[j].kernel.width = arg->split_conv_args[i]->conv_arg[j].kernel.width =
...@@ -800,13 +814,17 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input, ...@@ -800,13 +814,17 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input, void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input,
framework::Tensor *out, framework::Tensor *filter, framework::Tensor *out, framework::Tensor *filter,
bool relu_enabled, int stride_h, int stride_w, ActivationType activation_enable,
int padding_h, int padding_w, float *bias_ptr) { int16_t leaky_relu_negative_slope, int stride_h,
int stride_w, int padding_h, int padding_w,
float *bias_ptr) {
auto filter_ptr = filter->data<float>(); auto filter_ptr = filter->data<float>();
auto input_ptr = input->data<float>(); auto input_ptr = input->data<float>();
auto output_ptr = out->mutable_data<float>(); auto output_ptr = out->mutable_data<float>();
arg->sub_conv_num = 1; arg->sub_conv_num = 1;
arg->relu_enabled = relu_enabled; // arg->relu_enabled = relu_enabled;
arg->output.activation.activation_type = activation_enable;
arg->output.activation.leaky_relu_negative_slope = leaky_relu_negative_slope;
arg->bias_address = bias_ptr; arg->bias_address = bias_ptr;
arg->filter_address = filter_ptr; arg->filter_address = filter_ptr;
arg->kernel.height = (uint32_t)filter->dims()[2]; arg->kernel.height = (uint32_t)filter->dims()[2];
...@@ -826,8 +844,10 @@ void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input, ...@@ -826,8 +844,10 @@ void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input,
void fill_DWDeconv_arg(struct DWDeconvArgs *arg, framework::Tensor *input, void fill_DWDeconv_arg(struct DWDeconvArgs *arg, framework::Tensor *input,
framework::Tensor *out, framework::Tensor *filter, framework::Tensor *out, framework::Tensor *filter,
bool relu_enabled, int stride_h, int stride_w, ActivationType activation_enable,
int padding_h, int padding_w, float *bias_ptr) { int16_t leaky_relu_negative_slope, int stride_h,
int stride_w, int padding_h, int padding_w,
float *bias_ptr) {
auto filter_ptr = filter->data<float>(); auto filter_ptr = filter->data<float>();
auto input_ptr = input->data<float>(); auto input_ptr = input->data<float>();
auto output_ptr = out->mutable_data<float>(); auto output_ptr = out->mutable_data<float>();
...@@ -884,7 +904,10 @@ void fill_DWDeconv_arg(struct DWDeconvArgs *arg, framework::Tensor *input, ...@@ -884,7 +904,10 @@ void fill_DWDeconv_arg(struct DWDeconvArgs *arg, framework::Tensor *input,
arg->dw_conv_args.push_back(std::make_shared<DWconvArgs>()); arg->dw_conv_args.push_back(std::make_shared<DWconvArgs>());
arg->dw_conv_args[i]->sub_conv_num = sub_conv_num; arg->dw_conv_args[i]->sub_conv_num = sub_conv_num;
arg->dw_conv_args[i]->relu_enabled = relu_enabled; // arg->dw_conv_args[i]->relu_enabled = relu_enabled;
arg->dw_conv_args[i]->output.activation.activation_type = activation_enable;
arg->dw_conv_args[i]->output.activation.leaky_relu_negative_slope =
leaky_relu_negative_slope;
arg->dw_conv_args[i]->bias_address = bias_ptr; arg->dw_conv_args[i]->bias_address = bias_ptr;
arg->dw_conv_args[i]->filter_address = arg->dw_conv_args[i]->filter_address =
......
...@@ -47,20 +47,28 @@ void format_concat_output(framework::Tensor* out, int height, int width, ...@@ -47,20 +47,28 @@ void format_concat_output(framework::Tensor* out, int height, int width,
void fill_split_arg(struct SplitConvArgs* arg, framework::Tensor* input, void fill_split_arg(struct SplitConvArgs* arg, framework::Tensor* input,
framework::Tensor* out, framework::Tensor* filter, framework::Tensor* out, framework::Tensor* filter,
bool relu_enabled, int group_num, int stride_h, ActivationType activation_enable,
int stride_w, int padding_h, int padding_w, float* bs_ptr); int16_t leaky_relu_negative_slope, int group_num,
int stride_h, int stride_w, int padding_h, int padding_w,
float* bs_ptr);
void fill_deconv_arg(struct DeconvArgs* arg, framework::Tensor* input, void fill_deconv_arg(struct DeconvArgs* arg, framework::Tensor* input,
framework::Tensor* out, framework::Tensor* filter, framework::Tensor* out, framework::Tensor* filter,
bool relu_enabled, int group_num, int stride_h, ActivationType activation_enable,
int stride_w, int padding_h, int padding_w, float* bs_ptr); int16_t leaky_relu_negative_slope, int group_num,
int stride_h, int stride_w, int padding_h, int padding_w,
float* bs_ptr);
void fill_dwconv_arg(struct DWconvArgs* arg, framework::Tensor* input, void fill_dwconv_arg(struct DWconvArgs* arg, framework::Tensor* input,
framework::Tensor* out, framework::Tensor* filter, framework::Tensor* out, framework::Tensor* filter,
bool relu_enabled, int stride_h, int stride_w, ActivationType activation_enable,
int padding_h, int padding_w, float* bias_ptr); int16_t leaky_relu_negative_slope, int stride_h,
int stride_w, int padding_h, int padding_w,
float* bias_ptr);
void fill_DWDeconv_arg(struct DWDeconvArgs* arg, framework::Tensor* input, void fill_DWDeconv_arg(struct DWDeconvArgs* arg, framework::Tensor* input,
framework::Tensor* out, framework::Tensor* filter, framework::Tensor* out, framework::Tensor* filter,
bool relu_enabled, int stride_h, int stride_w, ActivationType activation_enable,
int padding_h, int padding_w, float* bs_ptr); int16_t leaky_relu_negative_slope, int stride_h,
int stride_w, int padding_h, int padding_w,
float* bs_ptr);
void format_deconv_filter(framework::Tensor* filter_tensor, float max_value, void format_deconv_filter(framework::Tensor* filter_tensor, float max_value,
int group_num, int stride); int group_num, int stride);
......
...@@ -19,7 +19,6 @@ limitations under the License. */ ...@@ -19,7 +19,6 @@ limitations under the License. */
#include "fpga/V1/filter.h" #include "fpga/V1/filter.h"
// #include "filter.h" // #include "filter.h"
#include "fpga/V1/api.h" #include "fpga/V1/api.h"
// #include "fpga_api.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace fpga { namespace fpga {
......
...@@ -63,6 +63,7 @@ using namespace std; // NOLINT ...@@ -63,6 +63,7 @@ using namespace std; // NOLINT
#define REG_TIMER_COUNTER 0x070 #define REG_TIMER_COUNTER 0x070
#define REG_SCALE_PARAMETER 0x080 #define REG_SCALE_PARAMETER 0x080
#define REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR 0x090
#define REG_FLASH_CMD 0x200 #define REG_FLASH_CMD 0x200
#define REG_FLASH_DATA 0x208 #define REG_FLASH_DATA 0x208
...@@ -189,8 +190,8 @@ int ComputeFpgaConv(const struct SplitConvArgs &args) { ...@@ -189,8 +190,8 @@ int ComputeFpgaConv(const struct SplitConvArgs &args) {
int ComputeBasicConv(const struct ConvArgs &args) { int ComputeBasicConv(const struct ConvArgs &args) {
#ifdef FPGA_PRINT_MODE #ifdef FPGA_PRINT_MODE
DLOG << "======Compute Basic Conv======"; DLOG << "======Compute Basic Conv======";
DLOG << " relu_enabled:" << args.relu_enabled // DLOG << " relu_enabled:" << args.relu_enabled
<< " sb_address:" << args.sb_address DLOG << " sb_address:" << args.sb_address
<< " filter_address:" << args.filter_address << " filter_address:" << args.filter_address
<< " filter_num:" << args.filter_num << " filter_num:" << args.filter_num
<< " group_num:" << args.group_num; << " group_num:" << args.group_num;
...@@ -212,6 +213,25 @@ int ComputeBasicConv(const struct ConvArgs &args) { ...@@ -212,6 +213,25 @@ int ComputeBasicConv(const struct ConvArgs &args) {
#ifdef PADDLE_MOBILE_ZU5 #ifdef PADDLE_MOBILE_ZU5
int ret = 0; int ret = 0;
uint64_t output_scale = 0; uint64_t output_scale = 0;
uint64_t reg_ActivationArgs = 0;
// active function:{none,leakeyrelu,sigmoid,tanh}
ActivationArgs active_args;
// active_args.activation_type = LEAKYRELU;
active_args.activation_type = args.output.activation.activation_type;
active_args.leaky_relu_negative_slope =
args.output.activation.leaky_relu_negative_slope;
reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) |
active_args.leaky_relu_negative_slope;
DLOG << " activation_type:" << active_args.activation_type
<< " leaky_relu_negative_slope:"
<< active_args.leaky_relu_negative_slope;
DLOG << " reg_ActivationArgs:" << reg_ActivationArgs;
pthread_mutex_lock(&g_fpgainfo.pe_data->mutex); pthread_mutex_lock(&g_fpgainfo.pe_data->mutex);
if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_CONV]->status) { if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_CONV]->status) {
ret = -EIO; ret = -EIO;
...@@ -219,6 +239,10 @@ int ComputeBasicConv(const struct ConvArgs &args) { ...@@ -219,6 +239,10 @@ int ComputeBasicConv(const struct ConvArgs &args) {
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret; return ret;
} }
reg_writeq(reg_ActivationArgs,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion
reg_writeq(output_scale, REG_SCALE_PARAMETER); reg_writeq(output_scale, REG_SCALE_PARAMETER);
reg_writeq( reg_writeq(
((uint64_t)args.image.height) | (((uint64_t)args.image.width) << 32), ((uint64_t)args.image.height) | (((uint64_t)args.image.width) << 32),
...@@ -278,6 +302,9 @@ int ComputeBasicConv(const struct ConvArgs &args) { ...@@ -278,6 +302,9 @@ int ComputeBasicConv(const struct ConvArgs &args) {
output_scale = (output_scale << 32) | (output_scale >> 32); output_scale = (output_scale << 32) | (output_scale >> 32);
fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2); fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
active_args.activation_type = NONE;
reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR);
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret; return ret;
...@@ -314,6 +341,23 @@ int ComputeFpgaPool(const struct PoolingArgs &args) { ...@@ -314,6 +341,23 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
uint64_t image_physical_address = 0; uint64_t image_physical_address = 0;
uint64_t output_physical_address = 0; uint64_t output_physical_address = 0;
uint64_t reg_ActivationArgs = 0;
// active function:{none,leakeyrelu,sigmoid,tanh}
ActivationArgs active_args;
// active_args.activation_type = LEAKYRELU;
active_args.activation_type = args.output.activation.activation_type;
active_args.leaky_relu_negative_slope =
args.output.activation.leaky_relu_negative_slope;
reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) |
active_args.leaky_relu_negative_slope;
DLOG << " activation_type:" << active_args.activation_type
<< " leaky_relu_negative_slope:"
<< active_args.leaky_relu_negative_slope;
DLOG << " reg_ActivationArgs:" << reg_ActivationArgs;
image_physical_address = vaddr_to_paddr_driver(args.image.address); image_physical_address = vaddr_to_paddr_driver(args.image.address);
output_physical_address = vaddr_to_paddr_driver(args.output.address); output_physical_address = vaddr_to_paddr_driver(args.output.address);
uint32_t output_height = (uint32_t)( uint32_t output_height = (uint32_t)(
...@@ -364,6 +408,9 @@ int ComputeFpgaPool(const struct PoolingArgs &args) { ...@@ -364,6 +408,9 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
return ret; return ret;
} }
reg_writeq(reg_ActivationArgs,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion
reg_writeq(output_scale, REG_SCALE_PARAMETER); reg_writeq(output_scale, REG_SCALE_PARAMETER);
reg_writeq(image_physical_address, REG_POOLING_IMAGE_BASE_ADDR); reg_writeq(image_physical_address, REG_POOLING_IMAGE_BASE_ADDR);
reg_writeq(output_physical_address, REG_POOLING_RESULT_BASE_ADDR); reg_writeq(output_physical_address, REG_POOLING_RESULT_BASE_ADDR);
...@@ -408,6 +455,10 @@ int ComputeFpgaPool(const struct PoolingArgs &args) { ...@@ -408,6 +455,10 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
output_scale = reg_readq(REG_SCALE_PARAMETER); output_scale = reg_readq(REG_SCALE_PARAMETER);
output_scale = (output_scale << 32) | (output_scale >> 32); output_scale = (output_scale << 32) | (output_scale >> 32);
fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2); fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
active_args.activation_type = NONE;
reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR);
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret; return ret;
...@@ -418,8 +469,8 @@ int ComputeFpgaPool(const struct PoolingArgs &args) { ...@@ -418,8 +469,8 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
int ComputeFpgaEWAdd(const struct EWAddArgs &args) { int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
#ifdef FPGA_PRINT_MODE #ifdef FPGA_PRINT_MODE
DLOG << "=============ComputeFpgaEWAdd==========="; DLOG << "=============ComputeFpgaEWAdd===========";
DLOG << " relu_enabled:" << args.relu_enabled // DLOG << " relu_enabled:" << args.relu_enabled
<< " const0:" << fp16_2_fp32(int16_t(args.const0)) DLOG << " const0:" << fp16_2_fp32(int16_t(args.const0))
<< " const1:" << fp16_2_fp32(int16_t(args.const1)); << " const1:" << fp16_2_fp32(int16_t(args.const1));
DLOG << " image0_address:" << args.image0.address DLOG << " image0_address:" << args.image0.address
<< " image0_scale_address:" << args.image0.scale_address << " image0_scale_address:" << args.image0.scale_address
...@@ -441,6 +492,19 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) { ...@@ -441,6 +492,19 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
#ifdef PADDLE_MOBILE_ZU5 #ifdef PADDLE_MOBILE_ZU5
int ret = 0; int ret = 0;
uint64_t output_scale = 0; uint64_t output_scale = 0;
uint64_t reg_ActivationArgs = 0;
ActivationArgs active_args;
active_args.activation_type = args.output.activation.activation_type;
active_args.leaky_relu_negative_slope =
args.output.activation.leaky_relu_negative_slope;
reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) |
active_args.leaky_relu_negative_slope;
DLOG << " activation_type:" << active_args.activation_type
<< " leaky_relu_negative_slope:"
<< active_args.leaky_relu_negative_slope;
DLOG << " reg_ActivationArgs:" << reg_ActivationArgs;
pthread_mutex_lock(&g_fpgainfo.pe_data->mutex); pthread_mutex_lock(&g_fpgainfo.pe_data->mutex);
if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_EW]->status) { if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_EW]->status) {
ret = -EIO; ret = -EIO;
...@@ -449,6 +513,9 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) { ...@@ -449,6 +513,9 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
return ret; return ret;
} }
reg_writeq(reg_ActivationArgs,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion
reg_writeq(output_scale, REG_SCALE_PARAMETER); reg_writeq(output_scale, REG_SCALE_PARAMETER);
reg_writeq(args.driver.image0_address_phy, REG_EW_IMAGE0_BASE_ADDR); reg_writeq(args.driver.image0_address_phy, REG_EW_IMAGE0_BASE_ADDR);
reg_writeq(args.driver.image1_address_phy, REG_EW_IMAGE1_BASE_ADDR); reg_writeq(args.driver.image1_address_phy, REG_EW_IMAGE1_BASE_ADDR);
...@@ -468,6 +535,9 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) { ...@@ -468,6 +535,9 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
output_scale = reg_readq(REG_SCALE_PARAMETER); output_scale = reg_readq(REG_SCALE_PARAMETER);
output_scale = (output_scale << 32) | (output_scale >> 32); output_scale = (output_scale << 32) | (output_scale >> 32);
fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2); fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
active_args.activation_type = NONE;
reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR);
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret; return ret;
#endif #endif
...@@ -501,6 +571,17 @@ int PerformBypass(const struct BypassArgs &args) { ...@@ -501,6 +571,17 @@ int PerformBypass(const struct BypassArgs &args) {
uint8_t data_cell_in = 0; uint8_t data_cell_in = 0;
uint8_t data_cell_out = 0; uint8_t data_cell_out = 0;
int ret = 0; int ret = 0;
uint64_t reg_ActivationArgs = 0;
ActivationArgs active_args;
active_args.activation_type = args.output.activation.activation_type;
active_args.leaky_relu_negative_slope =
args.output.activation.leaky_relu_negative_slope;
reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) |
active_args.leaky_relu_negative_slope;
datalen = (uint64_t)args.image.width * (uint64_t)args.image.height * datalen = (uint64_t)args.image.width * (uint64_t)args.image.height *
(uint64_t)args.image.channels; (uint64_t)args.image.channels;
datalen = align_to_x(datalen, 16); datalen = align_to_x(datalen, 16);
...@@ -559,7 +640,6 @@ int PerformBypass(const struct BypassArgs &args) { ...@@ -559,7 +640,6 @@ int PerformBypass(const struct BypassArgs &args) {
(data_cell_out != SIZE_FP16 && data_cell_out != SIZE_FP32)) { (data_cell_out != SIZE_FP16 && data_cell_out != SIZE_FP32)) {
return -EFAULT; return -EFAULT;
} }
pthread_mutex_lock(&g_fpgainfo.pe_data->mutex); pthread_mutex_lock(&g_fpgainfo.pe_data->mutex);
if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_BYPASS]->status) { if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_BYPASS]->status) {
ret = -EIO; ret = -EIO;
...@@ -567,7 +647,8 @@ int PerformBypass(const struct BypassArgs &args) { ...@@ -567,7 +647,8 @@ int PerformBypass(const struct BypassArgs &args) {
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret; return ret;
} }
reg_writeq(reg_ActivationArgs,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion
reg_writeq(output_scale, REG_SCALE_PARAMETER); reg_writeq(output_scale, REG_SCALE_PARAMETER);
reg_writeq(input_address_phy, REG_CONVERT_SRC_ADDR); reg_writeq(input_address_phy, REG_CONVERT_SRC_ADDR);
reg_writeq(output_address_phy, REG_CONVERT_DST_ADDR); reg_writeq(output_address_phy, REG_CONVERT_DST_ADDR);
...@@ -585,6 +666,7 @@ int PerformBypass(const struct BypassArgs &args) { ...@@ -585,6 +666,7 @@ int PerformBypass(const struct BypassArgs &args) {
output_scale = reg_readq(REG_SCALE_PARAMETER); output_scale = reg_readq(REG_SCALE_PARAMETER);
output_scale = (output_scale << 32) | (output_scale >> 32); output_scale = (output_scale << 32) | (output_scale >> 32);
fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2); fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR);
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret; return ret;
#endif #endif
...@@ -808,7 +890,7 @@ int ComputeFPGASplit(const struct SplitArgs &args) { ...@@ -808,7 +890,7 @@ int ComputeFPGASplit(const struct SplitArgs &args) {
int ComputeDWConv(const struct DWconvArgs &args) { int ComputeDWConv(const struct DWconvArgs &args) {
#ifdef FPGA_PRINT_MODE #ifdef FPGA_PRINT_MODE
DLOG << "=============ComputeDWConv==========="; DLOG << "=============ComputeDWConv===========";
DLOG << " mode:" << args.relu_enabled; // DLOG << " mode:" << args.relu_enabled;
DLOG << " image_address:" << args.image.address DLOG << " image_address:" << args.image.address
<< " image_scale_address:" << args.image.scale_address << " image_scale_address:" << args.image.scale_address
<< " image_channels:" << args.image.channels << " image_channels:" << args.image.channels
...@@ -831,7 +913,8 @@ int ComputeDWConv(const struct DWconvArgs &args) { ...@@ -831,7 +913,8 @@ int ComputeDWConv(const struct DWconvArgs &args) {
uint64_t output_scale = 0; uint64_t output_scale = 0;
uint64_t timer_cnt = 0; uint64_t timer_cnt = 0;
int ret = 0; int ret = 0;
uint64_t cmd = args.relu_enabled; // uint64_t cmd = args.relu_enabled;
uint64_t cmd = 0;
uint64_t image_physical_address = 0; uint64_t image_physical_address = 0;
uint64_t output_physical_address = 0; uint64_t output_physical_address = 0;
uint64_t filter_physical_address = 0; uint64_t filter_physical_address = 0;
......
...@@ -154,7 +154,6 @@ int memory_request(struct fpga_memory *memory, size_t size, uint64_t *addr) { ...@@ -154,7 +154,6 @@ int memory_request(struct fpga_memory *memory, size_t size, uint64_t *addr) {
unsigned int nr = (unsigned int)_nr; unsigned int nr = (unsigned int)_nr;
int ret = 0; int ret = 0;
uint64_t a_size = FPGA_PAGE_SIZE * nr; uint64_t a_size = FPGA_PAGE_SIZE * nr;
DLOG << a_size;
pthread_mutex_lock(&memory->mutex); pthread_mutex_lock(&memory->mutex);
...@@ -391,9 +390,6 @@ int fpga_invalidate_driver(void *address, size_t size) { ...@@ -391,9 +390,6 @@ int fpga_invalidate_driver(void *address, size_t size) {
void fpga_copy_driver(void *dest, const void *src, size_t num) { void fpga_copy_driver(void *dest, const void *src, size_t num) {
uint64_t i; uint64_t i;
DLOG << "dest:" << dest << " src:" << src << " size:" << num;
for (i = 0; i < num; i++) { for (i = 0; i < num; i++) {
*((int8_t *)dest + i) = *((int8_t *)src + i); // NOLINT *((int8_t *)dest + i) = *((int8_t *)src + i); // NOLINT
} }
......
...@@ -29,7 +29,7 @@ namespace driver { ...@@ -29,7 +29,7 @@ namespace driver {
#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d)) #define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
#define FPGA_REG_PHY_ADDR 0xa0000000 #define FPGA_REG_PHY_ADDR 0x80000000
#define FPGA_REG_SIZE 0x1000 #define FPGA_REG_SIZE 0x1000
#define FPGA_MEM_PHY_ADDR 0x40000000 #define FPGA_MEM_PHY_ADDR 0x40000000
#define FPGA_MEM_SIZE 0x80000000 #define FPGA_MEM_SIZE 0x80000000
......
...@@ -45,6 +45,7 @@ enum ActivationType { ...@@ -45,6 +45,7 @@ enum ActivationType {
LEAKYRELU = 1, LEAKYRELU = 1,
SIGMOID = 2, SIGMOID = 2,
TANH = 3, TANH = 3,
SOFTMAX = 4,
}; };
struct ActivationArgs { struct ActivationArgs {
...@@ -132,7 +133,7 @@ struct DeconvTxParm { ...@@ -132,7 +133,7 @@ struct DeconvTxParm {
#endif #endif
struct ConvArgs { struct ConvArgs {
bool relu_enabled; // bool relu_enabled;
void* sb_address; // scale and bias void* sb_address; // scale and bias
void* filter_address; void* filter_address;
float* filter_scale_address; float* filter_scale_address;
...@@ -198,7 +199,7 @@ struct PoolingArgs { ...@@ -198,7 +199,7 @@ struct PoolingArgs {
}; };
struct EWAddArgs { struct EWAddArgs {
bool relu_enabled; // bool relu_enabled;
uint32_t const0; // output0 = const0 x input0 + const1 x input1; uint32_t const0; // output0 = const0 x input0 + const1 x input1;
uint32_t const1; uint32_t const1;
struct ImageInputArgs image0; struct ImageInputArgs image0;
...@@ -230,7 +231,7 @@ struct DeconvArgs { ...@@ -230,7 +231,7 @@ struct DeconvArgs {
}; };
struct DWconvArgs { struct DWconvArgs {
uint32_t sub_conv_num; uint32_t sub_conv_num;
bool relu_enabled; // bool relu_enabled;
void* bias_address; void* bias_address;
void* filter_address; void* filter_address;
struct KernelArgs kernel; struct KernelArgs kernel;
......
...@@ -31,6 +31,10 @@ DEFINE_ACTIVATION_INFERSHAPE(Relu6); ...@@ -31,6 +31,10 @@ DEFINE_ACTIVATION_INFERSHAPE(Relu6);
#ifdef SIGMOID_OP #ifdef SIGMOID_OP
DEFINE_ACTIVATION_INFERSHAPE(Sigmoid); DEFINE_ACTIVATION_INFERSHAPE(Sigmoid);
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(sigmoid, ops::SigmoidOp);
#endif
#endif // SIGMOID_OP #endif // SIGMOID_OP
#ifdef TANH_OP #ifdef TANH_OP
......
...@@ -32,6 +32,7 @@ void ConvBNReluBasic(const FusionConvBNReluParam<CPU> &param) { ...@@ -32,6 +32,7 @@ void ConvBNReluBasic(const FusionConvBNReluParam<CPU> &param) {
Tensor new_scale = *param.NewScale(); Tensor new_scale = *param.NewScale();
Tensor *output = param.Output(); Tensor *output = param.Output();
output->mutable_data<float>();
int groups = param.Groups(); int groups = param.Groups();
std::vector<int> strides = param.Strides(); std::vector<int> strides = param.Strides();
......
...@@ -32,6 +32,7 @@ void DWConvBNReluBasic(const FusionDWConvBNReluParam<CPU> &param) { ...@@ -32,6 +32,7 @@ void DWConvBNReluBasic(const FusionDWConvBNReluParam<CPU> &param) {
Tensor new_scale = *param.NewScale(); Tensor new_scale = *param.NewScale();
Tensor *output = param.Output(); Tensor *output = param.Output();
output->mutable_data<float>();
int groups = param.Groups(); int groups = param.Groups();
std::vector<int> strides = param.Strides(); std::vector<int> strides = param.Strides();
......
...@@ -22,7 +22,10 @@ namespace operators { ...@@ -22,7 +22,10 @@ namespace operators {
template <> template <>
bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) { bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
bool relu_enabled = false; // bool relu_enabled = false;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input()); auto input = const_cast<Tensor *>(param->Input());
auto bias = param->Bias(); auto bias = param->Bias();
...@@ -61,10 +64,10 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) { ...@@ -61,10 +64,10 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
fpga::SplitConvArgs conv_arg = {0}; fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
param->Groups(), param->Strides()[0], leaky_relu_negative_slope, param->Groups(),
param->Strides()[1], param->Paddings()[0], param->Strides()[0], param->Strides()[1],
param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
return true; return true;
......
...@@ -23,7 +23,10 @@ namespace operators { ...@@ -23,7 +23,10 @@ namespace operators {
template <> template <>
bool ConvAddBNReluKernel<FPGA, float>::Init( bool ConvAddBNReluKernel<FPGA, float>::Init(
FusionConvAddBNReluParam<FPGA> *param) { FusionConvAddBNReluParam<FPGA> *param) {
bool relu_enabled = true; // bool relu_enabled = true;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::LEAKYRELU;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input()); auto input = const_cast<Tensor *>(param->Input());
auto bias = param->Bias(); auto bias = param->Bias();
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
...@@ -64,16 +67,16 @@ bool ConvAddBNReluKernel<FPGA, float>::Init( ...@@ -64,16 +67,16 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
if (groups == channel) { if (groups == channel) {
fpga::format_dwconv_data(filter, out, new_scale_ptr, &new_bias_ptr); fpga::format_dwconv_data(filter, out, new_scale_ptr, &new_bias_ptr);
fpga::DWconvArgs dwconv_arg = {0}; fpga::DWconvArgs dwconv_arg = {0};
fpga::fill_dwconv_arg(&dwconv_arg, input, out, filter, relu_enabled, fpga::fill_dwconv_arg(&dwconv_arg, input, out, filter, activation_enable,
strides[0], strides[1], paddings[0], paddings[1], leaky_relu_negative_slope, strides[0], strides[1],
new_bias_ptr); paddings[0], paddings[1], new_bias_ptr);
param->SetFpgaArgs(dwconv_arg); param->SetFpgaArgs(dwconv_arg);
} else { } else {
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
fpga::SplitConvArgs conv_arg = {0}; fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
param->Groups(), strides[0], strides[1], paddings[0], leaky_relu_negative_slope, param->Groups(), strides[0],
paddings[1], bs_ptr); strides[1], paddings[0], paddings[1], bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
} }
return true; return true;
......
...@@ -21,7 +21,10 @@ namespace operators { ...@@ -21,7 +21,10 @@ namespace operators {
template <> template <>
bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) { bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) {
bool relu_enabled = false; // bool relu_enabled = false;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input()); auto input = const_cast<Tensor *>(param->Input());
const Tensor *bias = param->Bias(); const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
...@@ -40,10 +43,10 @@ bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) { ...@@ -40,10 +43,10 @@ bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) {
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
fpga::SplitConvArgs conv_arg = {0}; fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
param->Groups(), param->Strides()[0], leaky_relu_negative_slope, param->Groups(),
param->Strides()[1], param->Paddings()[0], param->Strides()[0], param->Strides()[1],
param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
return true; return true;
} }
......
...@@ -21,7 +21,10 @@ namespace operators { ...@@ -21,7 +21,10 @@ namespace operators {
template <> template <>
bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) { bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
bool relu_enabled = true; // bool relu_enabled = true;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::LEAKYRELU;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input()); auto input = const_cast<Tensor *>(param->Input());
const Tensor *bias = param->Bias(); const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
...@@ -40,10 +43,10 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) { ...@@ -40,10 +43,10 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
fpga::SplitConvArgs conv_arg = {0}; fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
param->Groups(), param->Strides()[0], leaky_relu_negative_slope, param->Groups(),
param->Strides()[1], param->Paddings()[0], param->Strides()[0], param->Strides()[1],
param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
return true; return true;
} }
......
...@@ -22,7 +22,10 @@ namespace operators { ...@@ -22,7 +22,10 @@ namespace operators {
template <> template <>
bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) { bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
bool relu_enabled = false; // bool relu_enabled = false;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input()); auto input = const_cast<Tensor *>(param->Input());
auto filter = const_cast<Tensor *>(param->Filter()); auto filter = const_cast<Tensor *>(param->Filter());
auto out = param->Output(); auto out = param->Output();
...@@ -53,10 +56,10 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) { ...@@ -53,10 +56,10 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
fpga::SplitConvArgs conv_arg = {0}; fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
param->Groups(), param->Strides()[0], leaky_relu_negative_slope, param->Groups(),
param->Strides()[1], param->Paddings()[0], param->Strides()[0], param->Strides()[1],
param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
return true; return true;
} }
......
...@@ -22,7 +22,10 @@ namespace operators { ...@@ -22,7 +22,10 @@ namespace operators {
template <> template <>
bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) { bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
bool relu_enabled = true; // bool relu_enabled = true;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::LEAKYRELU;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input()); auto input = const_cast<Tensor *>(param->Input());
auto filter = const_cast<Tensor *>(param->Filter()); auto filter = const_cast<Tensor *>(param->Filter());
auto out = param->Output(); auto out = param->Output();
...@@ -53,10 +56,10 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) { ...@@ -53,10 +56,10 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
fpga::SplitConvArgs conv_arg = {0}; fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
param->Groups(), param->Strides()[0], leaky_relu_negative_slope, param->Groups(),
param->Strides()[1], param->Paddings()[0], param->Strides()[0], param->Strides()[1],
param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
return true; return true;
} }
......
...@@ -23,7 +23,10 @@ namespace operators { ...@@ -23,7 +23,10 @@ namespace operators {
template <> template <>
bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) { bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
bool relu_enabled = false; // bool relu_enabled = false;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input()); auto input = const_cast<Tensor *>(param->Input());
const Tensor *bias = param->Bias(); const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
...@@ -53,17 +56,18 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) { ...@@ -53,17 +56,18 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(), fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
sub_conv_n); sub_conv_n);
fpga::DWDeconvArgs DWDeconv_arg = {0}; fpga::DWDeconvArgs DWDeconv_arg = {0};
fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter, relu_enabled, fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter,
activation_enable, leaky_relu_negative_slope,
param->Strides()[0], param->Strides()[1], param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(DWDeconv_arg); param->SetFpgaArgs(DWDeconv_arg);
} else { } else {
fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
fpga::DeconvArgs deconv_arg = {0}; fpga::DeconvArgs deconv_arg = {0};
fpga::fill_deconv_arg(&deconv_arg, input, out, filter, relu_enabled, fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
param->Groups(), param->Strides()[0], leaky_relu_negative_slope, param->Groups(),
param->Strides()[1], param->Paddings()[0], param->Strides()[0], param->Strides()[1],
param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(deconv_arg); param->SetFpgaArgs(deconv_arg);
} }
......
...@@ -24,7 +24,10 @@ namespace operators { ...@@ -24,7 +24,10 @@ namespace operators {
template <> template <>
bool DeconvAddReluKernel<FPGA, float>::Init( bool DeconvAddReluKernel<FPGA, float>::Init(
FusionDeconvAddReluParam<FPGA> *param) { FusionDeconvAddReluParam<FPGA> *param) {
bool relu_enabled = true; // bool relu_enabled = true;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::LEAKYRELU;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input()); auto input = const_cast<Tensor *>(param->Input());
const Tensor *bias = param->Bias(); const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
...@@ -54,17 +57,18 @@ bool DeconvAddReluKernel<FPGA, float>::Init( ...@@ -54,17 +57,18 @@ bool DeconvAddReluKernel<FPGA, float>::Init(
fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(), fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
sub_conv_n); sub_conv_n);
fpga::DWDeconvArgs DWDeconv_arg = {0}; fpga::DWDeconvArgs DWDeconv_arg = {0};
fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter, relu_enabled, fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter,
activation_enable, leaky_relu_negative_slope,
param->Strides()[0], param->Strides()[1], param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(DWDeconv_arg); param->SetFpgaArgs(DWDeconv_arg);
} else { } else {
fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
fpga::DeconvArgs deconv_arg = {0}; fpga::DeconvArgs deconv_arg = {0};
fpga::fill_deconv_arg(&deconv_arg, input, out, filter, relu_enabled, fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
param->Groups(), param->Strides()[0], leaky_relu_negative_slope, param->Groups(),
param->Strides()[1], param->Paddings()[0], param->Strides()[0], param->Strides()[1],
param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(deconv_arg); param->SetFpgaArgs(deconv_arg);
} }
return true; return true;
......
...@@ -20,7 +20,10 @@ namespace operators { ...@@ -20,7 +20,10 @@ namespace operators {
template <> template <>
bool ElementwiseAddKernel<FPGA, float>::Init(ElementwiseAddParam<FPGA> *param) { bool ElementwiseAddKernel<FPGA, float>::Init(ElementwiseAddParam<FPGA> *param) {
bool relu_enabled = false; // bool relu_enabled = false;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0;
auto *input_x = const_cast<LoDTensor *>(param->InputX()); auto *input_x = const_cast<LoDTensor *>(param->InputX());
auto *input_y = const_cast<LoDTensor *>(param->InputY()); auto *input_y = const_cast<LoDTensor *>(param->InputY());
auto *out = param->Out(); auto *out = param->Out();
...@@ -30,7 +33,10 @@ bool ElementwiseAddKernel<FPGA, float>::Init(ElementwiseAddParam<FPGA> *param) { ...@@ -30,7 +33,10 @@ bool ElementwiseAddKernel<FPGA, float>::Init(ElementwiseAddParam<FPGA> *param) {
auto out_ptr = out->mutable_data<float>(); auto out_ptr = out->mutable_data<float>();
fpga::EWAddArgs ewaddArgs = {0}; fpga::EWAddArgs ewaddArgs = {0};
ewaddArgs.relu_enabled = relu_enabled; // ewaddArgs.relu_enabled = relu_enabled;
ewaddArgs.output.activation.activation_type = activation_enable;
ewaddArgs.output.activation.leaky_relu_negative_slope =
leaky_relu_negative_slope;
ewaddArgs.const0 = 0x3c00; // =1 ewaddArgs.const0 = 0x3c00; // =1
ewaddArgs.const1 = 0x3c00; // =1 ewaddArgs.const1 = 0x3c00; // =1
ewaddArgs.image0.address = input_x_ptr; ewaddArgs.image0.address = input_x_ptr;
......
...@@ -21,7 +21,10 @@ namespace operators { ...@@ -21,7 +21,10 @@ namespace operators {
template <> template <>
bool ElementwiseAddReluKernel<FPGA, float>::Init( bool ElementwiseAddReluKernel<FPGA, float>::Init(
ElementwiseAddReluParam<FPGA> *param) { ElementwiseAddReluParam<FPGA> *param) {
bool relu_enabled = true; // bool relu_enabled = true;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::LEAKYRELU;
int16_t leaky_relu_negative_slope = 0;
auto *input_x = const_cast<LoDTensor *>(param->InputX()); auto *input_x = const_cast<LoDTensor *>(param->InputX());
auto *input_y = const_cast<LoDTensor *>(param->InputY()); auto *input_y = const_cast<LoDTensor *>(param->InputY());
auto *out = param->Out(); auto *out = param->Out();
...@@ -31,7 +34,10 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init( ...@@ -31,7 +34,10 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init(
auto out_ptr = out->mutable_data<float>(); auto out_ptr = out->mutable_data<float>();
fpga::EWAddArgs ewaddArgs = {0}; fpga::EWAddArgs ewaddArgs = {0};
ewaddArgs.relu_enabled = relu_enabled; // ewaddArgs.relu_enabled = relu_enabled;
ewaddArgs.output.activation.activation_type = activation_enable;
ewaddArgs.output.activation.leaky_relu_negative_slope =
leaky_relu_negative_slope;
ewaddArgs.const0 = 0x3c00; // =1 ewaddArgs.const0 = 0x3c00; // =1
ewaddArgs.const1 = 0x3c00; // =1 ewaddArgs.const1 = 0x3c00; // =1
ewaddArgs.image0.address = input_x_ptr; ewaddArgs.image0.address = input_x_ptr;
......
...@@ -19,12 +19,34 @@ namespace operators { ...@@ -19,12 +19,34 @@ namespace operators {
template <> template <>
bool FetchKernel<FPGA, float>::Init(FetchParam<FPGA> *param) { bool FetchKernel<FPGA, float>::Init(FetchParam<FPGA> *param) {
Tensor *output = param->Out();
// fpga::format_fp16_ofm(output);
return true; return true;
} }
template <> template <>
void FetchKernel<FPGA, float>::Compute(const FetchParam<FPGA> &param) { void FetchKernel<FPGA, float>::Compute(const FetchParam<FPGA> &param) {
param.Out()->ShareDataWith(*(param.InputX())); param.Out()->ShareDataWith(*(param.InputX()));
/*auto input =
reinterpret_cast<Tensor *>(const_cast<Tensor *>(param.InputX()));
fpga::format_image(input);
auto input_ptr = input->data<float>();
Tensor *output = param.Out();
auto output_ptr = output->data<float>();
fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
args.input_data_type = fpga::DATA_TYPE_FP16;
args.output_data_type = fpga::DATA_TYPE_FP32;
args.input_layout_type = fpga::LAYOUT_CHW;
args.output_layout_type = fpga::LAYOUT_HWC;
args.image.address = reinterpret_cast<void *>(input_ptr);
args.image.channels = (uint32_t)input->dims()[1];
args.image.height = (input->dims().size() == 4) ? (uint32_t)input->dims()[2] :
1; args.image.width = (input->dims().size() == 4) ? (uint32_t)input->dims()[3]
: 1; args.image.pad_height = 0; args.image.pad_width = 0; args.output.address
= output_ptr; args.output.scale_address = output->scale;
fpga::PerformBypass(args);*/
} }
template class FetchKernel<FPGA, float>; template class FetchKernel<FPGA, float>;
......
...@@ -20,7 +20,10 @@ namespace operators { ...@@ -20,7 +20,10 @@ namespace operators {
template <> template <>
bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) { bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
bool relu_enabled = false; // bool relu_enabled = false;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0;
auto input_x = const_cast<LoDTensor *>(param->InputX()); auto input_x = const_cast<LoDTensor *>(param->InputX());
auto filter = const_cast<Tensor *>(param->InputY()); auto filter = const_cast<Tensor *>(param->InputY());
const Tensor *input_z = param->InputZ(); const Tensor *input_z = param->InputZ();
...@@ -55,8 +58,8 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) { ...@@ -55,8 +58,8 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
fpga::format_fp16_ofm(out); fpga::format_fp16_ofm(out);
fpga::SplitConvArgs conv_arg = {0}; fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, fpga::fill_split_arg(&conv_arg, input_x, out, filter, activation_enable,
0, 0, bs_ptr); leaky_relu_negative_slope, 1, 1, 1, 0, 0, bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
return true; return true;
} }
......
...@@ -22,6 +22,12 @@ namespace operators { ...@@ -22,6 +22,12 @@ namespace operators {
template <> template <>
bool ReshapeKernel<FPGA, float>::Init(ReshapeParam<FPGA> *param) { bool ReshapeKernel<FPGA, float>::Init(ReshapeParam<FPGA> *param) {
param->Out()->ShareDataWith(*param->InputX()); param->Out()->ShareDataWith(*param->InputX());
const int in_n = param->InputX()->dims()[0];
const int in_c = param->InputX()->dims()[1];
const int in_h = param->InputX()->dims()[2];
const int in_w = param->InputX()->dims()[3];
auto out = param->Out();
out->Resize(framework::make_ddim({in_n, in_c * in_h * in_w}));
return true; return true;
} }
......
...@@ -15,73 +15,41 @@ limitations under the License. */ ...@@ -15,73 +15,41 @@ limitations under the License. */
#ifdef SIGMOID_OP #ifdef SIGMOID_OP
#include "operators/kernel/activation_kernel.h" #include "operators/kernel/activation_kernel.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
using framework::DDim;
using framework::Tensor;
template <> template <>
bool SigmoidKernel<FPGA, float>::Init(SigmoidParam<FPGA> *param) { bool SigmoidKernel<FPGA, float>::Init(SigmoidParam<FPGA> *param) {
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::SIGMOID;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->InputX()); auto input = const_cast<Tensor *>(param->InputX());
auto input_ptr = input->data<float>(); auto input_ptr = input->data<float>();
auto out = param->Out(); auto out = param->Out();
fpga::format_fp32_ofm(out); fpga::format_fp16_ofm(out);
auto float_input = new Tensor;
if (input->dims().size() == 2) {
float_input->mutable_data<float>({1, input->dims()[1]});
} else if (input->dims().size() == 4) {
float_input->mutable_data<float>(
{1, input->dims()[2], input->dims()[3], input->dims()[1]});
} else {
DLOG << "wrong dimension of softmax input";
}
fpga::format_fp32_ofm(float_input);
fpga::BypassArgs args = {fpga::DATA_TYPE_FP16}; fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
args.input_layout_type = fpga::LAYOUT_HWC;
args.output_layout_type = fpga::LAYOUT_CHW;
args.input_data_type = fpga::DATA_TYPE_FP16; args.input_data_type = fpga::DATA_TYPE_FP16;
args.output_data_type = fpga::DATA_TYPE_FP32; args.output_data_type = fpga::DATA_TYPE_FP16;
args.image.address = input_ptr; args.image.address = input_ptr;
args.image.height = args.image.height =
(input->dims().size() == 4) ? (uint32_t)input->dims()[2] : 1; (input->dims().size() == 4) ? (uint32_t)input->dims()[2] : 1;
args.image.width = args.image.width =
(input->dims().size() == 4) ? (uint32_t)input->dims()[3] : 1; (input->dims().size() == 4) ? (uint32_t)input->dims()[3] : 1;
args.image.channels = (uint32_t)input->dims()[1]; args.image.channels = (uint32_t)input->dims()[1];
args.output.address = float_input->data<float>(); args.output.address = out->data<float>();
args.output.scale_address = float_input->scale; args.output.scale_address = out->scale;
param->SetFloatInput(float_input); args.output.activation.activation_type = activation_enable;
args.output.activation.leaky_relu_negative_slope = leaky_relu_negative_slope;
param->SetFpgaArgs(args); param->SetFpgaArgs(args);
return true; return true;
} }
template <typename T>
T Sigmoid(const T a) {
T tmp = -1.0f * a;
return (1.0 / (1.0 + exp(tmp)));
}
template <typename T>
void sigmoidFuntor(Tensor *input, Tensor *output) {
auto *input_ptr = input->data<T>();
auto *output_ptr = output->mutable_data<T>();
for (int i = 0; i < input->numel(); i++) {
*(output_ptr + i) = Sigmoid<T>(*(input_ptr + i));
}
}
template <> template <>
void SigmoidKernel<FPGA, float>::Compute(const SigmoidParam<FPGA> &param) { void SigmoidKernel<FPGA, float>::Compute(const SigmoidParam<FPGA> &param) {
Tensor *in_x = param.FloatInput();
Tensor *out = param.Out();
fpga::PerformBypass(param.FpgaArgs()); fpga::PerformBypass(param.FpgaArgs());
fpga::fpga_invalidate((void *)in_x->data<float>(), // NOLINT
in_x->numel() * sizeof(float));
// TODO: In general case, 0 should be squeezed before softmax input // NOLINT
sigmoidFuntor<float>(in_x, out);
fpga::fpga_flush(out->data<float>(), out->memory_size());
} }
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -26,7 +26,6 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) { ...@@ -26,7 +26,6 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
auto input_ptr = input->data<float>(); auto input_ptr = input->data<float>();
auto out = param->Out(); auto out = param->Out();
fpga::format_fp32_ofm(out); fpga::format_fp32_ofm(out);
auto float_input = new Tensor; auto float_input = new Tensor;
if (input->dims().size() == 2) { if (input->dims().size() == 2) {
float_input->mutable_data<float>({1, input->dims()[1]}); float_input->mutable_data<float>({1, input->dims()[1]});
...@@ -36,7 +35,6 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) { ...@@ -36,7 +35,6 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
} else { } else {
DLOG << "wrong dimension of softmax input"; DLOG << "wrong dimension of softmax input";
} }
fpga::format_fp32_ofm(float_input); fpga::format_fp32_ofm(float_input);
fpga::BypassArgs args = {fpga::DATA_TYPE_FP16}; fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
args.input_layout_type = fpga::LAYOUT_HWC; args.input_layout_type = fpga::LAYOUT_HWC;
...@@ -53,6 +51,7 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) { ...@@ -53,6 +51,7 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
args.output.scale_address = float_input->scale; args.output.scale_address = float_input->scale;
param->SetFloatInput(float_input); param->SetFloatInput(float_input);
param->SetFpgaArgs(args); param->SetFpgaArgs(args);
return true; return true;
} }
......
...@@ -564,7 +564,7 @@ void DepthwiseConvAddBNRelu3x3s1p1(const framework::Tensor *input, ...@@ -564,7 +564,7 @@ void DepthwiseConvAddBNRelu3x3s1p1(const framework::Tensor *input,
#if __ARM_NEON #if __ARM_NEON
const float *input_data = input->data<float>(); const float *input_data = input->data<float>();
const float *filter_data = filter->data<float>(); const float *filter_data = filter->data<float>();
float *output_data = output->data<float>(); float *output_data = output->mutable_data<float>();
const float *newscale_data = new_scale->data<float>(); const float *newscale_data = new_scale->data<float>();
const float *newbias_data = new_bias->data<float>(); const float *newbias_data = new_bias->data<float>();
...@@ -1309,7 +1309,7 @@ void DepthwiseConv3x3s2p1v2(const framework::Tensor *input, ...@@ -1309,7 +1309,7 @@ void DepthwiseConv3x3s2p1v2(const framework::Tensor *input,
#if __ARM_NEON #if __ARM_NEON
const float *input_data = input->data<float>(); const float *input_data = input->data<float>();
const float *filter_data = filter->data<float>(); const float *filter_data = filter->data<float>();
float *output_data = output->data<float>(); float *output_data = output->mutable_data<float>();
const float *bias_data; const float *bias_data;
if (if_bias) { if (if_bias) {
bias_data = bias->data<float>(); bias_data = bias->data<float>();
...@@ -1729,7 +1729,7 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const framework::Tensor *input, ...@@ -1729,7 +1729,7 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const framework::Tensor *input,
const float *input_data = input->data<float>(); const float *input_data = input->data<float>();
const float *filter_data = filter->data<float>(); const float *filter_data = filter->data<float>();
float *output_data = output->data<float>(); float *output_data = output->mutable_data<float>();
const float *newscale_data = new_scale->data<float>(); const float *newscale_data = new_scale->data<float>();
const float *newbias_data = new_bias->data<float>(); const float *newbias_data = new_bias->data<float>();
...@@ -1978,6 +1978,7 @@ void DepthwiseConv3x3s2p0(const framework::Tensor *input, ...@@ -1978,6 +1978,7 @@ void DepthwiseConv3x3s2p0(const framework::Tensor *input,
const int output_width = static_cast<int>(output->dims()[3]); const int output_width = static_cast<int>(output->dims()[3]);
const int inhxw = input_height * input_width; const int inhxw = input_height * input_width;
const int outhxw = output_height * output_width; const int outhxw = output_height * output_width;
output->mutable_data<float>();
float32x4_t zero = vdupq_n_f32(0.0); float32x4_t zero = vdupq_n_f32(0.0);
for (int b = 0; b < batch_size; b++) { for (int b = 0; b < batch_size; b++) {
......
...@@ -1081,14 +1081,9 @@ class SigmoidParam : public OpParam { ...@@ -1081,14 +1081,9 @@ class SigmoidParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
private: private:
std::shared_ptr<RType> float_input_x_;
fpga::BypassArgs fpga_bypass_args; fpga::BypassArgs fpga_bypass_args;
public: public:
RType *FloatInput() const {
return float_input_x_ == nullptr ? input_x_ : float_input_x_.get();
}
void SetFloatInput(Tensor *input) { float_input_x_.reset(input); }
const fpga::BypassArgs &FpgaArgs() const { return fpga_bypass_args; } const fpga::BypassArgs &FpgaArgs() const { return fpga_bypass_args; }
void SetFpgaArgs(const fpga::BypassArgs &args) { fpga_bypass_args = args; } void SetFpgaArgs(const fpga::BypassArgs &args) { fpga_bypass_args = args; }
#endif #endif
...@@ -1214,6 +1209,20 @@ class FetchParam : public OpParam { ...@@ -1214,6 +1209,20 @@ class FetchParam : public OpParam {
private: private:
RType *input_x_; RType *input_x_;
Tensor *out_; Tensor *out_;
#ifdef PADDLE_MOBILE_FPGA
private:
std::shared_ptr<RType> float_input_x_;
fpga::BypassArgs fpga_bypass_args;
public:
RType *FloatInput() const {
return float_input_x_ == nullptr ? input_x_ : float_input_x_.get();
}
void SetFloatInput(Tensor *input) { float_input_x_.reset(input); }
const fpga::BypassArgs &FpgaArgs() const { return fpga_bypass_args; }
void SetFpgaArgs(const fpga::BypassArgs &args) { fpga_bypass_args = args; }
#endif
}; };
#ifdef FILL_CONSTANT_OP #ifdef FILL_CONSTANT_OP
......
...@@ -337,8 +337,8 @@ if (NOT FOUND_MATCH) ...@@ -337,8 +337,8 @@ if (NOT FOUND_MATCH)
target_link_libraries(test-genet paddle-mobile) target_link_libraries(test-genet paddle-mobile)
# gen test # gen test
ADD_EXECUTABLE(test-sigmoid operators/test_sigmoid_op.cpp test_include.h) ADD_EXECUTABLE(test-sigmoid-op operators/test_sigmoid_op.cpp test_include.h)
target_link_libraries(test-sigmoid paddle-mobile) target_link_libraries(test-sigmoid-op paddle-mobile)
# gen test # gen test
ADD_EXECUTABLE(test-depthwise-conv-op operators/test_depthwise_conv_op.cpp test_helper.h test_include.h executor_for_test.h) ADD_EXECUTABLE(test-depthwise-conv-op operators/test_depthwise_conv_op.cpp test_helper.h test_include.h executor_for_test.h)
...@@ -408,14 +408,14 @@ if (NOT FOUND_MATCH) ...@@ -408,14 +408,14 @@ if (NOT FOUND_MATCH)
ADD_EXECUTABLE(test-ocr net/test_ocr.cpp test_helper.h test_include.h) ADD_EXECUTABLE(test-ocr net/test_ocr.cpp test_helper.h test_include.h)
target_link_libraries(test-ocr paddle-mobile) target_link_libraries(test-ocr paddle-mobile)
ADD_EXECUTABLE(test-sequence-expand operators/test_sequence_expand_op.cpp test_helper.h test_include.h) ADD_EXECUTABLE(test-sequence-expand-op operators/test_sequence_expand_op.cpp test_helper.h test_include.h)
target_link_libraries(test-sequence-expand paddle-mobile) target_link_libraries(test-sequence-expand-op paddle-mobile)
ADD_EXECUTABLE(test-sequence-pool operators/test_sequence_pool_op.cpp test_helper.h test_include.h) ADD_EXECUTABLE(test-sequence-pool-op operators/test_sequence_pool_op.cpp test_helper.h test_include.h)
target_link_libraries(test-sequence-pool paddle-mobile) target_link_libraries(test-sequence-pool-op paddle-mobile)
ADD_EXECUTABLE(test-sequence-softmax operators/test_sequence_softmax_op.cpp test_helper.h test_include.h) ADD_EXECUTABLE(test-sequence-softmax-op operators/test_sequence_softmax_op.cpp test_helper.h test_include.h)
target_link_libraries(test-sequence-softmax paddle-mobile) target_link_libraries(test-sequence-softmax-op paddle-mobile)
# gen test # gen test
ADD_EXECUTABLE(test-vgg16ssd net/test_vgg16ssd.cpp test_helper.h test_include.h) ADD_EXECUTABLE(test-vgg16ssd net/test_vgg16ssd.cpp test_helper.h test_include.h)
...@@ -445,4 +445,9 @@ if (NOT FOUND_MATCH) ...@@ -445,4 +445,9 @@ if (NOT FOUND_MATCH)
ADD_EXECUTABLE(test-is-empty-op operators/test_is_empty_op.cpp test_helper.h test_include.h) ADD_EXECUTABLE(test-is-empty-op operators/test_is_empty_op.cpp test_helper.h test_include.h)
target_link_libraries(test-is-empty-op paddle-mobile) target_link_libraries(test-is-empty-op paddle-mobile)
ADD_EXECUTABLE(test-conv-bn-relu-op operators/test_conv_bn_relu_op.cpp test_helper.h test_include.h)
target_link_libraries(test-conv-bn-relu-op paddle-mobile)
ADD_EXECUTABLE(test-dwconv-bn-relu-op operators/test_dwconv_bn_relu_op.cpp test_helper.h test_include.h)
target_link_libraries(test-dwconv-bn-relu-op paddle-mobile)
endif () endif ()
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "../test_helper.h"
#include "../test_include.h"
#include "operators/fusion_conv_bn_relu_op.h"
namespace paddle_mobile {
// Reference convolution from Caffe for checking results.
// accumulate through explicit loops over input, output, and filters.
template <typename Itype, typename Otype, int Kernel, int Pad, int Stride>
int TestConvBnReluOp(int in_channels, int in_height, int in_width,
int out_channels, int groups, std::string opname) {
int kernel_h = Kernel;
int kernel_w = Kernel;
int pad_h = Pad;
int pad_w = Pad;
int stride_h = Stride;
int stride_w = Stride;
int dilation_h = 1;
int dilation_w = 1;
int batch_size = 1;
int input_c = in_channels;
int input_h = in_height;
int input_w = in_width;
int output_c = out_channels;
framework::DDim input_shape =
framework::make_ddim({batch_size, input_c, input_h, input_w});
framework::DDim filter_shape =
framework::make_ddim({output_c, input_c / groups, kernel_h, kernel_w});
framework::DDim shape = framework::make_ddim({output_c});
VariableNameMap inputs;
VariableNameMap outputs;
auto scope = std::make_shared<framework::Scope>();
inputs["Input"] = std::vector<std::string>({"input"});
inputs["Filter"] = std::vector<std::string>({"filter"});
outputs["Out"] = std::vector<std::string>({"output"});
inputs["Mean"] = std::vector<std::string>({"input_mean"});
inputs["Variance"] = std::vector<std::string>({"input_variance"});
inputs["Scale"] = std::vector<std::string>({"input_scale"});
inputs["Bias"] = std::vector<std::string>({"input_bias"});
auto input_var = scope.get()->Var("input");
auto input = input_var->template GetMutable<framework::LoDTensor>();
SetupTensor<Itype>(input, input_shape, -20.0, 20.0);
auto filter_var = scope.get()->Var("filter");
auto filter = filter_var->template GetMutable<framework::LoDTensor>();
SetupTensor<Itype>(filter, filter_shape, -20, 20);
auto input_mean_var = scope.get()->Var("input_mean");
auto input_mean = input_mean_var->template GetMutable<framework::LoDTensor>();
SetupTensor<float>(input_mean, shape, -10.0, 10.0);
auto vari_var = scope.get()->Var("input_variance");
auto vari = vari_var->template GetMutable<framework::LoDTensor>();
SetupTensor<float>(vari, shape, -10.0, 10.0);
auto scale_var = scope.get()->Var("input_scale");
auto scale = scale_var->template GetMutable<framework::LoDTensor>();
SetupTensor<float>(scale, shape, -10.0, 10.0);
auto input_bias_var = scope.get()->Var("input_bias");
auto input_bias = input_bias_var->template GetMutable<framework::LoDTensor>();
SetupTensor<float>(input_bias, shape, -10.0, 10.0);
auto output_var = scope.get()->Var("output");
framework::AttributeMap attrs;
attrs["strides"].Set<vector<int>>(std::vector<int>({stride_h, stride_w}));
attrs["paddings"].Set<vector<int>>(std::vector<int>({pad_h, pad_w}));
attrs["dilations"].Set<vector<int>>(
std::vector<int>({dilation_h, dilation_w}));
attrs["groups"].Set<int>(groups);
attrs["epsilon"].Set<float>(1e-6);
attrs["momentum"].Set<float>(0.f);
auto *op = new operators::FusionConvBNReluOp<CPU, float>(
"fusion_conv_bn_relu", inputs, outputs, attrs, scope);
op->InferShape();
op->Init();
for (int i = 0; i < 10; ++i) {
op->Run();
}
auto time1 = time();
for (int i = 0; i < 10; ++i) {
op->Run();
}
auto time2 = time();
std::ofstream out_file("./out_conv.txt", std::ios::app);
out_file << opname << " cost :" << time_diff(time1, time2) / 10.0 << "ms"
<< std::endl;
out_file.close();
delete op;
return 0;
}
} // namespace paddle_mobile
int main(int argc, char *argv[]) {
// kernel = 3, pad = 1, stride = 2
paddle_mobile::TestConvBnReluOp<float, float, 3, 1, 2>(3, 48, 48, 16, 1,
"conv_bn_relu");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(16, 24, 24, 8, 1,
"depthwise_seperable");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(8, 24, 24, 24, 1,
"MBConv_3x3_conv1");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(24, 24, 24, 8, 1,
"MBConv_3x3_pw1");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(8, 24, 24, 24, 1,
"MBConv_3x3_conv2");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(24, 24, 24, 8, 1,
"MBConv_3x3_pw2");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(8, 24, 24, 24, 1,
"MBConv_3x3_conv3");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(24, 12, 12, 16, 1,
"MBConv_3x3_pw3");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(
16, 12, 12, 48, 1, "MBConv_5x5_stage1_conv1");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(
48, 12, 12, 16, 1, "MBConv_5x5_stage1_pw1");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(
16, 12, 12, 48, 1, "MBConv_5x5_stage1_conv2");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(
48, 12, 12, 16, 1, "MBConv_5x5_stage1_pw2");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(
16, 12, 12, 48, 1, "MBConv_5x5_stage1_conv3");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(
48, 6, 6, 32, 1, "MBConv_5x5_stage1_pw3");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(
32, 6, 6, 192, 1, "MBConv_5x5_stage2_conv1");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(
192, 6, 6, 32, 1, "MBConv_5x5_stage2_pw1");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(
32, 6, 6, 192, 1, "MBConv_5x5_stage2_conv2");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(
192, 6, 6, 32, 1, "MBConv_5x5_stage2_pw2");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(
32, 6, 6, 192, 1, "MBConv_5x5_stage2_conv3");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(
192, 6, 6, 64, 1, "MBConv_5x5_stage2_pw3");
return 0;
}
...@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <iostream>
#include "../test_helper.h" #include "../test_helper.h"
#include "../test_include.h" #include "../test_include.h"
#include "operators/conv_op.h" #include "operators/conv_op.h"
...@@ -209,10 +210,10 @@ int TestConvOp(int in_channels, int in_height, int in_width, int out_channels, ...@@ -209,10 +210,10 @@ int TestConvOp(int in_channels, int in_height, int in_width, int out_channels,
// PADDLE_MOBILE_ENFORCE(std::abs(gap / (output_data[i] + 1e-5)) < 1e-3, // PADDLE_MOBILE_ENFORCE(std::abs(gap / (output_data[i] + 1e-5)) < 1e-3,
// "output[%d] = %d, output_cmp[%d] = %d", i, // "output[%d] = %d, output_cmp[%d] = %d", i,
// output_data[i], i, output_cmp_data[i]); // output_data[i], i, output_cmp_data[i]);
if (std::abs(gap / (output_data[i] + 1e-5)) > 1e-3) { if (gap > 1e-2 && std::abs(gap / (output_data[i] + 1e-5)) > 1e-3) {
LOG(kLOG_INFO) << "output_data[" << i << "] = " << output_data[i] std::cerr << "output_data[" << i << "] = " << output_data[i]
<< ", output_cmp_data[" << i << ", output_cmp_data[" << i << "] = " << output_cmp_data[i]
<< "] = " << output_cmp_data[i]; << std::endl;
exit(1); exit(1);
} }
} }
...@@ -222,94 +223,131 @@ int TestConvOp(int in_channels, int in_height, int in_width, int out_channels, ...@@ -222,94 +223,131 @@ int TestConvOp(int in_channels, int in_height, int in_width, int out_channels,
} // namespace paddle_mobile } // namespace paddle_mobile
int main(int argc, char *argv[]) { int TestAll(const int in_channels, const int in_height, const int in_width,
if (argc < 5) { const int out_channels, const int groups) {
LOG(paddle_mobile::kLOG_INFO) std::cerr << "in_channels=" << in_channels << ", in_height=" << in_height
<< "Usage:\n" << ", in_width=" << in_width << ", out_channels=" << out_channels
<< " ./test-int8-conv-op in_channels in_height in_width out_channels " << ", groups=" << groups << std::endl;
"[groups]\n" // // kernel = 3, pad = 0, stride = 1
<< " params:\n" // std::cerr << "float, kernel=3, pad=0, stride=1" << std::endl;
<< " -in_channels: int, input image's channels\n" // paddle_mobile::TestConvOp<float, float, 3, 0, 1>(
<< " -in_height: int, input image's height\n" // in_channels, in_height, in_width, out_channels, groups);
<< " -in_width: int, input image's width\n" // // kernel = 3, pad = 1, stride = 1
<< " -out_channels: int, conv output channels\n"; // std::cerr << "float, kernel=3, pad=1, stride=1" << std::endl;
return 1; // paddle_mobile::TestConvOp<float, float, 3, 1, 1>(
} // in_channels, in_height, in_width, out_channels, groups);
int in_channels = atoi(argv[1]); // // kernel = 3, pad = 2, stride = 1
int in_height = atoi(argv[2]); // std::cerr << "float, kernel=3, pad=2, stride=1" << std::endl;
int in_width = atoi(argv[3]); // paddle_mobile::TestConvOp<float, float, 3, 2, 1>(
int out_channels = atoi(argv[4]); // in_channels, in_height, in_width, out_channels, groups);
int groups = 1; // // kernel = 3, pad = 5, stride = 1
if (argc == 6) { // std::cerr << "float, kernel=3, pad=5, stride=1" << std::endl;
groups = atoi(argv[5]); // paddle_mobile::TestConvOp<float, float, 3, 5, 1>(
} // in_channels, in_height, in_width, out_channels, groups);
//
// // kernel = 3, pad = 0, stride = 2
// std::cerr << "float, kernel=3, pad=0, stride=2" << std::endl;
// paddle_mobile::TestConvOp<float, float, 3, 0, 2>(
// in_channels, in_height, in_width, out_channels, groups);
// // kernel = 3, pad = 1, stride = 2
// std::cerr << "float, kernel=3, pad=1, stride=2" << std::endl;
// paddle_mobile::TestConvOp<float, float, 3, 1, 2>(
// in_channels, in_height, in_width, out_channels, groups);
// // kernel = 3, pad = 2, stride = 2
// std::cerr << "float, kernel=3, pad=2, stride=2" << std::endl;
// paddle_mobile::TestConvOp<float, float, 3, 2, 2>(
// in_channels, in_height, in_width, out_channels, groups);
// // kernel = 3, pad = 5, stride = 2
// std::cerr << "float, kernel=3, pad=5, stride=2" << std::endl;
// paddle_mobile::TestConvOp<float, float, 3, 5, 2>(
// in_channels, in_height, in_width, out_channels, groups);
#ifndef __aarch64__
// kernel = 3, pad = 0, stride = 1 // kernel = 3, pad = 0, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "float, kernel=3, pad=0, stride=1"; std::cerr << "int8, kernel=3, pad=0, stride=1" << std::endl;
paddle_mobile::TestConvOp<int8_t, int32_t, 3, 0, 1>( paddle_mobile::TestConvOp<int8_t, int32_t, 3, 0, 1>(
in_channels, in_height, in_width, out_channels, groups); in_channels, in_height, in_width, out_channels, groups);
// kernel = 3, pad = 1, stride = 1 // kernel = 3, pad = 1, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "float, kernel=3, pad=1, stride=1"; std::cerr << "int8, kernel=3, pad=1, stride=1" << std::endl;
paddle_mobile::TestConvOp<int8_t, int32_t, 3, 1, 1>( paddle_mobile::TestConvOp<int8_t, int32_t, 3, 1, 1>(
in_channels, in_height, in_width, out_channels, groups); in_channels, in_height, in_width, out_channels, groups);
// kernel = 3, pad = 2, stride = 1 // kernel = 3, pad = 2, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "float, kernel=3, pad=2, stride=1"; std::cerr << "int8, kernel=3, pad=2, stride=1" << std::endl;
paddle_mobile::TestConvOp<int8_t, int32_t, 3, 2, 1>( paddle_mobile::TestConvOp<int8_t, int32_t, 3, 2, 1>(
in_channels, in_height, in_width, out_channels, groups); in_channels, in_height, in_width, out_channels, groups);
// kernel = 3, pad = 5, stride = 1 // kernel = 3, pad = 5, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "float, kernel=3, pad=5, stride=1"; std::cerr << "int8, kernel=3, pad=5, stride=1" << std::endl;
paddle_mobile::TestConvOp<int8_t, int32_t, 3, 5, 1>( paddle_mobile::TestConvOp<int8_t, int32_t, 3, 5, 1>(
in_channels, in_height, in_width, out_channels, groups); in_channels, in_height, in_width, out_channels, groups);
// kernel = 3, pad = 0, stride = 2 // kernel = 3, pad = 0, stride = 2
LOG(paddle_mobile::kLOG_INFO) << "float, kernel=3, pad=0, stride=2"; std::cerr << "int8, kernel=3, pad=0, stride=2" << std::endl;
paddle_mobile::TestConvOp<int8_t, int32_t, 3, 0, 2>( paddle_mobile::TestConvOp<int8_t, int32_t, 3, 0, 2>(
in_channels, in_height, in_width, out_channels, groups); in_channels, in_height, in_width, out_channels, groups);
// kernel = 3, pad = 1, stride = 2 // kernel = 3, pad = 1, stride = 2
LOG(paddle_mobile::kLOG_INFO) << "float, kernel=3, pad=1, stride=2"; std::cerr << "int8, kernel=3, pad=1, stride=2" << std::endl;
paddle_mobile::TestConvOp<int8_t, int32_t, 3, 1, 2>( paddle_mobile::TestConvOp<int8_t, int32_t, 3, 1, 2>(
in_channels, in_height, in_width, out_channels, groups); in_channels, in_height, in_width, out_channels, groups);
// kernel = 3, pad = 2, stride = 2 // kernel = 3, pad = 2, stride = 2
LOG(paddle_mobile::kLOG_INFO) << "float, kernel=3, pad=2, stride=2"; std::cerr << "int8, kernel=3, pad=2, stride=2" << std::endl;
paddle_mobile::TestConvOp<int8_t, int32_t, 3, 2, 2>( paddle_mobile::TestConvOp<int8_t, int32_t, 3, 2, 2>(
in_channels, in_height, in_width, out_channels, groups); in_channels, in_height, in_width, out_channels, groups);
// kernel = 3, pad = 5, stride = 2 // kernel = 3, pad = 5, stride = 2
LOG(paddle_mobile::kLOG_INFO) << "float, kernel=3, pad=5, stride=2"; std::cerr << "int8, kernel=3, pad=5, stride=2" << std::endl;
paddle_mobile::TestConvOp<int8_t, int32_t, 3, 5, 2>( paddle_mobile::TestConvOp<int8_t, int32_t, 3, 5, 2>(
in_channels, in_height, in_width, out_channels, groups); in_channels, in_height, in_width, out_channels, groups);
#endif // __aarch64__
// kernel = 5, pad = 0, stride = 1 // kernel = 5, pad = 0, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "float, kernel=5, pad=0, stride=1"; std::cerr << "float, kernel=5, pad=0, stride=1" << std::endl;
paddle_mobile::TestConvOp<float, float, 5, 0, 1>( paddle_mobile::TestConvOp<float, float, 5, 0, 1>(
in_channels, in_height, in_width, out_channels, groups); in_channels, in_height, in_width, out_channels, groups);
// kernel = 5, pad = 1, stride = 1 // kernel = 5, pad = 1, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "float, kernel=5, pad=1, stride=1"; std::cerr << "float, kernel=5, pad=1, stride=1" << std::endl;
paddle_mobile::TestConvOp<float, float, 5, 1, 1>( paddle_mobile::TestConvOp<float, float, 5, 1, 1>(
in_channels, in_height, in_width, out_channels, groups); in_channels, in_height, in_width, out_channels, groups);
// kernel = 5, pad = 2, stride = 1 // kernel = 5, pad = 2, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "float, kernel=5, pad=2, stride=1"; std::cerr << "float, kernel=5, pad=2, stride=1" << std::endl;
paddle_mobile::TestConvOp<float, float, 5, 2, 1>( paddle_mobile::TestConvOp<float, float, 5, 2, 1>(
in_channels, in_height, in_width, out_channels, groups); in_channels, in_height, in_width, out_channels, groups);
// kernel = 5, pad = 5, stride = 1 // kernel = 5, pad = 5, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "float, kernel=5, pad=5, stride=1"; std::cerr << "float, kernel=5, pad=5, stride=1" << std::endl;
paddle_mobile::TestConvOp<float, float, 5, 5, 1>( paddle_mobile::TestConvOp<float, float, 5, 5, 1>(
in_channels, in_height, in_width, out_channels, groups); in_channels, in_height, in_width, out_channels, groups);
#ifndef __aarch64__
// kernel = 5, pad = 0, stride = 1 // kernel = 5, pad = 0, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=5, pad=0, stride=1"; std::cerr << "int8, kernel=5, pad=0, stride=1" << std::endl;
paddle_mobile::TestConvOp<int8_t, int32_t, 5, 0, 1>( paddle_mobile::TestConvOp<int8_t, int32_t, 5, 0, 1>(
in_channels, in_height, in_width, out_channels, groups); in_channels, in_height, in_width, out_channels, groups);
// kernel = 5, pad = 1, stride = 1 // kernel = 5, pad = 1, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=5, pad=1, stride=1"; std::cerr << "int8, kernel=5, pad=1, stride=1" << std::endl;
paddle_mobile::TestConvOp<int8_t, int32_t, 5, 1, 1>( paddle_mobile::TestConvOp<int8_t, int32_t, 5, 1, 1>(
in_channels, in_height, in_width, out_channels, groups); in_channels, in_height, in_width, out_channels, groups);
// kernel = 5, pad = 2, stride = 1 // kernel = 5, pad = 2, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=5, pad=2, stride=1"; std::cerr << "int8, kernel=5, pad=2, stride=1" << std::endl;
paddle_mobile::TestConvOp<int8_t, int32_t, 5, 2, 1>( paddle_mobile::TestConvOp<int8_t, int32_t, 5, 2, 1>(
in_channels, in_height, in_width, out_channels, groups); in_channels, in_height, in_width, out_channels, groups);
// kernel = 5, pad = 5, stride = 1 // kernel = 5, pad = 5, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=5, pad=5, stride=1"; std::cerr << "int8, kernel=5, pad=5, stride=1" << std::endl;
paddle_mobile::TestConvOp<int8_t, int32_t, 5, 5, 1>( paddle_mobile::TestConvOp<int8_t, int32_t, 5, 5, 1>(
in_channels, in_height, in_width, out_channels, groups); in_channels, in_height, in_width, out_channels, groups);
#endif // __aarch64__
return 0;
}
int main() {
TestAll(1, 5, 5, 1, 1);
TestAll(1, 5, 5, 10, 1);
TestAll(10, 5, 5, 10, 10);
TestAll(5, 33, 33, 5, 1);
TestAll(5, 33, 33, 13, 1);
TestAll(13, 33, 33, 13, 13);
TestAll(5, 33, 13, 5, 1);
TestAll(5, 33, 13, 13, 1);
TestAll(13, 33, 13, 13, 13);
return 0; return 0;
} }
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "../test_helper.h"
#include "../test_include.h"
#include "operators/fusion_dwconv_bn_relu_op.h"
namespace paddle_mobile {
template <typename Itype, typename Otype, int Kernel, int Pad, int Stride>
int TestDWConvAddBnReluOp(int in_channels, int in_height, int in_width,
int out_channels, int groups, std::string opname) {
int kernel_h = Kernel;
int kernel_w = Kernel;
int pad_h = Pad;
int pad_w = Pad;
int stride_h = Stride;
int stride_w = Stride;
int dilation_h = 1;
int dilation_w = 1;
int batch_size = 1;
int input_c = in_channels;
int input_h = in_height;
int input_w = in_width;
int output_c = out_channels;
framework::DDim input_shape =
framework::make_ddim({batch_size, input_c, input_h, input_w});
framework::DDim filter_shape =
framework::make_ddim({output_c, input_c / groups, kernel_h, kernel_w});
framework::DDim shape = framework::make_ddim({output_c});
VariableNameMap inputs;
VariableNameMap outputs;
auto scope = std::make_shared<framework::Scope>();
inputs["Input"] = std::vector<std::string>({"input"});
inputs["Filter"] = std::vector<std::string>({"filter"});
inputs["Mean"] = std::vector<std::string>({"mean"});
inputs["Variance"] = std::vector<std::string>({"variance"});
inputs["Scale"] = std::vector<std::string>({"scale"});
inputs["Bias"] = std::vector<std::string>({"bias"});
outputs["Out"] = std::vector<std::string>({"output"});
auto input_var = scope.get()->Var("input");
auto input = input_var->template GetMutable<framework::LoDTensor>();
SetupTensor<Itype>(input, input_shape, -20.0, 20.0);
auto filter_var = scope.get()->Var("filter");
auto filter = filter_var->template GetMutable<framework::LoDTensor>();
SetupTensor<Itype>(filter, filter_shape, -20, 20);
auto mean_var = scope.get()->Var("mean");
auto mean = mean_var->template GetMutable<framework::LoDTensor>();
SetupTensor<float>(mean, shape, -10.0, 10.0);
auto vari_var = scope.get()->Var("variance");
auto vari = vari_var->template GetMutable<framework::LoDTensor>();
SetupTensor<float>(vari, shape, -10.0, 10.0);
auto scale_var = scope.get()->Var("scale");
auto scale = scale_var->template GetMutable<framework::LoDTensor>();
SetupTensor<float>(scale, shape, -10.0, 10.0);
auto bias_var = scope.get()->Var("bias");
auto bias = bias_var->template GetMutable<framework::LoDTensor>();
SetupTensor<float>(bias, shape, -10.0, 10.0);
auto output_var = scope.get()->Var("output");
framework::AttributeMap attrs;
attrs["strides"].Set<vector<int>>(std::vector<int>({stride_h, stride_w}));
attrs["paddings"].Set<vector<int>>(std::vector<int>({pad_h, pad_w}));
attrs["dilations"].Set<vector<int>>(
std::vector<int>({dilation_h, dilation_w}));
attrs["groups"].Set<int>(groups);
attrs["epsilon"].Set<float>(1e-6);
attrs["momentum"].Set<float>(0.f);
auto *op = new operators::FusionDWConvBNReluOp<CPU, float>(
"fusion_dwconv_bn_relu", inputs, outputs, attrs, scope);
op->InferShape();
op->Init();
for (int i = 0; i < 10; ++i) {
op->Run();
}
auto time1 = time();
for (int i = 0; i < 10; ++i) {
op->Run();
}
auto time2 = time();
std::ofstream out_file("./out_dwconv.txt", std::ios::app);
out_file << opname << " cost :" << time_diff(time1, time2) / 10.0 << "ms"
<< std::endl;
out_file.close();
delete op;
return 0;
}
} // namespace paddle_mobile
int main(int argc, char *argv[]) {
// kernel = 3, pad = 1, stride = 1
paddle_mobile::TestDWConvAddBnReluOp<float, float, 3, 1, 1>(
16, 24, 24, 16, 16, "depthwise_seperable");
// kernel = 3, pad = 1, stride = 1
paddle_mobile::TestDWConvAddBnReluOp<float, float, 3, 1, 1>(
24, 24, 24, 24, 24, "MBConv_3x3_dw1");
// kernel = 3, pad = 1, stride = 1
paddle_mobile::TestDWConvAddBnReluOp<float, float, 3, 1, 1>(
24, 24, 24, 24, 24, "MBConv_3x3_dw2");
// kernel = 3, pad = 1, stride = 2
paddle_mobile::TestDWConvAddBnReluOp<float, float, 3, 1, 2>(
24, 24, 24, 24, 24, "MBConv_3x3_dw3");
// kernel = 5, pad = 2, stride = 1
paddle_mobile::TestDWConvAddBnReluOp<float, float, 5, 2, 1>(
48, 12, 12, 48, 48, "MBConv_5x5_stage1_dw1");
// kernel = 5, pad = 2, stride = 1
paddle_mobile::TestDWConvAddBnReluOp<float, float, 5, 2, 1>(
48, 12, 12, 48, 48, "MBConv_5x5_stage1_dw2");
// kernel = 5, pad = 2, stride = 2
paddle_mobile::TestDWConvAddBnReluOp<float, float, 5, 2, 2>(
48, 12, 12, 48, 48, "MBConv_5x5_stage1_dw3");
// kernel = 5, pad = 2, stride = 1
paddle_mobile::TestDWConvAddBnReluOp<float, float, 5, 2, 1>(
192, 6, 6, 192, 192, "MBConv_5x5_stage2_dw1");
// kernel = 5, pad = 2, stride = 1
paddle_mobile::TestDWConvAddBnReluOp<float, float, 5, 2, 1>(
192, 6, 6, 192, 192, "MBConv_5x5_stage2_dw2");
// kernel = 5, pad = 2, stride = 1
paddle_mobile::TestDWConvAddBnReluOp<float, float, 5, 2, 1>(
192, 6, 6, 192, 192, "MBConv_5x5_stage2_dw3");
return 0;
}
...@@ -12,18 +12,89 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,18 +12,89 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "../test_helper.h"
#include "../test_include.h" #include "../test_include.h"
#include "operators/gru_op.h" #include "operators/gru_op.h"
int main() { namespace paddle_mobile {
paddle_mobile::framework::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(g_nlp);
PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
"program file read fail");
Executor4Test<paddle_mobile::CPU, template <typename Itype, typename Otype>
paddle_mobile::operators::GruOp<paddle_mobile::CPU, float>> int TestGruOp(int in_channels, int out_channels, std::string opname) {
executor(program, "gru"); int input_c = in_channels;
int output_c = out_channels;
paddle_mobile::framework::LoD lod{{0, input_c}};
int batch_size = lod.size();
framework::DDim input_shape = framework::make_ddim({input_c, output_c * 3});
framework::DDim weight_shape = framework::make_ddim({output_c, output_c * 3});
framework::DDim h0_shape = framework::make_ddim({batch_size, output_c});
framework::DDim bias_shape = framework::make_ddim({batch_size, output_c * 3});
VariableNameMap inputs;
VariableNameMap outputs;
auto scope = std::make_shared<framework::Scope>();
inputs["Input"] = std::vector<std::string>({"input"});
inputs["Weight"] = std::vector<std::string>({"weight"});
inputs["H0"] = std::vector<std::string>({"h0"});
inputs["Bias"] = std::vector<std::string>({"bias"});
outputs["BatchGate"] = std::vector<std::string>({"output_batch_gate"});
outputs["BatchResetHiddenPrev"] =
std::vector<std::string>({"output_batch_reset_hidden_prev"});
outputs["BatchHidden"] = std::vector<std::string>({"output_batch_hidden"});
outputs["Hidden"] = std::vector<std::string>({"output_hidden"});
auto input_var = scope.get()->Var("input");
auto input = input_var->template GetMutable<framework::LoDTensor>();
SetupTensor<Itype>(input, input_shape, -127, 127);
input->set_lod(lod);
auto weight_var = scope.get()->Var("weight");
auto weight = weight_var->template GetMutable<framework::LoDTensor>();
SetupTensor<Itype>(weight, weight_shape, -127, 127);
auto h0_var = scope.get()->Var("h0");
auto h0 = h0_var->template GetMutable<framework::LoDTensor>();
SetupTensor<Itype>(h0, h0_shape, -127, 127);
auto bias_var = scope.get()->Var("bias");
auto bias = bias_var->template GetMutable<framework::LoDTensor>();
SetupTensor<Itype>(bias, bias_shape, -127, 127);
auto batch_gate_var = scope.get()->Var("output_batch_gate");
auto batch_reset_hidden_prev_var =
scope.get()->Var("output_batch_reset_hidden_prev");
auto batch_hidden_var = scope.get()->Var("output_batch_hidden");
auto hidden_var = scope.get()->Var("output_hidden");
framework::AttributeMap attrs;
attrs["activation"].SetString(std::string("relu"));
attrs["gate_activation"].SetString(std::string("sigmoid"));
attrs["is_reverse"].Set<bool>(false);
auto *op =
new operators::GruOp<CPU, float>("gru", inputs, outputs, attrs, scope);
op->InferShape();
op->Init();
for (int i = 0; i < 10; ++i) {
op->Run();
}
auto time1 = time();
for (int i = 0; i < 10; ++i) {
op->Run();
}
auto time2 = time();
std::ofstream out_file("./out_gru.txt", std::ios::app);
out_file << opname << " cost :" << time_diff(time1, time2) / 10.0 << "ms"
<< std::endl;
out_file.close();
delete op;
return 0;
}
} // namespace paddle_mobile
int main(int argc, char *argv[]) {
paddle_mobile::TestGruOp<float, float>(384, 120, "gru_forward");
return 0; return 0;
} }
...@@ -76,6 +76,5 @@ int main() { ...@@ -76,6 +76,5 @@ int main() {
paddle_mobile::TestLogOp({1, 1, 2, 3}); paddle_mobile::TestLogOp({1, 1, 2, 3});
paddle_mobile::TestLogOp({1, 3, 11, 22}); paddle_mobile::TestLogOp({1, 3, 11, 22});
paddle_mobile::TestLogOp({1, 32, 112, 112}); paddle_mobile::TestLogOp({1, 32, 112, 112});
std::cout << "test log op pass." << std::endl;
return 0; return 0;
} }
...@@ -92,18 +92,10 @@ static float find_abs_max(const Tensor *input) { ...@@ -92,18 +92,10 @@ static float find_abs_max(const Tensor *input) {
return max_abs; return max_abs;
} }
int TestQuqntizeOp(int argc, char *argv[]) { int TestQuqntizeOp(const int batch_size, const int channel, const int height,
if (argc < 5) { const int width) {
std::cout << "Usage: ./test-quantize-op batch_size channel height width" DLOG << "batch_size: " << batch_size << ", channel: " << channel
<< std::endl; << ", height: " << height << ", width: " << width;
return 1;
}
int batch_size = atoi(argv[1]);
int channel = atoi(argv[2]);
int height = atoi(argv[3]);
int width = atoi(argv[4]);
std::cout << "batch_size: " << batch_size << ", channel: " << channel
<< ", height: " << height << ", width: " << width << std::endl;
framework::DDim dim = framework::DDim dim =
framework::make_ddim({batch_size, channel, height, width}); framework::make_ddim({batch_size, channel, height, width});
...@@ -140,9 +132,7 @@ int TestQuqntizeOp(int argc, char *argv[]) { ...@@ -140,9 +132,7 @@ int TestQuqntizeOp(int argc, char *argv[]) {
framework::Tensor output_cmp; framework::Tensor output_cmp;
output_cmp.Resize(output->dims()); output_cmp.Resize(output->dims());
float scale = 127 / output_scale_cmp; float scale = 127 / output_scale_cmp;
// quantize<round::RoundToEven>(input, scale, &output_cmp); quantize<round::RoundAwayZero>(input, scale, &output_cmp);
// quantize<round::RoundAwayZero>(input, scale, &output_cmp);
quantize<round::RoundTowardsZero>(input, scale, &output_cmp);
int8_t *output_cmp_data = output_cmp.data<int8_t>(); int8_t *output_cmp_data = output_cmp.data<int8_t>();
for (int i = 0; i < output->numel(); ++i) { for (int i = 0; i < output->numel(); ++i) {
PADDLE_MOBILE_ENFORCE(output_data[i] == output_cmp_data[i], PADDLE_MOBILE_ENFORCE(output_data[i] == output_cmp_data[i],
...@@ -157,5 +147,7 @@ int TestQuqntizeOp(int argc, char *argv[]) { ...@@ -157,5 +147,7 @@ int TestQuqntizeOp(int argc, char *argv[]) {
} // namespace paddle_mobile } // namespace paddle_mobile
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
return paddle_mobile::TestQuqntizeOp(argc, argv); TestQuqntizeOp(1, 10, 10, 5);
TestQuqntizeOp(1, 111, 111, 5);
TestQuqntizeOp(5, 111, 111, 5);
} }
...@@ -59,7 +59,7 @@ int TestSequencePoolOp(const framework::LoDTensor &input_x, ...@@ -59,7 +59,7 @@ int TestSequencePoolOp(const framework::LoDTensor &input_x,
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
framework::LoDTensor input_x, output; framework::LoDTensor input_x, output;
// case 1 // case 1
std::cerr << "running max case 1" << std::endl; DLOG << "running max case 1";
{ {
std::vector<float> data{1, 2, 3, 4}; std::vector<float> data{1, 2, 3, 4};
input_x.Resize(framework::make_ddim({4, 1})); input_x.Resize(framework::make_ddim({4, 1}));
...@@ -71,14 +71,14 @@ int main(int argc, char *argv[]) { ...@@ -71,14 +71,14 @@ int main(int argc, char *argv[]) {
std::vector<float> expect_data{2, 4}; std::vector<float> expect_data{2, 4};
for (int i = 0; i < 2; ++i) { for (int i = 0; i < 2; ++i) {
if (output.data<float>()[i] != expect_data[i]) { if (output.data<float>()[i] != expect_data[i]) {
std::cerr << "output[" << i << "]: " << output.data<float>()[i] DLOG << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i] << std::endl; << " != expect[" << i << "]: " << expect_data[i];
return 1; return 1;
} }
} }
} }
// case 2 // case 2
std::cerr << "running max case 2" << std::endl; DLOG << "running max case 2";
{ {
std::vector<float> data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; std::vector<float> data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
input_x.Resize(framework::make_ddim({data.size(), 1})); input_x.Resize(framework::make_ddim({data.size(), 1}));
...@@ -90,13 +90,13 @@ int main(int argc, char *argv[]) { ...@@ -90,13 +90,13 @@ int main(int argc, char *argv[]) {
std::vector<float> expect_data{3, 10}; std::vector<float> expect_data{3, 10};
for (int i = 0; i < 2; ++i) { for (int i = 0; i < 2; ++i) {
if (output.data<float>()[i] != expect_data[i]) { if (output.data<float>()[i] != expect_data[i]) {
std::cerr << "output[" << i << "]: " << output.data<float>()[i] DLOG << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i] << std::endl; << " != expect[" << i << "]: " << expect_data[i];
return 1; return 1;
} }
} }
} }
std::cerr << "running max case 3" << std::endl; DLOG << "running max case 3";
// case 3 // case 3
{ {
std::vector<float> data{1, 2, 3, 4, 5, 6, 7, 8}; std::vector<float> data{1, 2, 3, 4, 5, 6, 7, 8};
...@@ -109,14 +109,14 @@ int main(int argc, char *argv[]) { ...@@ -109,14 +109,14 @@ int main(int argc, char *argv[]) {
std::vector<float> expect_data{3, 4, 7, 8}; std::vector<float> expect_data{3, 4, 7, 8};
for (int i = 0; i < 4; ++i) { for (int i = 0; i < 4; ++i) {
if (output.data<float>()[i] != expect_data[i]) { if (output.data<float>()[i] != expect_data[i]) {
std::cerr << "output[" << i << "]: " << output.data<float>()[i] DLOG << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i] << std::endl; << " != expect[" << i << "]: " << expect_data[i];
return 1; return 1;
} }
} }
} }
// case 4 // case 4
std::cerr << "running max case 4" << std::endl; DLOG << "running max case 4";
{ {
std::vector<float> data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, std::vector<float> data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20}; 11, 12, 13, 14, 15, 16, 17, 18, 19, 20};
...@@ -129,14 +129,14 @@ int main(int argc, char *argv[]) { ...@@ -129,14 +129,14 @@ int main(int argc, char *argv[]) {
std::vector<float> expect_data{6, 7, 8, 9, 10, 16, 17, 18, 19, 20}; std::vector<float> expect_data{6, 7, 8, 9, 10, 16, 17, 18, 19, 20};
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 10; ++i) {
if (output.data<float>()[i] != expect_data[i]) { if (output.data<float>()[i] != expect_data[i]) {
std::cerr << "output[" << i << "]: " << output.data<float>()[i] DLOG << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i] << std::endl; << " != expect[" << i << "]: " << expect_data[i];
return 1; return 1;
} }
} }
} }
// case 1 // case 1
std::cerr << "running sum case 1" << std::endl; DLOG << "running sum case 1";
{ {
std::vector<float> data{1, 2, 3, 4}; std::vector<float> data{1, 2, 3, 4};
input_x.Resize(framework::make_ddim({4, 1})); input_x.Resize(framework::make_ddim({4, 1}));
...@@ -148,14 +148,14 @@ int main(int argc, char *argv[]) { ...@@ -148,14 +148,14 @@ int main(int argc, char *argv[]) {
std::vector<float> expect_data{3, 7}; std::vector<float> expect_data{3, 7};
for (int i = 0; i < 2; ++i) { for (int i = 0; i < 2; ++i) {
if (output.data<float>()[i] != expect_data[i]) { if (output.data<float>()[i] != expect_data[i]) {
std::cerr << "output[" << i << "]: " << output.data<float>()[i] DLOG << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i] << std::endl; << " != expect[" << i << "]: " << expect_data[i];
return 1; return 1;
} }
} }
} }
// case 2 // case 2
std::cerr << "running sum case 2" << std::endl; DLOG << "running sum case 2";
{ {
std::vector<float> data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; std::vector<float> data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
input_x.Resize(framework::make_ddim({data.size(), 1})); input_x.Resize(framework::make_ddim({data.size(), 1}));
...@@ -167,14 +167,14 @@ int main(int argc, char *argv[]) { ...@@ -167,14 +167,14 @@ int main(int argc, char *argv[]) {
std::vector<float> expect_data{6, 49}; std::vector<float> expect_data{6, 49};
for (int i = 0; i < 2; ++i) { for (int i = 0; i < 2; ++i) {
if (output.data<float>()[i] != expect_data[i]) { if (output.data<float>()[i] != expect_data[i]) {
std::cerr << "output[" << i << "]: " << output.data<float>()[i] DLOG << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i] << std::endl; << " != expect[" << i << "]: " << expect_data[i];
return 1; return 1;
} }
} }
} }
// case 3 // case 3
std::cerr << "running sum case 3" << std::endl; DLOG << "running sum case 3";
{ {
std::vector<float> data{1, 2, 3, 4, 5, 6, 7, 8}; std::vector<float> data{1, 2, 3, 4, 5, 6, 7, 8};
input_x.Resize(framework::make_ddim({4, 2})); input_x.Resize(framework::make_ddim({4, 2}));
...@@ -186,14 +186,14 @@ int main(int argc, char *argv[]) { ...@@ -186,14 +186,14 @@ int main(int argc, char *argv[]) {
std::vector<float> expect_data{4, 6, 12, 14}; std::vector<float> expect_data{4, 6, 12, 14};
for (int i = 0; i < 4; ++i) { for (int i = 0; i < 4; ++i) {
if (output.data<float>()[i] != expect_data[i]) { if (output.data<float>()[i] != expect_data[i]) {
std::cerr << "output[" << i << "]: " << output.data<float>()[i] DLOG << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i] << std::endl; << " != expect[" << i << "]: " << expect_data[i];
return 1; return 1;
} }
} }
} }
// case 4 // case 4
std::cerr << "running sum case 4" << std::endl; DLOG << "running sum case 4";
{ {
std::vector<float> data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, std::vector<float> data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20}; 11, 12, 13, 14, 15, 16, 17, 18, 19, 20};
...@@ -206,14 +206,14 @@ int main(int argc, char *argv[]) { ...@@ -206,14 +206,14 @@ int main(int argc, char *argv[]) {
std::vector<float> expect_data{7, 9, 11, 13, 15, 27, 29, 31, 33, 35}; std::vector<float> expect_data{7, 9, 11, 13, 15, 27, 29, 31, 33, 35};
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 10; ++i) {
if (output.data<float>()[i] != expect_data[i]) { if (output.data<float>()[i] != expect_data[i]) {
std::cerr << "output[" << i << "]: " << output.data<float>()[i] DLOG << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i] << std::endl; << " != expect[" << i << "]: " << expect_data[i];
return 1; return 1;
} }
} }
} }
// case 1 // case 1
std::cerr << "running first case 1" << std::endl; DLOG << "running first case 1";
{ {
std::vector<float> data{1, 2, 3, 4}; std::vector<float> data{1, 2, 3, 4};
input_x.Resize(framework::make_ddim({4, 1})); input_x.Resize(framework::make_ddim({4, 1}));
...@@ -225,14 +225,14 @@ int main(int argc, char *argv[]) { ...@@ -225,14 +225,14 @@ int main(int argc, char *argv[]) {
std::vector<float> expect_data{1, 3}; std::vector<float> expect_data{1, 3};
for (int i = 0; i < 2; ++i) { for (int i = 0; i < 2; ++i) {
if (output.data<float>()[i] != expect_data[i]) { if (output.data<float>()[i] != expect_data[i]) {
std::cerr << "output[" << i << "]: " << output.data<float>()[i] DLOG << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i] << std::endl; << " != expect[" << i << "]: " << expect_data[i];
return 1; return 1;
} }
} }
} }
// case 2 // case 2
std::cerr << "running first case 2" << std::endl; DLOG << "running first case 2";
{ {
std::vector<float> data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; std::vector<float> data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
input_x.Resize(framework::make_ddim({data.size(), 1})); input_x.Resize(framework::make_ddim({data.size(), 1}));
...@@ -244,14 +244,14 @@ int main(int argc, char *argv[]) { ...@@ -244,14 +244,14 @@ int main(int argc, char *argv[]) {
std::vector<float> expect_data{1, 4}; std::vector<float> expect_data{1, 4};
for (int i = 0; i < 2; ++i) { for (int i = 0; i < 2; ++i) {
if (output.data<float>()[i] != expect_data[i]) { if (output.data<float>()[i] != expect_data[i]) {
std::cerr << "output[" << i << "]: " << output.data<float>()[i] DLOG << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i] << std::endl; << " != expect[" << i << "]: " << expect_data[i];
return 1; return 1;
} }
} }
} }
// case 3 // case 3
std::cerr << "running first case 3" << std::endl; DLOG << "running first case 3";
{ {
std::vector<float> data{1, 2, 3, 4, 5, 6, 7, 8}; std::vector<float> data{1, 2, 3, 4, 5, 6, 7, 8};
input_x.Resize(framework::make_ddim({4, 2})); input_x.Resize(framework::make_ddim({4, 2}));
...@@ -263,14 +263,14 @@ int main(int argc, char *argv[]) { ...@@ -263,14 +263,14 @@ int main(int argc, char *argv[]) {
std::vector<float> expect_data{1, 2, 5, 6}; std::vector<float> expect_data{1, 2, 5, 6};
for (int i = 0; i < 4; ++i) { for (int i = 0; i < 4; ++i) {
if (output.data<float>()[i] != expect_data[i]) { if (output.data<float>()[i] != expect_data[i]) {
std::cerr << "output[" << i << "]: " << output.data<float>()[i] DLOG << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i] << std::endl; << " != expect[" << i << "]: " << expect_data[i];
return 1; return 1;
} }
} }
} }
// case 4 // case 4
std::cerr << "running first case 4" << std::endl; DLOG << "running first case 4";
{ {
std::vector<float> data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, std::vector<float> data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20}; 11, 12, 13, 14, 15, 16, 17, 18, 19, 20};
...@@ -283,8 +283,8 @@ int main(int argc, char *argv[]) { ...@@ -283,8 +283,8 @@ int main(int argc, char *argv[]) {
std::vector<float> expect_data{1, 2, 3, 4, 5, 11, 12, 13, 14, 15}; std::vector<float> expect_data{1, 2, 3, 4, 5, 11, 12, 13, 14, 15};
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 10; ++i) {
if (output.data<float>()[i] != expect_data[i]) { if (output.data<float>()[i] != expect_data[i]) {
std::cerr << "output[" << i << "]: " << output.data<float>()[i] DLOG << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i] << std::endl; << " != expect[" << i << "]: " << expect_data[i];
return 1; return 1;
} }
} }
......
...@@ -76,6 +76,5 @@ int main() { ...@@ -76,6 +76,5 @@ int main() {
paddle_mobile::TestSigmoidOp({1, 1, 2, 3}); paddle_mobile::TestSigmoidOp({1, 1, 2, 3});
paddle_mobile::TestSigmoidOp({1, 3, 11, 22}); paddle_mobile::TestSigmoidOp({1, 3, 11, 22});
paddle_mobile::TestSigmoidOp({1, 32, 112, 112}); paddle_mobile::TestSigmoidOp({1, 32, 112, 112});
std::cout << "test sigmoid op pass." << std::endl;
return 0; return 0;
} }
...@@ -58,7 +58,7 @@ int TestTanhOp(const std::vector<int> input_shape) { ...@@ -58,7 +58,7 @@ int TestTanhOp(const std::vector<int> input_shape) {
const float *output_data = output->data<float>(); const float *output_data = output->data<float>();
for (int i = 0; i < output->numel(); ++i) { for (int i = 0; i < output->numel(); ++i) {
float gap = output_data[i] - output_cmp_data[i]; float gap = output_data[i] - output_cmp_data[i];
if (std::abs(gap / (output_data[i] + 1e-5)) > 1e-3) { if (gap > 1e-5 && std::abs(gap / (output_data[i] + 1e-5)) > 1e-3) {
LOG(kLOG_INFO) << "output_data[" << i << "] = " << output_data[i] LOG(kLOG_INFO) << "output_data[" << i << "] = " << output_data[i]
<< ", output_cmp_data[" << i << ", output_cmp_data[" << i
<< "] = " << output_cmp_data[i]; << "] = " << output_cmp_data[i];
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
# limitations under the License. # limitations under the License.
set -e set -e
source ./ci_run_test.sh
function print_usage() { function print_usage() {
echo "\n${RED}Usage${NONE}: echo "\n${RED}Usage${NONE}:
...@@ -231,6 +232,11 @@ function build_linux_fpga() { ...@@ -231,6 +232,11 @@ function build_linux_fpga() {
docker build -t paddle-mobile:dev - < Dockerfile docker build -t paddle-mobile:dev - < Dockerfile
fi fi
docker run --rm -v `pwd`:/workspace paddle-mobile:dev bash /workspace/tools/docker_build_fpga.sh docker run --rm -v `pwd`:/workspace paddle-mobile:dev bash /workspace/tools/docker_build_fpga.sh
cd -
}
function run_android_test() {
ExecuteAndroidTests $1
} }
function main() { function main() {
...@@ -239,9 +245,11 @@ function main() { ...@@ -239,9 +245,11 @@ function main() {
case $CMD in case $CMD in
android_armv7) android_armv7)
build_android_armv7 build_android_armv7
run_android_test armeabi-v7a
;; ;;
android_armv8) android_armv8)
build_android_armv8 build_android_armv8
run_android_test arm64-v8a
;; ;;
ios) ios)
build_ios build_ios
......
#!/usr/bin/env bash
operators=
function AddTest() {
operators="${operators} $1"
}
function ExecuteAndroidTests() {
platform=$1
devices=`adb devices | grep -v devices | grep device | awk -F ' ' '{print $1}'`
for device in ${devices}; do
adb -s ${device} shell rm -rf /data/local/tmp/*
adb -s ${device} push ../build/${platform}/build/libpaddle-mobile.so /data/local/tmp/
for op in ${operators}; do
adb -s ${device} push ../test/build/test-${op}-op /data/local/tmp/
adb -s ${device} shell "cd /data/local/tmp/; LD_LIBRARY_PATH=. ./test-${op}-op"
echo "${BLUE}run test ${op} pass${NONE}"
done
done
}
AddTest batchnorm
AddTest cast
AddTest conv
AddTest dequantize
#AddTest elementwiseadd
AddTest log
AddTest logical-and
AddTest logical-not
AddTest logical-or
AddTest logical-xor
AddTest pool
AddTest quantize
AddTest relu
AddTest relu6
AddTest sequence-expand
AddTest sequence-pool
AddTest sequence-softmax
AddTest sigmoid
AddTest softmax
AddTest tanh
AddTest topk
#!/usr/bin/env bash
apt-get update apt-get update
apt-get install -y gcc g++ cmake apt-get install -y gcc g++ cmake
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册