diff --git a/src/fpga/V1/api.cpp b/src/fpga/V1/api.cpp index a8540e72f3ad077ac4aa49e34b535675f04dcd16..9a408a8f2fbe3c600679ddb2e3eadb493f323165 100644 --- a/src/fpga/V1/api.cpp +++ b/src/fpga/V1/api.cpp @@ -346,9 +346,9 @@ void expand_conv_arg(ConvArgs *arg) { auto filter_pad_width_mul_channel = args.image.pad_width * args.image.channels; auto image_amount_per_row_multi_win_first = - image_amount_per_row * (4 * args.kernel.stride_h - args.image.pad_height); + image_amount_per_row * (2 * args.kernel.stride_h - args.image.pad_height); auto image_amount_per_row_multi_win = - image_amount_per_row * (4 * args.kernel.stride_h); + image_amount_per_row * (2 * args.kernel.stride_h); auto image_block_num = block_num; auto image_block_len = @@ -375,7 +375,8 @@ void expand_conv_arg(ConvArgs *arg) { (512 / (align_to_x(args.filter_num, 4) / 4 * 2) > 2) ? (512 / (align_to_x(args.filter_num, 4) / 4 * 2) - 2) : 0; - auto cmd = 0UL | (args.relu_enabled ? USE_RELU : 0) | USE_BIAS; + // auto cmd = 0UL | (args.relu_enabled ? USE_RELU : 0) | USE_BIAS; + auto cmd = 0UL | USE_BIAS; auto deconv_param = ((args.deconv_tx_param.deconv_en) << 24) | ((args.deconv_tx_param.sub_conv_num) << 16) | @@ -413,7 +414,8 @@ void expand_conv_arg(ConvArgs *arg) { void expand_EW_arg(EWAddArgs *arg) { EWAddArgs args = *arg; - uint64_t cmd = args.relu_enabled ? USE_RELU : 0; + // uint64_t cmd = args.relu_enabled ? USE_RELU : 0; + uint64_t cmd = 0; uint64_t datalen = (uint64_t)args.image0.width * (uint64_t)args.image0.height * (uint64_t)args.image0.channels; @@ -441,8 +443,10 @@ void expand_EW_arg(EWAddArgs *arg) { void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input, framework::Tensor *out, framework::Tensor *filter, - bool relu_enabled, int group_num, int stride_h, - int stride_w, int padding_h, int padding_w, float *bs_ptr) { + ActivationType activation_enable, + int16_t leaky_relu_negative_slope, int group_num, + int stride_h, int stride_w, int padding_h, int padding_w, + float *bs_ptr) { auto input_ptr = input->data(); auto filter_ptr = filter->data(); auto out_ptr = out->data(); @@ -488,7 +492,10 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input, filter->dims()[3])); for (int i = 0; i < n; i++) { - arg->conv_arg[i].relu_enabled = relu_enabled; + // arg->conv_arg[i].relu_enabled = relu_enabled; + arg->conv_arg[i].output.activation.activation_type = activation_enable; + arg->conv_arg[i].output.activation.leaky_relu_negative_slope = + leaky_relu_negative_slope; arg->conv_arg[i].group_num = (uint32_t)group_num; arg->conv_arg[i].kernel.stride_h = (uint32_t)stride_h; arg->conv_arg[i].kernel.stride_w = (uint32_t)stride_w; @@ -560,8 +567,9 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input, void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input, framework::Tensor *out, framework::Tensor *filter, - bool relu_enabled, int group_num, int stride_h, - int stride_w, int padding_h, int padding_w, + ActivationType activation_enable, + int16_t leaky_relu_negative_slope, int group_num, + int stride_h, int stride_w, int padding_h, int padding_w, float *bs_ptr) { auto input_ptr = input->data(); auto filter_ptr = filter->data(); @@ -687,7 +695,13 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input, } for (int j = 0; j < split_num; ++j) { - arg->split_conv_args[i]->conv_arg[j].relu_enabled = relu_enabled; + // arg->split_conv_args[i]->conv_arg[j].relu_enabled = relu_enabled; + arg->split_conv_args[i]->conv_arg[j].output.activation.activation_type = + activation_enable; + arg->split_conv_args[i] + ->conv_arg[j] + .output.activation.leaky_relu_negative_slope = + leaky_relu_negative_slope; arg->split_conv_args[i]->conv_arg[j].group_num = (uint32_t)group_num; arg->split_conv_args[i]->conv_arg[j].kernel.width = @@ -800,13 +814,17 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input, void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input, framework::Tensor *out, framework::Tensor *filter, - bool relu_enabled, int stride_h, int stride_w, - int padding_h, int padding_w, float *bias_ptr) { + ActivationType activation_enable, + int16_t leaky_relu_negative_slope, int stride_h, + int stride_w, int padding_h, int padding_w, + float *bias_ptr) { auto filter_ptr = filter->data(); auto input_ptr = input->data(); auto output_ptr = out->mutable_data(); arg->sub_conv_num = 1; - arg->relu_enabled = relu_enabled; + // arg->relu_enabled = relu_enabled; + arg->output.activation.activation_type = activation_enable; + arg->output.activation.leaky_relu_negative_slope = leaky_relu_negative_slope; arg->bias_address = bias_ptr; arg->filter_address = filter_ptr; arg->kernel.height = (uint32_t)filter->dims()[2]; @@ -826,8 +844,10 @@ void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input, void fill_DWDeconv_arg(struct DWDeconvArgs *arg, framework::Tensor *input, framework::Tensor *out, framework::Tensor *filter, - bool relu_enabled, int stride_h, int stride_w, - int padding_h, int padding_w, float *bias_ptr) { + ActivationType activation_enable, + int16_t leaky_relu_negative_slope, int stride_h, + int stride_w, int padding_h, int padding_w, + float *bias_ptr) { auto filter_ptr = filter->data(); auto input_ptr = input->data(); auto output_ptr = out->mutable_data(); @@ -884,7 +904,10 @@ void fill_DWDeconv_arg(struct DWDeconvArgs *arg, framework::Tensor *input, arg->dw_conv_args.push_back(std::make_shared()); arg->dw_conv_args[i]->sub_conv_num = sub_conv_num; - arg->dw_conv_args[i]->relu_enabled = relu_enabled; + // arg->dw_conv_args[i]->relu_enabled = relu_enabled; + arg->dw_conv_args[i]->output.activation.activation_type = activation_enable; + arg->dw_conv_args[i]->output.activation.leaky_relu_negative_slope = + leaky_relu_negative_slope; arg->dw_conv_args[i]->bias_address = bias_ptr; arg->dw_conv_args[i]->filter_address = diff --git a/src/fpga/V1/api.h b/src/fpga/V1/api.h index 05d6a938c85f14770b97cd477580d0e6103fa777..05a30ddce4828bf8ac0f049ea0db4f18dc1dba79 100644 --- a/src/fpga/V1/api.h +++ b/src/fpga/V1/api.h @@ -47,20 +47,28 @@ void format_concat_output(framework::Tensor* out, int height, int width, void fill_split_arg(struct SplitConvArgs* arg, framework::Tensor* input, framework::Tensor* out, framework::Tensor* filter, - bool relu_enabled, int group_num, int stride_h, - int stride_w, int padding_h, int padding_w, float* bs_ptr); + ActivationType activation_enable, + int16_t leaky_relu_negative_slope, int group_num, + int stride_h, int stride_w, int padding_h, int padding_w, + float* bs_ptr); void fill_deconv_arg(struct DeconvArgs* arg, framework::Tensor* input, framework::Tensor* out, framework::Tensor* filter, - bool relu_enabled, int group_num, int stride_h, - int stride_w, int padding_h, int padding_w, float* bs_ptr); + ActivationType activation_enable, + int16_t leaky_relu_negative_slope, int group_num, + int stride_h, int stride_w, int padding_h, int padding_w, + float* bs_ptr); void fill_dwconv_arg(struct DWconvArgs* arg, framework::Tensor* input, framework::Tensor* out, framework::Tensor* filter, - bool relu_enabled, int stride_h, int stride_w, - int padding_h, int padding_w, float* bias_ptr); + ActivationType activation_enable, + int16_t leaky_relu_negative_slope, int stride_h, + int stride_w, int padding_h, int padding_w, + float* bias_ptr); void fill_DWDeconv_arg(struct DWDeconvArgs* arg, framework::Tensor* input, framework::Tensor* out, framework::Tensor* filter, - bool relu_enabled, int stride_h, int stride_w, - int padding_h, int padding_w, float* bs_ptr); + ActivationType activation_enable, + int16_t leaky_relu_negative_slope, int stride_h, + int stride_w, int padding_h, int padding_w, + float* bs_ptr); void format_deconv_filter(framework::Tensor* filter_tensor, float max_value, int group_num, int stride); diff --git a/src/fpga/V1/deconv_filter.cpp b/src/fpga/V1/deconv_filter.cpp index 4c484a45d0a36db4aac677377ae11b5235603ac6..7c87452f5a7264ad069d8508cb1e9dc24f5cdc3d 100644 --- a/src/fpga/V1/deconv_filter.cpp +++ b/src/fpga/V1/deconv_filter.cpp @@ -19,7 +19,6 @@ limitations under the License. */ #include "fpga/V1/filter.h" // #include "filter.h" #include "fpga/V1/api.h" -// #include "fpga_api.h" namespace paddle_mobile { namespace fpga { diff --git a/src/fpga/V1/pe.cpp b/src/fpga/V1/pe.cpp index 16d3bc793389f49ad0b6e3bf3b064a880e4a927a..5a81e2422979f08b2113bd9b46022fe4d77154cb 100644 --- a/src/fpga/V1/pe.cpp +++ b/src/fpga/V1/pe.cpp @@ -63,6 +63,7 @@ using namespace std; // NOLINT #define REG_TIMER_COUNTER 0x070 #define REG_SCALE_PARAMETER 0x080 +#define REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR 0x090 #define REG_FLASH_CMD 0x200 #define REG_FLASH_DATA 0x208 @@ -189,8 +190,8 @@ int ComputeFpgaConv(const struct SplitConvArgs &args) { int ComputeBasicConv(const struct ConvArgs &args) { #ifdef FPGA_PRINT_MODE DLOG << "======Compute Basic Conv======"; - DLOG << " relu_enabled:" << args.relu_enabled - << " sb_address:" << args.sb_address + // DLOG << " relu_enabled:" << args.relu_enabled + DLOG << " sb_address:" << args.sb_address << " filter_address:" << args.filter_address << " filter_num:" << args.filter_num << " group_num:" << args.group_num; @@ -212,6 +213,25 @@ int ComputeBasicConv(const struct ConvArgs &args) { #ifdef PADDLE_MOBILE_ZU5 int ret = 0; uint64_t output_scale = 0; + + uint64_t reg_ActivationArgs = 0; + // active function:{none,leakeyrelu,sigmoid,tanh} + ActivationArgs active_args; + // active_args.activation_type = LEAKYRELU; + + active_args.activation_type = args.output.activation.activation_type; + + active_args.leaky_relu_negative_slope = + args.output.activation.leaky_relu_negative_slope; + + reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) | + active_args.leaky_relu_negative_slope; + + DLOG << " activation_type:" << active_args.activation_type + << " leaky_relu_negative_slope:" + << active_args.leaky_relu_negative_slope; + DLOG << " reg_ActivationArgs:" << reg_ActivationArgs; + pthread_mutex_lock(&g_fpgainfo.pe_data->mutex); if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_CONV]->status) { ret = -EIO; @@ -219,6 +239,10 @@ int ComputeBasicConv(const struct ConvArgs &args) { pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); return ret; } + + reg_writeq(reg_ActivationArgs, + REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion + reg_writeq(output_scale, REG_SCALE_PARAMETER); reg_writeq( ((uint64_t)args.image.height) | (((uint64_t)args.image.width) << 32), @@ -278,6 +302,9 @@ int ComputeBasicConv(const struct ConvArgs &args) { output_scale = (output_scale << 32) | (output_scale >> 32); fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2); + active_args.activation_type = NONE; + reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); + pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); return ret; @@ -314,6 +341,23 @@ int ComputeFpgaPool(const struct PoolingArgs &args) { uint64_t image_physical_address = 0; uint64_t output_physical_address = 0; + uint64_t reg_ActivationArgs = 0; + // active function:{none,leakeyrelu,sigmoid,tanh} + ActivationArgs active_args; + // active_args.activation_type = LEAKYRELU; + active_args.activation_type = args.output.activation.activation_type; + + active_args.leaky_relu_negative_slope = + args.output.activation.leaky_relu_negative_slope; + + reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) | + active_args.leaky_relu_negative_slope; + + DLOG << " activation_type:" << active_args.activation_type + << " leaky_relu_negative_slope:" + << active_args.leaky_relu_negative_slope; + DLOG << " reg_ActivationArgs:" << reg_ActivationArgs; + image_physical_address = vaddr_to_paddr_driver(args.image.address); output_physical_address = vaddr_to_paddr_driver(args.output.address); uint32_t output_height = (uint32_t)( @@ -364,6 +408,9 @@ int ComputeFpgaPool(const struct PoolingArgs &args) { return ret; } + reg_writeq(reg_ActivationArgs, + REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion + reg_writeq(output_scale, REG_SCALE_PARAMETER); reg_writeq(image_physical_address, REG_POOLING_IMAGE_BASE_ADDR); reg_writeq(output_physical_address, REG_POOLING_RESULT_BASE_ADDR); @@ -408,6 +455,10 @@ int ComputeFpgaPool(const struct PoolingArgs &args) { output_scale = reg_readq(REG_SCALE_PARAMETER); output_scale = (output_scale << 32) | (output_scale >> 32); fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2); + + active_args.activation_type = NONE; + reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); + pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); return ret; @@ -418,8 +469,8 @@ int ComputeFpgaPool(const struct PoolingArgs &args) { int ComputeFpgaEWAdd(const struct EWAddArgs &args) { #ifdef FPGA_PRINT_MODE DLOG << "=============ComputeFpgaEWAdd==========="; - DLOG << " relu_enabled:" << args.relu_enabled - << " const0:" << fp16_2_fp32(int16_t(args.const0)) + // DLOG << " relu_enabled:" << args.relu_enabled + DLOG << " const0:" << fp16_2_fp32(int16_t(args.const0)) << " const1:" << fp16_2_fp32(int16_t(args.const1)); DLOG << " image0_address:" << args.image0.address << " image0_scale_address:" << args.image0.scale_address @@ -441,6 +492,19 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) { #ifdef PADDLE_MOBILE_ZU5 int ret = 0; uint64_t output_scale = 0; + + uint64_t reg_ActivationArgs = 0; + ActivationArgs active_args; + active_args.activation_type = args.output.activation.activation_type; + active_args.leaky_relu_negative_slope = + args.output.activation.leaky_relu_negative_slope; + reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) | + active_args.leaky_relu_negative_slope; + DLOG << " activation_type:" << active_args.activation_type + << " leaky_relu_negative_slope:" + << active_args.leaky_relu_negative_slope; + DLOG << " reg_ActivationArgs:" << reg_ActivationArgs; + pthread_mutex_lock(&g_fpgainfo.pe_data->mutex); if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_EW]->status) { ret = -EIO; @@ -449,6 +513,9 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) { return ret; } + reg_writeq(reg_ActivationArgs, + REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion + reg_writeq(output_scale, REG_SCALE_PARAMETER); reg_writeq(args.driver.image0_address_phy, REG_EW_IMAGE0_BASE_ADDR); reg_writeq(args.driver.image1_address_phy, REG_EW_IMAGE1_BASE_ADDR); @@ -468,6 +535,9 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) { output_scale = reg_readq(REG_SCALE_PARAMETER); output_scale = (output_scale << 32) | (output_scale >> 32); fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2); + active_args.activation_type = NONE; + reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); + pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); return ret; #endif @@ -501,6 +571,17 @@ int PerformBypass(const struct BypassArgs &args) { uint8_t data_cell_in = 0; uint8_t data_cell_out = 0; int ret = 0; + + uint64_t reg_ActivationArgs = 0; + ActivationArgs active_args; + active_args.activation_type = args.output.activation.activation_type; + + active_args.leaky_relu_negative_slope = + args.output.activation.leaky_relu_negative_slope; + + reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) | + active_args.leaky_relu_negative_slope; + datalen = (uint64_t)args.image.width * (uint64_t)args.image.height * (uint64_t)args.image.channels; datalen = align_to_x(datalen, 16); @@ -559,7 +640,6 @@ int PerformBypass(const struct BypassArgs &args) { (data_cell_out != SIZE_FP16 && data_cell_out != SIZE_FP32)) { return -EFAULT; } - pthread_mutex_lock(&g_fpgainfo.pe_data->mutex); if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_BYPASS]->status) { ret = -EIO; @@ -567,7 +647,8 @@ int PerformBypass(const struct BypassArgs &args) { pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); return ret; } - + reg_writeq(reg_ActivationArgs, + REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion reg_writeq(output_scale, REG_SCALE_PARAMETER); reg_writeq(input_address_phy, REG_CONVERT_SRC_ADDR); reg_writeq(output_address_phy, REG_CONVERT_DST_ADDR); @@ -585,6 +666,7 @@ int PerformBypass(const struct BypassArgs &args) { output_scale = reg_readq(REG_SCALE_PARAMETER); output_scale = (output_scale << 32) | (output_scale >> 32); fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2); + reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); return ret; #endif @@ -808,7 +890,7 @@ int ComputeFPGASplit(const struct SplitArgs &args) { int ComputeDWConv(const struct DWconvArgs &args) { #ifdef FPGA_PRINT_MODE DLOG << "=============ComputeDWConv==========="; - DLOG << " mode:" << args.relu_enabled; + // DLOG << " mode:" << args.relu_enabled; DLOG << " image_address:" << args.image.address << " image_scale_address:" << args.image.scale_address << " image_channels:" << args.image.channels @@ -831,7 +913,8 @@ int ComputeDWConv(const struct DWconvArgs &args) { uint64_t output_scale = 0; uint64_t timer_cnt = 0; int ret = 0; - uint64_t cmd = args.relu_enabled; + // uint64_t cmd = args.relu_enabled; + uint64_t cmd = 0; uint64_t image_physical_address = 0; uint64_t output_physical_address = 0; uint64_t filter_physical_address = 0; diff --git a/src/fpga/common/driver.cpp b/src/fpga/common/driver.cpp index 18a310b09cad4a741eb83453a09f3c94d4f0db05..b1d3559dbbb238ae24cc6224e2d253dab744dce1 100644 --- a/src/fpga/common/driver.cpp +++ b/src/fpga/common/driver.cpp @@ -154,7 +154,6 @@ int memory_request(struct fpga_memory *memory, size_t size, uint64_t *addr) { unsigned int nr = (unsigned int)_nr; int ret = 0; uint64_t a_size = FPGA_PAGE_SIZE * nr; - DLOG << a_size; pthread_mutex_lock(&memory->mutex); @@ -391,9 +390,6 @@ int fpga_invalidate_driver(void *address, size_t size) { void fpga_copy_driver(void *dest, const void *src, size_t num) { uint64_t i; - - DLOG << "dest:" << dest << " src:" << src << " size:" << num; - for (i = 0; i < num; i++) { *((int8_t *)dest + i) = *((int8_t *)src + i); // NOLINT } diff --git a/src/fpga/common/driver.h b/src/fpga/common/driver.h index 4fa83b776e7b3df5df5e536de91093fd18ca67a1..d35627cd46b3f233255a98d1e1fbca27469f715c 100644 --- a/src/fpga/common/driver.h +++ b/src/fpga/common/driver.h @@ -29,7 +29,7 @@ namespace driver { #define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d)) -#define FPGA_REG_PHY_ADDR 0xa0000000 +#define FPGA_REG_PHY_ADDR 0x80000000 #define FPGA_REG_SIZE 0x1000 #define FPGA_MEM_PHY_ADDR 0x40000000 #define FPGA_MEM_SIZE 0x80000000 diff --git a/src/fpga/common/fpga_common.h b/src/fpga/common/fpga_common.h index 25ca99613e91dcbab4ffedf3802f2025afdc040f..60753e5cde1e39a1dbf4a1016667db748fc6b9f9 100644 --- a/src/fpga/common/fpga_common.h +++ b/src/fpga/common/fpga_common.h @@ -45,6 +45,7 @@ enum ActivationType { LEAKYRELU = 1, SIGMOID = 2, TANH = 3, + SOFTMAX = 4, }; struct ActivationArgs { @@ -132,7 +133,7 @@ struct DeconvTxParm { #endif struct ConvArgs { - bool relu_enabled; + // bool relu_enabled; void* sb_address; // scale and bias void* filter_address; float* filter_scale_address; @@ -198,7 +199,7 @@ struct PoolingArgs { }; struct EWAddArgs { - bool relu_enabled; + // bool relu_enabled; uint32_t const0; // output0 = const0 x input0 + const1 x input1; uint32_t const1; struct ImageInputArgs image0; @@ -230,7 +231,7 @@ struct DeconvArgs { }; struct DWconvArgs { uint32_t sub_conv_num; - bool relu_enabled; + // bool relu_enabled; void* bias_address; void* filter_address; struct KernelArgs kernel; diff --git a/src/operators/activation_op.cpp b/src/operators/activation_op.cpp index bcff87c9276721c19a970eb328fc0a183ed6c003..76c9e1a014bc0e51b032d8516ba9448fa25b2aa5 100644 --- a/src/operators/activation_op.cpp +++ b/src/operators/activation_op.cpp @@ -31,6 +31,10 @@ DEFINE_ACTIVATION_INFERSHAPE(Relu6); #ifdef SIGMOID_OP DEFINE_ACTIVATION_INFERSHAPE(Sigmoid); +namespace ops = paddle_mobile::operators; +#ifdef PADDLE_MOBILE_FPGA +REGISTER_OPERATOR_FPGA(sigmoid, ops::SigmoidOp); +#endif #endif // SIGMOID_OP #ifdef TANH_OP diff --git a/src/operators/kernel/central-arm-func/conv_bn_relu_arm_func.h b/src/operators/kernel/central-arm-func/conv_bn_relu_arm_func.h index 6e8aec99e5f595381efa98e7fb04501c13ddf4de..7eeb7f76670aa5c5a39544484ac92e611ff9066a 100644 --- a/src/operators/kernel/central-arm-func/conv_bn_relu_arm_func.h +++ b/src/operators/kernel/central-arm-func/conv_bn_relu_arm_func.h @@ -32,6 +32,7 @@ void ConvBNReluBasic(const FusionConvBNReluParam ¶m) { Tensor new_scale = *param.NewScale(); Tensor *output = param.Output(); + output->mutable_data(); int groups = param.Groups(); std::vector strides = param.Strides(); diff --git a/src/operators/kernel/central-arm-func/dwconv_bn_relu_arm_func.h b/src/operators/kernel/central-arm-func/dwconv_bn_relu_arm_func.h index cef297daad3c83253105ccf2c44d195e01d074ae..e0299d00ae09de62c133676449f0148a49beae5e 100644 --- a/src/operators/kernel/central-arm-func/dwconv_bn_relu_arm_func.h +++ b/src/operators/kernel/central-arm-func/dwconv_bn_relu_arm_func.h @@ -32,6 +32,7 @@ void DWConvBNReluBasic(const FusionDWConvBNReluParam ¶m) { Tensor new_scale = *param.NewScale(); Tensor *output = param.Output(); + output->mutable_data(); int groups = param.Groups(); std::vector strides = param.Strides(); diff --git a/src/operators/kernel/fpga/V1/conv_add_bn_kernel.cpp b/src/operators/kernel/fpga/V1/conv_add_bn_kernel.cpp index 30ff3155a47c813f303dc59191edd8b60e6d8ce3..3e41efdf76ed5b14d408a1278c7dba0bd1f30a1f 100644 --- a/src/operators/kernel/fpga/V1/conv_add_bn_kernel.cpp +++ b/src/operators/kernel/fpga/V1/conv_add_bn_kernel.cpp @@ -22,7 +22,10 @@ namespace operators { template <> bool ConvAddBNKernel::Init(FusionConvAddBNParam *param) { - bool relu_enabled = false; + // bool relu_enabled = false; + paddle_mobile::fpga::ActivationType activation_enable = + paddle_mobile::fpga::NONE; + int16_t leaky_relu_negative_slope = 0; auto input = const_cast(param->Input()); auto bias = param->Bias(); @@ -61,10 +64,10 @@ bool ConvAddBNKernel::Init(FusionConvAddBNParam *param) { fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::SplitConvArgs conv_arg = {0}; - fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, - param->Groups(), param->Strides()[0], - param->Strides()[1], param->Paddings()[0], - param->Paddings()[1], bs_ptr); + fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable, + leaky_relu_negative_slope, param->Groups(), + param->Strides()[0], param->Strides()[1], + param->Paddings()[0], param->Paddings()[1], bs_ptr); param->SetFpgaArgs(conv_arg); return true; diff --git a/src/operators/kernel/fpga/V1/conv_add_bn_relu_kernel.cpp b/src/operators/kernel/fpga/V1/conv_add_bn_relu_kernel.cpp index 7f720323253fff53f7d1bb92f8bfeec77bf0da14..b7b99be78acae80c46b9d1bd1f3cb72d5f4a7cfb 100644 --- a/src/operators/kernel/fpga/V1/conv_add_bn_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V1/conv_add_bn_relu_kernel.cpp @@ -23,7 +23,10 @@ namespace operators { template <> bool ConvAddBNReluKernel::Init( FusionConvAddBNReluParam *param) { - bool relu_enabled = true; + // bool relu_enabled = true; + paddle_mobile::fpga::ActivationType activation_enable = + paddle_mobile::fpga::LEAKYRELU; + int16_t leaky_relu_negative_slope = 0; auto input = const_cast(param->Input()); auto bias = param->Bias(); auto bias_ptr = bias->data(); @@ -64,16 +67,16 @@ bool ConvAddBNReluKernel::Init( if (groups == channel) { fpga::format_dwconv_data(filter, out, new_scale_ptr, &new_bias_ptr); fpga::DWconvArgs dwconv_arg = {0}; - fpga::fill_dwconv_arg(&dwconv_arg, input, out, filter, relu_enabled, - strides[0], strides[1], paddings[0], paddings[1], - new_bias_ptr); + fpga::fill_dwconv_arg(&dwconv_arg, input, out, filter, activation_enable, + leaky_relu_negative_slope, strides[0], strides[1], + paddings[0], paddings[1], new_bias_ptr); param->SetFpgaArgs(dwconv_arg); } else { fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::SplitConvArgs conv_arg = {0}; - fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, - param->Groups(), strides[0], strides[1], paddings[0], - paddings[1], bs_ptr); + fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable, + leaky_relu_negative_slope, param->Groups(), strides[0], + strides[1], paddings[0], paddings[1], bs_ptr); param->SetFpgaArgs(conv_arg); } return true; diff --git a/src/operators/kernel/fpga/V1/conv_add_kernel.cpp b/src/operators/kernel/fpga/V1/conv_add_kernel.cpp old mode 100755 new mode 100644 index e566dc9b165811a3e8a9f78d040cc8c571fd93a9..153be5a4f888c2a39a7b05b9a7fbb72e305acb8d --- a/src/operators/kernel/fpga/V1/conv_add_kernel.cpp +++ b/src/operators/kernel/fpga/V1/conv_add_kernel.cpp @@ -21,7 +21,10 @@ namespace operators { template <> bool ConvAddKernel::Init(FusionConvAddParam *param) { - bool relu_enabled = false; + // bool relu_enabled = false; + paddle_mobile::fpga::ActivationType activation_enable = + paddle_mobile::fpga::NONE; + int16_t leaky_relu_negative_slope = 0; auto input = const_cast(param->Input()); const Tensor *bias = param->Bias(); auto bias_ptr = bias->data(); @@ -40,10 +43,10 @@ bool ConvAddKernel::Init(FusionConvAddParam *param) { fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::SplitConvArgs conv_arg = {0}; - fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, - param->Groups(), param->Strides()[0], - param->Strides()[1], param->Paddings()[0], - param->Paddings()[1], bs_ptr); + fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable, + leaky_relu_negative_slope, param->Groups(), + param->Strides()[0], param->Strides()[1], + param->Paddings()[0], param->Paddings()[1], bs_ptr); param->SetFpgaArgs(conv_arg); return true; } diff --git a/src/operators/kernel/fpga/V1/conv_add_relu_kernel.cpp b/src/operators/kernel/fpga/V1/conv_add_relu_kernel.cpp old mode 100755 new mode 100644 index 6b2a2d77c0df29b4c319061776491b0583157d6f..eef35bf74b6b28e3ec0c49d6b7ace0a350f3f194 --- a/src/operators/kernel/fpga/V1/conv_add_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V1/conv_add_relu_kernel.cpp @@ -21,7 +21,10 @@ namespace operators { template <> bool ConvAddReluKernel::Init(FusionConvAddReluParam *param) { - bool relu_enabled = true; + // bool relu_enabled = true; + paddle_mobile::fpga::ActivationType activation_enable = + paddle_mobile::fpga::LEAKYRELU; + int16_t leaky_relu_negative_slope = 0; auto input = const_cast(param->Input()); const Tensor *bias = param->Bias(); auto bias_ptr = bias->data(); @@ -40,10 +43,10 @@ bool ConvAddReluKernel::Init(FusionConvAddReluParam *param) { fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::SplitConvArgs conv_arg = {0}; - fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, - param->Groups(), param->Strides()[0], - param->Strides()[1], param->Paddings()[0], - param->Paddings()[1], bs_ptr); + fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable, + leaky_relu_negative_slope, param->Groups(), + param->Strides()[0], param->Strides()[1], + param->Paddings()[0], param->Paddings()[1], bs_ptr); param->SetFpgaArgs(conv_arg); return true; } diff --git a/src/operators/kernel/fpga/V1/conv_bn_kernel.cpp b/src/operators/kernel/fpga/V1/conv_bn_kernel.cpp index 492d418b9023a3c4c802da099a5da5ebf5568649..c4c2bf184d536ace31e52defb59e97c154386464 100644 --- a/src/operators/kernel/fpga/V1/conv_bn_kernel.cpp +++ b/src/operators/kernel/fpga/V1/conv_bn_kernel.cpp @@ -22,7 +22,10 @@ namespace operators { template <> bool ConvBNKernel::Init(FusionConvBNParam *param) { - bool relu_enabled = false; + // bool relu_enabled = false; + paddle_mobile::fpga::ActivationType activation_enable = + paddle_mobile::fpga::NONE; + int16_t leaky_relu_negative_slope = 0; auto input = const_cast(param->Input()); auto filter = const_cast(param->Filter()); auto out = param->Output(); @@ -53,10 +56,10 @@ bool ConvBNKernel::Init(FusionConvBNParam *param) { fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::SplitConvArgs conv_arg = {0}; - fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, - param->Groups(), param->Strides()[0], - param->Strides()[1], param->Paddings()[0], - param->Paddings()[1], bs_ptr); + fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable, + leaky_relu_negative_slope, param->Groups(), + param->Strides()[0], param->Strides()[1], + param->Paddings()[0], param->Paddings()[1], bs_ptr); param->SetFpgaArgs(conv_arg); return true; } diff --git a/src/operators/kernel/fpga/V1/conv_bn_relu_kernel.cpp b/src/operators/kernel/fpga/V1/conv_bn_relu_kernel.cpp index 337b25ffa5d3ba00cd60935f8643213cb5ea70d3..463c90d1bb0dcd48a7b41aff73b830d14f989c73 100644 --- a/src/operators/kernel/fpga/V1/conv_bn_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V1/conv_bn_relu_kernel.cpp @@ -22,7 +22,10 @@ namespace operators { template <> bool ConvBNReluKernel::Init(FusionConvBNReluParam *param) { - bool relu_enabled = true; + // bool relu_enabled = true; + paddle_mobile::fpga::ActivationType activation_enable = + paddle_mobile::fpga::LEAKYRELU; + int16_t leaky_relu_negative_slope = 0; auto input = const_cast(param->Input()); auto filter = const_cast(param->Filter()); auto out = param->Output(); @@ -53,10 +56,10 @@ bool ConvBNReluKernel::Init(FusionConvBNReluParam *param) { fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::SplitConvArgs conv_arg = {0}; - fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, - param->Groups(), param->Strides()[0], - param->Strides()[1], param->Paddings()[0], - param->Paddings()[1], bs_ptr); + fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable, + leaky_relu_negative_slope, param->Groups(), + param->Strides()[0], param->Strides()[1], + param->Paddings()[0], param->Paddings()[1], bs_ptr); param->SetFpgaArgs(conv_arg); return true; } diff --git a/src/operators/kernel/fpga/V1/deconv_add_kernel.cpp b/src/operators/kernel/fpga/V1/deconv_add_kernel.cpp index 1e21d374cb3651e582f43b2875a9c302ae86cdfb..97a4d5516b52939a3a1d90a22c8050679810d405 100644 --- a/src/operators/kernel/fpga/V1/deconv_add_kernel.cpp +++ b/src/operators/kernel/fpga/V1/deconv_add_kernel.cpp @@ -23,7 +23,10 @@ namespace operators { template <> bool DeconvAddKernel::Init(FusionDeconvAddParam *param) { - bool relu_enabled = false; + // bool relu_enabled = false; + paddle_mobile::fpga::ActivationType activation_enable = + paddle_mobile::fpga::NONE; + int16_t leaky_relu_negative_slope = 0; auto input = const_cast(param->Input()); const Tensor *bias = param->Bias(); auto bias_ptr = bias->data(); @@ -53,17 +56,18 @@ bool DeconvAddKernel::Init(FusionDeconvAddParam *param) { fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::DWDeconvArgs DWDeconv_arg = {0}; - fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter, relu_enabled, + fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter, + activation_enable, leaky_relu_negative_slope, param->Strides()[0], param->Strides()[1], param->Paddings()[0], param->Paddings()[1], bs_ptr); param->SetFpgaArgs(DWDeconv_arg); } else { fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::DeconvArgs deconv_arg = {0}; - fpga::fill_deconv_arg(&deconv_arg, input, out, filter, relu_enabled, - param->Groups(), param->Strides()[0], - param->Strides()[1], param->Paddings()[0], - param->Paddings()[1], bs_ptr); + fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable, + leaky_relu_negative_slope, param->Groups(), + param->Strides()[0], param->Strides()[1], + param->Paddings()[0], param->Paddings()[1], bs_ptr); param->SetFpgaArgs(deconv_arg); } diff --git a/src/operators/kernel/fpga/V1/deconv_add_relu_kernel.cpp b/src/operators/kernel/fpga/V1/deconv_add_relu_kernel.cpp index ca77b2fd440fdfcfb61498205739b3ded6346ebc..f0b29943d7731d716a19cff1e3cfc904d7610c0b 100644 --- a/src/operators/kernel/fpga/V1/deconv_add_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V1/deconv_add_relu_kernel.cpp @@ -24,7 +24,10 @@ namespace operators { template <> bool DeconvAddReluKernel::Init( FusionDeconvAddReluParam *param) { - bool relu_enabled = true; + // bool relu_enabled = true; + paddle_mobile::fpga::ActivationType activation_enable = + paddle_mobile::fpga::LEAKYRELU; + int16_t leaky_relu_negative_slope = 0; auto input = const_cast(param->Input()); const Tensor *bias = param->Bias(); auto bias_ptr = bias->data(); @@ -54,17 +57,18 @@ bool DeconvAddReluKernel::Init( fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::DWDeconvArgs DWDeconv_arg = {0}; - fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter, relu_enabled, + fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter, + activation_enable, leaky_relu_negative_slope, param->Strides()[0], param->Strides()[1], param->Paddings()[0], param->Paddings()[1], bs_ptr); param->SetFpgaArgs(DWDeconv_arg); } else { fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::DeconvArgs deconv_arg = {0}; - fpga::fill_deconv_arg(&deconv_arg, input, out, filter, relu_enabled, - param->Groups(), param->Strides()[0], - param->Strides()[1], param->Paddings()[0], - param->Paddings()[1], bs_ptr); + fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable, + leaky_relu_negative_slope, param->Groups(), + param->Strides()[0], param->Strides()[1], + param->Paddings()[0], param->Paddings()[1], bs_ptr); param->SetFpgaArgs(deconv_arg); } return true; diff --git a/src/operators/kernel/fpga/V1/elementwise_add_kernel.cpp b/src/operators/kernel/fpga/V1/elementwise_add_kernel.cpp index be773412f099410b02f24b1d38d2a44d6ca77689..27eee7e5ba7045473ff035f45236d04e080a692e 100644 --- a/src/operators/kernel/fpga/V1/elementwise_add_kernel.cpp +++ b/src/operators/kernel/fpga/V1/elementwise_add_kernel.cpp @@ -20,7 +20,10 @@ namespace operators { template <> bool ElementwiseAddKernel::Init(ElementwiseAddParam *param) { - bool relu_enabled = false; + // bool relu_enabled = false; + paddle_mobile::fpga::ActivationType activation_enable = + paddle_mobile::fpga::NONE; + int16_t leaky_relu_negative_slope = 0; auto *input_x = const_cast(param->InputX()); auto *input_y = const_cast(param->InputY()); auto *out = param->Out(); @@ -30,7 +33,10 @@ bool ElementwiseAddKernel::Init(ElementwiseAddParam *param) { auto out_ptr = out->mutable_data(); fpga::EWAddArgs ewaddArgs = {0}; - ewaddArgs.relu_enabled = relu_enabled; + // ewaddArgs.relu_enabled = relu_enabled; + ewaddArgs.output.activation.activation_type = activation_enable; + ewaddArgs.output.activation.leaky_relu_negative_slope = + leaky_relu_negative_slope; ewaddArgs.const0 = 0x3c00; // =1 ewaddArgs.const1 = 0x3c00; // =1 ewaddArgs.image0.address = input_x_ptr; diff --git a/src/operators/kernel/fpga/V1/elementwise_add_relu_kernel.cpp b/src/operators/kernel/fpga/V1/elementwise_add_relu_kernel.cpp index 541bb6126509dc7da59fa6bed5c46aff3442928b..fbbe679d4b6a6d4b0ca0a25ebb7aacf93a133943 100644 --- a/src/operators/kernel/fpga/V1/elementwise_add_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V1/elementwise_add_relu_kernel.cpp @@ -21,7 +21,10 @@ namespace operators { template <> bool ElementwiseAddReluKernel::Init( ElementwiseAddReluParam *param) { - bool relu_enabled = true; + // bool relu_enabled = true; + paddle_mobile::fpga::ActivationType activation_enable = + paddle_mobile::fpga::LEAKYRELU; + int16_t leaky_relu_negative_slope = 0; auto *input_x = const_cast(param->InputX()); auto *input_y = const_cast(param->InputY()); auto *out = param->Out(); @@ -31,7 +34,10 @@ bool ElementwiseAddReluKernel::Init( auto out_ptr = out->mutable_data(); fpga::EWAddArgs ewaddArgs = {0}; - ewaddArgs.relu_enabled = relu_enabled; + // ewaddArgs.relu_enabled = relu_enabled; + ewaddArgs.output.activation.activation_type = activation_enable; + ewaddArgs.output.activation.leaky_relu_negative_slope = + leaky_relu_negative_slope; ewaddArgs.const0 = 0x3c00; // =1 ewaddArgs.const1 = 0x3c00; // =1 ewaddArgs.image0.address = input_x_ptr; diff --git a/src/operators/kernel/fpga/V1/fetch_kernel.cpp b/src/operators/kernel/fpga/V1/fetch_kernel.cpp index e6e4591168b90cbe19b207cd9e77eaf5cd07de80..c00bdf57a259e24669c33f011d7b77eb20d4b308 100644 --- a/src/operators/kernel/fpga/V1/fetch_kernel.cpp +++ b/src/operators/kernel/fpga/V1/fetch_kernel.cpp @@ -19,12 +19,34 @@ namespace operators { template <> bool FetchKernel::Init(FetchParam *param) { + Tensor *output = param->Out(); + // fpga::format_fp16_ofm(output); return true; } template <> void FetchKernel::Compute(const FetchParam ¶m) { param.Out()->ShareDataWith(*(param.InputX())); + /*auto input = + reinterpret_cast(const_cast(param.InputX())); + fpga::format_image(input); + auto input_ptr = input->data(); + Tensor *output = param.Out(); + auto output_ptr = output->data(); + + fpga::BypassArgs args = {fpga::DATA_TYPE_FP16}; + + args.input_data_type = fpga::DATA_TYPE_FP16; + args.output_data_type = fpga::DATA_TYPE_FP32; + args.input_layout_type = fpga::LAYOUT_CHW; + args.output_layout_type = fpga::LAYOUT_HWC; + args.image.address = reinterpret_cast(input_ptr); + args.image.channels = (uint32_t)input->dims()[1]; + args.image.height = (input->dims().size() == 4) ? (uint32_t)input->dims()[2] : + 1; args.image.width = (input->dims().size() == 4) ? (uint32_t)input->dims()[3] + : 1; args.image.pad_height = 0; args.image.pad_width = 0; args.output.address + = output_ptr; args.output.scale_address = output->scale; + fpga::PerformBypass(args);*/ } template class FetchKernel; diff --git a/src/operators/kernel/fpga/V1/fusion_fc_kernel.cpp b/src/operators/kernel/fpga/V1/fusion_fc_kernel.cpp index 9258fb90e1e6bf9a597a387843ce781858628139..fadeae324ff8f5160bc5ff410c2e02b09539a01e 100644 --- a/src/operators/kernel/fpga/V1/fusion_fc_kernel.cpp +++ b/src/operators/kernel/fpga/V1/fusion_fc_kernel.cpp @@ -20,7 +20,10 @@ namespace operators { template <> bool FusionFcKernel::Init(FusionFcParam *param) { - bool relu_enabled = false; + // bool relu_enabled = false; + paddle_mobile::fpga::ActivationType activation_enable = + paddle_mobile::fpga::NONE; + int16_t leaky_relu_negative_slope = 0; auto input_x = const_cast(param->InputX()); auto filter = const_cast(param->InputY()); const Tensor *input_z = param->InputZ(); @@ -55,8 +58,8 @@ bool FusionFcKernel::Init(FusionFcParam *param) { fpga::format_fp16_ofm(out); fpga::SplitConvArgs conv_arg = {0}; - fpga::fill_split_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, - 0, 0, bs_ptr); + fpga::fill_split_arg(&conv_arg, input_x, out, filter, activation_enable, + leaky_relu_negative_slope, 1, 1, 1, 0, 0, bs_ptr); param->SetFpgaArgs(conv_arg); return true; } diff --git a/src/operators/kernel/fpga/V1/reshape_kernel.cpp b/src/operators/kernel/fpga/V1/reshape_kernel.cpp index f5495e6d005f7f7c14ebd3d290ea9be02b9f0951..5e01bb74bab6996ca59632ae31f37ecfeafc918c 100644 --- a/src/operators/kernel/fpga/V1/reshape_kernel.cpp +++ b/src/operators/kernel/fpga/V1/reshape_kernel.cpp @@ -22,6 +22,12 @@ namespace operators { template <> bool ReshapeKernel::Init(ReshapeParam *param) { param->Out()->ShareDataWith(*param->InputX()); + const int in_n = param->InputX()->dims()[0]; + const int in_c = param->InputX()->dims()[1]; + const int in_h = param->InputX()->dims()[2]; + const int in_w = param->InputX()->dims()[3]; + auto out = param->Out(); + out->Resize(framework::make_ddim({in_n, in_c * in_h * in_w})); return true; } diff --git a/src/operators/kernel/fpga/V1/sigmoid_kernel.cpp b/src/operators/kernel/fpga/V1/sigmoid_kernel.cpp index 276a8fef62edfabfabb116fada145eedbf23ffa3..6c836e2776891f283677287eae54019f0dbef39b 100644 --- a/src/operators/kernel/fpga/V1/sigmoid_kernel.cpp +++ b/src/operators/kernel/fpga/V1/sigmoid_kernel.cpp @@ -15,73 +15,41 @@ limitations under the License. */ #ifdef SIGMOID_OP #include "operators/kernel/activation_kernel.h" + namespace paddle_mobile { namespace operators { -using framework::DDim; -using framework::Tensor; - template <> bool SigmoidKernel::Init(SigmoidParam *param) { + paddle_mobile::fpga::ActivationType activation_enable = + paddle_mobile::fpga::SIGMOID; + int16_t leaky_relu_negative_slope = 0; auto input = const_cast(param->InputX()); auto input_ptr = input->data(); auto out = param->Out(); - fpga::format_fp32_ofm(out); + fpga::format_fp16_ofm(out); - auto float_input = new Tensor; - if (input->dims().size() == 2) { - float_input->mutable_data({1, input->dims()[1]}); - } else if (input->dims().size() == 4) { - float_input->mutable_data( - {1, input->dims()[2], input->dims()[3], input->dims()[1]}); - } else { - DLOG << "wrong dimension of softmax input"; - } - - fpga::format_fp32_ofm(float_input); fpga::BypassArgs args = {fpga::DATA_TYPE_FP16}; - args.input_layout_type = fpga::LAYOUT_HWC; - args.output_layout_type = fpga::LAYOUT_CHW; args.input_data_type = fpga::DATA_TYPE_FP16; - args.output_data_type = fpga::DATA_TYPE_FP32; + args.output_data_type = fpga::DATA_TYPE_FP16; args.image.address = input_ptr; args.image.height = (input->dims().size() == 4) ? (uint32_t)input->dims()[2] : 1; args.image.width = (input->dims().size() == 4) ? (uint32_t)input->dims()[3] : 1; args.image.channels = (uint32_t)input->dims()[1]; - args.output.address = float_input->data(); - args.output.scale_address = float_input->scale; - param->SetFloatInput(float_input); + args.output.address = out->data(); + args.output.scale_address = out->scale; + args.output.activation.activation_type = activation_enable; + args.output.activation.leaky_relu_negative_slope = leaky_relu_negative_slope; param->SetFpgaArgs(args); - return true; } -template -T Sigmoid(const T a) { - T tmp = -1.0f * a; - return (1.0 / (1.0 + exp(tmp))); -} -template -void sigmoidFuntor(Tensor *input, Tensor *output) { - auto *input_ptr = input->data(); - auto *output_ptr = output->mutable_data(); - for (int i = 0; i < input->numel(); i++) { - *(output_ptr + i) = Sigmoid(*(input_ptr + i)); - } -} template <> void SigmoidKernel::Compute(const SigmoidParam ¶m) { - Tensor *in_x = param.FloatInput(); - Tensor *out = param.Out(); - fpga::PerformBypass(param.FpgaArgs()); - fpga::fpga_invalidate((void *)in_x->data(), // NOLINT - in_x->numel() * sizeof(float)); - // TODO: In general case, 0 should be squeezed before softmax input // NOLINT - sigmoidFuntor(in_x, out); - fpga::fpga_flush(out->data(), out->memory_size()); } + } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/kernel/fpga/V1/softmax_kernel.cpp b/src/operators/kernel/fpga/V1/softmax_kernel.cpp index e5ada795b120c1438688089078be20e03f078cbb..2698fdece49409aec017112e8613a706c248cf48 100644 --- a/src/operators/kernel/fpga/V1/softmax_kernel.cpp +++ b/src/operators/kernel/fpga/V1/softmax_kernel.cpp @@ -26,7 +26,6 @@ bool SoftmaxKernel::Init(SoftmaxParam *param) { auto input_ptr = input->data(); auto out = param->Out(); fpga::format_fp32_ofm(out); - auto float_input = new Tensor; if (input->dims().size() == 2) { float_input->mutable_data({1, input->dims()[1]}); @@ -36,7 +35,6 @@ bool SoftmaxKernel::Init(SoftmaxParam *param) { } else { DLOG << "wrong dimension of softmax input"; } - fpga::format_fp32_ofm(float_input); fpga::BypassArgs args = {fpga::DATA_TYPE_FP16}; args.input_layout_type = fpga::LAYOUT_HWC; @@ -53,6 +51,7 @@ bool SoftmaxKernel::Init(SoftmaxParam *param) { args.output.scale_address = float_input->scale; param->SetFloatInput(float_input); param->SetFpgaArgs(args); + return true; } diff --git a/src/operators/math/depthwise_conv3x3.cpp b/src/operators/math/depthwise_conv3x3.cpp index ab47126329d5f5c9b8607250dff086a31466fcec..8220e20429ef3b26acb1f0f130ecd41f2954a3c2 100644 --- a/src/operators/math/depthwise_conv3x3.cpp +++ b/src/operators/math/depthwise_conv3x3.cpp @@ -564,7 +564,7 @@ void DepthwiseConvAddBNRelu3x3s1p1(const framework::Tensor *input, #if __ARM_NEON const float *input_data = input->data(); const float *filter_data = filter->data(); - float *output_data = output->data(); + float *output_data = output->mutable_data(); const float *newscale_data = new_scale->data(); const float *newbias_data = new_bias->data(); @@ -1309,7 +1309,7 @@ void DepthwiseConv3x3s2p1v2(const framework::Tensor *input, #if __ARM_NEON const float *input_data = input->data(); const float *filter_data = filter->data(); - float *output_data = output->data(); + float *output_data = output->mutable_data(); const float *bias_data; if (if_bias) { bias_data = bias->data(); @@ -1729,7 +1729,7 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const framework::Tensor *input, const float *input_data = input->data(); const float *filter_data = filter->data(); - float *output_data = output->data(); + float *output_data = output->mutable_data(); const float *newscale_data = new_scale->data(); const float *newbias_data = new_bias->data(); @@ -1978,6 +1978,7 @@ void DepthwiseConv3x3s2p0(const framework::Tensor *input, const int output_width = static_cast(output->dims()[3]); const int inhxw = input_height * input_width; const int outhxw = output_height * output_width; + output->mutable_data(); float32x4_t zero = vdupq_n_f32(0.0); for (int b = 0; b < batch_size; b++) { diff --git a/src/operators/op_param.h b/src/operators/op_param.h index 5a4627625c7b2e8f86a1012875c8f885118ed4c8..53bbb56d70ca214ca66d10d36b0565fabdff8afe 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -1081,14 +1081,9 @@ class SigmoidParam : public OpParam { #ifdef PADDLE_MOBILE_FPGA private: - std::shared_ptr float_input_x_; fpga::BypassArgs fpga_bypass_args; public: - RType *FloatInput() const { - return float_input_x_ == nullptr ? input_x_ : float_input_x_.get(); - } - void SetFloatInput(Tensor *input) { float_input_x_.reset(input); } const fpga::BypassArgs &FpgaArgs() const { return fpga_bypass_args; } void SetFpgaArgs(const fpga::BypassArgs &args) { fpga_bypass_args = args; } #endif @@ -1214,6 +1209,20 @@ class FetchParam : public OpParam { private: RType *input_x_; Tensor *out_; +#ifdef PADDLE_MOBILE_FPGA + + private: + std::shared_ptr float_input_x_; + fpga::BypassArgs fpga_bypass_args; + + public: + RType *FloatInput() const { + return float_input_x_ == nullptr ? input_x_ : float_input_x_.get(); + } + void SetFloatInput(Tensor *input) { float_input_x_.reset(input); } + const fpga::BypassArgs &FpgaArgs() const { return fpga_bypass_args; } + void SetFpgaArgs(const fpga::BypassArgs &args) { fpga_bypass_args = args; } +#endif }; #ifdef FILL_CONSTANT_OP diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index febc762889c21bb392a0f1e64c2ed415eabc6011..f3dffbad1c065561d86da0e976792d206198c61e 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -337,8 +337,8 @@ if (NOT FOUND_MATCH) target_link_libraries(test-genet paddle-mobile) # gen test - ADD_EXECUTABLE(test-sigmoid operators/test_sigmoid_op.cpp test_include.h) - target_link_libraries(test-sigmoid paddle-mobile) + ADD_EXECUTABLE(test-sigmoid-op operators/test_sigmoid_op.cpp test_include.h) + target_link_libraries(test-sigmoid-op paddle-mobile) # gen test ADD_EXECUTABLE(test-depthwise-conv-op operators/test_depthwise_conv_op.cpp test_helper.h test_include.h executor_for_test.h) @@ -408,14 +408,14 @@ if (NOT FOUND_MATCH) ADD_EXECUTABLE(test-ocr net/test_ocr.cpp test_helper.h test_include.h) target_link_libraries(test-ocr paddle-mobile) - ADD_EXECUTABLE(test-sequence-expand operators/test_sequence_expand_op.cpp test_helper.h test_include.h) - target_link_libraries(test-sequence-expand paddle-mobile) + ADD_EXECUTABLE(test-sequence-expand-op operators/test_sequence_expand_op.cpp test_helper.h test_include.h) + target_link_libraries(test-sequence-expand-op paddle-mobile) - ADD_EXECUTABLE(test-sequence-pool operators/test_sequence_pool_op.cpp test_helper.h test_include.h) - target_link_libraries(test-sequence-pool paddle-mobile) + ADD_EXECUTABLE(test-sequence-pool-op operators/test_sequence_pool_op.cpp test_helper.h test_include.h) + target_link_libraries(test-sequence-pool-op paddle-mobile) - ADD_EXECUTABLE(test-sequence-softmax operators/test_sequence_softmax_op.cpp test_helper.h test_include.h) - target_link_libraries(test-sequence-softmax paddle-mobile) + ADD_EXECUTABLE(test-sequence-softmax-op operators/test_sequence_softmax_op.cpp test_helper.h test_include.h) + target_link_libraries(test-sequence-softmax-op paddle-mobile) # gen test ADD_EXECUTABLE(test-vgg16ssd net/test_vgg16ssd.cpp test_helper.h test_include.h) @@ -445,4 +445,9 @@ if (NOT FOUND_MATCH) ADD_EXECUTABLE(test-is-empty-op operators/test_is_empty_op.cpp test_helper.h test_include.h) target_link_libraries(test-is-empty-op paddle-mobile) + ADD_EXECUTABLE(test-conv-bn-relu-op operators/test_conv_bn_relu_op.cpp test_helper.h test_include.h) + target_link_libraries(test-conv-bn-relu-op paddle-mobile) + + ADD_EXECUTABLE(test-dwconv-bn-relu-op operators/test_dwconv_bn_relu_op.cpp test_helper.h test_include.h) + target_link_libraries(test-dwconv-bn-relu-op paddle-mobile) endif () diff --git a/test/operators/test_conv_bn_relu_op.cpp b/test/operators/test_conv_bn_relu_op.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6a09d838e0a30486569448726c255b1a6ba7f617 --- /dev/null +++ b/test/operators/test_conv_bn_relu_op.cpp @@ -0,0 +1,172 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "../test_helper.h" +#include "../test_include.h" +#include "operators/fusion_conv_bn_relu_op.h" + +namespace paddle_mobile { + +// Reference convolution from Caffe for checking results. +// accumulate through explicit loops over input, output, and filters. +template +int TestConvBnReluOp(int in_channels, int in_height, int in_width, + int out_channels, int groups, std::string opname) { + int kernel_h = Kernel; + int kernel_w = Kernel; + int pad_h = Pad; + int pad_w = Pad; + int stride_h = Stride; + int stride_w = Stride; + int dilation_h = 1; + int dilation_w = 1; + + int batch_size = 1; + int input_c = in_channels; + int input_h = in_height; + int input_w = in_width; + int output_c = out_channels; + framework::DDim input_shape = + framework::make_ddim({batch_size, input_c, input_h, input_w}); + framework::DDim filter_shape = + framework::make_ddim({output_c, input_c / groups, kernel_h, kernel_w}); + framework::DDim shape = framework::make_ddim({output_c}); + + VariableNameMap inputs; + VariableNameMap outputs; + auto scope = std::make_shared(); + inputs["Input"] = std::vector({"input"}); + inputs["Filter"] = std::vector({"filter"}); + outputs["Out"] = std::vector({"output"}); + inputs["Mean"] = std::vector({"input_mean"}); + inputs["Variance"] = std::vector({"input_variance"}); + inputs["Scale"] = std::vector({"input_scale"}); + inputs["Bias"] = std::vector({"input_bias"}); + auto input_var = scope.get()->Var("input"); + auto input = input_var->template GetMutable(); + SetupTensor(input, input_shape, -20.0, 20.0); + + auto filter_var = scope.get()->Var("filter"); + auto filter = filter_var->template GetMutable(); + SetupTensor(filter, filter_shape, -20, 20); + + auto input_mean_var = scope.get()->Var("input_mean"); + auto input_mean = input_mean_var->template GetMutable(); + SetupTensor(input_mean, shape, -10.0, 10.0); + auto vari_var = scope.get()->Var("input_variance"); + auto vari = vari_var->template GetMutable(); + SetupTensor(vari, shape, -10.0, 10.0); + auto scale_var = scope.get()->Var("input_scale"); + auto scale = scale_var->template GetMutable(); + SetupTensor(scale, shape, -10.0, 10.0); + auto input_bias_var = scope.get()->Var("input_bias"); + auto input_bias = input_bias_var->template GetMutable(); + SetupTensor(input_bias, shape, -10.0, 10.0); + + auto output_var = scope.get()->Var("output"); + framework::AttributeMap attrs; + attrs["strides"].Set>(std::vector({stride_h, stride_w})); + attrs["paddings"].Set>(std::vector({pad_h, pad_w})); + attrs["dilations"].Set>( + std::vector({dilation_h, dilation_w})); + attrs["groups"].Set(groups); + attrs["epsilon"].Set(1e-6); + attrs["momentum"].Set(0.f); + auto *op = new operators::FusionConvBNReluOp( + "fusion_conv_bn_relu", inputs, outputs, attrs, scope); + op->InferShape(); + op->Init(); + for (int i = 0; i < 10; ++i) { + op->Run(); + } + auto time1 = time(); + for (int i = 0; i < 10; ++i) { + op->Run(); + } + auto time2 = time(); + std::ofstream out_file("./out_conv.txt", std::ios::app); + out_file << opname << " cost :" << time_diff(time1, time2) / 10.0 << "ms" + << std::endl; + out_file.close(); + + delete op; + return 0; +} + +} // namespace paddle_mobile + +int main(int argc, char *argv[]) { + // kernel = 3, pad = 1, stride = 2 + paddle_mobile::TestConvBnReluOp(3, 48, 48, 16, 1, + "conv_bn_relu"); + // kernel = 1, pad = 0, stride = 1 + paddle_mobile::TestConvBnReluOp(16, 24, 24, 8, 1, + "depthwise_seperable"); + // kernel = 1, pad = 0, stride = 1 + paddle_mobile::TestConvBnReluOp(8, 24, 24, 24, 1, + "MBConv_3x3_conv1"); + // kernel = 1, pad = 0, stride = 1 + paddle_mobile::TestConvBnReluOp(24, 24, 24, 8, 1, + "MBConv_3x3_pw1"); + // kernel = 1, pad = 0, stride = 1 + paddle_mobile::TestConvBnReluOp(8, 24, 24, 24, 1, + "MBConv_3x3_conv2"); + // kernel = 1, pad = 0, stride = 1 + paddle_mobile::TestConvBnReluOp(24, 24, 24, 8, 1, + "MBConv_3x3_pw2"); + // kernel = 1, pad = 0, stride = 1 + paddle_mobile::TestConvBnReluOp(8, 24, 24, 24, 1, + "MBConv_3x3_conv3"); + // kernel = 1, pad = 0, stride = 1 + paddle_mobile::TestConvBnReluOp(24, 12, 12, 16, 1, + "MBConv_3x3_pw3"); + // kernel = 1, pad = 0, stride = 1 + paddle_mobile::TestConvBnReluOp( + 16, 12, 12, 48, 1, "MBConv_5x5_stage1_conv1"); + // kernel = 1, pad = 0, stride = 1 + paddle_mobile::TestConvBnReluOp( + 48, 12, 12, 16, 1, "MBConv_5x5_stage1_pw1"); + // kernel = 1, pad = 0, stride = 1 + paddle_mobile::TestConvBnReluOp( + 16, 12, 12, 48, 1, "MBConv_5x5_stage1_conv2"); + // kernel = 1, pad = 0, stride = 1 + paddle_mobile::TestConvBnReluOp( + 48, 12, 12, 16, 1, "MBConv_5x5_stage1_pw2"); + // kernel = 1, pad = 0, stride = 1 + paddle_mobile::TestConvBnReluOp( + 16, 12, 12, 48, 1, "MBConv_5x5_stage1_conv3"); + // kernel = 1, pad = 0, stride = 1 + paddle_mobile::TestConvBnReluOp( + 48, 6, 6, 32, 1, "MBConv_5x5_stage1_pw3"); + // kernel = 1, pad = 0, stride = 1 + paddle_mobile::TestConvBnReluOp( + 32, 6, 6, 192, 1, "MBConv_5x5_stage2_conv1"); + // kernel = 1, pad = 0, stride = 1 + paddle_mobile::TestConvBnReluOp( + 192, 6, 6, 32, 1, "MBConv_5x5_stage2_pw1"); + // kernel = 1, pad = 0, stride = 1 + paddle_mobile::TestConvBnReluOp( + 32, 6, 6, 192, 1, "MBConv_5x5_stage2_conv2"); + // kernel = 1, pad = 0, stride = 1 + paddle_mobile::TestConvBnReluOp( + 192, 6, 6, 32, 1, "MBConv_5x5_stage2_pw2"); + // kernel = 1, pad = 0, stride = 1 + paddle_mobile::TestConvBnReluOp( + 32, 6, 6, 192, 1, "MBConv_5x5_stage2_conv3"); + // kernel = 1, pad = 0, stride = 1 + paddle_mobile::TestConvBnReluOp( + 192, 6, 6, 64, 1, "MBConv_5x5_stage2_pw3"); + + return 0; +} diff --git a/test/operators/test_conv_op.cpp b/test/operators/test_conv_op.cpp index c596c1def4006853532395f151c6e9c47cf8e3e8..3a949daefeb89df1c72702f1207a0d0f0e652f93 100644 --- a/test/operators/test_conv_op.cpp +++ b/test/operators/test_conv_op.cpp @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include #include "../test_helper.h" #include "../test_include.h" #include "operators/conv_op.h" @@ -209,10 +210,10 @@ int TestConvOp(int in_channels, int in_height, int in_width, int out_channels, // PADDLE_MOBILE_ENFORCE(std::abs(gap / (output_data[i] + 1e-5)) < 1e-3, // "output[%d] = %d, output_cmp[%d] = %d", i, // output_data[i], i, output_cmp_data[i]); - if (std::abs(gap / (output_data[i] + 1e-5)) > 1e-3) { - LOG(kLOG_INFO) << "output_data[" << i << "] = " << output_data[i] - << ", output_cmp_data[" << i - << "] = " << output_cmp_data[i]; + if (gap > 1e-2 && std::abs(gap / (output_data[i] + 1e-5)) > 1e-3) { + std::cerr << "output_data[" << i << "] = " << output_data[i] + << ", output_cmp_data[" << i << "] = " << output_cmp_data[i] + << std::endl; exit(1); } } @@ -222,94 +223,131 @@ int TestConvOp(int in_channels, int in_height, int in_width, int out_channels, } // namespace paddle_mobile -int main(int argc, char *argv[]) { - if (argc < 5) { - LOG(paddle_mobile::kLOG_INFO) - << "Usage:\n" - << " ./test-int8-conv-op in_channels in_height in_width out_channels " - "[groups]\n" - << " params:\n" - << " -in_channels: int, input image's channels\n" - << " -in_height: int, input image's height\n" - << " -in_width: int, input image's width\n" - << " -out_channels: int, conv output channels\n"; - return 1; - } - int in_channels = atoi(argv[1]); - int in_height = atoi(argv[2]); - int in_width = atoi(argv[3]); - int out_channels = atoi(argv[4]); - int groups = 1; - if (argc == 6) { - groups = atoi(argv[5]); - } +int TestAll(const int in_channels, const int in_height, const int in_width, + const int out_channels, const int groups) { + std::cerr << "in_channels=" << in_channels << ", in_height=" << in_height + << ", in_width=" << in_width << ", out_channels=" << out_channels + << ", groups=" << groups << std::endl; + // // kernel = 3, pad = 0, stride = 1 + // std::cerr << "float, kernel=3, pad=0, stride=1" << std::endl; + // paddle_mobile::TestConvOp( + // in_channels, in_height, in_width, out_channels, groups); + // // kernel = 3, pad = 1, stride = 1 + // std::cerr << "float, kernel=3, pad=1, stride=1" << std::endl; + // paddle_mobile::TestConvOp( + // in_channels, in_height, in_width, out_channels, groups); + // // kernel = 3, pad = 2, stride = 1 + // std::cerr << "float, kernel=3, pad=2, stride=1" << std::endl; + // paddle_mobile::TestConvOp( + // in_channels, in_height, in_width, out_channels, groups); + // // kernel = 3, pad = 5, stride = 1 + // std::cerr << "float, kernel=3, pad=5, stride=1" << std::endl; + // paddle_mobile::TestConvOp( + // in_channels, in_height, in_width, out_channels, groups); + // + // // kernel = 3, pad = 0, stride = 2 + // std::cerr << "float, kernel=3, pad=0, stride=2" << std::endl; + // paddle_mobile::TestConvOp( + // in_channels, in_height, in_width, out_channels, groups); + // // kernel = 3, pad = 1, stride = 2 + // std::cerr << "float, kernel=3, pad=1, stride=2" << std::endl; + // paddle_mobile::TestConvOp( + // in_channels, in_height, in_width, out_channels, groups); + // // kernel = 3, pad = 2, stride = 2 + // std::cerr << "float, kernel=3, pad=2, stride=2" << std::endl; + // paddle_mobile::TestConvOp( + // in_channels, in_height, in_width, out_channels, groups); + // // kernel = 3, pad = 5, stride = 2 + // std::cerr << "float, kernel=3, pad=5, stride=2" << std::endl; + // paddle_mobile::TestConvOp( + // in_channels, in_height, in_width, out_channels, groups); + +#ifndef __aarch64__ // kernel = 3, pad = 0, stride = 1 - LOG(paddle_mobile::kLOG_INFO) << "float, kernel=3, pad=0, stride=1"; + std::cerr << "int8, kernel=3, pad=0, stride=1" << std::endl; paddle_mobile::TestConvOp( in_channels, in_height, in_width, out_channels, groups); // kernel = 3, pad = 1, stride = 1 - LOG(paddle_mobile::kLOG_INFO) << "float, kernel=3, pad=1, stride=1"; + std::cerr << "int8, kernel=3, pad=1, stride=1" << std::endl; paddle_mobile::TestConvOp( in_channels, in_height, in_width, out_channels, groups); // kernel = 3, pad = 2, stride = 1 - LOG(paddle_mobile::kLOG_INFO) << "float, kernel=3, pad=2, stride=1"; + std::cerr << "int8, kernel=3, pad=2, stride=1" << std::endl; paddle_mobile::TestConvOp( in_channels, in_height, in_width, out_channels, groups); // kernel = 3, pad = 5, stride = 1 - LOG(paddle_mobile::kLOG_INFO) << "float, kernel=3, pad=5, stride=1"; + std::cerr << "int8, kernel=3, pad=5, stride=1" << std::endl; paddle_mobile::TestConvOp( in_channels, in_height, in_width, out_channels, groups); // kernel = 3, pad = 0, stride = 2 - LOG(paddle_mobile::kLOG_INFO) << "float, kernel=3, pad=0, stride=2"; + std::cerr << "int8, kernel=3, pad=0, stride=2" << std::endl; paddle_mobile::TestConvOp( in_channels, in_height, in_width, out_channels, groups); // kernel = 3, pad = 1, stride = 2 - LOG(paddle_mobile::kLOG_INFO) << "float, kernel=3, pad=1, stride=2"; + std::cerr << "int8, kernel=3, pad=1, stride=2" << std::endl; paddle_mobile::TestConvOp( in_channels, in_height, in_width, out_channels, groups); // kernel = 3, pad = 2, stride = 2 - LOG(paddle_mobile::kLOG_INFO) << "float, kernel=3, pad=2, stride=2"; + std::cerr << "int8, kernel=3, pad=2, stride=2" << std::endl; paddle_mobile::TestConvOp( in_channels, in_height, in_width, out_channels, groups); // kernel = 3, pad = 5, stride = 2 - LOG(paddle_mobile::kLOG_INFO) << "float, kernel=3, pad=5, stride=2"; + std::cerr << "int8, kernel=3, pad=5, stride=2" << std::endl; paddle_mobile::TestConvOp( in_channels, in_height, in_width, out_channels, groups); +#endif // __aarch64__ // kernel = 5, pad = 0, stride = 1 - LOG(paddle_mobile::kLOG_INFO) << "float, kernel=5, pad=0, stride=1"; + std::cerr << "float, kernel=5, pad=0, stride=1" << std::endl; paddle_mobile::TestConvOp( in_channels, in_height, in_width, out_channels, groups); // kernel = 5, pad = 1, stride = 1 - LOG(paddle_mobile::kLOG_INFO) << "float, kernel=5, pad=1, stride=1"; + std::cerr << "float, kernel=5, pad=1, stride=1" << std::endl; paddle_mobile::TestConvOp( in_channels, in_height, in_width, out_channels, groups); // kernel = 5, pad = 2, stride = 1 - LOG(paddle_mobile::kLOG_INFO) << "float, kernel=5, pad=2, stride=1"; + std::cerr << "float, kernel=5, pad=2, stride=1" << std::endl; paddle_mobile::TestConvOp( in_channels, in_height, in_width, out_channels, groups); // kernel = 5, pad = 5, stride = 1 - LOG(paddle_mobile::kLOG_INFO) << "float, kernel=5, pad=5, stride=1"; + std::cerr << "float, kernel=5, pad=5, stride=1" << std::endl; paddle_mobile::TestConvOp( in_channels, in_height, in_width, out_channels, groups); +#ifndef __aarch64__ // kernel = 5, pad = 0, stride = 1 - LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=5, pad=0, stride=1"; + std::cerr << "int8, kernel=5, pad=0, stride=1" << std::endl; paddle_mobile::TestConvOp( in_channels, in_height, in_width, out_channels, groups); // kernel = 5, pad = 1, stride = 1 - LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=5, pad=1, stride=1"; + std::cerr << "int8, kernel=5, pad=1, stride=1" << std::endl; paddle_mobile::TestConvOp( in_channels, in_height, in_width, out_channels, groups); // kernel = 5, pad = 2, stride = 1 - LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=5, pad=2, stride=1"; + std::cerr << "int8, kernel=5, pad=2, stride=1" << std::endl; paddle_mobile::TestConvOp( in_channels, in_height, in_width, out_channels, groups); // kernel = 5, pad = 5, stride = 1 - LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=5, pad=5, stride=1"; + std::cerr << "int8, kernel=5, pad=5, stride=1" << std::endl; paddle_mobile::TestConvOp( in_channels, in_height, in_width, out_channels, groups); +#endif // __aarch64__ + + return 0; +} + +int main() { + TestAll(1, 5, 5, 1, 1); + TestAll(1, 5, 5, 10, 1); + TestAll(10, 5, 5, 10, 10); + + TestAll(5, 33, 33, 5, 1); + TestAll(5, 33, 33, 13, 1); + TestAll(13, 33, 33, 13, 13); + TestAll(5, 33, 13, 5, 1); + TestAll(5, 33, 13, 13, 1); + TestAll(13, 33, 13, 13, 13); return 0; } diff --git a/test/operators/test_dwconv_bn_relu_op.cpp b/test/operators/test_dwconv_bn_relu_op.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7fcf10d903e571ac7b0f5fb0a4b1214bf55327d1 --- /dev/null +++ b/test/operators/test_dwconv_bn_relu_op.cpp @@ -0,0 +1,145 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "../test_helper.h" +#include "../test_include.h" +#include "operators/fusion_dwconv_bn_relu_op.h" + +namespace paddle_mobile { + +template +int TestDWConvAddBnReluOp(int in_channels, int in_height, int in_width, + int out_channels, int groups, std::string opname) { + int kernel_h = Kernel; + int kernel_w = Kernel; + int pad_h = Pad; + int pad_w = Pad; + int stride_h = Stride; + int stride_w = Stride; + int dilation_h = 1; + int dilation_w = 1; + + int batch_size = 1; + int input_c = in_channels; + int input_h = in_height; + int input_w = in_width; + int output_c = out_channels; + framework::DDim input_shape = + framework::make_ddim({batch_size, input_c, input_h, input_w}); + framework::DDim filter_shape = + framework::make_ddim({output_c, input_c / groups, kernel_h, kernel_w}); + framework::DDim shape = framework::make_ddim({output_c}); + + VariableNameMap inputs; + VariableNameMap outputs; + auto scope = std::make_shared(); + inputs["Input"] = std::vector({"input"}); + inputs["Filter"] = std::vector({"filter"}); + inputs["Mean"] = std::vector({"mean"}); + inputs["Variance"] = std::vector({"variance"}); + inputs["Scale"] = std::vector({"scale"}); + inputs["Bias"] = std::vector({"bias"}); + outputs["Out"] = std::vector({"output"}); + + auto input_var = scope.get()->Var("input"); + auto input = input_var->template GetMutable(); + SetupTensor(input, input_shape, -20.0, 20.0); + + auto filter_var = scope.get()->Var("filter"); + auto filter = filter_var->template GetMutable(); + SetupTensor(filter, filter_shape, -20, 20); + + auto mean_var = scope.get()->Var("mean"); + auto mean = mean_var->template GetMutable(); + SetupTensor(mean, shape, -10.0, 10.0); + + auto vari_var = scope.get()->Var("variance"); + auto vari = vari_var->template GetMutable(); + SetupTensor(vari, shape, -10.0, 10.0); + + auto scale_var = scope.get()->Var("scale"); + auto scale = scale_var->template GetMutable(); + SetupTensor(scale, shape, -10.0, 10.0); + + auto bias_var = scope.get()->Var("bias"); + auto bias = bias_var->template GetMutable(); + SetupTensor(bias, shape, -10.0, 10.0); + + auto output_var = scope.get()->Var("output"); + framework::AttributeMap attrs; + attrs["strides"].Set>(std::vector({stride_h, stride_w})); + attrs["paddings"].Set>(std::vector({pad_h, pad_w})); + attrs["dilations"].Set>( + std::vector({dilation_h, dilation_w})); + attrs["groups"].Set(groups); + attrs["epsilon"].Set(1e-6); + attrs["momentum"].Set(0.f); + + auto *op = new operators::FusionDWConvBNReluOp( + "fusion_dwconv_bn_relu", inputs, outputs, attrs, scope); + op->InferShape(); + op->Init(); + for (int i = 0; i < 10; ++i) { + op->Run(); + } + auto time1 = time(); + for (int i = 0; i < 10; ++i) { + op->Run(); + } + auto time2 = time(); + std::ofstream out_file("./out_dwconv.txt", std::ios::app); + out_file << opname << " cost :" << time_diff(time1, time2) / 10.0 << "ms" + << std::endl; + out_file.close(); + + delete op; + return 0; +} + +} // namespace paddle_mobile + +int main(int argc, char *argv[]) { + // kernel = 3, pad = 1, stride = 1 + paddle_mobile::TestDWConvAddBnReluOp( + 16, 24, 24, 16, 16, "depthwise_seperable"); + // kernel = 3, pad = 1, stride = 1 + paddle_mobile::TestDWConvAddBnReluOp( + 24, 24, 24, 24, 24, "MBConv_3x3_dw1"); + // kernel = 3, pad = 1, stride = 1 + paddle_mobile::TestDWConvAddBnReluOp( + 24, 24, 24, 24, 24, "MBConv_3x3_dw2"); + // kernel = 3, pad = 1, stride = 2 + paddle_mobile::TestDWConvAddBnReluOp( + 24, 24, 24, 24, 24, "MBConv_3x3_dw3"); + // kernel = 5, pad = 2, stride = 1 + paddle_mobile::TestDWConvAddBnReluOp( + 48, 12, 12, 48, 48, "MBConv_5x5_stage1_dw1"); + // kernel = 5, pad = 2, stride = 1 + paddle_mobile::TestDWConvAddBnReluOp( + 48, 12, 12, 48, 48, "MBConv_5x5_stage1_dw2"); + // kernel = 5, pad = 2, stride = 2 + paddle_mobile::TestDWConvAddBnReluOp( + 48, 12, 12, 48, 48, "MBConv_5x5_stage1_dw3"); + // kernel = 5, pad = 2, stride = 1 + paddle_mobile::TestDWConvAddBnReluOp( + 192, 6, 6, 192, 192, "MBConv_5x5_stage2_dw1"); + // kernel = 5, pad = 2, stride = 1 + paddle_mobile::TestDWConvAddBnReluOp( + 192, 6, 6, 192, 192, "MBConv_5x5_stage2_dw2"); + // kernel = 5, pad = 2, stride = 1 + paddle_mobile::TestDWConvAddBnReluOp( + 192, 6, 6, 192, 192, "MBConv_5x5_stage2_dw3"); + + return 0; +} diff --git a/test/operators/test_gru_op.cpp b/test/operators/test_gru_op.cpp index f2ce833661bfd1b3d751a7ac2d54cfb70114a6c6..b11ec4f5f77aca2c4997153863e70b1a6b209c32 100644 --- a/test/operators/test_gru_op.cpp +++ b/test/operators/test_gru_op.cpp @@ -12,18 +12,89 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "../test_helper.h" #include "../test_include.h" #include "operators/gru_op.h" -int main() { - paddle_mobile::framework::Loader loader; - auto program = loader.Load(g_nlp); - PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr, - "program file read fail"); +namespace paddle_mobile { - Executor4Test> - executor(program, "gru"); +template +int TestGruOp(int in_channels, int out_channels, std::string opname) { + int input_c = in_channels; + int output_c = out_channels; + paddle_mobile::framework::LoD lod{{0, input_c}}; + int batch_size = lod.size(); + framework::DDim input_shape = framework::make_ddim({input_c, output_c * 3}); + framework::DDim weight_shape = framework::make_ddim({output_c, output_c * 3}); + framework::DDim h0_shape = framework::make_ddim({batch_size, output_c}); + framework::DDim bias_shape = framework::make_ddim({batch_size, output_c * 3}); + VariableNameMap inputs; + VariableNameMap outputs; + auto scope = std::make_shared(); + inputs["Input"] = std::vector({"input"}); + inputs["Weight"] = std::vector({"weight"}); + inputs["H0"] = std::vector({"h0"}); + inputs["Bias"] = std::vector({"bias"}); + + outputs["BatchGate"] = std::vector({"output_batch_gate"}); + outputs["BatchResetHiddenPrev"] = + std::vector({"output_batch_reset_hidden_prev"}); + outputs["BatchHidden"] = std::vector({"output_batch_hidden"}); + outputs["Hidden"] = std::vector({"output_hidden"}); + + auto input_var = scope.get()->Var("input"); + auto input = input_var->template GetMutable(); + SetupTensor(input, input_shape, -127, 127); + input->set_lod(lod); + + auto weight_var = scope.get()->Var("weight"); + auto weight = weight_var->template GetMutable(); + SetupTensor(weight, weight_shape, -127, 127); + + auto h0_var = scope.get()->Var("h0"); + auto h0 = h0_var->template GetMutable(); + SetupTensor(h0, h0_shape, -127, 127); + + auto bias_var = scope.get()->Var("bias"); + auto bias = bias_var->template GetMutable(); + SetupTensor(bias, bias_shape, -127, 127); + + auto batch_gate_var = scope.get()->Var("output_batch_gate"); + auto batch_reset_hidden_prev_var = + scope.get()->Var("output_batch_reset_hidden_prev"); + auto batch_hidden_var = scope.get()->Var("output_batch_hidden"); + auto hidden_var = scope.get()->Var("output_hidden"); + + framework::AttributeMap attrs; + attrs["activation"].SetString(std::string("relu")); + attrs["gate_activation"].SetString(std::string("sigmoid")); + attrs["is_reverse"].Set(false); + + auto *op = + new operators::GruOp("gru", inputs, outputs, attrs, scope); + op->InferShape(); + op->Init(); + for (int i = 0; i < 10; ++i) { + op->Run(); + } + auto time1 = time(); + for (int i = 0; i < 10; ++i) { + op->Run(); + } + auto time2 = time(); + std::ofstream out_file("./out_gru.txt", std::ios::app); + out_file << opname << " cost :" << time_diff(time1, time2) / 10.0 << "ms" + << std::endl; + out_file.close(); + + delete op; + return 0; +} + +} // namespace paddle_mobile + +int main(int argc, char *argv[]) { + paddle_mobile::TestGruOp(384, 120, "gru_forward"); return 0; } diff --git a/test/operators/test_log_op.cpp b/test/operators/test_log_op.cpp index 8d675f06decc902365c32d797b432923933656f7..2f29e8711bb8de0e576a9a1485d96a448ec3d3c0 100644 --- a/test/operators/test_log_op.cpp +++ b/test/operators/test_log_op.cpp @@ -76,6 +76,5 @@ int main() { paddle_mobile::TestLogOp({1, 1, 2, 3}); paddle_mobile::TestLogOp({1, 3, 11, 22}); paddle_mobile::TestLogOp({1, 32, 112, 112}); - std::cout << "test log op pass." << std::endl; return 0; } diff --git a/test/operators/test_quantize_op.cpp b/test/operators/test_quantize_op.cpp index 50c0e7bd05da7f7a5ee1fd6912be0eff2f6e2958..f3b8fd151c83d115b003b226549ba351188808da 100644 --- a/test/operators/test_quantize_op.cpp +++ b/test/operators/test_quantize_op.cpp @@ -92,18 +92,10 @@ static float find_abs_max(const Tensor *input) { return max_abs; } -int TestQuqntizeOp(int argc, char *argv[]) { - if (argc < 5) { - std::cout << "Usage: ./test-quantize-op batch_size channel height width" - << std::endl; - return 1; - } - int batch_size = atoi(argv[1]); - int channel = atoi(argv[2]); - int height = atoi(argv[3]); - int width = atoi(argv[4]); - std::cout << "batch_size: " << batch_size << ", channel: " << channel - << ", height: " << height << ", width: " << width << std::endl; +int TestQuqntizeOp(const int batch_size, const int channel, const int height, + const int width) { + DLOG << "batch_size: " << batch_size << ", channel: " << channel + << ", height: " << height << ", width: " << width; framework::DDim dim = framework::make_ddim({batch_size, channel, height, width}); @@ -140,9 +132,7 @@ int TestQuqntizeOp(int argc, char *argv[]) { framework::Tensor output_cmp; output_cmp.Resize(output->dims()); float scale = 127 / output_scale_cmp; - // quantize(input, scale, &output_cmp); - // quantize(input, scale, &output_cmp); - quantize(input, scale, &output_cmp); + quantize(input, scale, &output_cmp); int8_t *output_cmp_data = output_cmp.data(); for (int i = 0; i < output->numel(); ++i) { PADDLE_MOBILE_ENFORCE(output_data[i] == output_cmp_data[i], @@ -157,5 +147,7 @@ int TestQuqntizeOp(int argc, char *argv[]) { } // namespace paddle_mobile int main(int argc, char *argv[]) { - return paddle_mobile::TestQuqntizeOp(argc, argv); + TestQuqntizeOp(1, 10, 10, 5); + TestQuqntizeOp(1, 111, 111, 5); + TestQuqntizeOp(5, 111, 111, 5); } diff --git a/test/operators/test_sequence_pool_op.cpp b/test/operators/test_sequence_pool_op.cpp index a8518d630a6008c7cd1fa99d2b0df1d27ebfba32..3b377aa437b8a37041e3f30d299214e19c48ff4e 100644 --- a/test/operators/test_sequence_pool_op.cpp +++ b/test/operators/test_sequence_pool_op.cpp @@ -59,7 +59,7 @@ int TestSequencePoolOp(const framework::LoDTensor &input_x, int main(int argc, char *argv[]) { framework::LoDTensor input_x, output; // case 1 - std::cerr << "running max case 1" << std::endl; + DLOG << "running max case 1"; { std::vector data{1, 2, 3, 4}; input_x.Resize(framework::make_ddim({4, 1})); @@ -71,14 +71,14 @@ int main(int argc, char *argv[]) { std::vector expect_data{2, 4}; for (int i = 0; i < 2; ++i) { if (output.data()[i] != expect_data[i]) { - std::cerr << "output[" << i << "]: " << output.data()[i] - << " != expect[" << i << "]: " << expect_data[i] << std::endl; + DLOG << "output[" << i << "]: " << output.data()[i] + << " != expect[" << i << "]: " << expect_data[i]; return 1; } } } // case 2 - std::cerr << "running max case 2" << std::endl; + DLOG << "running max case 2"; { std::vector data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; input_x.Resize(framework::make_ddim({data.size(), 1})); @@ -90,13 +90,13 @@ int main(int argc, char *argv[]) { std::vector expect_data{3, 10}; for (int i = 0; i < 2; ++i) { if (output.data()[i] != expect_data[i]) { - std::cerr << "output[" << i << "]: " << output.data()[i] - << " != expect[" << i << "]: " << expect_data[i] << std::endl; + DLOG << "output[" << i << "]: " << output.data()[i] + << " != expect[" << i << "]: " << expect_data[i]; return 1; } } } - std::cerr << "running max case 3" << std::endl; + DLOG << "running max case 3"; // case 3 { std::vector data{1, 2, 3, 4, 5, 6, 7, 8}; @@ -109,14 +109,14 @@ int main(int argc, char *argv[]) { std::vector expect_data{3, 4, 7, 8}; for (int i = 0; i < 4; ++i) { if (output.data()[i] != expect_data[i]) { - std::cerr << "output[" << i << "]: " << output.data()[i] - << " != expect[" << i << "]: " << expect_data[i] << std::endl; + DLOG << "output[" << i << "]: " << output.data()[i] + << " != expect[" << i << "]: " << expect_data[i]; return 1; } } } // case 4 - std::cerr << "running max case 4" << std::endl; + DLOG << "running max case 4"; { std::vector data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}; @@ -129,14 +129,14 @@ int main(int argc, char *argv[]) { std::vector expect_data{6, 7, 8, 9, 10, 16, 17, 18, 19, 20}; for (int i = 0; i < 10; ++i) { if (output.data()[i] != expect_data[i]) { - std::cerr << "output[" << i << "]: " << output.data()[i] - << " != expect[" << i << "]: " << expect_data[i] << std::endl; + DLOG << "output[" << i << "]: " << output.data()[i] + << " != expect[" << i << "]: " << expect_data[i]; return 1; } } } // case 1 - std::cerr << "running sum case 1" << std::endl; + DLOG << "running sum case 1"; { std::vector data{1, 2, 3, 4}; input_x.Resize(framework::make_ddim({4, 1})); @@ -148,14 +148,14 @@ int main(int argc, char *argv[]) { std::vector expect_data{3, 7}; for (int i = 0; i < 2; ++i) { if (output.data()[i] != expect_data[i]) { - std::cerr << "output[" << i << "]: " << output.data()[i] - << " != expect[" << i << "]: " << expect_data[i] << std::endl; + DLOG << "output[" << i << "]: " << output.data()[i] + << " != expect[" << i << "]: " << expect_data[i]; return 1; } } } // case 2 - std::cerr << "running sum case 2" << std::endl; + DLOG << "running sum case 2"; { std::vector data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; input_x.Resize(framework::make_ddim({data.size(), 1})); @@ -167,14 +167,14 @@ int main(int argc, char *argv[]) { std::vector expect_data{6, 49}; for (int i = 0; i < 2; ++i) { if (output.data()[i] != expect_data[i]) { - std::cerr << "output[" << i << "]: " << output.data()[i] - << " != expect[" << i << "]: " << expect_data[i] << std::endl; + DLOG << "output[" << i << "]: " << output.data()[i] + << " != expect[" << i << "]: " << expect_data[i]; return 1; } } } // case 3 - std::cerr << "running sum case 3" << std::endl; + DLOG << "running sum case 3"; { std::vector data{1, 2, 3, 4, 5, 6, 7, 8}; input_x.Resize(framework::make_ddim({4, 2})); @@ -186,14 +186,14 @@ int main(int argc, char *argv[]) { std::vector expect_data{4, 6, 12, 14}; for (int i = 0; i < 4; ++i) { if (output.data()[i] != expect_data[i]) { - std::cerr << "output[" << i << "]: " << output.data()[i] - << " != expect[" << i << "]: " << expect_data[i] << std::endl; + DLOG << "output[" << i << "]: " << output.data()[i] + << " != expect[" << i << "]: " << expect_data[i]; return 1; } } } // case 4 - std::cerr << "running sum case 4" << std::endl; + DLOG << "running sum case 4"; { std::vector data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}; @@ -206,14 +206,14 @@ int main(int argc, char *argv[]) { std::vector expect_data{7, 9, 11, 13, 15, 27, 29, 31, 33, 35}; for (int i = 0; i < 10; ++i) { if (output.data()[i] != expect_data[i]) { - std::cerr << "output[" << i << "]: " << output.data()[i] - << " != expect[" << i << "]: " << expect_data[i] << std::endl; + DLOG << "output[" << i << "]: " << output.data()[i] + << " != expect[" << i << "]: " << expect_data[i]; return 1; } } } // case 1 - std::cerr << "running first case 1" << std::endl; + DLOG << "running first case 1"; { std::vector data{1, 2, 3, 4}; input_x.Resize(framework::make_ddim({4, 1})); @@ -225,14 +225,14 @@ int main(int argc, char *argv[]) { std::vector expect_data{1, 3}; for (int i = 0; i < 2; ++i) { if (output.data()[i] != expect_data[i]) { - std::cerr << "output[" << i << "]: " << output.data()[i] - << " != expect[" << i << "]: " << expect_data[i] << std::endl; + DLOG << "output[" << i << "]: " << output.data()[i] + << " != expect[" << i << "]: " << expect_data[i]; return 1; } } } // case 2 - std::cerr << "running first case 2" << std::endl; + DLOG << "running first case 2"; { std::vector data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; input_x.Resize(framework::make_ddim({data.size(), 1})); @@ -244,14 +244,14 @@ int main(int argc, char *argv[]) { std::vector expect_data{1, 4}; for (int i = 0; i < 2; ++i) { if (output.data()[i] != expect_data[i]) { - std::cerr << "output[" << i << "]: " << output.data()[i] - << " != expect[" << i << "]: " << expect_data[i] << std::endl; + DLOG << "output[" << i << "]: " << output.data()[i] + << " != expect[" << i << "]: " << expect_data[i]; return 1; } } } // case 3 - std::cerr << "running first case 3" << std::endl; + DLOG << "running first case 3"; { std::vector data{1, 2, 3, 4, 5, 6, 7, 8}; input_x.Resize(framework::make_ddim({4, 2})); @@ -263,14 +263,14 @@ int main(int argc, char *argv[]) { std::vector expect_data{1, 2, 5, 6}; for (int i = 0; i < 4; ++i) { if (output.data()[i] != expect_data[i]) { - std::cerr << "output[" << i << "]: " << output.data()[i] - << " != expect[" << i << "]: " << expect_data[i] << std::endl; + DLOG << "output[" << i << "]: " << output.data()[i] + << " != expect[" << i << "]: " << expect_data[i]; return 1; } } } // case 4 - std::cerr << "running first case 4" << std::endl; + DLOG << "running first case 4"; { std::vector data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}; @@ -283,8 +283,8 @@ int main(int argc, char *argv[]) { std::vector expect_data{1, 2, 3, 4, 5, 11, 12, 13, 14, 15}; for (int i = 0; i < 10; ++i) { if (output.data()[i] != expect_data[i]) { - std::cerr << "output[" << i << "]: " << output.data()[i] - << " != expect[" << i << "]: " << expect_data[i] << std::endl; + DLOG << "output[" << i << "]: " << output.data()[i] + << " != expect[" << i << "]: " << expect_data[i]; return 1; } } diff --git a/test/operators/test_sigmoid_op.cpp b/test/operators/test_sigmoid_op.cpp index 40f6461a2cfdfb67b135a5a3a22c29bf19750189..260dd62781ad18b46e78db3cfaccf1fe27797175 100644 --- a/test/operators/test_sigmoid_op.cpp +++ b/test/operators/test_sigmoid_op.cpp @@ -76,6 +76,5 @@ int main() { paddle_mobile::TestSigmoidOp({1, 1, 2, 3}); paddle_mobile::TestSigmoidOp({1, 3, 11, 22}); paddle_mobile::TestSigmoidOp({1, 32, 112, 112}); - std::cout << "test sigmoid op pass." << std::endl; return 0; } diff --git a/test/operators/test_tanh_op.cpp b/test/operators/test_tanh_op.cpp index b8006931075d742724d18c3af3627f780a7bf454..d013b0eedfbe3bdc773e263aad594c89212ad6ce 100644 --- a/test/operators/test_tanh_op.cpp +++ b/test/operators/test_tanh_op.cpp @@ -58,7 +58,7 @@ int TestTanhOp(const std::vector input_shape) { const float *output_data = output->data(); for (int i = 0; i < output->numel(); ++i) { float gap = output_data[i] - output_cmp_data[i]; - if (std::abs(gap / (output_data[i] + 1e-5)) > 1e-3) { + if (gap > 1e-5 && std::abs(gap / (output_data[i] + 1e-5)) > 1e-3) { LOG(kLOG_INFO) << "output_data[" << i << "] = " << output_data[i] << ", output_cmp_data[" << i << "] = " << output_cmp_data[i]; diff --git a/tools/ci_build.sh b/tools/ci_build.sh index f21bcdc67e6f73f61e0b33558672ac61fdf0fb22..d725afe4595b8e88578ec6c2f0f3c78bc0807a1b 100755 --- a/tools/ci_build.sh +++ b/tools/ci_build.sh @@ -15,6 +15,7 @@ # limitations under the License. set -e +source ./ci_run_test.sh function print_usage() { echo "\n${RED}Usage${NONE}: @@ -231,6 +232,11 @@ function build_linux_fpga() { docker build -t paddle-mobile:dev - < Dockerfile fi docker run --rm -v `pwd`:/workspace paddle-mobile:dev bash /workspace/tools/docker_build_fpga.sh + cd - +} + +function run_android_test() { + ExecuteAndroidTests $1 } function main() { @@ -239,9 +245,11 @@ function main() { case $CMD in android_armv7) build_android_armv7 + run_android_test armeabi-v7a ;; android_armv8) build_android_armv8 + run_android_test arm64-v8a ;; ios) build_ios diff --git a/tools/ci_run_test.sh b/tools/ci_run_test.sh new file mode 100644 index 0000000000000000000000000000000000000000..6470a97b15a4497cf933ff0a22befa34383dd890 --- /dev/null +++ b/tools/ci_run_test.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash + +operators= + +function AddTest() { + operators="${operators} $1" +} + +function ExecuteAndroidTests() { + platform=$1 + devices=`adb devices | grep -v devices | grep device | awk -F ' ' '{print $1}'` + for device in ${devices}; do + adb -s ${device} shell rm -rf /data/local/tmp/* + adb -s ${device} push ../build/${platform}/build/libpaddle-mobile.so /data/local/tmp/ + for op in ${operators}; do + adb -s ${device} push ../test/build/test-${op}-op /data/local/tmp/ + adb -s ${device} shell "cd /data/local/tmp/; LD_LIBRARY_PATH=. ./test-${op}-op" + echo "${BLUE}run test ${op} pass${NONE}" + done + done +} + +AddTest batchnorm +AddTest cast +AddTest conv +AddTest dequantize +#AddTest elementwiseadd +AddTest log +AddTest logical-and +AddTest logical-not +AddTest logical-or +AddTest logical-xor +AddTest pool +AddTest quantize +AddTest relu +AddTest relu6 +AddTest sequence-expand +AddTest sequence-pool +AddTest sequence-softmax +AddTest sigmoid +AddTest softmax +AddTest tanh +AddTest topk diff --git a/tools/docker_build_fpga.sh b/tools/docker_build_fpga.sh index 0927c328dd41b87f77adf19d514703e7bcafbce8..31a28b1532909079b70c1bb1ea63cede8d2c1668 100644 --- a/tools/docker_build_fpga.sh +++ b/tools/docker_build_fpga.sh @@ -1,3 +1,5 @@ +#!/usr/bin/env bash + apt-get update apt-get install -y gcc g++ cmake