From 335e216f6f2572c82f2df7b9cc23ce5fdaf08f98 Mon Sep 17 00:00:00 2001 From: qnqinan Date: Fri, 20 Dec 2019 14:25:12 +0800 Subject: [PATCH] update some FPGA v2 files after rerun clang-format5.0, test=mobile --- mobile/src/fpga/V2/bias_scale.cpp | 21 +- mobile/src/fpga/V2/image.cpp | 9 +- mobile/src/fpga/V2/pe.cpp | 219 +++++++++--------- .../kernel/fpga/V2/elementwise_add_kernel.cpp | 14 +- .../fpga/V2/elementwise_add_relu_kernel.cpp | 14 +- .../kernel/fpga/V2/proposal_kernel.cpp | 22 +- .../kernel/fpga/V2/psroi_pool_kernel.cpp | 11 +- .../kernel/fpga/V2/reshape2_kernel.cpp | 5 +- .../operators/kernel/fpga/V2/slice_kernel.cpp | 33 ++- mobile/test/executor_for_test.h | 2 +- mobile/test/executor_for_test_opencl.h | 8 +- 11 files changed, 182 insertions(+), 176 deletions(-) mode change 100755 => 100644 mobile/src/fpga/V2/image.cpp mode change 100755 => 100644 mobile/src/fpga/V2/pe.cpp mode change 100755 => 100644 mobile/src/operators/kernel/fpga/V2/elementwise_add_kernel.cpp mode change 100755 => 100644 mobile/src/operators/kernel/fpga/V2/elementwise_add_relu_kernel.cpp mode change 100755 => 100644 mobile/src/operators/kernel/fpga/V2/reshape2_kernel.cpp mode change 100755 => 100644 mobile/src/operators/kernel/fpga/V2/slice_kernel.cpp diff --git a/mobile/src/fpga/V2/bias_scale.cpp b/mobile/src/fpga/V2/bias_scale.cpp index e04604c587..44722ef59a 100644 --- a/mobile/src/fpga/V2/bias_scale.cpp +++ b/mobile/src/fpga/V2/bias_scale.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "fpga/V2/bias_scale.h" -#include #include +#include #include "fpga/common/fpga_common.h" namespace paddle_mobile { @@ -56,15 +56,16 @@ void align_element(float **data_in, int num_per_div_before_alignment, int num) { *data_in = ptr_aligned; } -void fixed_scale_bias_new(void*data_in, int data_len) { - int* data_tmp = static_cast(data_in); - for (int idx = 0; idx < data_len/2; ++idx) { - float tmp = (static_cast(data_in))[idx]; - data_tmp[idx] = static_cast(round(tmp*pow(2.0, 23.0))); - tmp = (static_cast(data_in))[idx+data_len/2]; - data_tmp[idx+data_len/2] = static_cast(round(tmp*pow(2.0, 30.0))); - } - return; +void fixed_scale_bias_new(void *data_in, int data_len) { + int *data_tmp = static_cast(data_in); + for (int idx = 0; idx < data_len / 2; ++idx) { + float tmp = (static_cast(data_in))[idx]; + data_tmp[idx] = static_cast(round(tmp * pow(2.0, 23.0))); + tmp = (static_cast(data_in))[idx + data_len / 2]; + data_tmp[idx + data_len / 2] = + static_cast(round(tmp * pow(2.0, 30.0))); + } + return; } void interleave(float **data_in, int num_after_alignment) { diff --git a/mobile/src/fpga/V2/image.cpp b/mobile/src/fpga/V2/image.cpp old mode 100755 new mode 100644 index 917491c371..eda7837bd0 --- a/mobile/src/fpga/V2/image.cpp +++ b/mobile/src/fpga/V2/image.cpp @@ -94,11 +94,10 @@ void concat_images(int8_t **images_in, float **scales_in, void *image_out, for (i = 0; i < image_num; i++) { align_each_in_area_cw = align_to_x(channel_num[i] * width, IMAGE_ALIGNMENT); - memcpy( - (int8_t *)image_out + tmp_channel + // NOLINT - k * align_each_out_area_cw_differ, - images_in[i] + j * channel_num[i] + k * align_each_in_area_cw, - channel_num[i] * sizeof(int8_t)); + memcpy((int8_t *)image_out + tmp_channel + // NOLINT + k * align_each_out_area_cw_differ, + images_in[i] + j * channel_num[i] + k * align_each_in_area_cw, + channel_num[i] * sizeof(int8_t)); tmp_channel += channel_num[i]; } diff --git a/mobile/src/fpga/V2/pe.cpp b/mobile/src/fpga/V2/pe.cpp old mode 100755 new mode 100644 index 1443888c3f..585ab6706e --- a/mobile/src/fpga/V2/pe.cpp +++ b/mobile/src/fpga/V2/pe.cpp @@ -257,8 +257,8 @@ int ComputeBasicConv(const struct ConvArgs &args) { pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); return ret; } - // reg_writeq(reg_ActivationArgs, - // REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion + // reg_writeq(reg_ActivationArgs, + // REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion reg_writeq(output_scale, REG_SCALE_PARAMETER); // new @@ -274,10 +274,10 @@ int ComputeBasicConv(const struct ConvArgs &args) { args.driver.filter_pad_width_mul_channel, REG_CONV_REG1); - reg_writeq((args.driver.stride_h << 50) | (args.driver.skip_window << 30) | - (args.driver.filter_row << 10) | - (args.driver.filter_height << 5) | args.driver.filter_width, - REG_CONV_REG2); + reg_writeq((args.driver.stride_h << 50) | (args.driver.skip_window << 30) | + (args.driver.filter_row << 10) | + (args.driver.filter_height << 5) | args.driver.filter_width, + REG_CONV_REG2); reg_writeq((args.driver.filter_num << 42) | (args.driver.filter_align << 26) | (args.driver.prog_full_cnt << 16) | @@ -369,74 +369,77 @@ int ComputeFpgaPool(const struct PoolingArgs &args) { uint64_t cmd = 0; uint64_t image_physical_address = 0; uint64_t output_physical_address = 0; -uint64_t bypass_interrupt = reg_readq(REG_INTERRUPT); + uint64_t bypass_interrupt = reg_readq(REG_INTERRUPT); image_physical_address = vaddr_to_paddr(args.image.address); output_physical_address = vaddr_to_paddr(args.output.address); uint64_t C_paral_64 = align_to_x((uint64_t)args.image.channels, 64); uint64_t C_align_32 = align_to_x((uint64_t)args.image.channels, 32); uint64_t output_height = (uint64_t)( (args.image.height + args.image.pad_height * 2 - args.kernel.height) / - args.kernel.stride_h + 1); + args.kernel.stride_h + + 1); uint64_t output_width = (uint64_t)( (args.image.width + args.image.pad_width * 2 - args.kernel.width) / - args.kernel.stride_w + 1); + args.kernel.stride_w + + 1); uint64_t image_amount_per_row = align_to_x((uint64_t)args.image.width * (uint64_t)args.image.channels, IMAGE_ALIGNMENT); - uint64_t image_one_pad_per_row = (uint64_t)args.image.width * - (uint64_t)args.image.channels +(uint64_t)args.image.pad_width * - (uint64_t)args.image.channels; + uint64_t image_one_pad_per_row = + (uint64_t)args.image.width * (uint64_t)args.image.channels + + (uint64_t)args.image.pad_width * (uint64_t)args.image.channels; - uint64_t result_amount_align_32 = align_to_x((uint64_t)output_width * - (uint64_t)args.image.channels, 32); + uint64_t result_amount_align_32 = + align_to_x((uint64_t)output_width * (uint64_t)args.image.channels, 32); uint64_t result_addr_row = - (result_amount_align_32 << 32) | output_physical_address; + (result_amount_align_32 << 32) | output_physical_address; uint64_t row_padding_down = - (uint64_t)args.image.height + (uint64_t)args.image.pad_height; - uint64_t kernel_width_sub1 = - (uint64_t)args.kernel.width - 1; + (uint64_t)args.image.height + (uint64_t)args.image.pad_height; + uint64_t kernel_width_sub1 = (uint64_t)args.kernel.width - 1; uint64_t kernel_padding_step = row_padding_down | - ((uint64_t)args.image.pad_height << 16) | - ((uint64_t)args.kernel.stride_h << 24) | - ((uint64_t)kernel_width_sub1<<32) | - ((uint64_t)args.kernel.height << 40) | - ((uint64_t)(args.kernel.height-1) << 48); - uint64_t image_calcu_height = (uint64_t)args.kernel.height + - (output_height - 1) * (uint64_t)args.kernel.stride_h; + ((uint64_t)args.image.pad_height << 16) | + ((uint64_t)args.kernel.stride_h << 24) | + ((uint64_t)kernel_width_sub1 << 32) | + ((uint64_t)args.kernel.height << 40) | + ((uint64_t)(args.kernel.height - 1) << 48); + uint64_t image_calcu_height = + (uint64_t)args.kernel.height + + (output_height - 1) * (uint64_t)args.kernel.stride_h; uint64_t result_size_calcu_height = (output_height - 1) | - ((output_width - 1) << 16) | (image_calcu_height << 32); - uint64_t col_padding_down = ((uint64_t)args.image.width + - (uint64_t)args.image.pad_width) * (uint64_t)args.image.channels; + ((output_width - 1) << 16) | + (image_calcu_height << 32); + uint64_t col_padding_down = + ((uint64_t)args.image.width + (uint64_t)args.image.pad_width) * + (uint64_t)args.image.channels; uint64_t image_row_col_padding_down = - image_amount_per_row | (col_padding_down << 32); + image_amount_per_row | (col_padding_down << 32); uint64_t image_rowXpadding_h = - image_amount_per_row * (uint64_t)args.image.pad_height; + image_amount_per_row * (uint64_t)args.image.pad_height; uint64_t image_rowXstep_h = - image_amount_per_row * (uint64_t)args.kernel.stride_h; + image_amount_per_row * (uint64_t)args.kernel.stride_h; uint64_t image_rowXpad_h_rowXstep_h = - image_rowXpadding_h | (image_rowXstep_h << 32); + image_rowXpadding_h | (image_rowXstep_h << 32); uint64_t channelXpad_w = - (uint64_t)args.image.channels * (uint64_t)args.image.pad_width; + (uint64_t)args.image.channels * (uint64_t)args.image.pad_width; uint64_t channelXstep_w = - (uint64_t)args.image.channels * (uint64_t)args.kernel.stride_w; + (uint64_t)args.image.channels * (uint64_t)args.kernel.stride_w; uint64_t channelXpad_w_channelXstep_w = - channelXpad_w | (channelXstep_w << 32); - uint64_t filter_row_align = - C_align_32 * (uint64_t)args.kernel.width; - uint64_t sub_filter_amount_align = C_align_32 * - (uint64_t)args.kernel.width * (uint64_t)args.kernel.height; + channelXpad_w | (channelXstep_w << 32); + uint64_t filter_row_align = C_align_32 * (uint64_t)args.kernel.width; + uint64_t sub_filter_amount_align = + C_align_32 * (uint64_t)args.kernel.width * (uint64_t)args.kernel.height; uint64_t mult_factor = 0; float average_reciprocal = args.kernel_reciprocal; - uint32_t* kernel_reciprocal; - kernel_reciprocal =(reinterpret_cast(&average_reciprocal)); + uint32_t *kernel_reciprocal; + kernel_reciprocal = (reinterpret_cast(&average_reciprocal)); if (args.mode == 1) - mult_factor = (uint64_t)(*kernel_reciprocal) | - ((uint64_t)1 << 32) | ((uint64_t)1 << 40); + mult_factor = (uint64_t)(*kernel_reciprocal) | ((uint64_t)1 << 32) | + ((uint64_t)1 << 40); else mult_factor = - (uint64_t)0x3f800000 | ((uint64_t)1 << 32) | ((uint64_t)1 << 40); + (uint64_t)0x3f800000 | ((uint64_t)1 << 32) | ((uint64_t)1 << 40); pthread_mutex_lock(&g_fpgainfo.pe_data->mutex); if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_POOLING]->status) { ret = -EIO; @@ -501,7 +504,7 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) { #endif #ifdef PADDLE_MOBILE_ZU5 int ret = 0; -uint64_t bypass_interrupt = reg_readq(REG_INTERRUPT); + uint64_t bypass_interrupt = reg_readq(REG_INTERRUPT); pthread_mutex_lock(&g_fpgainfo.pe_data->mutex); if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_EW]->status) { @@ -511,7 +514,6 @@ uint64_t bypass_interrupt = reg_readq(REG_INTERRUPT); return ret; } - uint64_t image0_physical_address = 0; uint64_t image1_physical_address = 0; uint64_t image_physical_address = 0; @@ -519,26 +521,28 @@ uint64_t bypass_interrupt = reg_readq(REG_INTERRUPT); image0_physical_address = vaddr_to_paddr(args.image0.address); image1_physical_address = vaddr_to_paddr(args.image1.address); image_physical_address = - image0_physical_address | (image1_physical_address << 32); + image0_physical_address | (image1_physical_address << 32); output_physical_address = vaddr_to_paddr(args.output.address); uint64_t image_amount_per_row = - align_to_x((uint64_t)args.image0.width * - (uint64_t)args.image0.channels, IMAGE_ALIGNMENT); + align_to_x((uint64_t)args.image0.width * (uint64_t)args.image0.channels, + IMAGE_ALIGNMENT); uint64_t result_addr_row = - output_physical_address | (image_amount_per_row << 32); + output_physical_address | (image_amount_per_row << 32); uint64_t kernel_padding_step = 0; kernel_padding_step = ((uint64_t)args.image0.height * 2) | - ((uint64_t)2 << 24) | ((uint64_t)2 << 40) | ((uint64_t)1 << 48); - uint64_t result_size_calcu_height = ((uint64_t)args.image0.height - 1) | - ((image_amount_per_row / 32 - 1) << 16) | - (((uint64_t)args.image0.height * 2) << 32); - uint64_t image_row_col_padding_down = image_amount_per_row | - (image_amount_per_row << 32); - float quantParam = - ((args.image0.scale_address)[0]) / ((args.output.scale_address)[0]); - uint32_t* ew_scale = reinterpret_cast(&quantParam); - uint64_t ew_scale_mult_factor = (*ew_scale) | - ((uint64_t)args.const0 << 32) | ((uint64_t)args.const1 << 40); + ((uint64_t)2 << 24) | ((uint64_t)2 << 40) | + ((uint64_t)1 << 48); + uint64_t result_size_calcu_height = + ((uint64_t)args.image0.height - 1) | + ((image_amount_per_row / 32 - 1) << 16) | + (((uint64_t)args.image0.height * 2) << 32); + uint64_t image_row_col_padding_down = + image_amount_per_row | (image_amount_per_row << 32); + float quantParam = + ((args.image0.scale_address)[0]) / ((args.output.scale_address)[0]); + uint32_t *ew_scale = reinterpret_cast(&quantParam); + uint64_t ew_scale_mult_factor = (*ew_scale) | ((uint64_t)args.const0 << 32) | + ((uint64_t)args.const1 << 40); reg_writeq(0ul, REG_SCALE_PARAMETER); reg_writeq(image_physical_address, 0x808); reg_writeq(result_addr_row, 0x810); @@ -546,7 +550,7 @@ uint64_t bypass_interrupt = reg_readq(REG_INTERRUPT); reg_writeq(result_size_calcu_height, 0x820); reg_writeq(32, 0x828); reg_writeq(image_row_col_padding_down, 0x830); - reg_writeq(((image_amount_per_row*2) << 32), 0x838); + reg_writeq(((image_amount_per_row * 2) << 32), 0x838); reg_writeq(ew_scale_mult_factor, 0x840); // dw donot care reg_writeq(((uint64_t)32 << 32), 0x848); reg_writeq(0, 0x858); @@ -924,7 +928,7 @@ int ComputeDWConv(const struct DWconvArgs &args) { << " pad_height:" << args.image.pad_height << " pad_width:" << args.image.pad_width; DLOG << " filter_address:" << args.filter_address; - //<< " bias_address:" << args.bias_address; + //<< " bias_address:" << args.bias_address; DLOG << " kernel_height:" << args.kernel.height << " kernel_width:" << args.kernel.width << " stride_h:" << args.kernel.stride_h @@ -950,67 +954,71 @@ int ComputeDWConv(const struct DWconvArgs &args) { bias_physical_address = vaddr_to_paddr(args.bias_address); uint64_t C_align_64 = align_to_x((uint64_t)args.image.channels, 64); uint64_t C_align_32 = align_to_x((uint64_t)args.image.channels, 32); - uint64_t output_height = (uint64_t) - ((args.image.height + args.image.pad_height * 2 - - args.kernel.height) / args.kernel.stride_h +1); - uint64_t output_width = (uint64_t) - (((args.image.width + args.image.pad_width * 2 - args.kernel.width) / - args.kernel.stride_w + 1) * args.sub_conv_num); + uint64_t output_height = (uint64_t)( + (args.image.height + args.image.pad_height * 2 - args.kernel.height) / + args.kernel.stride_h + + 1); + uint64_t output_width = (uint64_t)( + ((args.image.width + args.image.pad_width * 2 - args.kernel.width) / + args.kernel.stride_w + + 1) * + args.sub_conv_num); uint64_t image_amount_per_row = - align_to_x((uint64_t)args.image.width * - (uint64_t)args.image.channels, IMAGE_ALIGNMENT); + align_to_x((uint64_t)args.image.width * (uint64_t)args.image.channels, + IMAGE_ALIGNMENT); uint64_t image_one_pad_per_row = - (uint64_t)args.image.width * (uint64_t)args.image.channels + - (uint64_t)args.image.pad_width * (uint64_t)args.image.channels; + (uint64_t)args.image.width * (uint64_t)args.image.channels + + (uint64_t)args.image.pad_width * (uint64_t)args.image.channels; - uint64_t result_amount_align_32 = align_to_x( - (uint64_t)output_width * (uint64_t)args.image.channels, 32); + uint64_t result_amount_align_32 = + align_to_x((uint64_t)output_width * (uint64_t)args.image.channels, 32); uint64_t result_addr_row = - (result_amount_align_32 << 32) | output_physical_address; + (result_amount_align_32 << 32) | output_physical_address; uint64_t row_padding_down = - (uint64_t)args.image.height + (uint64_t)args.image.pad_height; + (uint64_t)args.image.height + (uint64_t)args.image.pad_height; uint64_t kernel_width_sub1 = (uint64_t)args.kernel.width - 1; uint64_t kernel_padding_step = row_padding_down | - ((uint64_t)args.image.pad_height << 16) | - ((uint64_t)args.kernel.stride_h << 24) | - ((uint64_t)kernel_width_sub1<<32) | - ((uint64_t)args.kernel.height << 40) | - ((uint64_t)(args.kernel.height-1) << 48); - uint64_t image_calcu_height = (uint64_t)args.kernel.height + - (output_height - 1) * (uint64_t)args.kernel.stride_h; + ((uint64_t)args.image.pad_height << 16) | + ((uint64_t)args.kernel.stride_h << 24) | + ((uint64_t)kernel_width_sub1 << 32) | + ((uint64_t)args.kernel.height << 40) | + ((uint64_t)(args.kernel.height - 1) << 48); + uint64_t image_calcu_height = + (uint64_t)args.kernel.height + + (output_height - 1) * (uint64_t)args.kernel.stride_h; uint64_t result_size_calcu_height = (output_height - 1) | - ((output_width - 1) << 16) | (image_calcu_height << 32); - uint64_t col_padding_down = ((uint64_t)args.image.width + - (uint64_t)args.image.pad_width) * (uint64_t)args.image.channels; + ((output_width - 1) << 16) | + (image_calcu_height << 32); + uint64_t col_padding_down = + ((uint64_t)args.image.width + (uint64_t)args.image.pad_width) * + (uint64_t)args.image.channels; uint64_t image_row_col_padding_down = - image_amount_per_row | (col_padding_down << 32); + image_amount_per_row | (col_padding_down << 32); uint64_t image_rowXpadding_h = - image_amount_per_row * (uint64_t)args.image.pad_height; + image_amount_per_row * (uint64_t)args.image.pad_height; uint64_t image_rowXstep_h = - image_amount_per_row * (uint64_t)args.kernel.stride_h; + image_amount_per_row * (uint64_t)args.kernel.stride_h; uint64_t image_rowXpad_h_rowXstep_h = - image_rowXpadding_h | (image_rowXstep_h << 32); + image_rowXpadding_h | (image_rowXstep_h << 32); uint64_t channelXpad_w = - (uint64_t)args.image.channels * (uint64_t)args.image.pad_width; + (uint64_t)args.image.channels * (uint64_t)args.image.pad_width; uint64_t channelXstep_w = - (uint64_t)args.image.channels * (uint64_t)args.kernel.stride_w; + (uint64_t)args.image.channels * (uint64_t)args.kernel.stride_w; uint64_t channelXpad_w_channelXstep_w = - channelXpad_w | (channelXstep_w << 32); + channelXpad_w | (channelXstep_w << 32); - uint64_t filter_row_align = - C_align_64 * (uint64_t)args.kernel.width; - uint64_t sub_filter_amount_align = C_align_64 * - (uint64_t)args.kernel.width * - (uint64_t)args.kernel.height; + uint64_t filter_row_align = C_align_64 * (uint64_t)args.kernel.width; + uint64_t sub_filter_amount_align = + C_align_64 * (uint64_t)args.kernel.width * (uint64_t)args.kernel.height; uint64_t filter_amount_align = - sub_filter_amount_align * (uint64_t)args.sub_conv_num; + sub_filter_amount_align * (uint64_t)args.sub_conv_num; uint64_t filter_param = filter_row_align | (filter_amount_align << 16) | - (sub_filter_amount_align << 32) | - (((uint64_t)args.sub_conv_num -1) << 48); + (sub_filter_amount_align << 32) | + (((uint64_t)args.sub_conv_num - 1) << 48); uint64_t channel_parameter = - (uint64_t)args.image.channels | (C_align_64 << 16); + (uint64_t)args.image.channels | (C_align_64 << 16); pthread_mutex_lock(&g_fpgainfo.pe_data->mutex); if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_POOLING]->status) { ret = -EIO; @@ -1030,8 +1038,9 @@ int ComputeDWConv(const struct DWconvArgs &args) { reg_writeq(channelXpad_w_channelXstep_w, 0x848); reg_writeq(filter_physical_address, 0x850); reg_writeq(filter_param, 0x858); - reg_writeq(((bias_physical_address+C_align_64*4) | - (bias_physical_address << 32)), 0x860); + reg_writeq(((bias_physical_address + C_align_64 * 4) | + (bias_physical_address << 32)), + 0x860); cmd = (uint64_t)1 | (((uint64_t)args.relu_enabled) << 8); reg_writeq(cmd, 0x800); diff --git a/mobile/src/operators/kernel/fpga/V2/elementwise_add_kernel.cpp b/mobile/src/operators/kernel/fpga/V2/elementwise_add_kernel.cpp old mode 100755 new mode 100644 index ab87d71292..54ae3b6712 --- a/mobile/src/operators/kernel/fpga/V2/elementwise_add_kernel.cpp +++ b/mobile/src/operators/kernel/fpga/V2/elementwise_add_kernel.cpp @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifdef ELEMENTWISEADD_OP -#include #include "operators/kernel/elementwise_add_kernel.h" +#include namespace paddle_mobile { namespace operators { @@ -62,14 +62,14 @@ void ComputeCPUEWAdd(fpga::EWAddArgs ewaddArgs) { int inputh = ewaddArgs.image0.height; int inputw = ewaddArgs.image0.width; float inScale0 = - (reinterpret_cast(ewaddArgs.image0.scale_address))[0]; + (reinterpret_cast(ewaddArgs.image0.scale_address))[0]; float inScale1 = - (reinterpret_cast(ewaddArgs.image1.scale_address))[0]; + (reinterpret_cast(ewaddArgs.image1.scale_address))[0]; float outScale = - (reinterpret_cast(ewaddArgs.output.scale_address))[0]; - int8_t* inPtr0 = reinterpret_cast(ewaddArgs.image0.address); - int8_t* inPtr1 = reinterpret_cast(ewaddArgs.image1.address); - int8_t* outPtr = reinterpret_cast(ewaddArgs.output.address); + (reinterpret_cast(ewaddArgs.output.scale_address))[0]; + int8_t *inPtr0 = reinterpret_cast(ewaddArgs.image0.address); + int8_t *inPtr1 = reinterpret_cast(ewaddArgs.image1.address); + int8_t *outPtr = reinterpret_cast(ewaddArgs.output.address); int datasize = inputc * inputh * inputw; float const0 = inScale0 / outScale; float const1 = inScale1 / outScale; diff --git a/mobile/src/operators/kernel/fpga/V2/elementwise_add_relu_kernel.cpp b/mobile/src/operators/kernel/fpga/V2/elementwise_add_relu_kernel.cpp old mode 100755 new mode 100644 index a55a78e566..c406a22d56 --- a/mobile/src/operators/kernel/fpga/V2/elementwise_add_relu_kernel.cpp +++ b/mobile/src/operators/kernel/fpga/V2/elementwise_add_relu_kernel.cpp @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifdef FUSION_ELEMENTWISEADDRELU_OP -#include #include "operators/kernel/elementwise_add_relu_kernel.h" +#include namespace paddle_mobile { namespace operators { @@ -63,14 +63,14 @@ void ComputeCPUEWAddRelu(fpga::EWAddArgs ewaddArgs) { int inputh = ewaddArgs.image0.height; int inputw = ewaddArgs.image0.width; float inScale0 = - (reinterpret_cast(ewaddArgs.image0.scale_address))[0]; + (reinterpret_cast(ewaddArgs.image0.scale_address))[0]; float inScale1 = - (reinterpret_cast(ewaddArgs.image1.scale_address))[0]; + (reinterpret_cast(ewaddArgs.image1.scale_address))[0]; float outScale = - (reinterpret_cast(ewaddArgs.output.scale_address))[0]; - int8_t* inPtr0 = reinterpret_cast(ewaddArgs.image0.address); - int8_t* inPtr1 = reinterpret_cast(ewaddArgs.image1.address); - int8_t* outPtr = reinterpret_cast(ewaddArgs.output.address); + (reinterpret_cast(ewaddArgs.output.scale_address))[0]; + int8_t *inPtr0 = reinterpret_cast(ewaddArgs.image0.address); + int8_t *inPtr1 = reinterpret_cast(ewaddArgs.image1.address); + int8_t *outPtr = reinterpret_cast(ewaddArgs.output.address); int datasize = inputc * inputh * inputw; float const0 = inScale0 / outScale; float const1 = inScale1 / outScale; diff --git a/mobile/src/operators/kernel/fpga/V2/proposal_kernel.cpp b/mobile/src/operators/kernel/fpga/V2/proposal_kernel.cpp index 50179b9cd5..c2f8b55c1e 100644 --- a/mobile/src/operators/kernel/fpga/V2/proposal_kernel.cpp +++ b/mobile/src/operators/kernel/fpga/V2/proposal_kernel.cpp @@ -331,7 +331,7 @@ std::pair ProposalForOneImage( keep_nms.Resize({post_nms_top_n}); } - proposals.mutable_data({keep_nms.numel(), 4}); // original + proposals.mutable_data({keep_nms.numel(), 4}); // original scores_sel.mutable_data({keep_nms.numel(), 1}); // original CPUGather(bbox_sel, keep_nms, &proposals); @@ -371,8 +371,8 @@ void ProposalKernel::Compute(const ProposalParam ¶m) { for (int h = 0; h < score_height; h++) { for (int w = 0; w < score_width; w++) { for (int c = 0; c < score_channels; ++c) { - int dstidx = h*unalignedCW + w*score_channels + c; - int srcidx = h*alignedCW + w*score_channels + c; + int dstidx = h * unalignedCW + w * score_channels + c; + int srcidx = h * alignedCW + w * score_channels + c; score_tensor.data()[dstidx] = input_score_data[srcidx]; } } @@ -388,11 +388,11 @@ void ProposalKernel::Compute(const ProposalParam ¶m) { for (int h = 0; h < bbox_height; h++) { for (int w = 0; w < bbox_width; w++) { for (int c = 0; c < bbox_channels; ++c) { - int dstidx = h*unalignedCW + w*bbox_channels + c; - int srcidx = h*alignedCW + w*bbox_channels + c; + int dstidx = h * unalignedCW + w * bbox_channels + c; + int srcidx = h * alignedCW + w * bbox_channels + c; bbox_tensor->data()[dstidx] = - (static_cast(input_bbox_data[srcidx]))/127.0* - input_bbox->scale[0]; + (static_cast(input_bbox_data[srcidx])) / 127.0 * + input_bbox->scale[0]; } } } @@ -412,14 +412,14 @@ void ProposalKernel::Compute(const ProposalParam ¶m) { float min_size = param.min_size_; float eta = param.eta_; - rpn_rois->mutable_data({bbox_tensor->numel()/4, 4}); - rpn_roi_probs->mutable_data({input_score->numel()/4, 1}); + rpn_rois->mutable_data({bbox_tensor->numel() / 4, 4}); + rpn_roi_probs->mutable_data({input_score->numel() / 4, 1}); framework::LoD lod; lod.resize(1); auto &lod0 = lod[0]; lod0.push_back(0); - anchors.Resize({anchors.numel()/4, 4}); - variances.Resize({variances.numel()/4, 4}); + anchors.Resize({anchors.numel() / 4, 4}); + variances.Resize({variances.numel() / 4, 4}); int64_t num_proposals = 0; for (int64_t i = 0; i < score_n; ++i) { diff --git a/mobile/src/operators/kernel/fpga/V2/psroi_pool_kernel.cpp b/mobile/src/operators/kernel/fpga/V2/psroi_pool_kernel.cpp index 87948f824e..00c0b5d631 100644 --- a/mobile/src/operators/kernel/fpga/V2/psroi_pool_kernel.cpp +++ b/mobile/src/operators/kernel/fpga/V2/psroi_pool_kernel.cpp @@ -143,7 +143,6 @@ void PSRoiPoolKernel::Compute(const PSRoiPoolParam& param) { "the channels of input X should equal the product of " "output_channels x pooled_height x pooled_width"); - auto output_data = out->mutable_data(); auto input_rois = rois->data(); @@ -173,11 +172,11 @@ void PSRoiPoolKernel::Compute(const PSRoiPoolParam& param) { for (int ph = 0; ph < pooled_height; ph++) { for (int pw = 0; pw < pooled_width; pw++) { - PSROIPoolingForward( - input_data, height, width, input_channels, offset_output_data, - pooled_height, pooled_width, output_channels, input_rois, - bin_size_h, bin_size_w, roi_start_h, roi_start_w, pw, ph, - scale, roi_batch_ind); + PSROIPoolingForward(input_data, height, width, input_channels, + offset_output_data, pooled_height, + pooled_width, output_channels, input_rois, + bin_size_h, bin_size_w, roi_start_h, + roi_start_w, pw, ph, scale, roi_batch_ind); } } } diff --git a/mobile/src/operators/kernel/fpga/V2/reshape2_kernel.cpp b/mobile/src/operators/kernel/fpga/V2/reshape2_kernel.cpp old mode 100755 new mode 100644 index c7cd6575e4..5b651ad6e6 --- a/mobile/src/operators/kernel/fpga/V2/reshape2_kernel.cpp +++ b/mobile/src/operators/kernel/fpga/V2/reshape2_kernel.cpp @@ -118,11 +118,10 @@ void Reshape2Kernel::Compute(const Reshape2Param ¶m) { auto inputdimsize = input->dims().size(); auto outputdimsize = output->dims().size(); int smallersize = - inputdimsize > outputdimsize ? outputdimsize : inputdimsize; + inputdimsize > outputdimsize ? outputdimsize : inputdimsize; int i = 0; for (i = 0; i < smallersize; i++) { - if ((input->dims())[i] != (output->dims())[i]) - break; + if ((input->dims())[i] != (output->dims())[i]) break; } if (i == smallersize) { reshapeNeedFlg = 0; diff --git a/mobile/src/operators/kernel/fpga/V2/slice_kernel.cpp b/mobile/src/operators/kernel/fpga/V2/slice_kernel.cpp old mode 100755 new mode 100644 index d32dddb307..e40242d5c2 --- a/mobile/src/operators/kernel/fpga/V2/slice_kernel.cpp +++ b/mobile/src/operators/kernel/fpga/V2/slice_kernel.cpp @@ -57,31 +57,30 @@ void SliceKernel::Compute(const SliceParam& param) { int len = end - start; size_t size = len * sizeof(int8_t); DLOG << input->fpga_data_num; - fpga::fpga_invalidate(input_ptr, input->fpga_data_num*sizeof(int8_t)); + fpga::fpga_invalidate(input_ptr, input->fpga_data_num * sizeof(int8_t)); DLOG << output->fpga_data_num; - fpga::fpga_invalidate(output_ptr, output->fpga_data_num*sizeof(int8_t)); + fpga::fpga_invalidate(output_ptr, output->fpga_data_num * sizeof(int8_t)); int unalignedWC = len * W; int alignedWC = fpga::align_to_x(W * len, IMAGE_ALIGNMENT); if (unalignedWC != alignedWC) { - auto tmpOutput = reinterpret_cast - (fpga::fpga_malloc(len*HW * sizeof(int8_t))); - for (int i = 0; i < HW; i++) { - memcpy(tmpOutput + len * i, input_ptr + i * channel + start, size); + auto tmpOutput = + reinterpret_cast(fpga::fpga_malloc(len * HW * sizeof(int8_t))); + for (int i = 0; i < HW; i++) { + memcpy(tmpOutput + len * i, input_ptr + i * channel + start, size); + } + for (int i = 0; i < H; i++) { + for (int j = 0; j < unalignedWC; j++) { + *(output_ptr + alignedWC * i + j) = *(tmpOutput + unalignedWC * i + j); } - for (int i = 0; i < H; i++) { - for (int j = 0; j < unalignedWC; j++) { - *(output_ptr + alignedWC * i + j) = - *(tmpOutput + unalignedWC * i + j); - } - } - fpga::fpga_free(tmpOutput); + } + fpga::fpga_free(tmpOutput); } else { - for (int i = 0; i < HW; i++) { - memcpy(output_ptr + len * i, input_ptr + i * channel + start, size); - } + for (int i = 0; i < HW; i++) { + memcpy(output_ptr + len * i, input_ptr + i * channel + start, size); + } } - fpga::fpga_flush(output_ptr, output->fpga_data_num*sizeof(int8_t)); + fpga::fpga_flush(output_ptr, output->fpga_data_num * sizeof(int8_t)); } } // namespace operators } // namespace paddle_mobile diff --git a/mobile/test/executor_for_test.h b/mobile/test/executor_for_test.h index 6f1680c513..0a67eea5d5 100644 --- a/mobile/test/executor_for_test.h +++ b/mobile/test/executor_for_test.h @@ -14,9 +14,9 @@ limitations under the License. */ #pragma once +#include #include #include -#include #include "common/log.h" #include "framework/executor.h" #include "framework/op_registry.h" diff --git a/mobile/test/executor_for_test_opencl.h b/mobile/test/executor_for_test_opencl.h index bc24541f13..3a8af87592 100644 --- a/mobile/test/executor_for_test_opencl.h +++ b/mobile/test/executor_for_test_opencl.h @@ -15,10 +15,11 @@ limitations under the License. */ #pragma once #ifdef PADDLE_MOBILE_CL +#include #include #include -#include +#include "./test_helper.h" #include "common/log.h" #include "framework/cl/cl_helper.h" #include "framework/cl/cl_tensor.h" @@ -26,18 +27,17 @@ limitations under the License. */ #include "framework/op_registry.h" #include "operators/feed_op.h" #include "operators/fetch_op.h" -#include "./test_helper.h" +using paddle_mobile::framework::AttributeMap; using paddle_mobile::framework::BlockDesc; using paddle_mobile::framework::DDim; using paddle_mobile::framework::Executor; using paddle_mobile::framework::LoDTensor; using paddle_mobile::framework::OpDesc; +using paddle_mobile::framework::OperatorBase; using paddle_mobile::framework::Program; using paddle_mobile::framework::Tensor; using paddle_mobile::framework::Variable; -using paddle_mobile::framework::OperatorBase; -using paddle_mobile::framework::AttributeMap; using std::string; using std::vector; namespace paddle_mobile { -- GitLab