diff --git a/python/tools/imagetools/imagetools.py b/python/tools/imagetools/imagetools.py new file mode 100644 index 0000000000000000000000000000000000000000..2a4432858007d6858f2728815670cfd1ed5ec786 --- /dev/null +++ b/python/tools/imagetools/imagetools.py @@ -0,0 +1,61 @@ +# coding=utf-8 +import cv2 +from array import array + + +def resize_take_rgbs(path, shape_h_w): + print '--------------resize_take_rgbs-----------------begin' + image = cv2.imread(path) + # print image.shape + cv2.imshow("before", image) + + print_rgb(image[0, 0]) + # image len may be for .just check it + # image.resize(shape_h_w) + + image = cv2.resize(image, (shape_h_w[0], shape_h_w[1])) + + cv2.imshow("after", image) + print image.shape + height = shape_h_w[0] + width = shape_h_w[1] + + rs_ = [] + gs_ = [] + bs_ = [] + for h in range(0, height): + for w in range(0, width): + bs_.append(image[h, w, 0]) + gs_.append(image[h, w, 1]) + rs_.append(image[h, w, 2]) + + # print image[2, 2, 0]/255. + print len(bs_) + print len(gs_) + print len(rs_) + print '--------------resize_take_rgbs-----------------end' + return bs_, gs_, rs_ + + +def print_rgb((b, g, r)): + print "像素 - R:%d,G:%d,B:%d" % (r, g, b) # 显示像素值 + # + # image[0, 0] = (100, 150, 200) # 更改位置(0,0)处的像素 + # + # (b, g, r) = image[0, 0] # 再次读取(0,0)像素 + # print "位置(0,0)处的像素 - 红:%d,绿:%d,蓝:%d" % (r, g, b) # 显示更改后的像素值 + # + # corner = image[0:100, 0:100] # 读取像素块 + # cv2.imshow("Corner", corner) # 显示读取的像素块 + # + # image[0:100, 0:100] = (0, 255, 0); # 更改读取的像素块 + # + # cv2.imshow("Updated", image) # 显示图像 + # + # cv2.waitKey(0) # 程序暂停 + + +def save_to_file(to_file_name, array): + to_file = open(to_file_name, "wb") + array.tofile(to_file) + to_file.close() diff --git a/python/tools/imagetools/img2nchw.py b/python/tools/imagetools/img2nchw.py new file mode 100644 index 0000000000000000000000000000000000000000..70ca456a1b1b5d20b92d0aaa51b01abb352c1d54 --- /dev/null +++ b/python/tools/imagetools/img2nchw.py @@ -0,0 +1,69 @@ +# coding=utf-8 +import cv2 +from array import array +import imagetools as tools +from enum import Enum + + +class ChannelType(Enum): + RGB = 0, + BGR = 1 + + +def combine_bgrs_nchw(bgrs, means_b_g_r, scale, channel_type=ChannelType.BGR): + print '--------------combine_bgrs_nchw-----------------begin' + print "scale: %f" % scale + print means_b_g_r + # print len(bgrs) + bs = bgrs[0] + gs = bgrs[1] + rs = bgrs[2] + + assert len(bs) == len(gs) == len(rs) + print len(bs) + bgrs_float_array = array('f') + + if channel_type == ChannelType.BGR: + print 'bgr' + for i in range(0, len(bs)): + bgrs_float_array.append((bs[i] - means_b_g_r[0]) * scale) # b + for i in range(0, len(gs)): + bgrs_float_array.append((gs[i] - means_b_g_r[1]) * scale) # g + for i in range(0, len(rs)): + bgrs_float_array.append((rs[i] - means_b_g_r[2]) * scale) # r + elif channel_type == ChannelType.RGB: + print 'rgb' + + for i in range(0, len(rs)): + bgrs_float_array.append((rs[i] - means_b_g_r[2]) * scale) # r + for i in range(0, len(gs)): + bgrs_float_array.append((gs[i] - means_b_g_r[1]) * scale) # g + for i in range(0, len(bs)): + bgrs_float_array.append((bs[i] - means_b_g_r[0]) * scale) # b + + print len(bgrs_float_array) + + print '------------------' + print bgrs_float_array[0] + print bgrs_float_array[416 * 416 * 2 + 416 * 2 + 2] + + # for i in range(0, 9): + # print'bs %d' % i + # print bs[i] / 255. + + print bs[416 * 2 + 2] / 255. + print '--------------combine_bgrs_nchw-----------------end' + + return bgrs_float_array + + +# bgrs = tools.resize_take_rgbs('banana.jpeg', (224, 224, 3)) +# array = combine_bgrs_nchw(bgrs, (103.94, 116.78, 123.68), 0.017, array,ChannelType.BGR) +# tools.save_to_file('banana_1_3_224_224_nchw_float') + +# cv2.waitKey(0) + + +bgrs = tools.resize_take_rgbs('datas/newyolo.jpg', (416, 416, 3)) +array = combine_bgrs_nchw(bgrs, (0, 0, 0), 1. / 255, ChannelType.RGB) +tools.save_to_file('datas/desktop_1_3_416_416_nchw_float', array) diff --git a/python/tools/imagetools/img2nhwc.py b/python/tools/imagetools/img2nhwc.py new file mode 100644 index 0000000000000000000000000000000000000000..c982fe303ecde08a9de1827ca67024567322d47f --- /dev/null +++ b/python/tools/imagetools/img2nhwc.py @@ -0,0 +1,34 @@ +# coding=utf-8 +import cv2 +from array import array +import imagetools as tools + + +def combine_bgrs_nhwc(bgrs, means_b_g_r, scale): + print "scale: %f" % scale + print means_b_g_r + # print len(bgrs) + bs = bgrs[0] + gs = bgrs[1] + rs = bgrs[2] + assert len(bs) == len(gs) == len(rs) + # print len(bs) + bgrs_float_array = array('f') + for i in range(0, len(bs)): + bgrs_float_array.append((rs[i] - means_b_g_r[2]) * scale) # r + bgrs_float_array.append((gs[i] - means_b_g_r[1]) * scale) # g + bgrs_float_array.append((bs[i] - means_b_g_r[0]) * scale) # b + + print len(bgrs_float_array) + + print '------------------' + print bgrs_float_array[0] + print bgrs_float_array[999] + return bgrs_float_array + + +bgrs = tools.resize_take_rgbs('newyolo_1.jpg', (416, 416, 3)) +array = combine_bgrs_nhwc(bgrs, (0, 0, 0), 1.0 / 255) +tools.save_to_file('desktop_1_3_416_416_nhwc_float', array) + +cv2.waitKey(0) diff --git a/python/tools/imagetools/numpy2binary.py b/python/tools/imagetools/numpy2binary.py new file mode 100644 index 0000000000000000000000000000000000000000..dd4bc6e10074183b8dcee4122860c4140ff54229 --- /dev/null +++ b/python/tools/imagetools/numpy2binary.py @@ -0,0 +1,47 @@ +# coding=utf-8 + +# 这个脚本是可以将numpy合并到二进制 +import cv2 +import numpy as np +import imagetools as tools +from array import array + +# +# image = cv2.imread(path) +# print image.shape +# +# print_rgb(image[0, 0]) +# # image len may be for .just check it +# image.resize(shape_h_w) + + +data = np.fromfile('datas/img.res') +print data.size +print data[0] + +data.reshape(1, 3, 416, 416) +out_array = array('f') +print'--------------------' +print data.size +print data[0] + +print '如果是nhwc --------' +# rgb rgb rgb rgb rgb +print data[416 * 3 * 2 + 3 * 2 + 2] +# print data[2] + +print '如果是nchw --------' +# rgb rgb rgb rgb rgb +print data[416 * 416 * 2 + 416 * 2 + 2] +# print data[2] + +# 明明是nchw + +for i in range(0, data.size): + out_array.append(data[i]) + +print len(out_array) + +print out_array[416 * 416 * 2 + 416 * 2 + 2] + +tools.save_to_file('datas/in_put_1_3_416_416_2', out_array) diff --git a/python/tools/mdl2fluid/model_combine.py b/python/tools/mdl2fluid/model_combine.py new file mode 100644 index 0000000000000000000000000000000000000000..ae3ca8a786dc0d4032deda35c33f44d3d96e983d --- /dev/null +++ b/python/tools/mdl2fluid/model_combine.py @@ -0,0 +1,19 @@ +# coding=utf-8 +import os + +path = "yolo_v2_tofile_source/" # 文件夹目录 +to_file_path = "yolo_v2_tofile_combined/params" +files = os.listdir(path) # 得到文件夹下的所有文件名称 +files.sort(cmp=None, key=str.lower) +to_file = open(to_file_path, "wb") + +for file in files: # 遍历文件夹 + if not os.path.isdir(file): # 判断是否是文件夹,不是文件夹才打开 + f = open(path + "/" + file) # 打开文件 + name = f.name + print 'name: ' + name + from_file = open(name, "rb") + to_file.write(from_file.read()) + from_file.close() + +to_file.close() diff --git a/python/tools/mdl2fluid/swicher.py b/python/tools/mdl2fluid/swicher.py index 0cf39959ff59bee4495ca92f8276dcc49e094686..bfe0360fd5b32f5e6fa61f6f05a0a384fb3a1e9b 100644 --- a/python/tools/mdl2fluid/swicher.py +++ b/python/tools/mdl2fluid/swicher.py @@ -66,7 +66,7 @@ class Swichter: def read_head(self, head_file): from_file = open(head_file, "rb") - read = from_file.read(20) + read = from_file.read(24) # print read from_file.close() # print read @@ -84,9 +84,32 @@ class Swichter: to_file.close() pass + def copy_padding_add_head(self, from_file_name, to_file_name, tmp_file_name, padding): + print'padding = %d' % padding + from_file = open(from_file_name, "rb") + # print len(from_file.read()) + from_file.seek(padding, 0) + + read = from_file.read() + print len(read) + + to_file = open(to_file_name, "wb") + # tmp_file = open(tmp_file_name, "wb") + + head = self.read_head('/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/yolo/conv1_biases') + to_file.write(head) + to_file.write(read) + from_file.close() + to_file.close() + pass + +# Swichter().nhwc2nchw_one_slice_add_head( +# '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nhwc/conv1_0.bin', +# '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nchw_with_head/conv1_0', +# '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nchw/.tmp', +# 32, +# 3, 3, 3) + +# Swichter().read_head('/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/yolo/conv1_biases') -# Swichter().nhwc2nchw_one_slice( -# '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nhwc/conv5_6_dw_0.bin', -# '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nchw/conv5_6_dw_0', 1, -# 512, 3, 3) -Swichter().read_head('/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/yolo/conv1_biases') +# Swichter().copy_add_head('datas/model.0.0.weight', 'datas/conv1_0', '') diff --git a/src/fpga/api.cpp b/src/fpga/api.cpp index 9d33c742e3a4bc76d1f2766a8b5476579ace2789..10787b915594a12a826a087e5453b2c2e8c03f9a 100644 --- a/src/fpga/api.cpp +++ b/src/fpga/api.cpp @@ -29,9 +29,7 @@ namespace fpga { static int fd = -1; static const char *device_path = "/dev/fpgadrv0"; -#ifdef PADDLE_MOBILE_OS_LINUX static std::map memory_map; -#endif static inline int do_ioctl(int req, const void *arg) { #ifdef PADDLE_MOBILE_OS_LINUX @@ -53,32 +51,38 @@ int open_device() { // memory management; void *fpga_malloc(size_t size) { static uint64_t counter = 0; - counter += size; - DLOG << size << " bytes allocated. Total " << counter << " bytes"; + #ifdef PADDLE_MOBILE_OS_LINUX auto ptr = mmap64(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - memory_map.insert(std::make_pair(ptr, size)); - return ptr; #else - return malloc(size); + auto ptr = malloc(size); #endif + counter += size; + memory_map.insert(std::make_pair(ptr, size)); + DLOG << "Address: " << ptr << ", " << size << " bytes allocated. Total " + << counter << " bytes"; + return ptr; } void fpga_free(void *ptr) { -#ifdef PADDLE_MOBILE_OS_LINUX static uint64_t counter = 0; size_t size = 0; + auto iter = memory_map.find(ptr); // std::map::iterator if (iter != memory_map.end()) { size = iter->second; - munmap(ptr, size); memory_map.erase(iter); - } - counter += size; - DLOG << size << " bytes freed. Total " << counter << " bytes"; +#ifdef PADDLE_MOBILE_OS_LINUX + munmap(ptr, size); #else - free(ptr); + free(ptr); #endif + counter += size; + DLOG << "Address: " << ptr << ", " << size << " bytes freed. Total " + << counter << " bytes"; + } else { + DLOG << "Invalid pointer"; + } } void fpga_copy(void *dest, const void *src, size_t num) { @@ -211,7 +215,8 @@ int PerformBypass(const struct BypassArgs &args) { int ComputeFPGAConcat(const struct ConcatArgs &args) { #ifdef FPGA_TEST_MODE DLOG << "=============ComputeFpgaConcat==========="; - DLOG << " out_address:" << args.image_out + DLOG << " Image_num: " << args.image_num + << " out_address:" << args.image_out << " out_scale_address:" << args.scale_out; DLOG << " image_height:" << args.height << " image_width:" << args.width; for (int i = 0; i < args.image_num; i++) { @@ -235,7 +240,7 @@ void format_image(framework::Tensor *image_tensor) { auto channel = dims[1], height = dims[2], width = dims[3]; auto data_ptr = image_tensor->data(); size_t memory_size = channel * height * width * sizeof(float); - float *new_data = (float *)fpga_malloc(memory_size); + auto new_data = (float *)fpga_malloc(memory_size); fpga_copy(new_data, data_ptr, memory_size); image::format_image(&new_data, channel, height, width); image_tensor->reset_data_ptr(new_data); @@ -346,12 +351,12 @@ void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input, auto out_ptr = out->data(); arg->group_num = (uint32_t)group_num; - arg->split_num = (uint32_t)fpga::get_plit_num(filter); + // Either group_num or split_num = 1; + arg->split_num = group_num == 1 ? (uint32_t)get_plit_num(filter) : 1; arg->filter_num = (uint32_t)filter->dims()[0]; arg->output.address = out_ptr; arg->output.scale_address = out->scale; - arg->conv_args = (fpga::ConvArgs *)fpga::fpga_malloc(arg->split_num * - sizeof(fpga::ConvArgs)); + arg->conv_args = (ConvArgs *)fpga_malloc(arg->split_num * sizeof(ConvArgs)); arg->concat_arg.image_num = arg->split_num; arg->concat_arg.image_out = out_ptr; @@ -360,15 +365,14 @@ void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input, arg->concat_arg.width = (uint32_t)filter->dims()[3]; int n = arg->split_num; - arg->concat_arg.images_in = (half **)fpga::fpga_malloc(n * sizeof(int *)); - arg->concat_arg.scales_in = (float **)fpga::fpga_malloc(n * sizeof(float *)); - arg->concat_arg.channel_num = - (uint32_t *)fpga::fpga_malloc(n * sizeof(uint32_t)); + arg->concat_arg.images_in = (half **)fpga_malloc(n * sizeof(int *)); + arg->concat_arg.scales_in = (float **)fpga_malloc(n * sizeof(float *)); + arg->concat_arg.channel_num = (uint32_t *)fpga_malloc(n * sizeof(uint32_t)); arg->concat_arg.image_out = out_ptr; auto channel = (int)out->dims()[1]; - int filter_num_per_div = fpga::get_filter_num_per_div(filter, group_num); - int element_num = fpga::get_aligned_filter_element_num( + int filter_num_per_div = get_filter_num_per_div(filter, group_num); + int element_num = get_aligned_filter_element_num( filter->dims()[1] * filter->dims()[2] * filter->dims()[3]); for (int i = 0; i < n; i++) { @@ -390,16 +394,17 @@ void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input, &((int8_t *)filter_ptr)[i * element_num * filter_num_per_div]; arg->conv_args[i].sb_address = &bs_ptr[i * filter_num_per_div * 2]; arg->conv_args[i].filter_num = - (uint32_t)(i == n - 1 ? fpga::get_aligned_filter_num( - channel - (n - 1) * filter_num_per_div) + (uint32_t)(i == n - 1 ? channel - (n - 1) * filter_num_per_div : filter_num_per_div); if (n > 1) { arg->conv_args[i].output.scale_address = - (float *)fpga::fpga_malloc(2 * sizeof(float)); - arg->conv_args[i].output.address = - fpga::fpga_malloc(input->dims()[2] * input->dims()[3] * - arg->conv_args[i].filter_num * sizeof(half)); + (float *)fpga_malloc(2 * sizeof(float)); + arg->conv_args[i].output.address = fpga_malloc( + input->dims()[2] * + align_to_x(input->dims()[3] * arg->conv_args[i].filter_num, + IMAGE_ALIGNMENT) * + sizeof(half)); } else { @@ -408,7 +413,7 @@ void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input, } arg->concat_arg.images_in[i] = (half *)arg->conv_args[i].output.address; - arg->concat_arg.scales_in[i] = (float *)arg->conv_args[i].sb_address; + arg->concat_arg.scales_in[i] = arg->conv_args[i].output.scale_address; arg->concat_arg.channel_num[i] = arg->conv_args[i].filter_num; } } diff --git a/src/io/executor.cpp b/src/io/executor.cpp index c12f1ce02c8ab32d04d00d76cad5dc7d6ce45bc2..c8d8f52a427bb1ee2b9fa04c9ef09f8e626f11b0 100644 --- a/src/io/executor.cpp +++ b/src/io/executor.cpp @@ -79,7 +79,7 @@ Executor::Executor(const framework::Program p, int batch_size, std::vector> ops = block_desc->Ops(); for (int j = 0; j < ops.size(); ++j) { std::shared_ptr op = ops[j]; - DLOG << "create op: " << op->Type(); + DLOG << "create op: " << j << " " << op->Type(); auto op_base = framework::OpRegistry::CreateOp( op->Type(), op->GetInputs(), op->GetOutputs(), op->GetAttrMap(), program_.scope); @@ -103,7 +103,9 @@ Executor::Executor(const framework::Program p, int batch_size, std::shared_ptr to_predict_block = to_predict_program_->Block(0); auto &ops = ops_of_block_[*to_predict_block.get()]; + int i = 0; for (const auto &op : ops) { + DLOG << "Init op: " << i++ << " " << op->Type(); op->Init(); } } @@ -231,6 +233,13 @@ void Executor::InitMemory() { Get_binary_data(program_.model_path + "/" + var_desc->Name()); char *data = origin_data; LoadMemory(*var_desc, tensor, &data); + + // DLOG << "----- " << var_desc->Name(); + // DLOG << "----- " << tensor->dims(); + // float *pDouble = tensor->template data(); + // for (int i = 0; i < tensor->numel() && i < 30; ++i) { + // std::cout << pDouble[i] << std::endl; + // } delete origin_data; } else { if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) { @@ -695,6 +704,7 @@ void Executor::Predict_From_To(int start, int end) { clock_gettime(CLOCK_MONOTONIC, &ts); profile[i].runBegin = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec; #endif + DLOG << "Running op: " << i << " " << ops[i]->Type(); ops[i]->Run(); #ifdef PADDLE_MOBILE_PROFILE diff --git a/src/operators/feed_op.h b/src/operators/feed_op.h index 2cc7fda7f8a6bb6f6856a937b9e14ab9792224e1..cccd4f52ebdc368e4f68eaf9dc3f25ee3693fdd2 100644 --- a/src/operators/feed_op.h +++ b/src/operators/feed_op.h @@ -53,7 +53,7 @@ class FeedOp : public framework::OperatorBase { auto input_ptr = input->data(); fpga::format_image(input); Tensor *output = param_.Out(); - auto output_ptr = output->data(); + auto output_ptr = output->data(); fpga::BypassArgs args = {fpga::DATA_TYPE_FP32}; diff --git a/src/operators/kernel/central-arm-func/conv_add_arm_func.h b/src/operators/kernel/central-arm-func/conv_add_arm_func.h index 643ee84529e01aebc33a144b4c7a8181ff39a1c9..d71bc235977236fbd0dd332df556ea4bd41eacf4 100644 --- a/src/operators/kernel/central-arm-func/conv_add_arm_func.h +++ b/src/operators/kernel/central-arm-func/conv_add_arm_func.h @@ -129,10 +129,13 @@ void ConvAddCompute(const FusionConvAddParam ¶m) { // param.Paddings(), // param.Filter(), param.Bias(), // param.Output(), false); - - math::DepthwiseConv3x3s2p1v2(param.Input(), param.Filter(), param.Output(), + if (param.Paddings()[0] == 0) { + math::DepthwiseConv3x3s2p0(param.Input(), param.Filter(), param.Output(), *param.Bias(), true); - + } else { + math::DepthwiseConv3x3s2p1v2(param.Input(), param.Filter(), + param.Output(), *param.Bias(), true); + } } else { ConvAddBasic(param); } diff --git a/src/operators/kernel/fpga/softmax_kernel.cpp b/src/operators/kernel/fpga/softmax_kernel.cpp index 79f1453fc8e77e35b52a5617064c164d93aa9207..7cfd0c7d76c1a8e73955dbec1971d86ceebde259 100644 --- a/src/operators/kernel/fpga/softmax_kernel.cpp +++ b/src/operators/kernel/fpga/softmax_kernel.cpp @@ -26,7 +26,8 @@ template <> bool SoftmaxKernel::Init(SoftmaxParam *param) { auto input = const_cast(param->InputX()); auto input_ptr = input->data(); - auto float_input = new Tensor(*input); + auto float_input = new Tensor; + float_input->mutable_data(input->dims()); fpga::format_fp32_ofm(float_input); fpga::BypassArgs args = {fpga::DATA_TYPE_FP16}; diff --git a/src/operators/math/depthwise_conv_3x3.cpp b/src/operators/math/depthwise_conv_3x3.cpp index 402b187f8f5e9d2fbb70fa6bcfb72c88aa53e3d3..716256a376a50f2ec1c4c62fa25703cabf3a0c66 100644 --- a/src/operators/math/depthwise_conv_3x3.cpp +++ b/src/operators/math/depthwise_conv_3x3.cpp @@ -1881,6 +1881,103 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter, #endif } +void DepthwiseConv3x3s2p0(const Tensor *input, const Tensor *filter, + Tensor *output, Tensor bias, bool if_bias) { +#if __ARM_NEON + + const int batch_size = static_cast(input->dims()[0]); + const int input_channel = static_cast(input->dims()[1]); + + const int input_height = static_cast(input->dims()[2]); + const int input_width = static_cast(input->dims()[3]); + const int output_height = static_cast(output->dims()[2]); + const int output_width = static_cast(output->dims()[3]); + const int inhxw = input_height * input_width; + const int outhxw = output_height * output_width; + + float32x4_t zero = vdupq_n_f32(0.0); + for (int b = 0; b < batch_size; b++) { +#pragma omp parallel for + for (int c = 0; c < input_channel; c++) { + const float *filter_data = filter->data() + c * 9; + const float *input_data = input->data() + c * inhxw; + const float *bias_data = bias.data() + c; + float *output_data = output->data() + c * outhxw; + float w00 = filter_data[0]; + float w01 = filter_data[1]; + float w02 = filter_data[2]; + float w10 = filter_data[3]; + float w11 = filter_data[4]; + float w12 = filter_data[5]; + float w20 = filter_data[6]; + float w21 = filter_data[7]; + float w22 = filter_data[8]; + + float32x4_t biasv = vld1q_dup_f32(bias_data); + + for (int i = 0; i < output_height; i += 1) { + for (int m = 0; m < output_width - 2; m += 3) { + float *output_ptr = output_data + i * output_width + m; + float32x4x2_t input_buff_top{}, input_buff_mid{}, input_buff_bottom{}; + float32x4_t in0, in1, in2, in3, in4, in5, tmp0, tmp1, tmp2, tmp3, + tmp4, tmp5, out0; + input_buff_top = + vld2q_f32(input_data + (2 * i) * input_width + (2 * m)); + input_buff_mid = + vld2q_f32(input_data + (2 * i + 1) * input_width + (2 * m)); + input_buff_bottom = + vld2q_f32(input_data + (2 * i + 2) * input_width + (2 * m)); + + in0 = input_buff_top.val[0]; + tmp0 = input_buff_top.val[1]; + tmp1 = vextq_f32(in0, zero, 1); + + in2 = input_buff_mid.val[0]; + tmp2 = input_buff_mid.val[1]; + tmp3 = vextq_f32(in2, zero, 1); + + in4 = input_buff_bottom.val[0]; + tmp4 = input_buff_bottom.val[1]; + tmp5 = vextq_f32(in4, zero, 1); + + out0 = vmulq_n_f32(in0, w00); + out0 = vmlaq_n_f32(out0, tmp0, w01); + out0 = vmlaq_n_f32(out0, tmp1, w02); + out0 = vmlaq_n_f32(out0, in2, w10); + out0 = vmlaq_n_f32(out0, tmp2, w11); + out0 = vmlaq_n_f32(out0, tmp3, w12); + out0 = vmlaq_n_f32(out0, in4, w20); + out0 = vmlaq_n_f32(out0, tmp4, w21); + out0 = vmlaq_n_f32(out0, tmp5, w22); + out0 = vaddq_f32(out0, biasv); + + vst1q_lane_f32(output_ptr, out0, 0); + vst1q_lane_f32(output_ptr + 1, out0, 1); + vst1q_lane_f32(output_ptr + 2, out0, 2); + } + int m; + for (m = 0; m < output_width - 2; m += 3) { + } + for (int j = m; j < output_width; j++) { + output_data[i * output_width + j] = + input_data[(2 * i - 1) * input_width + 2 * j - 1] * w00 + + input_data[(2 * i - 1) * input_width + 2 * j] * w01 + + input_data[(2 * i - 1) * input_width + 2 * j + 1] * w02 + + input_data[(2 * i) * input_width + 2 * j - 1] * w10 + + input_data[(2 * i) * input_width + 2 * j] * w11 + + input_data[(2 * i) * input_width + 2 * j + 1] * w12 + + input_data[(2 * i + 1) * input_width + 2 * j - 1] * w20 + + input_data[(2 * i + 1) * input_width + 2 * j] * w21 + + input_data[(2 * i + 1) * input_width + 2 * j + 1] * w22; + output_data[i * output_width + j] += *bias_data; + } + } + } + } + +#endif +} + } // namespace math } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/math/depthwise_conv_3x3.h b/src/operators/math/depthwise_conv_3x3.h index 60e979648f871e640924a3373c625c311c3dd067..b146b88e737a07ea08250315fc94653f63d2ad05 100644 --- a/src/operators/math/depthwise_conv_3x3.h +++ b/src/operators/math/depthwise_conv_3x3.h @@ -43,6 +43,9 @@ void DepthwiseConv3x3s2p1v2(const Tensor *input, const Tensor *filter, void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter, Tensor *output, const Tensor *new_scale, const Tensor *new_bias, bool if_relu); + +void DepthwiseConv3x3s2p0(const Tensor *input, const Tensor *filter, + Tensor *output, Tensor bias, bool if_bias); } // namespace math } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/op_param.h b/src/operators/op_param.h index f11e6c37cd9d1aa3625240619e0e31ea6f4d5a0d..5b53743b75bfe65a9e029e44114b339603388c08 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -341,22 +341,23 @@ class OpParam { } }; -#ifdef CONV_OP template -class ConvParam : OpParam { +class ConvParam : public OpParam { typedef typename DtypeTensorTrait::gtype GType; typedef typename DtypeTensorTrait::rtype RType; public: ConvParam(const VariableNameMap &inputs, const VariableNameMap &outputs, const AttributeMap &attrs, const Scope &scope) { - filter_ = FilterFrom(inputs, scope); - input_ = InputFrom(inputs, scope); - output_ = OutputFrom(outputs, scope); - strides_ = GetAttr>("strides", attrs); - paddings_ = GetAttr>("paddings", attrs); - dilations_ = GetAttr>("dilations", attrs); - groups = GetAttr("groups", attrs); + filter_ = OpParam::FilterFrom(inputs, scope); + input_ = OpParam::InputFrom(inputs, scope); + if (outputs.count("Output")) { + output_ = OpParam::OutputFrom(outputs, scope); + } + strides_ = OpParam::GetAttr>("strides", attrs); + paddings_ = OpParam::GetAttr>("paddings", attrs); + dilations_ = OpParam::GetAttr>("dilations", attrs); + groups = OpParam::GetAttr("groups", attrs); } const RType *Input() const { return input_; } @@ -384,7 +385,6 @@ class ConvParam : OpParam { }; template Print &operator<<(Print &printer, const ConvParam &conv_param); -#endif template class ElementwiseAddParam : OpParam { @@ -1294,52 +1294,29 @@ using FusionFcReluParam = FusionFcParam; #endif template -class FusionConvAddParam : public OpParam { +class FusionConvAddParam : public ConvParam { typedef typename DtypeTensorTrait::gtype GType; typedef typename DtypeTensorTrait::rtype RType; public: FusionConvAddParam(const VariableNameMap &inputs, const VariableNameMap &outputs, const AttributeMap &attrs, - const Scope &scope) { - bias_ = InputYFrom(inputs, scope); - axis_ = GetAttr("axis", attrs); - filter_ = FilterFrom(inputs, scope); - input_ = InputFrom(inputs, scope); - output_ = OutFrom(outputs, scope); - strides_ = GetAttr>("strides", attrs); - paddings_ = GetAttr>("paddings", attrs); - dilations_ = GetAttr>("dilations", attrs); - groups = GetAttr("groups", attrs); + const Scope &scope) + : ConvParam(inputs, outputs, attrs, scope) { + bias_ = OpParam::InputYFrom(inputs, scope); + axis_ = OpParam::GetAttr("axis", attrs); + output_ = OpParam::OutFrom(outputs, scope); } RType *Bias() const { return bias_; } const int &Axis() const { return axis_; } - const RType *Input() const { return input_; } - - const RType *Filter() const { return filter_; } - RType *Output() const { return output_; } - const vector &Strides() const { return strides_; } - - const vector &Paddings() const { return paddings_; } - - const vector &Dilations() const { return dilations_; } - - const int &Groups() const { return groups; } - protected: RType *bias_; int axis_; - RType *input_; RType *output_; - RType *filter_; - vector strides_; - vector paddings_; - vector dilations_; - int groups; #ifdef PADDLE_MOBILE_FPGA private: @@ -1366,58 +1343,33 @@ class FusionConvAddReluParam : public FusionConvAddParam { #endif #ifdef FUSION_CONVADDPRELU_OP -template -class FusionConvAddPReluParam : public OpParam { - typedef typename DtypeTensorTrait::gtype GType; - typedef typename DtypeTensorTrait::rtype RType; +template +class FusionConvAddPReluParam : public ConvParam { + typedef typename DtypeTensorTrait::gtype GType; + typedef typename DtypeTensorTrait::rtype RType; public: FusionConvAddPReluParam(const VariableNameMap &inputs, const VariableNameMap &outputs, - const AttributeMap &attrs, const Scope &scope) { - alpha_ = InputAlphaFrom(inputs, scope); - mode_ = GetAttr("mode", attrs); + const AttributeMap &attrs, const Scope &scope) + : ConvParam(inputs, outputs, attrs, scope) { + alpha_ = OpParam::InputAlphaFrom(inputs, scope); + mode_ = OpParam::GetAttr("mode", attrs); framework::DDim dims = alpha_->dims(); - bias_ = InputYFrom(inputs, scope); - axis_ = GetAttr("axis", attrs); - filter_ = FilterFrom(inputs, scope); - input_ = InputFrom(inputs, scope); - output_ = OutFrom(outputs, scope); - strides_ = GetAttr>("strides", attrs); - paddings_ = GetAttr>("paddings", attrs); - dilations_ = GetAttr>("dilations", attrs); - groups = GetAttr("groups", attrs); + bias_ = OpParam::InputYFrom(inputs, scope); + axis_ = OpParam::GetAttr("axis", attrs); + output_ = OpParam::OutFrom(outputs, scope); } const RType *InputAlpha() const { return alpha_; } const std::string &Mode() const { return mode_; } RType *Bias() const { return bias_; } - const int &Axis() const { return axis_; } - - const RType *Input() const { return input_; } - - const RType *Filter() const { return filter_; } - RType *Output() const { return output_; } - const vector &Strides() const { return strides_; } - - const vector &Paddings() const { return paddings_; } - - const vector &Dilations() const { return dilations_; } - - const int &Groups() const { return groups; } - protected: RType *bias_; int axis_; - RType *input_; RType *output_; - RType *filter_; - vector strides_; - vector paddings_; - vector dilations_; - int groups; RType *alpha_; std::string mode_; #ifdef PADDLE_MOBILE_FPGA @@ -1433,35 +1385,30 @@ class FusionConvAddPReluParam : public OpParam { #endif #ifdef FUSION_CONVADDADDPRELU_OP -template -class FusionConvAddAddPReluParam : public OpParam { - typedef typename DtypeTensorTrait::gtype GType; - typedef typename DtypeTensorTrait::rtype RType; +template +class FusionConvAddAddPReluParam : public ConvParam { + typedef typename DtypeTensorTrait::gtype GType; + typedef typename DtypeTensorTrait::rtype RType; public: FusionConvAddAddPReluParam(const VariableNameMap &inputs, const VariableNameMap &outputs, - const AttributeMap &attrs, const Scope &scope) { - bias1_ = InputYFrom1(inputs, scope); - alpha_ = InputAlphaFrom(inputs, scope); - mode_ = GetAttr("mode", attrs); + const AttributeMap &attrs, const Scope &scope) + : ConvParam(inputs, outputs, attrs, scope) { + bias1_ = OpParam::InputYFrom1(inputs, scope); + alpha_ = OpParam::InputAlphaFrom(inputs, scope); + mode_ = OpParam::GetAttr("mode", attrs); framework::DDim dims = alpha_->dims(); - bias_ = InputYFrom(inputs, scope); - axis_ = GetAttr("axis", attrs); - filter_ = FilterFrom(inputs, scope); - input_ = InputFrom(inputs, scope); - output_ = OutFrom(outputs, scope); - strides_ = GetAttr>("strides", attrs); - paddings_ = GetAttr>("paddings", attrs); - dilations_ = GetAttr>("dilations", attrs); - groups = GetAttr("groups", attrs); - keyOutput_ = getkey("addOut", inputs, 0); - keyX1_ = getkey("addX", inputs, 1); - keyY1_ = getkey("Y", inputs, 1); + bias_ = OpParam::InputYFrom(inputs, scope); + output_ = OpParam::OutFrom(outputs, scope); + axis_ = OpParam::GetAttr("axis", attrs); + keyOutput_ = OpParam::getkey("addOut", inputs, 0); + keyX1_ = OpParam::getkey("addX", inputs, 1); + keyY1_ = OpParam::getkey("Y", inputs, 1); if (keyX1_ == keyOutput_) { - bias1_ = InputYFrom1(inputs, scope); + bias1_ = OpParam::InputYFrom1(inputs, scope); } else if (keyY1_ == keyOutput_) { - bias1_ = InputXFrom1(inputs, scope); + bias1_ = OpParam::InputXFrom1(inputs, scope); } } const RType *InputAlpha() const { return alpha_; } @@ -1471,31 +1418,12 @@ class FusionConvAddAddPReluParam : public OpParam { RType *Bias() const { return bias_; } const int &Axis() const { return axis_; } - - const RType *Input() const { return input_; } - - const RType *Filter() const { return filter_; } - RType *Output() const { return output_; } - const vector &Strides() const { return strides_; } - - const vector &Paddings() const { return paddings_; } - - const vector &Dilations() const { return dilations_; } - - const int &Groups() const { return groups; } - protected: RType *bias_; int axis_; - RType *input_; RType *output_; - RType *filter_; - vector strides_; - vector paddings_; - vector dilations_; - int groups; RType *alpha_; std::string mode_; RType *bias1_; @@ -1516,49 +1444,32 @@ class FusionConvAddAddPReluParam : public OpParam { #ifdef FUSION_CONVADDBNRELU_OP template -class FusionConvAddBNReluParam : public OpParam { +class FusionConvAddBNReluParam : public ConvParam { typedef typename DtypeTensorTrait::gtype GType; typedef typename DtypeTensorTrait::rtype RType; public: FusionConvAddBNReluParam(const VariableNameMap &inputs, const VariableNameMap &outputs, - const AttributeMap &attrs, const Scope &scope) { - bias_ = InputYFrom(inputs, scope); - axis_ = GetAttr("axis", attrs); - filter_ = FilterFrom(inputs, scope); - input_ = InputFrom(inputs, scope); - output_ = OutFrom(outputs, scope); - strides_ = GetAttr>("strides", attrs); - paddings_ = GetAttr>("paddings", attrs); - dilations_ = GetAttr>("dilations", attrs); - groups = GetAttr("groups", attrs); - input_bias_ = InputBiasFrom(inputs, scope); - input_mean_ = InputMeanFrom(inputs, scope); - input_scale_ = InputScaleFrom(inputs, scope); - input_variance_ = InputVarianceFrom(inputs, scope); - epsilon_ = GetAttr("epsilon", attrs); - momentum_ = GetAttr("momentum", attrs); - // is_test_ = GetAttr("is_test", attrs); + const AttributeMap &attrs, const Scope &scope) + : ConvParam(inputs, outputs, attrs, scope) { + bias_ = OpParam::InputYFrom(inputs, scope); + axis_ = OpParam::GetAttr("axis", attrs); + output_ = OpParam::OutFrom(outputs, scope); + input_bias_ = OpParam::InputBiasFrom(inputs, scope); + input_mean_ = OpParam::InputMeanFrom(inputs, scope); + input_scale_ = OpParam::InputScaleFrom(inputs, scope); + input_variance_ = OpParam::InputVarianceFrom(inputs, scope); + epsilon_ = OpParam::GetAttr("epsilon", attrs); + momentum_ = OpParam::GetAttr("momentum", attrs); + // is_test_ = OpParam::GetAttr("is_test", attrs); } RType *Bias() const { return bias_; } const int &Axis() const { return axis_; } - const RType *Input() const { return input_; } - - const RType *Filter() const { return filter_; } - RType *Output() const { return output_; } - const vector &Strides() const { return strides_; } - - const vector &Paddings() const { return paddings_; } - - const vector &Dilations() const { return dilations_; } - - const int &Groups() const { return groups; } - const RType *InputBias() const { return input_bias_; } const RType *InputMean() const { return input_mean_; } @@ -1584,13 +1495,7 @@ class FusionConvAddBNReluParam : public OpParam { protected: RType *bias_; int axis_; - RType *input_; RType *output_; - RType *filter_; - vector strides_; - vector paddings_; - vector dilations_; - int groups; RType *input_bias_; RType *input_mean_; RType *input_scale_; @@ -1614,57 +1519,40 @@ class FusionConvAddBNReluParam : public OpParam { #ifdef FUSION_CONVBNADDRELU_OP template -class FusionConvBNAddReluParam : public OpParam { +class FusionConvBNAddReluParam : public ConvParam { typedef typename DtypeTensorTrait::gtype GType; typedef typename DtypeTensorTrait::rtype RType; public: FusionConvBNAddReluParam(const VariableNameMap &inputs, const VariableNameMap &outputs, - const AttributeMap &attrs, const Scope &scope) { - bias_ = InputYFrom(inputs, scope); - axis_ = GetAttr("axis", attrs); - filter_ = FilterFrom(inputs, scope); - input_ = InputFrom(inputs, scope); - output_ = OutFrom(outputs, scope); - strides_ = GetAttr>("strides", attrs); - paddings_ = GetAttr>("paddings", attrs); - dilations_ = GetAttr>("dilations", attrs); - groups = GetAttr("groups", attrs); - input_bias_ = InputBiasFrom(inputs, scope); - input_mean_ = InputMeanFrom(inputs, scope); - input_scale_ = InputScaleFrom(inputs, scope); - input_variance_ = InputVarianceFrom(inputs, scope); - epsilon_ = GetAttr("epsilon", attrs); - momentum_ = GetAttr("momentum", attrs); - keyBNY_ = getkey("BNY", inputs, 0); - keyX_ = getkey("X", inputs, 0); - keyY_ = getkey("Y", inputs, 0); + const AttributeMap &attrs, const Scope &scope) + : ConvParam(inputs, outputs, attrs, scope) { + bias_ = OpParam::InputYFrom(inputs, scope); + axis_ = OpParam::GetAttr("axis", attrs); + output_ = OpParam::OutFrom(outputs, scope); + input_bias_ = OpParam::InputBiasFrom(inputs, scope); + input_mean_ = OpParam::InputMeanFrom(inputs, scope); + input_scale_ = OpParam::InputScaleFrom(inputs, scope); + input_variance_ = OpParam::InputVarianceFrom(inputs, scope); + epsilon_ = OpParam::GetAttr("epsilon", attrs); + momentum_ = OpParam::GetAttr("momentum", attrs); + keyBNY_ = OpParam::getkey("BNY", inputs, 0); + keyX_ = OpParam::getkey("X", inputs, 0); + keyY_ = OpParam::getkey("Y", inputs, 0); if (keyX_ == keyBNY_) { - bias_ = InputYFrom(inputs, scope); + bias_ = OpParam::InputYFrom(inputs, scope); } else if (keyY_ == keyBNY_) { - bias_ = InputXFrom(inputs, scope); + bias_ = OpParam::InputXFrom(inputs, scope); } - // is_test_ = GetAttr("is_test", attrs); + // is_test_ = OpParam::GetAttr("is_test", attrs); } RType *Bias() const { return bias_; } const int &Axis() const { return axis_; } - const RType *Input() const { return input_; } - - const RType *Filter() const { return filter_; } - RType *Output() const { return output_; } - const vector &Strides() const { return strides_; } - - const vector &Paddings() const { return paddings_; } - - const vector &Dilations() const { return dilations_; } - - const int &Groups() const { return groups; } - const RType *InputBias() const { return input_bias_; } const RType *InputMean() const { return input_mean_; } @@ -1690,13 +1578,7 @@ class FusionConvBNAddReluParam : public OpParam { protected: RType *bias_; int axis_; - RType *input_; RType *output_; - RType *filter_; - vector strides_; - vector paddings_; - vector dilations_; - int groups; RType *input_bias_; RType *input_mean_; RType *input_scale_; @@ -1723,44 +1605,26 @@ class FusionConvBNAddReluParam : public OpParam { #ifdef FUSION_CONVBN_OP template -class FusionConvBNParam : public OpParam { +class FusionConvBNParam : public ConvParam { typedef typename DtypeTensorTrait::gtype GType; typedef typename DtypeTensorTrait::rtype RType; public: FusionConvBNParam(const VariableNameMap &inputs, const VariableNameMap &outputs, const AttributeMap &attrs, - const Scope &scope) { - filter_ = FilterFrom(inputs, scope); - input_ = InputFrom(inputs, scope); - output_y_ = OutputYFrom(outputs, scope); - strides_ = GetAttr>("strides", attrs); - paddings_ = GetAttr>("paddings", attrs); - dilations_ = GetAttr>("dilations", attrs); - groups = GetAttr("groups", attrs); - input_bias_ = InputBiasFrom(inputs, scope); - input_mean_ = InputMeanFrom(inputs, scope); - input_scale_ = InputScaleFrom(inputs, scope); - input_variance_ = InputVarianceFrom(inputs, scope); - epsilon_ = GetAttr("epsilon", attrs); - momentum_ = GetAttr("momentum", attrs); - // is_test_ = GetAttr("is_test", attrs); + const Scope &scope) + : ConvParam(inputs, outputs, attrs, scope) { + output_y_ = OpParam::OutputYFrom(outputs, scope); + input_bias_ = OpParam::InputBiasFrom(inputs, scope); + input_mean_ = OpParam::InputMeanFrom(inputs, scope); + input_scale_ = OpParam::InputScaleFrom(inputs, scope); + input_variance_ = OpParam::InputVarianceFrom(inputs, scope); + epsilon_ = OpParam::GetAttr("epsilon", attrs); + momentum_ = OpParam::GetAttr("momentum", attrs); + // is_test_ = OpParam::GetAttr("is_test", attrs); } - - const RType *Input() const { return input_; } - - const RType *Filter() const { return filter_; } - RType *Output() const { return output_y_; } - const vector &Strides() const { return strides_; } - - const vector &Paddings() const { return paddings_; } - - const vector &Dilations() const { return dilations_; } - - const int &Groups() const { return groups; } - const RType *InputBias() const { return input_bias_; } const RType *InputMean() const { return input_mean_; } @@ -1784,13 +1648,7 @@ class FusionConvBNParam : public OpParam { const RType *NewBias() const { return new_bias_; } protected: - RType *input_; RType *output_y_; - RType *filter_; - vector strides_; - vector paddings_; - vector dilations_; - int groups; RType *input_bias_; RType *input_mean_; RType *input_scale_; @@ -1814,49 +1672,32 @@ class FusionConvBNParam : public OpParam { #ifdef FUSION_CONVADDBN_OP template -class FusionConvAddBNParam : public OpParam { +class FusionConvAddBNParam : public ConvParam { typedef typename DtypeTensorTrait::gtype GType; typedef typename DtypeTensorTrait::rtype RType; public: FusionConvAddBNParam(const VariableNameMap &inputs, const VariableNameMap &outputs, - const AttributeMap &attrs, const Scope &scope) { - bias_ = InputYFrom(inputs, scope); - axis_ = GetAttr("axis", attrs); - filter_ = FilterFrom(inputs, scope); - input_ = InputFrom(inputs, scope); - output_y_ = OutputYFrom(outputs, scope); - strides_ = GetAttr>("strides", attrs); - paddings_ = GetAttr>("paddings", attrs); - dilations_ = GetAttr>("dilations", attrs); - groups = GetAttr("groups", attrs); - input_bias_ = InputBiasFrom(inputs, scope); - input_mean_ = InputMeanFrom(inputs, scope); - input_scale_ = InputScaleFrom(inputs, scope); - input_variance_ = InputVarianceFrom(inputs, scope); - epsilon_ = GetAttr("epsilon", attrs); - momentum_ = GetAttr("momentum", attrs); - // is_test_ = GetAttr("is_test", attrs); + const AttributeMap &attrs, const Scope &scope) + : ConvParam(inputs, outputs, attrs, scope) { + bias_ = OpParam::InputYFrom(inputs, scope); + axis_ = OpParam::GetAttr("axis", attrs); + output_y_ = OpParam::OutputYFrom(outputs, scope); + input_bias_ = OpParam::InputBiasFrom(inputs, scope); + input_mean_ = OpParam::InputMeanFrom(inputs, scope); + input_scale_ = OpParam::InputScaleFrom(inputs, scope); + input_variance_ = OpParam::InputVarianceFrom(inputs, scope); + epsilon_ = OpParam::GetAttr("epsilon", attrs); + momentum_ = OpParam::GetAttr("momentum", attrs); + // is_test_ = OpParam::GetAttr("is_test", attrs); } RType *Bias() const { return bias_; } const int &Axis() const { return axis_; } - const RType *Input() const { return input_; } - - const RType *Filter() const { return filter_; } - RType *Output() const { return output_y_; } - const vector &Strides() const { return strides_; } - - const vector &Paddings() const { return paddings_; } - - const vector &Dilations() const { return dilations_; } - - const int &Groups() const { return groups; } - const RType *InputBias() const { return input_bias_; } const RType *InputMean() const { return input_mean_; } @@ -1882,13 +1723,7 @@ class FusionConvAddBNParam : public OpParam { protected: RType *bias_; int axis_; - RType *input_; RType *output_y_; - RType *filter_; - vector strides_; - vector paddings_; - vector dilations_; - int groups; RType *input_bias_; RType *input_mean_; RType *input_scale_; @@ -1912,44 +1747,26 @@ class FusionConvAddBNParam : public OpParam { #ifdef FUSION_DWCONVBNRELU_OP template -class FusionDWConvBNReluParam : public OpParam { +class FusionDWConvBNReluParam : public ConvParam { typedef typename DtypeTensorTrait::gtype GType; typedef typename DtypeTensorTrait::rtype RType; public: FusionDWConvBNReluParam(const VariableNameMap &inputs, const VariableNameMap &outputs, - const AttributeMap &attrs, const Scope &scope) { - filter_ = FilterFrom(inputs, scope); - input_ = InputFrom(inputs, scope); - output_ = OutFrom(outputs, scope); - strides_ = GetAttr>("strides", attrs); - paddings_ = GetAttr>("paddings", attrs); - dilations_ = GetAttr>("dilations", attrs); - groups = GetAttr("groups", attrs); - input_bias_ = InputBiasFrom(inputs, scope); - input_mean_ = InputMeanFrom(inputs, scope); - input_scale_ = InputScaleFrom(inputs, scope); - input_variance_ = InputVarianceFrom(inputs, scope); - epsilon_ = GetAttr("epsilon", attrs); - momentum_ = GetAttr("momentum", attrs); - // is_test_ = GetAttr("is_test", attrs); + const AttributeMap &attrs, const Scope &scope) + : ConvParam(inputs, outputs, attrs, scope) { + output_ = OpParam::OutFrom(outputs, scope); + input_bias_ = OpParam::InputBiasFrom(inputs, scope); + input_mean_ = OpParam::InputMeanFrom(inputs, scope); + input_scale_ = OpParam::InputScaleFrom(inputs, scope); + input_variance_ = OpParam::InputVarianceFrom(inputs, scope); + epsilon_ = OpParam::GetAttr("epsilon", attrs); + momentum_ = OpParam::GetAttr("momentum", attrs); + // is_test_ = OpParam::GetAttr("is_test", attrs); } - - const RType *Input() const { return input_; } - - const RType *Filter() const { return filter_; } - RType *Output() const { return output_; } - const vector &Strides() const { return strides_; } - - const vector &Paddings() const { return paddings_; } - - const vector &Dilations() const { return dilations_; } - - const int &Groups() const { return groups; } - const RType *InputBias() const { return input_bias_; } const RType *InputMean() const { return input_mean_; } @@ -1973,13 +1790,7 @@ class FusionDWConvBNReluParam : public OpParam { const RType *NewBias() const { return new_bias_; } protected: - RType *input_; RType *output_; - RType *filter_; - vector strides_; - vector paddings_; - vector dilations_; - int groups; RType *input_bias_; RType *input_mean_; RType *input_scale_; @@ -1995,45 +1806,26 @@ class FusionDWConvBNReluParam : public OpParam { #ifdef FUSION_CONVBNRELU_OP template -class FusionConvBNReluParam : public OpParam { +class FusionConvBNReluParam : public ConvParam { typedef typename DtypeTensorTrait::gtype GType; typedef typename DtypeTensorTrait::rtype RType; public: FusionConvBNReluParam(const VariableNameMap &inputs, const VariableNameMap &outputs, - const AttributeMap &attrs, const Scope &scope) { - filter_ = FilterFrom(inputs, scope); - input_ = InputFrom(inputs, scope); - output_ = OutFrom(outputs, scope); - - strides_ = GetAttr>("strides", attrs); - paddings_ = GetAttr>("paddings", attrs); - dilations_ = GetAttr>("dilations", attrs); - groups = GetAttr("groups", attrs); - input_bias_ = InputBiasFrom(inputs, scope); - input_mean_ = InputMeanFrom(inputs, scope); - input_scale_ = InputScaleFrom(inputs, scope); - input_variance_ = InputVarianceFrom(inputs, scope); - epsilon_ = GetAttr("epsilon", attrs); - momentum_ = GetAttr("momentum", attrs); - // is_test_ = GetAttr("is_test", attrs); + const AttributeMap &attrs, const Scope &scope) + : ConvParam(inputs, outputs, attrs, scope) { + output_ = OpParam::OutFrom(outputs, scope); + input_bias_ = OpParam::InputBiasFrom(inputs, scope); + input_mean_ = OpParam::InputMeanFrom(inputs, scope); + input_scale_ = OpParam::InputScaleFrom(inputs, scope); + input_variance_ = OpParam::InputVarianceFrom(inputs, scope); + epsilon_ = OpParam::GetAttr("epsilon", attrs); + momentum_ = OpParam::GetAttr("momentum", attrs); + // is_test_ = OpParam::GetAttr("is_test", attrs); } - - const RType *Input() const { return input_; } - - const RType *Filter() const { return filter_; } - RType *Output() const { return output_; } - const vector &Strides() const { return strides_; } - - const vector &Paddings() const { return paddings_; } - - const vector &Dilations() const { return dilations_; } - - const int &Groups() const { return groups; } - const RType *InputBias() const { return input_bias_; } const RType *InputMean() const { return input_mean_; } @@ -2057,13 +1849,7 @@ class FusionConvBNReluParam : public OpParam { const RType *NewBias() const { return new_bias_; } protected: - RType *input_; RType *output_; - RType *filter_; - vector strides_; - vector paddings_; - vector dilations_; - int groups; RType *input_bias_; RType *input_mean_; RType *input_scale_; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ef03205ae595ade0765377b1dcc0178471a6553e..f4dc1421a4cd0f1062e8ad1240caa237e58c9371 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -18,6 +18,9 @@ elseif ("yolo" IN_LIST NET) # gen test ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h test_include.h executor_for_test.h) target_link_libraries(test-yolo paddle-mobile) + # gen test + ADD_EXECUTABLE(test_yolo_combined net/test_yolo_combined.cpp test_helper.h test_include.h executor_for_test.h) + target_link_libraries(test_yolo_combined paddle-mobile) elseif ("squeezenet" IN_LIST NET) # gen test ADD_EXECUTABLE(test-squeezenet net/test_squeezenet.cpp test_helper.h test_include.h executor_for_test.h) @@ -30,6 +33,27 @@ elseif("FPGAnets" IN_LIST NET) ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h test_include.h executor_for_test.h) target_link_libraries(test-resnet paddle-mobile) + ADD_EXECUTABLE(test-resnet50 fpga/test_resnet50.cpp test_helper.h test_include.h executor_for_test.h) + target_link_libraries(test-resnet50 paddle-mobile) + + ADD_EXECUTABLE(test-fpga-EW fpga/test_fpga_EW.cpp test_helper.h test_include.h executor_for_test.h) + target_link_libraries(test-fpga-EW paddle-mobile) + + ADD_EXECUTABLE(test-fpga-conv fpga/test_fpga_conv.cpp test_helper.h test_include.h executor_for_test.h) + target_link_libraries(test-fpga-conv paddle-mobile) + + ADD_EXECUTABLE(test-fpga-pooling fpga/test_fpga_pooling.cpp test_helper.h test_include.h executor_for_test.h) + target_link_libraries(test-fpga-pooling paddle-mobile) + + ADD_EXECUTABLE(test-fpga-bypass fpga/test_fpga_bypass.cpp test_helper.h test_include.h executor_for_test.h) + target_link_libraries(test-fpga-bypass paddle-mobile) + + ADD_EXECUTABLE(test-fpga-softmax fpga/test_fpga_softmax.cpp test_helper.h test_include.h executor_for_test.h) + target_link_libraries(test-fpga-softmax paddle-mobile) + + ADD_EXECUTABLE(test-fpga-concat fpga/test_fpga_concat.cpp test_helper.h test_include.h executor_for_test.h) + target_link_libraries(test-fpga-concat paddle-mobile) + ADD_EXECUTABLE(test-tensor-quant fpga/test_tensor_quant.cpp test_helper.h test_include.h executor_for_test.h) target_link_libraries(test-tensor-quant paddle-mobile) @@ -74,6 +98,10 @@ else () ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h test_include.h executor_for_test.h) target_link_libraries(test-yolo paddle-mobile) + # gen test + ADD_EXECUTABLE(test_yolo_combined net/test_yolo_combined.cpp test_helper.h test_include.h executor_for_test.h) + target_link_libraries(test_yolo_combined paddle-mobile) + # gen test ADD_EXECUTABLE(test-googlenet net/test_googlenet.cpp test_helper.h test_include.h executor_for_test.h) target_link_libraries(test-googlenet paddle-mobile) @@ -235,13 +263,4 @@ else () #add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp) - - - - endif() - -# if(FPGA) -# ADD_EXECUTABLE(test-tensor-quant fpga/test_tensor_quant.cpp test_helper.h test_include.h executor_for_test.h) -# target_link_libraries(test-tensor-quant paddle-mobile) -# endif() diff --git a/test/fpga/test_resnet50.cpp b/test/fpga/test_resnet50.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cca6793f10da5a0784cf8a3ba2d0104f3508028d --- /dev/null +++ b/test/fpga/test_resnet50.cpp @@ -0,0 +1,39 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "../test_include.h" +static const char *g_resnet_combine = "../models/resnet50"; + +int main() { + DLOG << paddle_mobile::fpga::open_device(); + paddle_mobile::PaddleMobile paddle_mobile; + if (paddle_mobile.Load(std::string(g_resnet_combine) + "/model", + std::string(g_resnet_combine) + "/params", true)) { + std::vector dims{1, 3, 224, 224}; + Tensor input_tensor; + SetupTensor(&input_tensor, {1, 3, 224, 224}, static_cast(0), + static_cast(1)); + + std::vector input(input_tensor.data(), + input_tensor.data() + input_tensor.numel()); + + paddle_mobile.FeedData(input_tensor); + paddle_mobile.Predict_To(-1); + // paddle_mobile.Predict_From(73); + // paddle_mobile.Predict_From_To(72, 73); + + DLOG << "Computation done"; + return 0; + } +} diff --git a/test/net/test_yolo_combined.cpp b/test/net/test_yolo_combined.cpp new file mode 100644 index 0000000000000000000000000000000000000000..88b889daa946cfaef1d86ff36f416b4643532c89 --- /dev/null +++ b/test/net/test_yolo_combined.cpp @@ -0,0 +1,60 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "../test_helper.h" +#include "../test_include.h" + +int main() { + paddle_mobile::PaddleMobile paddle_mobile; + paddle_mobile.SetThreadNum(4); + // ../../../test/models/googlenet + // ../../../test/models/mobilenet + auto time1 = time(); + + if (paddle_mobile.Load(std::string(g_yolo_combined) + "/model", + std::string(g_yolo_combined) + "/params", true)) { + auto time2 = time(); + std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl; + + std::vector dims{1, 3, 416, 416}; + std::vector input; + + GetInput(g_test_image_desktop_1_3_416_416_nchw_float, &input, dims); + std::cout << "input.size(): " << input.size() << std::endl; + for (int j = 0; j < 100; ++j) { + std::cout << j << " : " << input[j] << std::endl; + } + // // 预热十次 + // for (int i = 0; i < 10; ++i) { + // paddle_mobile.Predict(input, dims); + // } + auto time3 = time(); + const vector vector_out = paddle_mobile.Predict(input, dims); + std::cout << "--------------------------------------------" << std::endl; + + for (float i : vector_out) { + std::cout << i << std::endl; + } + + std::cout << "--------------------------------------------" << std::endl; + + std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl; + + auto time4 = time(); + std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms" + << std::endl; + } + return 0; +} diff --git a/test/test_helper.h b/test/test_helper.h index a6898b2cc8d55c79e5bd82d661bbe07533e2ec4f..ecbc251a815e343f75b1247ffc430e9c52d6abfd 100644 --- a/test/test_helper.h +++ b/test/test_helper.h @@ -41,12 +41,15 @@ static const char *g_resnet_50 = "../models/resnet_50"; static const char *g_resnet = "../models/resnet"; static const char *g_googlenet_combine = "../models/googlenet_combine"; static const char *g_yolo = "../models/yolo"; +static const char *g_yolo_combined = "../models/yolo_combined"; static const char *g_fluid_fssd_new = "../models/fluid_fssd_new"; static const char *g_test_image_1x3x224x224 = "../images/test_image_1x3x224x224_float"; static const char *g_test_image_1x3x224x224_banana = "../images/input_3x224x224_banana"; +static const char *g_test_image_desktop_1_3_416_416_nchw_float = + "../images/in_put_1_3_416_416_2"; static const char *g_hand = "../images/hand_image"; static const char *g_imgfssd_ar = "../images/test_image_ssd_ar"; static const char *g_imgfssd_ar1 = "../images/003_0001.txt";