From 607a1f48caca9716582609c1db57b3837a043c12 Mon Sep 17 00:00:00 2001 From: liuqi Date: Fri, 2 Nov 2018 14:33:07 +0800 Subject: [PATCH] Refactor: move all files in kernels directory to ops and remove kernels. 1. Move all files in kernels to ops 2. unify the op_def_registry and op_registry. 3. Support op clip: do not link the ops' code when the op is not registered. --- .gitlab-ci.yml | 1 - .travis.yml | 2 - docs/development/adding_a_new_op.md | 75 ++-- docs/user_guide/advanced_usage.rst | 63 ++- mace/core/allocator.h | 3 +- mace/core/net.cc | 20 +- mace/core/net.h | 5 +- mace/core/op_def_registry.cc | 77 ---- mace/core/op_def_registry.h | 81 ---- mace/core/operator.cc | 84 +++- mace/core/operator.h | 71 ++-- mace/core/registry.h | 97 ----- mace/core/tensor.h | 5 +- mace/kernels/BUILD | 150 ------- mace/kernels/matmul_benchmark.cc | 289 -------------- mace/libmace/BUILD | 5 +- mace/libmace/mace.cc | 9 +- mace/ops/BUILD | 132 +++++-- mace/{kernels => ops}/activation.cc | 12 +- mace/{kernels => ops}/activation.h | 12 +- mace/ops/activation_benchmark.cc | 13 +- mace/ops/activation_test.cc | 31 +- mace/{kernels => ops}/addn.cc | 6 +- mace/ops/addn_benchmark.cc | 3 +- mace/ops/addn_test.cc | 9 +- mace/{kernels => ops}/argmax.cc | 4 +- mace/ops/argmax_test.cc | 1 - mace/{kernels => ops}/arm/activation_neon.cc | 6 +- mace/{kernels => ops}/arm/activation_neon.h | 10 +- mace/{kernels => ops}/arm/conv_2d_neon.h | 12 +- .../{kernels => ops}/arm/conv_2d_neon_15x1.cc | 6 +- mace/{kernels => ops}/arm/conv_2d_neon_1x1.cc | 6 +- .../{kernels => ops}/arm/conv_2d_neon_1x15.cc | 6 +- mace/{kernels => ops}/arm/conv_2d_neon_1x7.cc | 6 +- mace/{kernels => ops}/arm/conv_2d_neon_3x3.cc | 6 +- mace/{kernels => ops}/arm/conv_2d_neon_5x5.cc | 6 +- mace/{kernels => ops}/arm/conv_2d_neon_7x1.cc | 6 +- mace/{kernels => ops}/arm/conv_2d_neon_7x7.cc | 6 +- mace/{kernels => ops}/arm/conv_winograd.cc | 8 +- mace/{kernels => ops}/arm/conv_winograd.h | 12 +- .../arm/conv_winograd_test.cc | 10 +- mace/{kernels => ops}/arm/deconv_2d_neon.h | 10 +- .../arm/deconv_2d_neon_3x3.cc | 6 +- .../arm/deconv_2d_neon_4x4.cc | 6 +- .../arm/depthwise_conv2d_neon.h | 10 +- .../arm/depthwise_conv2d_neon_3x3.cc | 6 +- mace/{kernels => ops}/batch_norm.cc | 12 +- mace/ops/batch_norm_benchmark.cc | 11 +- mace/ops/batch_norm_test.cc | 61 ++- mace/{kernels => ops}/batch_to_space.cc | 6 +- mace/ops/batch_to_space_benchmark.cc | 3 +- mace/{kernels => ops}/bias_add.cc | 8 +- mace/ops/bias_add_benchmark.cc | 5 +- mace/ops/bias_add_test.cc | 19 +- .../buffer_inverse_transform.cc | 14 +- mace/ops/buffer_to_image_benchmark.cc | 1 - mace/ops/buffer_to_image_test.cc | 46 +-- mace/{kernels => ops}/buffer_transform.cc | 14 +- mace/ops/buffer_transform_test.cc | 6 +- mace/{kernels => ops}/cast.cc | 4 +- mace/ops/cast_test.cc | 1 - mace/{kernels => ops}/channel_shuffle.cc | 6 +- mace/ops/channel_shuffle_benchmark.cc | 3 +- mace/ops/channel_shuffle_test.cc | 5 +- mace/{kernels => ops}/concat.cc | 9 +- mace/ops/concat_benchmark.cc | 5 +- mace/ops/concat_test.cc | 4 +- mace/{kernels => ops}/conv_2d.cc | 26 +- mace/ops/conv_2d_benchmark.cc | 9 +- mace/ops/conv_2d_test.cc | 94 ++--- mace/{kernels => ops}/conv_pool_2d_base.h | 12 +- mace/{kernels => ops}/conv_pool_2d_util.cc | 6 +- mace/{kernels => ops}/conv_pool_2d_util.h | 10 +- mace/ops/core_test.cc | 12 +- mace/{kernels => ops}/crop.cc | 6 +- mace/ops/crop_benchmark.cc | 5 +- mace/ops/crop_test.cc | 7 +- mace/{kernels => ops}/deconv_2d.cc | 22 +- mace/{kernels => ops}/deconv_2d.h | 10 +- mace/ops/deconv_2d_benchmark.cc | 9 +- mace/ops/deconv_2d_test.cc | 62 +-- mace/{kernels => ops}/depth_to_space.cc | 6 +- mace/ops/depth_to_space_benchmark.cc | 3 +- mace/ops/depth_to_space_test.cc | 9 +- mace/{kernels => ops}/depthwise_conv2d.cc | 16 +- mace/ops/depthwise_conv2d_benchmark.cc | 9 +- mace/ops/depthwise_conv2d_test.cc | 26 +- mace/{kernels => ops}/eltwise.cc | 20 +- mace/{kernels => ops}/eltwise.h | 10 +- mace/ops/eltwise_benchmark.cc | 11 +- mace/ops/eltwise_test.cc | 343 ++++++++-------- mace/{kernels => ops}/expand_dims.cc | 4 +- mace/ops/expand_dims_test.cc | 1 - mace/{kernels => ops}/fill.cc | 4 +- mace/ops/fill_test.cc | 1 - mace/{kernels => ops}/fixpoint.h | 10 +- mace/{kernels => ops}/fixpoint_test.cc | 6 +- mace/ops/folded_batch_norm_test.cc | 41 +- mace/{kernels => ops}/fully_connected.cc | 14 +- mace/ops/fully_connected_benchmark.cc | 7 +- mace/ops/fully_connected_test.cc | 17 +- mace/{kernels => ops}/gather.cc | 4 +- mace/ops/gather_benchmark.cc | 1 - mace/ops/gather_test.cc | 1 - mace/{kernels => ops}/gemm.cc | 6 +- mace/{kernels => ops}/gemm.h | 10 +- mace/{kernels => ops}/gemm_test.cc | 38 +- mace/{kernels => ops}/gemmlowp_util.h | 6 +- mace/{kernels => ops}/identity.cc | 4 +- mace/ops/identity_test.cc | 1 - mace/{kernels => ops}/infer_conv2d_shape.cc | 6 +- mace/ops/infer_conv2d_shape_test.cc | 3 +- mace/{kernels => ops}/local_response_norm.cc | 4 +- mace/ops/local_response_norm_benchmark.cc | 1 - mace/ops/local_response_norm_test.cc | 1 - mace/{kernels => ops}/lstm_cell.cc | 6 +- mace/ops/lstmcell_benchmark.cc | 12 +- mace/ops/lstmcell_test.cc | 17 +- mace/ops/lstmcell_test_util.h | 13 +- mace/{kernels => ops}/matmul.cc | 12 +- mace/ops/matmul_benchmark.cc | 292 +++++++++++++- mace/ops/matmul_test.cc | 13 +- mace/{kernels => ops}/memory_benchmark.cc | 4 +- mace/{kernels => ops}/opencl/activation.h | 10 +- mace/{kernels => ops}/opencl/addn.h | 10 +- mace/{kernels => ops}/opencl/batch_norm.h | 10 +- mace/{kernels => ops}/opencl/batch_to_space.h | 10 +- mace/{kernels => ops}/opencl/bias_add.h | 10 +- .../opencl/buffer/buffer_inverse_transform.h | 14 +- .../opencl/buffer/buffer_transform.cc | 6 +- .../opencl/buffer/buffer_transform.h | 14 +- .../opencl/buffer/buffer_type_transform.cc | 6 +- mace/{kernels => ops}/opencl/buffer/conv_2d.h | 18 +- .../opencl/buffer/conv_2d_1x1.cc | 8 +- .../opencl/buffer/conv_2d_general.cc | 8 +- .../opencl/buffer/depthwise_conv2d.cc | 6 +- .../opencl/buffer/depthwise_conv2d.h | 18 +- mace/{kernels => ops}/opencl/buffer/pooling.h | 18 +- mace/{kernels => ops}/opencl/buffer/softmax.h | 14 +- mace/{kernels => ops}/opencl/buffer/utils.cc | 8 +- mace/{kernels => ops}/opencl/buffer/utils.h | 10 +- .../opencl/buffer_inverse_transform.h | 12 +- .../opencl/buffer_transform.h | 12 +- .../{kernels => ops}/opencl/channel_shuffle.h | 10 +- mace/{kernels => ops}/opencl/cl/activation.cl | 0 mace/{kernels => ops}/opencl/cl/addn.cl | 0 mace/{kernels => ops}/opencl/cl/batch_norm.cl | 0 .../opencl/cl/batch_to_space.cl | 0 mace/{kernels => ops}/opencl/cl/bias_add.cl | 0 .../opencl/cl/buffer_to_image.cl | 0 .../opencl/cl/buffer_transform.cl | 0 .../opencl/cl/channel_shuffle.cl | 0 mace/{kernels => ops}/opencl/cl/common.h | 6 +- mace/{kernels => ops}/opencl/cl/concat.cl | 0 mace/{kernels => ops}/opencl/cl/conv_2d.cl | 0 .../{kernels => ops}/opencl/cl/conv_2d_1x1.cl | 0 .../opencl/cl/conv_2d_1x1_buffer.cl | 0 .../{kernels => ops}/opencl/cl/conv_2d_3x3.cl | 0 .../opencl/cl/conv_2d_buffer.cl | 0 mace/{kernels => ops}/opencl/cl/crop.cl | 0 mace/{kernels => ops}/opencl/cl/deconv_2d.cl | 0 .../opencl/cl/depth_to_space.cl | 0 .../opencl/cl/depthwise_conv2d.cl | 0 .../opencl/cl/depthwise_conv2d_buffer.cl | 0 mace/{kernels => ops}/opencl/cl/eltwise.cl | 0 .../opencl/cl/fully_connected.cl | 0 mace/{kernels => ops}/opencl/cl/lstmcell.cl | 0 mace/{kernels => ops}/opencl/cl/matmul.cl | 0 mace/{kernels => ops}/opencl/cl/pad.cl | 0 mace/{kernels => ops}/opencl/cl/pooling.cl | 0 .../opencl/cl/pooling_buffer.cl | 0 .../{kernels => ops}/opencl/cl/reduce_mean.cl | 0 .../opencl/cl/resize_bicubic.cl | 0 .../opencl/cl/resize_bilinear.cl | 0 mace/{kernels => ops}/opencl/cl/softmax.cl | 0 .../opencl/cl/softmax_buffer.cl | 0 .../opencl/cl/space_to_batch.cl | 0 .../opencl/cl/space_to_depth.cl | 0 mace/{kernels => ops}/opencl/cl/split.cl | 0 .../opencl/cl/sqrdiff_mean.cl | 0 .../opencl/cl/winograd_transform.cl | 0 mace/{kernels => ops}/opencl/common.h | 10 +- mace/{kernels => ops}/opencl/concat.h | 10 +- mace/{kernels => ops}/opencl/conv_2d.h | 14 +- mace/{kernels => ops}/opencl/crop.h | 10 +- mace/{kernels => ops}/opencl/deconv_2d.h | 12 +- mace/{kernels => ops}/opencl/depth_to_space.h | 10 +- .../opencl/depthwise_conv2d.h | 14 +- mace/{kernels => ops}/opencl/eltwise.h | 10 +- .../{kernels => ops}/opencl/fully_connected.h | 12 +- mace/{kernels => ops}/opencl/helper.cc | 6 +- mace/{kernels => ops}/opencl/helper.h | 40 +- .../opencl/image/activation.h | 16 +- mace/{kernels => ops}/opencl/image/addn.h | 14 +- .../opencl/image/batch_norm.h | 16 +- .../opencl/image/batch_to_space.h | 14 +- mace/{kernels => ops}/opencl/image/bias_add.h | 14 +- .../opencl/image/buffer_to_image.h | 14 +- .../opencl/image/channel_shuffle.h | 14 +- mace/{kernels => ops}/opencl/image/concat.cc | 6 +- mace/{kernels => ops}/opencl/image/concat.h | 14 +- mace/{kernels => ops}/opencl/image/conv_2d.h | 16 +- .../opencl/image/conv_2d_1x1.cc | 8 +- .../opencl/image/conv_2d_3x3.cc | 8 +- .../opencl/image/conv_2d_general.cc | 8 +- mace/{kernels => ops}/opencl/image/crop.h | 14 +- .../{kernels => ops}/opencl/image/deconv_2d.h | 14 +- .../opencl/image/depth_to_space.h | 14 +- .../opencl/image/depthwise_conv2d.cc | 6 +- .../opencl/image/depthwise_conv2d.h | 16 +- mace/{kernels => ops}/opencl/image/eltwise.h | 16 +- .../opencl/image/fully_connected.h | 14 +- .../opencl/image/image_to_buffer.h | 14 +- .../{kernels => ops}/opencl/image/lstm_cell.h | 14 +- mace/{kernels => ops}/opencl/image/matmul.h | 14 +- mace/{kernels => ops}/opencl/image/pad.h | 14 +- mace/{kernels => ops}/opencl/image/pooling.h | 16 +- .../opencl/image/reduce_mean.h | 14 +- .../opencl/image/resize_bicubic.h | 22 +- .../opencl/image/resize_bilinear.h | 20 +- mace/{kernels => ops}/opencl/image/softmax.h | 14 +- .../opencl/image/space_to_batch.h | 14 +- .../opencl/image/space_to_depth.h | 14 +- mace/{kernels => ops}/opencl/image/split.h | 14 +- .../opencl/image/sqrdiff_mean.h | 14 +- .../opencl/image/winograd_transform.h | 20 +- mace/{kernels => ops}/opencl/lstm_cell.h | 10 +- mace/{kernels => ops}/opencl/matmul.h | 10 +- .../opencl/out_of_range_check_test.cc | 6 +- mace/{kernels => ops}/opencl/pad.h | 10 +- mace/{kernels => ops}/opencl/pooling.h | 14 +- mace/{kernels => ops}/opencl/reduce_mean.h | 10 +- mace/{kernels => ops}/opencl/resize_bicubic.h | 10 +- .../{kernels => ops}/opencl/resize_bilinear.h | 10 +- mace/{kernels => ops}/opencl/softmax.h | 10 +- mace/{kernels => ops}/opencl/space_to_batch.h | 10 +- mace/{kernels => ops}/opencl/space_to_depth.h | 10 +- mace/{kernels => ops}/opencl/split.h | 10 +- mace/{kernels => ops}/opencl/sqrdiff_mean.h | 10 +- .../opencl/winograd_transform.h | 10 +- mace/ops/ops_def_register.cc | 373 ------------------ mace/ops/ops_def_register.h | 30 -- .../ops_register.cc => ops/ops_registry.cc} | 108 ++--- .../ops_register.h => ops/ops_registry.h} | 6 +- mace/ops/ops_test_util.h | 15 +- mace/{kernels => ops}/pad.cc | 6 +- mace/ops/pad_benchmark.cc | 4 +- mace/ops/pad_test.cc | 9 +- mace/{kernels => ops}/pooling.cc | 16 +- mace/{kernels => ops}/pooling.h | 6 +- mace/ops/pooling_benchmark.cc | 7 +- mace/ops/pooling_test.cc | 23 +- mace/{kernels => ops}/quantize.cc | 4 +- mace/ops/quantize_test.cc | 1 - mace/{kernels => ops}/reduce_mean.cc | 6 +- mace/ops/reduce_mean_benchmark.cc | 4 +- mace/ops/reduce_mean_test.cc | 9 +- mace/{kernels => ops}/reshape.cc | 4 +- mace/ops/reshape_test.cc | 1 - mace/{kernels => ops}/resize_bicubic.cc | 8 +- mace/{kernels => ops}/resize_bicubic.h | 10 +- mace/ops/resize_bicubic_benchmark.cc | 3 +- mace/ops/resize_bicubic_test.cc | 5 +- mace/{kernels => ops}/resize_bilinear.cc | 8 +- mace/{kernels => ops}/resize_bilinear.h | 10 +- mace/ops/resize_bilinear_benchmark.cc | 3 +- mace/ops/resize_bilinear_test.cc | 5 +- mace/{kernels => ops}/reverse.cc | 4 +- mace/ops/reverse_benchmark.cc | 2 - mace/ops/reverse_test.cc | 1 - mace/{kernels => ops}/scalar_math.cc | 10 +- mace/ops/scalar_math_test.cc | 51 ++- mace/{kernels => ops}/sgemm.cc | 14 +- mace/{kernels => ops}/sgemm.h | 10 +- mace/{kernels => ops}/sgemm_pack_test.cc | 6 +- mace/{kernels => ops}/shape.cc | 4 +- mace/ops/shape_test.cc | 1 - mace/{kernels => ops}/softmax.cc | 12 +- mace/ops/softmax_benchmark.cc | 3 +- mace/ops/softmax_test.cc | 9 +- mace/{kernels => ops}/space_to_batch.cc | 6 +- mace/ops/space_to_batch_benchmark.cc | 3 +- mace/ops/space_to_batch_test.cc | 16 +- mace/{kernels => ops}/space_to_depth.cc | 6 +- mace/ops/space_to_depth_benchmark.cc | 3 +- mace/ops/space_to_depth_test.cc | 9 +- mace/{kernels => ops}/split.cc | 6 +- mace/ops/split_benchmark.cc | 3 +- mace/ops/split_test.cc | 4 +- mace/{kernels => ops}/sqrdiff_mean.cc | 6 +- mace/ops/sqrdiff_mean_benchmark.cc | 4 +- mace/ops/sqrdiff_mean_test.cc | 12 +- mace/{kernels => ops}/squeeze.cc | 4 +- mace/ops/squeeze_test.cc | 1 - mace/{kernels => ops}/stack.cc | 4 +- mace/ops/stack_test.cc | 1 - mace/{kernels => ops}/strided_slice.cc | 4 +- mace/ops/strided_slice_test.cc | 1 - mace/{kernels => ops}/transpose.cc | 4 +- mace/ops/transpose_benchmark.cc | 1 - mace/ops/transpose_test.cc | 1 - mace/{kernels => ops}/unstack.cc | 4 +- mace/ops/unstack_test.cc | 1 - mace/ops/winograd_convolution_benchmark.cc | 11 +- mace/ops/winograd_convolution_test.cc | 27 +- mace/{kernels => ops}/winograd_transform.cc | 12 +- mace/ops/winograd_transform_benchmark.cc | 11 +- mace/proto/mace.proto | 1 - mace/python/tools/converter.py | 3 - .../tools/converter_tool/transformer.py | 10 - mace/python/tools/encrypt_opencl_codegen.py | 4 +- mace/python/tools/model.jinja2 | 9 - mace/test/mace_api_mt_test.cc | 13 +- mace/test/mace_api_test.cc | 13 +- .../opencl-kernel/opencl_kernel_configure.bzl | 78 ++-- tools/bazel.rc | 1 + tools/converter.py | 2 +- tools/sh_commands.py | 4 +- 318 files changed, 2118 insertions(+), 2946 deletions(-) delete mode 100644 mace/core/op_def_registry.cc delete mode 100644 mace/core/op_def_registry.h delete mode 100644 mace/core/registry.h delete mode 100644 mace/kernels/BUILD delete mode 100644 mace/kernels/matmul_benchmark.cc rename mace/{kernels => ops}/activation.cc (93%) rename mace/{kernels => ops}/activation.h (95%) rename mace/{kernels => ops}/addn.cc (98%) rename mace/{kernels => ops}/argmax.cc (98%) rename mace/{kernels => ops}/arm/activation_neon.cc (95%) rename mace/{kernels => ops}/arm/activation_neon.h (82%) rename mace/{kernels => ops}/arm/conv_2d_neon.h (95%) rename mace/{kernels => ops}/arm/conv_2d_neon_15x1.cc (98%) rename mace/{kernels => ops}/arm/conv_2d_neon_1x1.cc (94%) rename mace/{kernels => ops}/arm/conv_2d_neon_1x15.cc (98%) rename mace/{kernels => ops}/arm/conv_2d_neon_1x7.cc (99%) rename mace/{kernels => ops}/arm/conv_2d_neon_3x3.cc (99%) rename mace/{kernels => ops}/arm/conv_2d_neon_5x5.cc (99%) rename mace/{kernels => ops}/arm/conv_2d_neon_7x1.cc (99%) rename mace/{kernels => ops}/arm/conv_2d_neon_7x7.cc (99%) rename mace/{kernels => ops}/arm/conv_winograd.cc (99%) rename mace/{kernels => ops}/arm/conv_winograd.h (92%) rename mace/{kernels => ops}/arm/conv_winograd_test.cc (91%) rename mace/{kernels => ops}/arm/deconv_2d_neon.h (93%) rename mace/{kernels => ops}/arm/deconv_2d_neon_3x3.cc (99%) rename mace/{kernels => ops}/arm/deconv_2d_neon_4x4.cc (99%) rename mace/{kernels => ops}/arm/depthwise_conv2d_neon.h (90%) rename mace/{kernels => ops}/arm/depthwise_conv2d_neon_3x3.cc (99%) rename mace/{kernels => ops}/batch_norm.cc (96%) rename mace/{kernels => ops}/batch_to_space.cc (99%) rename mace/{kernels => ops}/bias_add.cc (97%) rename mace/{kernels => ops}/buffer_inverse_transform.cc (85%) rename mace/{kernels => ops}/buffer_transform.cc (84%) rename mace/{kernels => ops}/cast.cc (97%) rename mace/{kernels => ops}/channel_shuffle.cc (97%) rename mace/{kernels => ops}/concat.cc (97%) rename mace/{kernels => ops}/conv_2d.cc (98%) rename mace/{kernels => ops}/conv_pool_2d_base.h (85%) rename mace/{kernels => ops}/conv_pool_2d_util.cc (99%) rename mace/{kernels => ops}/conv_pool_2d_util.h (96%) rename mace/{kernels => ops}/crop.cc (98%) rename mace/{kernels => ops}/deconv_2d.cc (97%) rename mace/{kernels => ops}/deconv_2d.h (82%) rename mace/{kernels => ops}/depth_to_space.cc (97%) rename mace/{kernels => ops}/depthwise_conv2d.cc (98%) rename mace/{kernels => ops}/eltwise.cc (98%) rename mace/{kernels => ops}/eltwise.h (86%) rename mace/{kernels => ops}/expand_dims.cc (98%) rename mace/{kernels => ops}/fill.cc (97%) rename mace/{kernels => ops}/fixpoint.h (92%) rename mace/{kernels => ops}/fixpoint_test.cc (94%) rename mace/{kernels => ops}/fully_connected.cc (96%) rename mace/{kernels => ops}/gather.cc (98%) rename mace/{kernels => ops}/gemm.cc (99%) rename mace/{kernels => ops}/gemm.h (94%) rename mace/{kernels => ops}/gemm_test.cc (84%) rename mace/{kernels => ops}/gemmlowp_util.h (96%) rename mace/{kernels => ops}/identity.cc (97%) rename mace/{kernels => ops}/infer_conv2d_shape.cc (97%) rename mace/{kernels => ops}/local_response_norm.cc (98%) rename mace/{kernels => ops}/lstm_cell.cc (95%) rename mace/{kernels => ops}/matmul.cc (98%) rename mace/{kernels => ops}/memory_benchmark.cc (98%) rename mace/{kernels => ops}/opencl/activation.h (85%) rename mace/{kernels => ops}/opencl/addn.h (86%) rename mace/{kernels => ops}/opencl/batch_norm.h (85%) rename mace/{kernels => ops}/opencl/batch_to_space.h (86%) rename mace/{kernels => ops}/opencl/bias_add.h (85%) rename mace/{kernels => ops}/opencl/buffer/buffer_inverse_transform.h (85%) rename mace/{kernels => ops}/opencl/buffer/buffer_transform.cc (98%) rename mace/{kernels => ops}/opencl/buffer/buffer_transform.h (90%) rename mace/{kernels => ops}/opencl/buffer/buffer_type_transform.cc (97%) rename mace/{kernels => ops}/opencl/buffer/conv_2d.h (95%) rename mace/{kernels => ops}/opencl/buffer/conv_2d_1x1.cc (97%) rename mace/{kernels => ops}/opencl/buffer/conv_2d_general.cc (97%) rename mace/{kernels => ops}/opencl/buffer/depthwise_conv2d.cc (98%) rename mace/{kernels => ops}/opencl/buffer/depthwise_conv2d.h (94%) rename mace/{kernels => ops}/opencl/buffer/pooling.h (95%) rename mace/{kernels => ops}/opencl/buffer/softmax.h (93%) rename mace/{kernels => ops}/opencl/buffer/utils.cc (96%) rename mace/{kernels => ops}/opencl/buffer/utils.h (86%) rename mace/{kernels => ops}/opencl/buffer_inverse_transform.h (81%) rename mace/{kernels => ops}/opencl/buffer_transform.h (82%) rename mace/{kernels => ops}/opencl/channel_shuffle.h (83%) rename mace/{kernels => ops}/opencl/cl/activation.cl (100%) rename mace/{kernels => ops}/opencl/cl/addn.cl (100%) rename mace/{kernels => ops}/opencl/cl/batch_norm.cl (100%) rename mace/{kernels => ops}/opencl/cl/batch_to_space.cl (100%) rename mace/{kernels => ops}/opencl/cl/bias_add.cl (100%) rename mace/{kernels => ops}/opencl/cl/buffer_to_image.cl (100%) rename mace/{kernels => ops}/opencl/cl/buffer_transform.cl (100%) rename mace/{kernels => ops}/opencl/cl/channel_shuffle.cl (100%) rename mace/{kernels => ops}/opencl/cl/common.h (96%) rename mace/{kernels => ops}/opencl/cl/concat.cl (100%) rename mace/{kernels => ops}/opencl/cl/conv_2d.cl (100%) rename mace/{kernels => ops}/opencl/cl/conv_2d_1x1.cl (100%) rename mace/{kernels => ops}/opencl/cl/conv_2d_1x1_buffer.cl (100%) rename mace/{kernels => ops}/opencl/cl/conv_2d_3x3.cl (100%) rename mace/{kernels => ops}/opencl/cl/conv_2d_buffer.cl (100%) rename mace/{kernels => ops}/opencl/cl/crop.cl (100%) rename mace/{kernels => ops}/opencl/cl/deconv_2d.cl (100%) rename mace/{kernels => ops}/opencl/cl/depth_to_space.cl (100%) rename mace/{kernels => ops}/opencl/cl/depthwise_conv2d.cl (100%) rename mace/{kernels => ops}/opencl/cl/depthwise_conv2d_buffer.cl (100%) rename mace/{kernels => ops}/opencl/cl/eltwise.cl (100%) rename mace/{kernels => ops}/opencl/cl/fully_connected.cl (100%) rename mace/{kernels => ops}/opencl/cl/lstmcell.cl (100%) rename mace/{kernels => ops}/opencl/cl/matmul.cl (100%) rename mace/{kernels => ops}/opencl/cl/pad.cl (100%) rename mace/{kernels => ops}/opencl/cl/pooling.cl (100%) rename mace/{kernels => ops}/opencl/cl/pooling_buffer.cl (100%) rename mace/{kernels => ops}/opencl/cl/reduce_mean.cl (100%) rename mace/{kernels => ops}/opencl/cl/resize_bicubic.cl (100%) rename mace/{kernels => ops}/opencl/cl/resize_bilinear.cl (100%) rename mace/{kernels => ops}/opencl/cl/softmax.cl (100%) rename mace/{kernels => ops}/opencl/cl/softmax_buffer.cl (100%) rename mace/{kernels => ops}/opencl/cl/space_to_batch.cl (100%) rename mace/{kernels => ops}/opencl/cl/space_to_depth.cl (100%) rename mace/{kernels => ops}/opencl/cl/split.cl (100%) rename mace/{kernels => ops}/opencl/cl/sqrdiff_mean.cl (100%) rename mace/{kernels => ops}/opencl/cl/winograd_transform.cl (100%) rename mace/{kernels => ops}/opencl/common.h (84%) rename mace/{kernels => ops}/opencl/concat.h (86%) rename mace/{kernels => ops}/opencl/conv_2d.h (82%) rename mace/{kernels => ops}/opencl/crop.h (86%) rename mace/{kernels => ops}/opencl/deconv_2d.h (84%) rename mace/{kernels => ops}/opencl/depth_to_space.h (83%) rename mace/{kernels => ops}/opencl/depthwise_conv2d.h (81%) rename mace/{kernels => ops}/opencl/eltwise.h (85%) rename mace/{kernels => ops}/opencl/fully_connected.h (82%) rename mace/{kernels => ops}/opencl/helper.cc (99%) rename mace/{kernels => ops}/opencl/helper.h (85%) rename mace/{kernels => ops}/opencl/image/activation.h (93%) rename mace/{kernels => ops}/opencl/image/addn.h (94%) rename mace/{kernels => ops}/opencl/image/batch_norm.h (94%) rename mace/{kernels => ops}/opencl/image/batch_to_space.h (93%) rename mace/{kernels => ops}/opencl/image/bias_add.h (93%) rename mace/{kernels => ops}/opencl/image/buffer_to_image.h (95%) rename mace/{kernels => ops}/opencl/image/channel_shuffle.h (93%) rename mace/{kernels => ops}/opencl/image/concat.cc (98%) rename mace/{kernels => ops}/opencl/image/concat.h (93%) rename mace/{kernels => ops}/opencl/image/conv_2d.h (95%) rename mace/{kernels => ops}/opencl/image/conv_2d_1x1.cc (98%) rename mace/{kernels => ops}/opencl/image/conv_2d_3x3.cc (98%) rename mace/{kernels => ops}/opencl/image/conv_2d_general.cc (98%) rename mace/{kernels => ops}/opencl/image/crop.h (96%) rename mace/{kernels => ops}/opencl/image/deconv_2d.h (96%) rename mace/{kernels => ops}/opencl/image/depth_to_space.h (94%) rename mace/{kernels => ops}/opencl/image/depthwise_conv2d.cc (98%) rename mace/{kernels => ops}/opencl/image/depthwise_conv2d.h (92%) rename mace/{kernels => ops}/opencl/image/eltwise.h (95%) rename mace/{kernels => ops}/opencl/image/fully_connected.h (95%) rename mace/{kernels => ops}/opencl/image/image_to_buffer.h (95%) rename mace/{kernels => ops}/opencl/image/lstm_cell.h (94%) rename mace/{kernels => ops}/opencl/image/matmul.h (93%) rename mace/{kernels => ops}/opencl/image/pad.h (94%) rename mace/{kernels => ops}/opencl/image/pooling.h (95%) rename mace/{kernels => ops}/opencl/image/reduce_mean.h (95%) rename mace/{kernels => ops}/opencl/image/resize_bicubic.h (91%) rename mace/{kernels => ops}/opencl/image/resize_bilinear.h (92%) rename mace/{kernels => ops}/opencl/image/softmax.h (94%) rename mace/{kernels => ops}/opencl/image/space_to_batch.h (93%) rename mace/{kernels => ops}/opencl/image/space_to_depth.h (94%) rename mace/{kernels => ops}/opencl/image/split.h (95%) rename mace/{kernels => ops}/opencl/image/sqrdiff_mean.h (95%) rename mace/{kernels => ops}/opencl/image/winograd_transform.h (96%) rename mace/{kernels => ops}/opencl/lstm_cell.h (86%) rename mace/{kernels => ops}/opencl/matmul.h (86%) rename mace/{kernels => ops}/opencl/out_of_range_check_test.cc (98%) rename mace/{kernels => ops}/opencl/pad.h (86%) rename mace/{kernels => ops}/opencl/pooling.h (82%) rename mace/{kernels => ops}/opencl/reduce_mean.h (84%) rename mace/{kernels => ops}/opencl/resize_bicubic.h (84%) rename mace/{kernels => ops}/opencl/resize_bilinear.h (84%) rename mace/{kernels => ops}/opencl/softmax.h (85%) rename mace/{kernels => ops}/opencl/space_to_batch.h (86%) rename mace/{kernels => ops}/opencl/space_to_depth.h (83%) rename mace/{kernels => ops}/opencl/split.h (86%) rename mace/{kernels => ops}/opencl/sqrdiff_mean.h (84%) rename mace/{kernels => ops}/opencl/winograd_transform.h (86%) delete mode 100644 mace/ops/ops_def_register.cc delete mode 100644 mace/ops/ops_def_register.h rename mace/{kernels/ops_register.cc => ops/ops_registry.cc} (67%) rename mace/{kernels/ops_register.h => ops/ops_registry.h} (87%) rename mace/{kernels => ops}/pad.cc (98%) rename mace/{kernels => ops}/pooling.cc (98%) rename mace/{kernels => ops}/pooling.h (87%) rename mace/{kernels => ops}/quantize.cc (98%) rename mace/{kernels => ops}/reduce_mean.cc (98%) rename mace/{kernels => ops}/reshape.cc (98%) rename mace/{kernels => ops}/resize_bicubic.cc (98%) rename mace/{kernels => ops}/resize_bicubic.h (87%) rename mace/{kernels => ops}/resize_bilinear.cc (98%) rename mace/{kernels => ops}/resize_bilinear.h (86%) rename mace/{kernels => ops}/reverse.cc (98%) rename mace/{kernels => ops}/scalar_math.cc (95%) rename mace/{kernels => ops}/sgemm.cc (99%) rename mace/{kernels => ops}/sgemm.h (96%) rename mace/{kernels => ops}/sgemm_pack_test.cc (98%) rename mace/{kernels => ops}/shape.cc (98%) rename mace/{kernels => ops}/softmax.cc (98%) rename mace/{kernels => ops}/space_to_batch.cc (99%) rename mace/{kernels => ops}/space_to_depth.cc (97%) rename mace/{kernels => ops}/split.cc (98%) rename mace/{kernels => ops}/sqrdiff_mean.cc (97%) rename mace/{kernels => ops}/squeeze.cc (97%) rename mace/{kernels => ops}/stack.cc (98%) rename mace/{kernels => ops}/strided_slice.cc (99%) rename mace/{kernels => ops}/transpose.cc (99%) rename mace/{kernels => ops}/unstack.cc (98%) rename mace/{kernels => ops}/winograd_transform.cc (93%) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 65623c78..d7fc2ec5 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -67,7 +67,6 @@ extra_tests: stage: extra_tests script: - if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi - - python tools/bazel_adb_run.py --target="//mace/kernels:kernels_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS - python tools/bazel_adb_run.py --target="//mace/utils:tuner_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS platform_compatible_tests: diff --git a/.travis.yml b/.travis.yml index 19a336d7..b6354913 100644 --- a/.travis.yml +++ b/.travis.yml @@ -94,7 +94,6 @@ jobs: - python tools/bazel_adb_run.py --target="//mace/test:mace_api_test" --run_target=False --target_abis=armeabi-v7a || exit 1 - python tools/bazel_adb_run.py --target="//mace/test:mace_api_mt_test" --run_target=False --target_abis=armeabi-v7a || exit 1 - echo 'Extra Test' - - python tools/bazel_adb_run.py --target="//mace/kernels:kernels_test" --run_target=False --target_abis=armeabi-v7a || exit 1 - python tools/bazel_adb_run.py --target="//mace/utils:tuner_test" --run_target=False --target_abis=armeabi-v7a || exit 1 env: TYPE=Extra-Test-ARMEABI-v7a os: linux @@ -106,7 +105,6 @@ jobs: - python tools/bazel_adb_run.py --target="//mace/test:mace_api_test" --run_target=False --target_abis=arm64-v8a || exit 1 - python tools/bazel_adb_run.py --target="//mace/test:mace_api_mt_test" --run_target=False --target_abis=arm64-v8a || exit 1 - echo 'Extra Test on ARM64' - - python tools/bazel_adb_run.py --target="//mace/kernels:kernels_test" --run_target=False --target_abis=arm64-v8a || exit 1 - python tools/bazel_adb_run.py --target="//mace/utils:tuner_test" --run_target=False --target_abis=arm64-v8a || exit 1 env: TYPE=Extra-Test-ARM64-v8a os: linux diff --git a/docs/development/adding_a_new_op.md b/docs/development/adding_a_new_op.md index 33a1a60d..3e461671 100644 --- a/docs/development/adding_a_new_op.md +++ b/docs/development/adding_a_new_op.md @@ -5,46 +5,24 @@ You can create a custom op if it is not supported yet. To add a custom op, you need to follow these steps: -Register the new OpDef information ----------------------------------- -Register the OpDef information about which devices the operation could run on. -Registry file is in `mace/ops/ops_def_register.cc` -```c++ -#include "mace/ops/ops_def_register.h" - -namespace mace { -namespace ops { - -void RegisterOpDefs(OpDefRegistryBase *op_def_registry) { - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("MyCustomOp") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - ...... -} -} // namespace ops -} // namespace mace - -``` - Implement the Operation ----------------------- -The Best way is to refer to the implementation of other operator(e.g. `/mace/kernels/activation.cc`) - -Define the new Op class in `mace/kernels/my_custom_op.cc`. -1. CPU code: just write the code in `mace/kernels/my_custom_op.cc`. -2. GPU code: Kernel API is defined in `mace/kernels/my_custom_op.h`, -Kernel based on Image is realized in `mace/kernels/opencl/image/my_custom_op.cc`, -Kernel based on Buffer is realized in `mace/kernels/opencl/buffer/my_custom_op.cc`. +The Best way is to refer to the implementation of other operator(e.g. `/mace/ops/activation.cc`) + +Define the new Op class in `mace/ops/my_custom_op.cc`. +1. ARM kernels: Kernel about NEON is located at `mace/ops/arm/my_custom_op.cc` +2. GPU kernels: OpenCL kernel API is defined in `mace/ops/opencl/my_custom_op.h`, + * Kernel based on Image is realized in `mace/ops/opencl/image/my_custom_op.cc`, + * Kernel based on Buffer is realized in `mace/ops/opencl/buffer/my_custom_op.cc`. + * OpenCL kernel file is realized in `mace/ops/opencl/cl/my_custom_op.cl`. + * Add the path of opencl kernel file in file `mace/repository/opencl-kernel/opencl_kernel_configure.bzl` -The structure like the following code. +The structure of Op is like the following code. ```c++ #include "mace/core/operator.h" namespace mace { -namespace kernels { +namespace ops { template class MyCustomOp; @@ -56,43 +34,34 @@ class MyCustomOp : public Operation { #ifdef MACE_ENABLE_OPENCL template -class ActivationOp : public Operation { +class MyCustomOp : public Operation { ... }; #endif // MACE_ENABLE_OPENCL -} // namespace ops -} // namespace mace - -``` - -Register the Operation ------------------------ -1, Add register function in `mace/kernels/my_custom_op.cc` -```c++ -#include "mace/core/operator.h" - -namespace mace { -namespace kernels { - void RegisterMyCustomOp(OpRegistryBase *op_registry) { - MACE_REGISTER_OP(op_registry, "MyCustomOp", ActivationOp, + MACE_REGISTER_OP(op_registry, "MyCustomOp", MyCustomOp, DeviceType::CPU, float); #ifdef MACE_ENABLE_OPENCL - MACE_REGISTER_OP(op_registry, "MyCustomOp", ActivationOp, + MACE_REGISTER_OP(op_registry, "MyCustomOp", MyCustomOp, DeviceType::GPU, float); - MACE_REGISTER_OP(op_registry, "MyCustomOp", ActivationOp, + MACE_REGISTER_OP(op_registry, "MyCustomOp", MyCustomOp, DeviceType::GPU, half); #endif // MACE_ENABLE_OPENCL } + } // namespace ops } // namespace mace + ``` -2, And then register the new Op in `mace/kernels/ops_register.cc`. + +Register the Operation +----------------------- +Register the new Op in `mace/ops/ops_register.cc`. ``` -#include "mace/kernels/ops_register.h" +#include "mace/ops/ops_register.h" namespace mace { namespace ops { diff --git a/docs/user_guide/advanced_usage.rst b/docs/user_guide/advanced_usage.rst index e6284be9..13c3c211 100644 --- a/docs/user_guide/advanced_usage.rst +++ b/docs/user_guide/advanced_usage.rst @@ -112,7 +112,8 @@ There are two common advanced use cases: - converting model to C++ code. - tuning GPU kernels for a specific SoC. -* **Convert model(s) to C++ code** +Convert model(s) to C++ code +-------------------------------- * **1. Change the model deployment file(.yml)** @@ -204,7 +205,8 @@ There are two common advanced use cases: // ... Same with the code in basic usage -* **Tuning for specific SoC's GPU** +Tuning for specific SoC's GPU +--------------------------------- If you want to use the GPU of a specific device, you can just specify the ``target_socs`` in your YAML file and then tune the MACE lib for it (OpenCL kernels), which may get 1~10% performance improvement. @@ -375,25 +377,52 @@ Use ``-h`` to get detailed help. Reduce Library Size ------------------- -* **dynamic library** +* Build for your own usage purpose. + * **dynamic library** - The generated dynamic library by script ``tools/build-standalone-lib.sh`` is about ``1.6M`` for - ``armeabi-v7a`` and ``2.1M`` for ``arm64-v8a``. It can be reduced by modifying some build options. + - If the models don't need to run on device ``dsp``, change the build option ``--define hexagon=true`` + to ``false``. And the library will be decreased about ``100KB``. - - If the models don't need to run on device ``dsp``, change the build option ``--define hexagon=true`` - to ``false``. And the library will be decreased about ``100KB``. + - Futher more, if only ``cpu`` device needed, change ``--define opencl=true`` to ``false``. This way + will reduce half of library size to about ``700KB`` for ``armeabi-v7a`` and ``1000KB`` for ``arm64-v8a`` - - Futher more, if only ``cpu`` device needed, change ``--define opencl=true`` to ``false``. This way - will reduce half of library size to about ``700KB`` for ``armeabi-v7a`` and ``1000KB`` for ``arm64-v8a`` + - About ``300KB`` can be reduced when add ``--config symbol_hidden`` building option. It will change + the visibility of inner apis in libmace.so and lead to linking error when load model(s) in ``code`` + but no effection for ``file`` mode. - - About ``300KB`` can be reduced when add ``--config symbol_hidden`` building option. It will change - the visibility of inner apis in libmace.so and lead to linking error when load model(s) in ``code`` - but no effection for ``file`` mode. + * **static library** -* **static library** + - The methods in dynamic library can be useful for static library too. In additional, the static + library may also contain model graph and model datas if the configs ``model_graph_format`` and + ``model_data_format`` in deployment file are set to ``code``. - - The methods in dynamic library can be useful for static library too. In additional, the static - library may also contain model graph and model datas if the configs ``model_graph_format`` and - ``model_data_format`` in deployment file are set to ``code``. + - It is recommended to use ``version script`` and ``strip`` feature when linking mace static library. The effect is remarkable. - - It is recommended to use ``version script`` and ``strip`` feature when linking mace static library. The effect is remarkable. +* Remove the unused ops. +Remove the registration of the ops unused for your models in the ``mace/ops/ops_register.cc``, +which will reduce the library size significantly. the final binary just link the registered ops' code. +``` +#include "mace/ops/ops_register.h" + +namespace mace { +namespace ops { +// Just leave the ops used in your models + +... + +} // namespace ops + + +OpRegistry::OpRegistry() : OpRegistryBase() { +// Just leave the ops used in your models + + ... + + ops::RegisterMyCustomOp(this); + + ... + +} + +} // namespace mace +``` diff --git a/mace/core/allocator.h b/mace/core/allocator.h index 51f04741..d1101413 100644 --- a/mace/core/allocator.h +++ b/mace/core/allocator.h @@ -23,9 +23,10 @@ #include #include "mace/core/macros.h" -#include "mace/core/registry.h" #include "mace/core/types.h" #include "mace/core/runtime_failure_mock.h" +#include "mace/public/mace.h" +#include "mace/utils/logging.h" namespace mace { diff --git a/mace/core/net.cc b/mace/core/net.cc index 757b4831..63ca5792 100644 --- a/mace/core/net.cc +++ b/mace/core/net.cc @@ -27,8 +27,7 @@ namespace mace { -SerialNet::SerialNet(OpDefRegistryBase *op_def_registry, - const OpRegistryBase *op_registry, +SerialNet::SerialNet(const OpRegistryBase *op_registry, const NetDef *net_def, Workspace *ws, Device *target_device, @@ -41,15 +40,7 @@ SerialNet::SerialNet(OpDefRegistryBase *op_def_registry, target_device->cpu_runtime()->policy(), target_device->cpu_runtime()->use_gemmlowp())) { MACE_LATENCY_LOGGER(1, "Constructing SerialNet"); - // Register Operations - MaceStatus status; - for (int idx = 0; idx < net_def->op_types_size(); ++idx) { - status = op_def_registry->Register(net_def->op_types(idx)); - MACE_CHECK(status == MaceStatus::MACE_SUCCESS, status.information()); - } // Create Operations - operators_.clear(); - const OpRegistrationInfo *info; DeviceType target_device_type = target_device_->device_type(); OpConstructContext construct_context(ws_); for (int idx = 0; idx < net_def->op_size(); ++idx) { @@ -59,16 +50,13 @@ SerialNet::SerialNet(OpDefRegistryBase *op_def_registry, ProtoArgHelper::GetOptionalArg( operator_def, "device", static_cast(target_device_type)); if (op_device == target_device_type) { - // Find op registration information - status = op_def_registry->Find(operator_def.type(), &info); - MACE_CHECK(status == MaceStatus::MACE_SUCCESS, status.information()); // Get available devices (sorted based on priority) OperatorDef temp_def(operator_def); - auto available_devices = info->device_place_func_(); + auto available_devices = op_registry->AvailableDevices(temp_def.type()); // Find the device type to run the op. // If the target_device_type in available devices, use target_device_type, - // otherwise, fallback to the first device (top priority). - DeviceType device_type = available_devices[0]; + // otherwise, fallback to CPU device. + DeviceType device_type = DeviceType::CPU; construct_context.set_device(cpu_device_); for (auto device : available_devices) { if (device == target_device_type) { diff --git a/mace/core/net.h b/mace/core/net.h index 799e07d4..d5a6725f 100644 --- a/mace/core/net.h +++ b/mace/core/net.h @@ -21,8 +21,6 @@ #include #include -#include "mace/core/op_def_registry.h" - #include "mace/core/operator.h" namespace mace { @@ -45,8 +43,7 @@ class NetBase { class SerialNet : public NetBase { public: - SerialNet(OpDefRegistryBase *op_def_registry, - const OpRegistryBase *op_registry, + SerialNet(const OpRegistryBase *op_registry, const NetDef *net_def, Workspace *ws, Device *target_device, diff --git a/mace/core/op_def_registry.cc b/mace/core/op_def_registry.cc deleted file mode 100644 index 7bb8de9e..00000000 --- a/mace/core/op_def_registry.cc +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright 2018 Xiaomi, Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "mace/core/op_def_registry.h" -#include "mace/utils/logging.h" - -namespace mace { - -void AddOpRegistrar(OpDefRegistryBase *registry, - const OpRegistrationBuilder &builder) { - registry->AddRegistrar( - builder.name(), - [builder](OpRegistrationInfo *info){ - builder.Finalize(info); - }); -} - -OpRegistrationBuilder::OpRegistrationBuilder(const std::string name) - : name_(name) {} - -const std::string OpRegistrationBuilder::name() const { return name_; } - -OpRegistrationBuilder &OpRegistrationBuilder::SetDevicePlaceFunc( - std::vector (*func)()) { - info_.device_place_func_ = func; - return *this; -} - -void OpRegistrationBuilder::Finalize(OpRegistrationInfo *info) const { - *info = info_; -} - -void OpDefRegistryBase::AddRegistrar(const std::string name, - const OpRegistrar ®istrar) { - registrar_.emplace(name, registrar); -} - -MaceStatus OpDefRegistryBase::Register(const std::string &name) { - VLOG(3) << "Registering operation definition: " << name; - if (registry_.find(name) != registry_.end()) { - return MaceStatus::MACE_SUCCESS; - } - auto iter = registrar_.find(name); - if (iter == registrar_.end()) { - return MaceStatus(MaceStatus::MACE_INVALID_ARGS, - "MACE do not support the operation: " + name); - } - registry_.emplace( - name, std::unique_ptr(new OpRegistrationInfo())); - iter->second(registry_[name].get()); - return MaceStatus::MACE_SUCCESS; -} - -MaceStatus OpDefRegistryBase::Find(const std::string &name, - const OpRegistrationInfo **info) { - auto iter = registry_.find(name); - if (iter == registry_.end()) { - *info = nullptr; - return MaceStatus(MaceStatus::MACE_INVALID_ARGS, - "Mace do not support the operation: " + name); - } - *info = iter->second.get(); - return MaceStatus::MACE_SUCCESS; -} - -} // namespace mace diff --git a/mace/core/op_def_registry.h b/mace/core/op_def_registry.h deleted file mode 100644 index 8e015658..00000000 --- a/mace/core/op_def_registry.h +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright 2018 Xiaomi, Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef MACE_CORE_OP_DEF_REGISTRY_H_ -#define MACE_CORE_OP_DEF_REGISTRY_H_ - -#include -#include -#include -#include -#include - -#include "mace/proto/mace.pb.h" -#include "mace/public/mace.h" -#include "mace/utils/utils.h" - -namespace mace { - -// Device placement function -typedef std::function()> DevicePlaceFunc; - -struct OpRegistrationInfo { - OpRegistrationInfo() = default; - explicit OpRegistrationInfo(const DevicePlaceFunc &func) - : device_place_func_(func) {} - - DevicePlaceFunc device_place_func_; -}; - -class OpRegistrationBuilder { - public: - explicit OpRegistrationBuilder(const std::string name); - - const std::string name() const; - - OpRegistrationBuilder &SetDevicePlaceFunc( - std::vector (*func)()); - - void Finalize(OpRegistrationInfo *info) const; - private: - std::string name_; - OpRegistrationInfo info_; -}; - -class OpDefRegistryBase { - public: - typedef std::function OpRegistrar; - OpDefRegistryBase() = default; - virtual ~OpDefRegistryBase() = default; - void AddRegistrar(const std::string name, const OpRegistrar ®istrar); - MaceStatus Register(const std::string &name); - MaceStatus Find(const std::string &name, const OpRegistrationInfo **info); - - private: - std::unordered_map registrar_; - std::unordered_map< - std::string, - std::unique_ptr> registry_; - MACE_DISABLE_COPY_AND_ASSIGN(OpDefRegistryBase); -}; - -void AddOpRegistrar(OpDefRegistryBase *registry, - const OpRegistrationBuilder &builder); - -#define MACE_REGISTER_OP_DEF(op_def_registry, builder) \ - AddOpRegistrar(op_def_registry, builder) - -} // namespace mace - -#endif // MACE_CORE_OP_DEF_REGISTRY_H_ diff --git a/mace/core/operator.cc b/mace/core/operator.cc index d29c84e3..9a1da4c8 100644 --- a/mace/core/operator.cc +++ b/mace/core/operator.cc @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include @@ -79,7 +80,26 @@ MaceStatus Operation::Init(OpInitContext *context) { return MaceStatus::MACE_SUCCESS; } -OpKeyBuilder::OpKeyBuilder(const char *op_name) : op_name_(op_name) {} +// op registry +namespace { +class OpKeyBuilder { + public: + explicit OpKeyBuilder(const std::string &op_name); + + OpKeyBuilder &Device(DeviceType device); + + OpKeyBuilder &TypeConstraint(const char *attr_name, + DataType allowed); + + const std::string Build(); + + private: + std::string op_name_; + DeviceType device_type_; + std::map type_constraint_; +}; + +OpKeyBuilder::OpKeyBuilder(const std::string &op_name) : op_name_(op_name) {} OpKeyBuilder &OpKeyBuilder::Device(DeviceType device) { device_type_ = device; @@ -103,16 +123,53 @@ const std::string OpKeyBuilder::Build() { return ss.str(); } +} // namespace + +void OpRegistrationInfo::AddDevice(mace::DeviceType device) { + devices.insert(device); +} + +void OpRegistrationInfo::Register(const std::string &key, OpCreator creator) { + VLOG(3) << "Registering: " << key; + MACE_CHECK(creators.count(key) == 0, "Key already registered: ", key); + creators[key] = creator; +} + +MaceStatus OpRegistryBase::Register(const std::string &op_type, + const mace::DeviceType device_type, + const mace::DataType dt, + mace::OpRegistrationInfo::OpCreator creator) { + if (registry_.count(op_type) == 0) { + registry_[op_type] = std::unique_ptr( + new OpRegistrationInfo); + } + registry_[op_type]->AddDevice(device_type); + + std::string op_key = OpKeyBuilder(op_type) + .Device(device_type) + .TypeConstraint("T", dt) + .Build(); + registry_.at(op_type)->Register(op_key, creator); + return MaceStatus::MACE_SUCCESS; +} + +const std::set OpRegistryBase::AvailableDevices( + const std::string &op_type) const { + MACE_CHECK(registry_.count(op_type) != 0, + op_type, " operation is not registered."); + + return registry_.at(op_type)->devices; +} -OpRegistryBase::~OpRegistryBase() = default; std::unique_ptr OpRegistryBase::CreateOperation( OpConstructContext *context, DeviceType device_type, const NetMode mode) const { OperatorDef *operator_def = context->operator_def(); - const int dtype = ProtoArgHelper::GetOptionalArg( - *operator_def, "T", static_cast(DT_FLOAT)); + const DataType dtype = static_cast( + ProtoArgHelper::GetOptionalArg( + *operator_def, "T", static_cast(DT_FLOAT))); const int op_mode_i = ProtoArgHelper::GetOptionalArg( *operator_def, "mode", static_cast(NetMode::NORMAL)); const NetMode op_mode = static_cast(op_mode_i); @@ -120,15 +177,20 @@ std::unique_ptr OpRegistryBase::CreateOperation( << operator_def->type() << "<" << dtype << ">" << ") on " << device_type; if (op_mode == mode) { - return registry_.Create( - OpKeyBuilder(operator_def->type().data()) - .Device(device_type) - .TypeConstraint("T", static_cast(dtype)) - .Build(), - context); + const std::string op_type = context->operator_def()->type(); + MACE_CHECK(registry_.count(op_type) != 0, + op_type, " operation is not registered."); + + std::string key = OpKeyBuilder(op_type) + .Device(device_type) + .TypeConstraint("T", dtype) + .Build(); + if (registry_.at(op_type)->creators.count(key) == 0) { + LOG(FATAL) << "Key not registered: " << key; + } + return registry_.at(op_type)->creators.at(key)(context); } else { return nullptr; } } - } // namespace mace diff --git a/mace/core/operator.h b/mace/core/operator.h index 34de7e72..c354afbd 100644 --- a/mace/core/operator.h +++ b/mace/core/operator.h @@ -16,13 +16,13 @@ #define MACE_CORE_OPERATOR_H_ #include +#include #include +#include #include -#include #include "mace/core/arg_helper.h" #include "mace/core/op_context.h" -#include "mace/core/registry.h" #include "mace/core/tensor.h" #include "mace/core/workspace.h" #include "mace/proto/mace.pb.h" @@ -160,62 +160,57 @@ class Operation { #define MACE_OP_OUTPUT_TAGS(first_input, ...) \ enum _OutputTags { first_input = 0, __VA_ARGS__ } -class OpKeyBuilder { - public: - explicit OpKeyBuilder(const char *op_name); - OpKeyBuilder &Device(DeviceType device); +struct OpRegistrationInfo { + public: + typedef std::function(OpConstructContext *)> + OpCreator; - OpKeyBuilder &TypeConstraint(const char *attr_name, - DataType allowed); + OpRegistrationInfo() = default; - template - OpKeyBuilder &TypeConstraint(const char *attr_name); + void AddDevice(DeviceType); - const std::string Build(); + void Register(const std::string &key, OpCreator creator); - private: - std::string op_name_; - DeviceType device_type_; - std::map type_constraint_; + std::set devices; + std::unordered_map creators; }; -template -OpKeyBuilder &OpKeyBuilder::TypeConstraint(const char *attr_name) { - return this->TypeConstraint(attr_name, DataTypeToEnum::value); -} - class OpRegistryBase { public: - typedef Registry - RegistryType; OpRegistryBase() = default; - virtual ~OpRegistryBase(); - RegistryType *registry() { return ®istry_; } + virtual ~OpRegistryBase() = default; + MaceStatus Register(const std::string &op_type, + const DeviceType device_type, + const DataType dt, + OpRegistrationInfo::OpCreator creator); + + const std::set AvailableDevices( + const std::string &op_type) const; + std::unique_ptr CreateOperation( OpConstructContext *context, DeviceType device_type, const NetMode mode) const; + template + static std::unique_ptr DefaultCreator( + OpConstructContext *context) { + return std::unique_ptr(new DerivedType(context)); + } + private: - RegistryType registry_; + std::unordered_map< + std::string, + std::unique_ptr> registry_; MACE_DISABLE_COPY_AND_ASSIGN(OpRegistryBase); }; -MACE_DECLARE_REGISTRY(OpRegistry, - Operation, - OpConstructContext *); - #define MACE_REGISTER_OP(op_registry, op_type, class_name, device, dt) \ - MACE_REGISTER_CLASS(OpRegistry, \ - op_registry->registry(), \ - OpKeyBuilder(op_type) \ - .Device(device) \ - .TypeConstraint
("T") \ - .Build(), \ - class_name) + op_registry->Register(op_type, \ + device, \ + DataTypeToEnum
::value, \ + OpRegistryBase::DefaultCreator>) } // namespace mace diff --git a/mace/core/registry.h b/mace/core/registry.h deleted file mode 100644 index 1ad92f0a..00000000 --- a/mace/core/registry.h +++ /dev/null @@ -1,97 +0,0 @@ -// Copyright 2018 Xiaomi, Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef MACE_CORE_REGISTRY_H_ -#define MACE_CORE_REGISTRY_H_ - -#include -#include -#include -#include // NOLINT(build/c++11) -#include -#include - -#include "mace/utils/logging.h" - -namespace mace { - -template -class Registry { - public: - typedef std::function(Args...)> Creator; - - Registry() : registry_() {} - - void Register(const SrcType &key, Creator creator) { - VLOG(3) << "Registering: " << key; - std::lock_guard lock(register_mutex_); - MACE_CHECK(registry_.count(key) == 0, "Key already registered: ", key); - registry_[key] = creator; - } - - std::unique_ptr Create(const SrcType &key, Args... args) const { - if (registry_.count(key) == 0) { - LOG(FATAL) << "Key not registered: " << key; - } - return registry_.at(key)(args...); - } - - private: - std::map registry_; - std::mutex register_mutex_; - - MACE_DISABLE_COPY_AND_ASSIGN(Registry); -}; - -template -class Registerer { - public: - Registerer(const SrcType &key, - Registry *registry, - typename Registry::Creator creator) { - registry->Register(key, creator); - } - - template - static std::unique_ptr DefaultCreator(Args... args) { - return std::unique_ptr(new DerivedType(args...)); - } -}; - -#define MACE_CONCATENATE_IMPL(s1, s2) s1##s2 -#define MACE_CONCATENATE(s1, s2) MACE_CONCATENATE_IMPL(s1, s2) -#ifdef __COUNTER__ -#define MACE_ANONYMOUS_VARIABLE(str) MACE_CONCATENATE(str, __COUNTER__) -#else -#define MACE_ANONYMOUS_VARIABLE(str) MACE_CONCATENATE(str, __LINE__) -#endif - -#define MACE_DECLARE_TYPED_REGISTRY(RegistryName, SrcType, ObjectType, ...) \ - typedef Registerer \ - Registerer##RegistryName; - -#define MACE_DECLARE_REGISTRY(RegistryName, ObjectType, ...) \ - MACE_DECLARE_TYPED_REGISTRY(RegistryName, std::string, ObjectType, \ - ##__VA_ARGS__) - -#define MACE_REGISTER_TYPED_CLASS(RegistryName, registry, key, ...) \ - Registerer##RegistryName MACE_ANONYMOUS_VARIABLE(RegistryName)( \ - key, registry, Registerer##RegistryName::DefaultCreator<__VA_ARGS__>); - -#define MACE_REGISTER_CLASS(RegistryName, registry, key, ...) \ - MACE_REGISTER_TYPED_CLASS(RegistryName, registry, key, __VA_ARGS__) - -} // namespace mace - -#endif // MACE_CORE_REGISTRY_H_ diff --git a/mace/core/tensor.h b/mace/core/tensor.h index 4c03f331..7cf01043 100644 --- a/mace/core/tensor.h +++ b/mace/core/tensor.h @@ -15,10 +15,11 @@ #ifndef MACE_CORE_TENSOR_H_ #define MACE_CORE_TENSOR_H_ +#include +#include +#include #include #include -#include -#include #include "mace/core/buffer.h" #include "mace/core/preallocated_pooled_allocator.h" diff --git a/mace/kernels/BUILD b/mace/kernels/BUILD deleted file mode 100644 index 88528578..00000000 --- a/mace/kernels/BUILD +++ /dev/null @@ -1,150 +0,0 @@ -# Description: -# Mace neon kernels. -# -package( - default_visibility = ["//visibility:public"], -) - -licenses(["notice"]) # Apache 2.0 - -load( - "//mace:mace.bzl", - "if_android", - "if_neon_enabled", - "if_openmp_enabled", - "if_android_armv7", - "if_hexagon_enabled", - "if_opencl_enabled", -) - -cc_library( - name = "kernels", - srcs = glob( - [ - "*.cc", - "arm/*.cc", - ], - exclude = [ - "*_test.cc", - "*_benchmark.cc", - "arm/*_test.cc", - "buffer_inverse_transform.cc", - "buffer_transform.cc", - "lstm_cell.cc", - "winograd_transform.cc", - ], - ) + if_opencl_enabled(glob( - [ - "opencl/*.cc", - "opencl/image/*.cc", - "opencl/buffer/*.cc", - "buffer_inverse_transform.cc", - "buffer_transform.cc", - "lstm_cell.cc", - "winograd_transform.cc", - ], - exclude = [ - "opencl/*_test.cc", - ], - )), - hdrs = glob( - [ - "*.h", - "arm/*.h", - ], - ) + if_opencl_enabled(glob([ - "opencl/*.h", - "opencl/image/*.h", - "opencl/buffer/*.h", - ])), - copts = [ - "-Werror", - "-Wextra", - "-Wno-missing-field-initializers", - ] + if_openmp_enabled([ - "-fopenmp", - ]) + if_neon_enabled([ - "-DMACE_ENABLE_NEON", - ]) + if_android_armv7([ - "-mfpu=neon", - ]) + if_android_armv7([ - "-mfloat-abi=softfp", - ]) + if_opencl_enabled([ - "-DMACE_ENABLE_OPENCL", - ]) + if_hexagon_enabled([ - "-DMACE_ENABLE_HEXAGON", - ]), - linkopts = if_android(["-lm"]), - deps = [ - "//mace/core", - "@gemmlowp", - "@tflite", - ], -) - -cc_test( - name = "kernels_test", - testonly = 1, - srcs = glob( - [ - "*_test.cc", - "arm/*_test.cc", - "opencl/*_test.cc", - ], - ), - copts = [ - "-Werror", - "-Wextra", - "-Wno-missing-field-initializers", - ] + if_openmp_enabled([ - "-fopenmp", - ]) + if_neon_enabled([ - "-DMACE_ENABLE_NEON", - ]) + if_android_armv7([ - "-mfpu=neon", - "-mfloat-abi=softfp", - ]) + if_opencl_enabled([ - "-DMACE_ENABLE_OPENCL", - ]) + if_hexagon_enabled([ - "-DMACE_ENABLE_HEXAGON", - ]), - linkopts = ["-fopenmp"], - linkstatic = 1, - deps = [ - ":kernels", - "//mace/ops", - "@gtest", - "@gtest//:gtest_main", - ], -) - -cc_test( - name = "kernels_benchmark", - testonly = 1, - srcs = glob(["*_benchmark.cc"]), - copts = [ - "-Werror", - "-Wextra", - "-Wno-missing-field-initializers", - ] + if_openmp_enabled([ - "-fopenmp", - ]) + if_neon_enabled([ - "-DMACE_ENABLE_NEON", - ]) + if_android_armv7([ - "-mfpu=neon", - "-mfloat-abi=softfp", - ]) + if_opencl_enabled([ - "-DMACE_ENABLE_OPENCL", - ]) + if_hexagon_enabled([ - "-DMACE_ENABLE_HEXAGON", - ]), - linkopts = ["-fopenmp"], - linkstatic = 1, - deps = [ - ":kernels", - "//mace/core:test_benchmark_main", - "//mace/ops", - "//third_party/eigen3", - "@gemmlowp", - ], -) diff --git a/mace/kernels/matmul_benchmark.cc b/mace/kernels/matmul_benchmark.cc deleted file mode 100644 index ef19bd6c..00000000 --- a/mace/kernels/matmul_benchmark.cc +++ /dev/null @@ -1,289 +0,0 @@ -// Copyright 2018 Xiaomi, Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include - -#include "public/gemmlowp.h" -#include "mace/core/testing/test_benchmark.h" -#include "mace/kernels/gemm.h" -#include "mace/kernels/sgemm.h" -#include "mace/ops/ops_test_util.h" - -namespace gemmlowp { - -template -class Matrix : public MatrixMap { - public: - typedef MatrixMap Map; - typedef MatrixMap ConstMap; - typedef typename Map::Scalar Scalar; - static const MapOrder Order = tOrder; - using Map::cols_; - using Map::data_; - using Map::kOrder; - using Map::rows_; - using Map::stride_; - - public: - Matrix() : Map(nullptr, 0, 0, 0) {} - - Matrix(int rows, int cols) : Map(nullptr, 0, 0, 0) { Resize(rows, cols); } - - Matrix(const Matrix &other) : Map(nullptr, 0, 0, 0) { *this = other; } - - Matrix &operator=(const Matrix &other) { - Resize(other.rows_, other.cols_); - std::memcpy(data_, other.data_, size() * sizeof(Scalar)); - return *this; - } - - friend bool operator==(const Matrix &a, const Matrix &b) { - return a.rows_ == b.rows_ && a.cols_ == b.cols_ && - !std::memcmp(a.data_, b.data_, a.size()); - } - - void Resize(int rows, int cols) { - rows_ = rows; - cols_ = cols; - stride_ = kOrder == gemmlowp::MapOrder::ColMajor ? rows : cols; - storage.resize(size()); - data_ = storage.data(); - } - - int size() const { return rows_ * cols_; } - - Map &map() { return *static_cast(this); } - - ConstMap const_map() const { return ConstMap(data_, rows_, cols_, stride_); } - - protected: - std::vector storage; -}; - -template -void MakeZero(MatrixType *m) { - for (int c = 0; c < m->cols(); c++) { - for (int r = 0; r < m->rows(); r++) { - (*m)(r, c) = 128; - } - } -} - -} // namespace gemmlowp - -namespace mace { -namespace kernels { -namespace test { - -// Test the speed of different access order of a NHWC buffer - -namespace { - -// Matmul with (m, k) x (k, n) -void MatmulBenchmark_Mace(int iters, int m, int k, int n) { - mace::testing::StopTiming(); - std::vector lhs(m * k); - std::vector rhs(k * n); - std::vector result(m * n); - // warm up - Gemm(lhs.data(), rhs.data(), 1, m, k, n, result.data()); - mace::testing::StartTiming(); - while (iters--) { - Gemm(lhs.data(), rhs.data(), 1, m, k, n, result.data()); - } -} - -void MatmulBenchmark_Mace_SGemm(int iters, int m, int k, int n) { - mace::testing::StopTiming(); - std::vector lhs(m * k); - std::vector rhs(k * n); - std::vector result(m * n); - - kernels::MatrixMap matrix_lhs(1, m, k, RowMajor, lhs.data(), - true); - kernels::MatrixMap matrix_rhs(1, k, n, RowMajor, rhs.data(), - true); - kernels::MatrixMap matrix_result(1, m, n, RowMajor, result.data()); - - kernels::SGemm sgemm; - - sgemm(matrix_lhs, matrix_rhs, &matrix_result); - - mace::testing::StartTiming(); - while (iters--) { - sgemm(matrix_lhs, matrix_rhs, &matrix_result); - } -} - -void MatmulBenchmark_Eigen(int iters, int m, int k, int n) { - mace::testing::StopTiming(); - Eigen::MatrixXf lhs = Eigen::MatrixXf::Random(m, k); - Eigen::MatrixXf rhs = Eigen::MatrixXf::Random(k, n); - Eigen::MatrixXf result = Eigen::MatrixXf::Zero(m, n); - // warm up - result = lhs * rhs; - mace::testing::StartTiming(); - while (iters--) { - result = lhs * rhs; - } -} - -void MatmulBenchmark_gemmlowp_uint8(int iters, int rows, int depth, int cols) { - mace::testing::StopTiming(); - - gemmlowp::Matrix lhs; - gemmlowp::Matrix rhs; - gemmlowp::Matrix result; - lhs.Resize(rows, depth); - rhs.Resize(depth, cols); - result.Resize(rows, cols); - gemmlowp::MakeZero(&lhs); - gemmlowp::MakeZero(&rhs); - gemmlowp::MakeZero(&result); - - gemmlowp::OutputStageQuantizeDownInt32ByFixedPoint quantize_down_stage; - quantize_down_stage.result_offset_after_shift = 128; - quantize_down_stage.result_fixedpoint_multiplier = 1234567890; - quantize_down_stage.result_shift = 16; - gemmlowp::OutputStageSaturatingCastToUint8 saturating_cast_stage; - const auto output_pipeline = - std::make_tuple(quantize_down_stage, saturating_cast_stage); - - auto gemm_context = - mace::ops::test::OpTestContext::Get() - ->GetDevice(CPU)->cpu_runtime()->GetGemmlowpContext(); - MACE_CHECK_NOTNULL(gemm_context); - - using BitDepthParams = gemmlowp::L8R8WithLhsNonzeroBitDepthParams; - - gemmlowp::GemmWithOutputPipeline( - gemm_context, lhs.const_map(), rhs.const_map(), &result.map(), -128, - -128, output_pipeline); - - mace::testing::StartTiming(); - while (iters--) { - gemmlowp::GemmWithOutputPipeline( - gemm_context, lhs.const_map(), rhs.const_map(), &result.map(), -128, - -128, output_pipeline); - } -} - -void MatmulBenchmark_gemmlowp_int32(int iters, int rows, int depth, int cols) { - mace::testing::StopTiming(); - - gemmlowp::Matrix lhs; - gemmlowp::Matrix rhs; - gemmlowp::Matrix result; - lhs.Resize(rows, depth); - rhs.Resize(depth, cols); - result.Resize(rows, cols); - gemmlowp::MakeZero(&lhs); - gemmlowp::MakeZero(&rhs); - gemmlowp::MakeZero(&result); - - const auto output_pipeline = std::make_tuple(); - - auto gemm_context = - mace::ops::test::OpTestContext::Get() - ->GetDevice(CPU)->cpu_runtime()->GetGemmlowpContext(); - MACE_CHECK_NOTNULL(gemm_context); - - using BitDepthParams = gemmlowp::L8R8WithLhsNonzeroBitDepthParams; - - gemmlowp::GemmWithOutputPipeline( - gemm_context, lhs.const_map(), rhs.const_map(), &result.map(), -128, - -128, output_pipeline); - - mace::testing::StartTiming(); - while (iters--) { - gemmlowp::GemmWithOutputPipeline( - gemm_context, lhs.const_map(), rhs.const_map(), &result.map(), -128, - -128, output_pipeline); - } -} - -} // namespace - -#define MACE_BM_MATMUL_FUNC(M, K, N, FUNC, TYPE) \ - static void MACE_BM_MATMUL_##M##_##K##_##N##_##FUNC(int iters) { \ - const int64_t macc = static_cast(iters) * M * K * N; \ - const int64_t tot = static_cast(iters) * (M + N) * K; \ - mace::testing::MaccProcessed(macc); \ - mace::testing::BytesProcessed(tot * sizeof(TYPE)); \ - MatmulBenchmark_##FUNC(iters, M, K, N); \ - } \ - MACE_BENCHMARK(MACE_BM_MATMUL_##M##_##K##_##N##_##FUNC) - -#define MACE_BM_MATMUL(M, K, N) \ - MACE_BM_MATMUL_FUNC(M, K, N, Mace, float); \ - MACE_BM_MATMUL_FUNC(M, K, N, Mace_SGemm, float); \ - MACE_BM_MATMUL_FUNC(M, K, N, Eigen, float); \ - MACE_BM_MATMUL_FUNC(M, K, N, gemmlowp_uint8, uint8_t); \ - MACE_BM_MATMUL_FUNC(M, K, N, gemmlowp_int32, uint8_t); - -// Embedding size 384 -MACE_BM_MATMUL(7, 384, 384); -MACE_BM_MATMUL(7, 384, 1536); -MACE_BM_MATMUL(7, 1536, 384); - -MACE_BM_MATMUL(15, 384, 384); -MACE_BM_MATMUL(15, 384, 1536); -MACE_BM_MATMUL(15, 1536, 384); - -MACE_BM_MATMUL(1, 256, 256); -MACE_BM_MATMUL(1, 256, 1536); -MACE_BM_MATMUL(1, 1536, 256); -MACE_BM_MATMUL(256, 256, 1); -MACE_BM_MATMUL(1536, 256, 1); -MACE_BM_MATMUL(256, 1536, 1); -MACE_BM_MATMUL(29792, 256, 1); -MACE_BM_MATMUL(1, 256, 29792); -MACE_BM_MATMUL(2, 256, 256); -MACE_BM_MATMUL(2, 256, 1536); -MACE_BM_MATMUL(2, 1536, 256); -MACE_BM_MATMUL(3, 256, 256); -MACE_BM_MATMUL(3, 256, 1536); -MACE_BM_MATMUL(3, 1536, 256); -MACE_BM_MATMUL(4, 256, 256); -MACE_BM_MATMUL(4, 256, 1536); -MACE_BM_MATMUL(4, 1536, 256); -MACE_BM_MATMUL(8, 256, 256); -MACE_BM_MATMUL(8, 256, 1536); -MACE_BM_MATMUL(8, 1536, 256); -MACE_BM_MATMUL(10, 256, 256); -MACE_BM_MATMUL(10, 256, 1536); -MACE_BM_MATMUL(10, 1536, 256); -MACE_BM_MATMUL(15, 256, 256); -MACE_BM_MATMUL(15, 256, 1536); -MACE_BM_MATMUL(15, 1536, 256); - -// Embedding size 128 -MACE_BM_MATMUL(1, 128, 1536); -MACE_BM_MATMUL(1, 128, 44678); - -// MobileNet -MACE_BM_MATMUL(128, 128, 3136); -MACE_BM_MATMUL(256, 256, 784); -MACE_BM_MATMUL(512, 512, 196); -MACE_BM_MATMUL(1024, 1024, 49); - -} // namespace test -} // namespace kernels -} // namespace mace diff --git a/mace/libmace/BUILD b/mace/libmace/BUILD index 4e887713..d227f259 100644 --- a/mace/libmace/BUILD +++ b/mace/libmace/BUILD @@ -40,7 +40,6 @@ cc_library( deps = [ "//mace/public", "//mace/ops", - "//mace/kernels", ], alwayslink = 1, ) @@ -79,7 +78,7 @@ genrule( srcs = [ "//mace/codegen:generated_version", "//mace/core", - "//mace/kernels", + "//mace/ops:internal_ops", "//mace/ops", "//mace/libmace", "//mace/utils", @@ -93,7 +92,7 @@ genrule( "mri_stream=$$(python $(location //mace/python/tools:archive_static_lib) " + "$(locations //mace/codegen:generated_version) " + "$(locations //mace/core:core) " + - "$(locations //mace/kernels:kernels) " + + "$(locations //mace/ops:internal_ops) " + "$(locations //mace/ops:ops) " + "$(locations //mace/libmace:libmace) " + "$(locations //mace/utils:utils) " + diff --git a/mace/libmace/mace.cc b/mace/libmace/mace.cc index a7494086..c4d65f7b 100644 --- a/mace/libmace/mace.cc +++ b/mace/libmace/mace.cc @@ -22,8 +22,7 @@ #include "mace/core/net.h" #include "mace/core/device_context.h" -#include "mace/kernels/ops_register.h" -#include "mace/ops/ops_def_register.h" +#include "mace/ops/ops_registry.h" #include "mace/public/mace.h" #ifdef MACE_ENABLE_OPENCL @@ -359,7 +358,6 @@ class MaceEngine::Impl { private: const unsigned char *model_data_; size_t model_data_size_; - std::unique_ptr op_def_registry_; std::unique_ptr op_registry_; DeviceType device_type_; std::unique_ptr device_; @@ -377,7 +375,6 @@ class MaceEngine::Impl { MaceEngine::Impl::Impl(const MaceEngineConfig &config) : model_data_(nullptr), model_data_size_(0), - op_def_registry_(new OpDefRegistry()), op_registry_(new OpRegistry), device_type_(config.impl_->device_type()), device_(nullptr), @@ -466,7 +463,6 @@ MaceStatus MaceEngine::Impl::Init( // Init model auto net = std::unique_ptr(new SerialNet( - op_def_registry_.get(), op_registry_.get(), net_def, ws_.get(), @@ -474,8 +470,7 @@ MaceStatus MaceEngine::Impl::Init( NetMode::INIT)); MACE_RETURN_IF_ERROR(net->Init()); MACE_RETURN_IF_ERROR(net->Run()); - net_ = std::unique_ptr(new SerialNet(op_def_registry_.get(), - op_registry_.get(), + net_ = std::unique_ptr(new SerialNet(op_registry_.get(), net_def, ws_.get(), device_.get())); diff --git a/mace/ops/BUILD b/mace/ops/BUILD index d039f8c8..bfdf85a5 100644 --- a/mace/ops/BUILD +++ b/mace/ops/BUILD @@ -1,5 +1,4 @@ # Description: -# Mace operators. # package( default_visibility = ["//visibility:public"], @@ -18,18 +17,58 @@ load( ) cc_library( - name = "test", - testonly = 1, - hdrs = glob([ - "*_test_util.h", - ]), - srcs = [ - "ops_test_util.cc", - ], + name = "internal_ops", + srcs = glob( + [ + "*.cc", + "arm/*.cc", + ], + exclude = [ + "*_test.cc", + "*_benchmark.cc", + "arm/*_test.cc", + "ops_registry.cc", + "ops_test_util.cc", + "buffer_inverse_transform.cc", + "buffer_transform.cc", + "lstm_cell.cc", + "winograd_transform.cc", + ], + ) + if_opencl_enabled(glob( + [ + "opencl/*.cc", + "opencl/image/*.cc", + "opencl/buffer/*.cc", + "buffer_inverse_transform.cc", + "buffer_transform.cc", + "lstm_cell.cc", + "winograd_transform.cc", + ], + exclude = [ + "opencl/*_test.cc", + ], + )), + hdrs = glob( + [ + "*.h", + "arm/*.h", + ], + exclude = [ + "ops_registry.h", + "ops_test_util.h", + ] + ) + if_opencl_enabled(glob([ + "opencl/*.h", + "opencl/image/*.h", + "opencl/buffer/*.h", + ])), copts = [ "-Werror", "-Wextra", - ] + if_openmp_enabled(["-fopenmp"]) + if_neon_enabled([ + "-Wno-missing-field-initializers", + ] + if_openmp_enabled([ + "-fopenmp", + ]) + if_neon_enabled([ "-DMACE_ENABLE_NEON", ]) + if_android_armv7([ "-mfpu=neon", @@ -40,20 +79,54 @@ cc_library( ]) + if_hexagon_enabled([ "-DMACE_ENABLE_HEXAGON", ]), + linkopts = if_android(["-lm"]), deps = [ - "ops", - "//mace/kernels", - "@gtest", + "//mace/core", + "@gemmlowp", + "@tflite", ], ) + cc_library( name = "ops", srcs = [ - "ops_def_register.cc", + "ops_registry.cc" ], hdrs = [ - "ops_def_register.h", + "ops_registry.h", + ], + copts = [ + "-Werror", + "-Wextra", + "-Wno-missing-field-initializers", + ] + if_openmp_enabled([ + "-fopenmp", + ]) + if_neon_enabled([ + "-DMACE_ENABLE_NEON", + ]) + if_android_armv7([ + "-mfpu=neon", + ]) + if_android_armv7([ + "-mfloat-abi=softfp", + ]) + if_opencl_enabled([ + "-DMACE_ENABLE_OPENCL", + ]) + if_hexagon_enabled([ + "-DMACE_ENABLE_HEXAGON", + ]), + linkopts = if_android(["-lm"]), + deps = [ + "internal_ops", + ], +) + +cc_library( + name = "test", + testonly = 1, + hdrs = glob([ + "*_test_util.h", + ]), + srcs = [ + "ops_test_util.cc", ], copts = [ "-Werror", @@ -70,7 +143,8 @@ cc_library( "-DMACE_ENABLE_HEXAGON", ]), deps = [ - "//mace/core", + "ops", + "@gtest", ], ) @@ -78,16 +152,22 @@ cc_test( name = "ops_test", testonly = 1, srcs = glob( - ["*_test.cc"], + [ + "*_test.cc", + "arm/*_test.cc", + "opencl/*_test.cc", + ], ), copts = [ "-Werror", "-Wextra", - ] + if_openmp_enabled(["-fopenmp"]) + if_neon_enabled([ + "-Wno-missing-field-initializers", + ] + if_openmp_enabled([ + "-fopenmp", + ]) + if_neon_enabled([ "-DMACE_ENABLE_NEON", ]) + if_android_armv7([ "-mfpu=neon", - ]) + if_android_armv7([ "-mfloat-abi=softfp", ]) + if_opencl_enabled([ "-DMACE_ENABLE_OPENCL", @@ -97,8 +177,7 @@ cc_test( linkopts = ["-fopenmp"], linkstatic = 1, deps = [ - ":ops", - ":test", + "test", "@gtest//:gtest_main", ], ) @@ -110,11 +189,13 @@ cc_test( copts = [ "-Werror", "-Wextra", - ] + if_openmp_enabled(["-fopenmp"]) + if_neon_enabled([ + "-Wno-missing-field-initializers", + ] + if_openmp_enabled([ + "-fopenmp", + ]) + if_neon_enabled([ "-DMACE_ENABLE_NEON", ]) + if_android_armv7([ "-mfpu=neon", - ]) + if_android_armv7([ "-mfloat-abi=softfp", ]) + if_opencl_enabled([ "-DMACE_ENABLE_OPENCL", @@ -124,8 +205,9 @@ cc_test( linkopts = ["-fopenmp"], linkstatic = 1, deps = [ - ":ops", - ":test", + "test", "//mace/core:test_benchmark_main", + "//third_party/eigen3", + "@gemmlowp", ], ) diff --git a/mace/kernels/activation.cc b/mace/ops/activation.cc similarity index 93% rename from mace/kernels/activation.cc rename to mace/ops/activation.cc index 038c4549..7c733177 100644 --- a/mace/kernels/activation.cc +++ b/mace/ops/activation.cc @@ -12,18 +12,18 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/kernels/activation.h" +#include "mace/ops/activation.h" #include #include "mace/core/operator.h" #ifdef MACE_ENABLE_OPENCL -#include "mace/kernels/opencl/image/activation.h" +#include "mace/ops/opencl/image/activation.h" #endif // MACE_ENABLE_OPENCL namespace mace { -namespace kernels { +namespace ops { template class ActivationOp; @@ -33,7 +33,7 @@ class ActivationOp : public Operation { public: explicit ActivationOp(OpConstructContext *context) : Operation(context), - activation_(kernels::StringToActivationType( + activation_(ops::StringToActivationType( Operation::GetOptionalArg("activation", "NOOP"))), relux_max_limit_(Operation::GetOptionalArg("max_limit", @@ -74,7 +74,7 @@ class ActivationOp : public Operation { public: explicit ActivationOp(OpConstructContext *context) : Operation(context) { - ActivationType type = kernels::StringToActivationType( + ActivationType type = ops::StringToActivationType( Operation::GetOptionalArg("activation", "NOOP")); auto relux_max_limit = static_cast( @@ -114,5 +114,5 @@ void RegisterActivation(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/activation.h b/mace/ops/activation.h similarity index 95% rename from mace/kernels/activation.h rename to mace/ops/activation.h index 12728465..2c9a1861 100644 --- a/mace/kernels/activation.h +++ b/mace/ops/activation.h @@ -12,19 +12,19 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_ACTIVATION_H_ -#define MACE_KERNELS_ACTIVATION_H_ +#ifndef MACE_OPS_ACTIVATION_H_ +#define MACE_OPS_ACTIVATION_H_ #include #include #include #include "mace/core/types.h" -#include "mace/kernels/arm/activation_neon.h" +#include "mace/ops/arm/activation_neon.h" #include "mace/utils/logging.h" namespace mace { -namespace kernels { +namespace ops { enum ActivationType { NOOP = 0, @@ -149,7 +149,7 @@ void PReLUActivation(const T *input_ptr, } } -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_ACTIVATION_H_ +#endif // MACE_OPS_ACTIVATION_H_ diff --git a/mace/ops/activation_benchmark.cc b/mace/ops/activation_benchmark.cc index 1f16879f..bd766047 100644 --- a/mace/ops/activation_benchmark.cc +++ b/mace/ops/activation_benchmark.cc @@ -14,7 +14,6 @@ #include -#include "mace/core/op_def_registry.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" @@ -46,7 +45,7 @@ void ReluBenchmark(int iters, int batch, int channels, int height, int width) { .Finalize(net.NewOperatorDef()); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Activation", "ReluBM") .Input("InputImage") @@ -108,7 +107,7 @@ void ReluxBenchmark(int iters, int batch, int channels, int height, int width) { if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Activation", "ReluxBM") .Input("InputImage") @@ -186,9 +185,9 @@ void PreluBenchmark(int iters, int batch, int channels, int height, int width) { .Finalize(net.NewOperatorDef()); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Alpha", "AlphaImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("Activation", "PreluBM") .Input("InputImage") @@ -251,7 +250,7 @@ void TanhBenchmark(int iters, int batch, int channels, int height, int width) { if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Activation", "TanhBM") .Input("InputImage") @@ -318,7 +317,7 @@ void SigmoidBenchmark( if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Activation", "SigmoidBM") .Input("InputImage") diff --git a/mace/ops/activation_test.cc b/mace/ops/activation_test.cc index 01735e97..f56a3a17 100644 --- a/mace/ops/activation_test.cc +++ b/mace/ops/activation_test.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" namespace mace { @@ -33,7 +32,7 @@ void TestSimpleRelu() { if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Activation", "ReluTest") .Input("InputImage") @@ -46,7 +45,7 @@ void TestSimpleRelu() { // Transfer output ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } else { OpDefBuilder("Activation", "ReluTest") .Input("Input") @@ -81,7 +80,7 @@ void TestUnalignedSimpleRelu() { if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Activation", "ReluTest") .Input("InputImage") @@ -94,7 +93,7 @@ void TestUnalignedSimpleRelu() { // Transfer output ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } else { OpDefBuilder("Activation", "ReluTest") .Input("Input") @@ -132,7 +131,7 @@ void TestSimpleRelux() { if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Activation", "ReluxTest") .Input("InputImage") @@ -146,7 +145,7 @@ void TestSimpleRelux() { // Transfer output ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } else { OpDefBuilder("Activation", "ReluxTest") .Input("Input") @@ -182,7 +181,7 @@ void TestSimpleReluRelux() { if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Activation", "ReluxTest") .Input("InputImage") @@ -196,7 +195,7 @@ void TestSimpleReluRelux() { // Transfer output ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } else { OpDefBuilder("Activation", "ReluxTest") .Input("Input") @@ -237,9 +236,9 @@ void TestSimplePrelu() { if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Alpha", "AlphaImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("Activation", "PreluTest") .Input("InputImage") @@ -253,7 +252,7 @@ void TestSimplePrelu() { // Transfer output ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } else { OpDefBuilder("Activation", "PreluTest") .Input("Input") @@ -293,7 +292,7 @@ void TestSimpleTanh() { if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Activation", "TanhTest") .Input("InputImage") @@ -306,7 +305,7 @@ void TestSimpleTanh() { // Transfer output ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } else { OpDefBuilder("Activation", "TanhTest") .Input("Input") @@ -346,7 +345,7 @@ void TestSimpleSigmoid() { if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Activation", "SigmoidTest") .Input("InputImage") @@ -359,7 +358,7 @@ void TestSimpleSigmoid() { // Transfer output ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } else { OpDefBuilder("Activation", "SigmoidTest") .Input("Input") diff --git a/mace/kernels/addn.cc b/mace/ops/addn.cc similarity index 98% rename from mace/kernels/addn.cc rename to mace/ops/addn.cc index 6634e8e8..4040de1f 100644 --- a/mace/kernels/addn.cc +++ b/mace/ops/addn.cc @@ -22,11 +22,11 @@ #include "mace/core/operator.h" #ifdef MACE_ENABLE_OPENCL -#include "mace/kernels/opencl/image/addn.h" +#include "mace/ops/opencl/image/addn.h" #endif // MACE_ENABLE_OPENCL namespace mace { -namespace kernels { +namespace ops { static constexpr int kCostPerGroup = 1024; @@ -142,5 +142,5 @@ void RegisterAddN(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/addn_benchmark.cc b/mace/ops/addn_benchmark.cc index a155d854..5db2bda4 100644 --- a/mace/ops/addn_benchmark.cc +++ b/mace/ops/addn_benchmark.cc @@ -14,7 +14,6 @@ #include -#include "mace/core/op_def_registry.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" @@ -37,7 +36,7 @@ void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) { for (int i = 0; i < inputs; ++i) { BufferToImage(&net, MakeString("Input", i).c_str(), MakeString("InputImage", i).c_str(), - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } OpDefBuilder op_def_builder("AddN", "AddNBM"); for (int i = 0; i < inputs; ++i) { diff --git a/mace/ops/addn_test.cc b/mace/ops/addn_test.cc index 5116e36b..865fdd7f 100644 --- a/mace/ops/addn_test.cc +++ b/mace/ops/addn_test.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" namespace mace { @@ -69,7 +68,7 @@ void SimpleAdd3() { for (int i = 0; i < input_num; ++i) { BufferToImage(&net, MakeString("Input", i), MakeString("InputImage", i), - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } auto op_def_cl = OpDefBuilder("AddN", "AddNTest"); @@ -84,7 +83,7 @@ void SimpleAdd3() { net.RunOp(D); ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } else { OpDefBuilder("AddN", "AddNTest") .Input("Input0") @@ -143,7 +142,7 @@ void RandomTest() { for (int i = 0; i < input_num; ++i) { BufferToImage(&net, MakeString("Input", i), MakeString("InputImage", i), - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } auto op_def_cl = OpDefBuilder("AddN", "AddNTest"); @@ -158,7 +157,7 @@ void RandomTest() { net.RunOp(D); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(*expected, *net.GetOutput("OPENCLOutput"), 1e-2, 1e-2); diff --git a/mace/kernels/argmax.cc b/mace/ops/argmax.cc similarity index 98% rename from mace/kernels/argmax.cc rename to mace/ops/argmax.cc index 19d52f7f..8f8419b7 100644 --- a/mace/kernels/argmax.cc +++ b/mace/ops/argmax.cc @@ -21,7 +21,7 @@ #include "mace/core/operator.h" namespace mace { -namespace kernels { +namespace ops { template class ArgMaxOp : public Operation { @@ -84,5 +84,5 @@ void RegisterArgMax(OpRegistryBase *op_registry) { DeviceType::CPU, float); } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/argmax_test.cc b/mace/ops/argmax_test.cc index 06de7046..a0001ec3 100644 --- a/mace/ops/argmax_test.cc +++ b/mace/ops/argmax_test.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" namespace mace { diff --git a/mace/kernels/arm/activation_neon.cc b/mace/ops/arm/activation_neon.cc similarity index 95% rename from mace/kernels/arm/activation_neon.cc rename to mace/ops/arm/activation_neon.cc index 6067077c..44b492a4 100644 --- a/mace/kernels/arm/activation_neon.cc +++ b/mace/ops/arm/activation_neon.cc @@ -17,10 +17,10 @@ #endif #include -#include "mace/kernels/arm/activation_neon.h" +#include "mace/ops/arm/activation_neon.h" namespace mace { -namespace kernels { +namespace ops { void ReluNeon(const float *input, const index_t size, float *output) { #if defined(MACE_ENABLE_NEON) @@ -67,5 +67,5 @@ void ReluxNeon(const float *input, const float limit, #endif } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/arm/activation_neon.h b/mace/ops/arm/activation_neon.h similarity index 82% rename from mace/kernels/arm/activation_neon.h rename to mace/ops/arm/activation_neon.h index 886c95fe..cbd1974f 100644 --- a/mace/kernels/arm/activation_neon.h +++ b/mace/ops/arm/activation_neon.h @@ -12,20 +12,20 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_ARM_ACTIVATION_NEON_H_ -#define MACE_KERNELS_ARM_ACTIVATION_NEON_H_ +#ifndef MACE_OPS_ARM_ACTIVATION_NEON_H_ +#define MACE_OPS_ARM_ACTIVATION_NEON_H_ #include "mace/core/types.h" namespace mace { -namespace kernels { +namespace ops { void ReluNeon(const float *input, const index_t size, float *output); void ReluxNeon(const float *input, const float limit, const index_t size, float *output); -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_ARM_ACTIVATION_NEON_H_ +#endif // MACE_OPS_ARM_ACTIVATION_NEON_H_ diff --git a/mace/kernels/arm/conv_2d_neon.h b/mace/ops/arm/conv_2d_neon.h similarity index 95% rename from mace/kernels/arm/conv_2d_neon.h rename to mace/ops/arm/conv_2d_neon.h index bf0e1023..bd2307e2 100644 --- a/mace/kernels/arm/conv_2d_neon.h +++ b/mace/ops/arm/conv_2d_neon.h @@ -12,14 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_ARM_CONV_2D_NEON_H_ -#define MACE_KERNELS_ARM_CONV_2D_NEON_H_ +#ifndef MACE_OPS_ARM_CONV_2D_NEON_H_ +#define MACE_OPS_ARM_CONV_2D_NEON_H_ #include "mace/core/types.h" -#include "mace/kernels/sgemm.h" +#include "mace/ops/sgemm.h" namespace mace { -namespace kernels { +namespace ops { void Conv2dNeonK1x1S1(const float *input, const float *filter, @@ -115,7 +115,7 @@ inline void Conv2dCPUKHxKWCalc(const float *in_ptr, } } -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_ARM_CONV_2D_NEON_H_ +#endif // MACE_OPS_ARM_CONV_2D_NEON_H_ diff --git a/mace/kernels/arm/conv_2d_neon_15x1.cc b/mace/ops/arm/conv_2d_neon_15x1.cc similarity index 98% rename from mace/kernels/arm/conv_2d_neon_15x1.cc rename to mace/ops/arm/conv_2d_neon_15x1.cc index 0facfc6e..a4bae4e9 100644 --- a/mace/kernels/arm/conv_2d_neon_15x1.cc +++ b/mace/ops/arm/conv_2d_neon_15x1.cc @@ -16,11 +16,11 @@ #include #endif -#include "mace/kernels/arm/conv_2d_neon.h" +#include "mace/ops/arm/conv_2d_neon.h" #include "mace/utils/utils.h" namespace mace { -namespace kernels { +namespace ops { inline void Conv2dCPUK15x1Calc(const float *in_ptr, const float *filter_ptr, @@ -157,5 +157,5 @@ void Conv2dNeonK15x1S1(const float *input, } // b } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/arm/conv_2d_neon_1x1.cc b/mace/ops/arm/conv_2d_neon_1x1.cc similarity index 94% rename from mace/kernels/arm/conv_2d_neon_1x1.cc rename to mace/ops/arm/conv_2d_neon_1x1.cc index 21554d90..be5c6b53 100644 --- a/mace/kernels/arm/conv_2d_neon_1x1.cc +++ b/mace/ops/arm/conv_2d_neon_1x1.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/kernels/arm/conv_2d_neon.h" +#include "mace/ops/arm/conv_2d_neon.h" namespace mace { -namespace kernels { +namespace ops { void Conv2dNeonK1x1S1(const float *input, const float *filter, @@ -44,5 +44,5 @@ void Conv2dNeonK1x1S1(const float *input, } } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/arm/conv_2d_neon_1x15.cc b/mace/ops/arm/conv_2d_neon_1x15.cc similarity index 98% rename from mace/kernels/arm/conv_2d_neon_1x15.cc rename to mace/ops/arm/conv_2d_neon_1x15.cc index 6fdc6ed8..06c40e29 100644 --- a/mace/kernels/arm/conv_2d_neon_1x15.cc +++ b/mace/ops/arm/conv_2d_neon_1x15.cc @@ -16,12 +16,12 @@ #include #endif -#include "mace/kernels/arm/conv_2d_neon.h" +#include "mace/ops/arm/conv_2d_neon.h" #include "mace/utils/logging.h" #include "mace/utils/utils.h" namespace mace { -namespace kernels { +namespace ops { inline void Conv2dCPUK1x15Calc(const float *in_ptr, const float *filter_ptr, @@ -143,5 +143,5 @@ void Conv2dNeonK1x15S1(const float *input, } // b } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/arm/conv_2d_neon_1x7.cc b/mace/ops/arm/conv_2d_neon_1x7.cc similarity index 99% rename from mace/kernels/arm/conv_2d_neon_1x7.cc rename to mace/ops/arm/conv_2d_neon_1x7.cc index 8a7b1a41..39321e0f 100644 --- a/mace/kernels/arm/conv_2d_neon_1x7.cc +++ b/mace/ops/arm/conv_2d_neon_1x7.cc @@ -16,10 +16,10 @@ #include #endif -#include "mace/kernels/arm/conv_2d_neon.h" +#include "mace/ops/arm/conv_2d_neon.h" namespace mace { -namespace kernels { +namespace ops { // Ho = 1, Wo = 4, Co = 4 void Conv2dNeonK1x7S1(const float *input, @@ -247,5 +247,5 @@ void Conv2dNeonK1x7S1(const float *input, } // b } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/arm/conv_2d_neon_3x3.cc b/mace/ops/arm/conv_2d_neon_3x3.cc similarity index 99% rename from mace/kernels/arm/conv_2d_neon_3x3.cc rename to mace/ops/arm/conv_2d_neon_3x3.cc index 94551667..33653a42 100644 --- a/mace/kernels/arm/conv_2d_neon_3x3.cc +++ b/mace/ops/arm/conv_2d_neon_3x3.cc @@ -17,10 +17,10 @@ #endif #include "mace/core/macros.h" -#include "mace/kernels/arm/conv_2d_neon.h" +#include "mace/ops/arm/conv_2d_neon.h" namespace mace { -namespace kernels { +namespace ops { // Ho = 2, Wo = 4, Co = 2 void Conv2dNeonK3x3S1(const float *input, @@ -658,5 +658,5 @@ void Conv2dNeonK3x3S2(const float *input, } // b } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/arm/conv_2d_neon_5x5.cc b/mace/ops/arm/conv_2d_neon_5x5.cc similarity index 99% rename from mace/kernels/arm/conv_2d_neon_5x5.cc rename to mace/ops/arm/conv_2d_neon_5x5.cc index a60bec41..7803a89e 100644 --- a/mace/kernels/arm/conv_2d_neon_5x5.cc +++ b/mace/ops/arm/conv_2d_neon_5x5.cc @@ -16,10 +16,10 @@ #include #endif -#include "mace/kernels/arm/conv_2d_neon.h" +#include "mace/ops/arm/conv_2d_neon.h" namespace mace { -namespace kernels { +namespace ops { #define MACE_Conv2dNeonK5x5SnLoadCalc4 \ /* load filter (4 outch x 1 height x 4 width) */ \ @@ -215,5 +215,5 @@ void Conv2dNeonK5x5S1(const float *input, } // b } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/arm/conv_2d_neon_7x1.cc b/mace/ops/arm/conv_2d_neon_7x1.cc similarity index 99% rename from mace/kernels/arm/conv_2d_neon_7x1.cc rename to mace/ops/arm/conv_2d_neon_7x1.cc index 97d1bec2..37d9ec9d 100644 --- a/mace/kernels/arm/conv_2d_neon_7x1.cc +++ b/mace/ops/arm/conv_2d_neon_7x1.cc @@ -16,10 +16,10 @@ #include #endif -#include "mace/kernels/arm/conv_2d_neon.h" +#include "mace/ops/arm/conv_2d_neon.h" namespace mace { -namespace kernels { +namespace ops { // Ho = 4, Wo = 1, Co = 4 void Conv2dNeonK7x1S1(const float *input, @@ -287,5 +287,5 @@ void Conv2dNeonK7x1S1(const float *input, } // b } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/arm/conv_2d_neon_7x7.cc b/mace/ops/arm/conv_2d_neon_7x7.cc similarity index 99% rename from mace/kernels/arm/conv_2d_neon_7x7.cc rename to mace/ops/arm/conv_2d_neon_7x7.cc index d824f881..4e1c0041 100644 --- a/mace/kernels/arm/conv_2d_neon_7x7.cc +++ b/mace/ops/arm/conv_2d_neon_7x7.cc @@ -16,10 +16,10 @@ #include #endif -#include "mace/kernels/arm/conv_2d_neon.h" +#include "mace/ops/arm/conv_2d_neon.h" namespace mace { -namespace kernels { +namespace ops { #define MACE_Conv2dArmv8NeonK7x7SnLoadCalc4 \ /* load filter (4 outch x 1 height x 4 width) */ \ @@ -638,5 +638,5 @@ void Conv2dNeonK7x7S3(const float *input, } // b } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/arm/conv_winograd.cc b/mace/ops/arm/conv_winograd.cc similarity index 99% rename from mace/kernels/arm/conv_winograd.cc rename to mace/ops/arm/conv_winograd.cc index d115e4e5..2f6207fd 100644 --- a/mace/kernels/arm/conv_winograd.cc +++ b/mace/ops/arm/conv_winograd.cc @@ -14,11 +14,11 @@ #include -#include "mace/kernels/arm/conv_winograd.h" -#include "mace/kernels/gemm.h" +#include "mace/ops/arm/conv_winograd.h" +#include "mace/ops/gemm.h" namespace mace { -namespace kernels { +namespace ops { namespace { // NCHW => NTCB (T: in tile pixels, B: tile indices) @@ -747,5 +747,5 @@ void ConvRef3x3s1(const float *input, } } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/arm/conv_winograd.h b/mace/ops/arm/conv_winograd.h similarity index 92% rename from mace/kernels/arm/conv_winograd.h rename to mace/ops/arm/conv_winograd.h index 7e274b77..5e07db15 100644 --- a/mace/kernels/arm/conv_winograd.h +++ b/mace/ops/arm/conv_winograd.h @@ -12,18 +12,18 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_ARM_CONV_WINOGRAD_H_ -#define MACE_KERNELS_ARM_CONV_WINOGRAD_H_ +#ifndef MACE_OPS_ARM_CONV_WINOGRAD_H_ +#define MACE_OPS_ARM_CONV_WINOGRAD_H_ #if defined(MACE_ENABLE_NEON) && defined(__aarch64__) #include #endif #include "mace/core/types.h" -#include "mace/kernels/sgemm.h" +#include "mace/ops/sgemm.h" namespace mace { -namespace kernels { +namespace ops { void TransformFilter4x4(const float *filter, const index_t in_channels, @@ -70,7 +70,7 @@ void ConvRef3x3s1(const float *input, const index_t out_channels, float *output); -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_ARM_CONV_WINOGRAD_H_ +#endif // MACE_OPS_ARM_CONV_WINOGRAD_H_ diff --git a/mace/kernels/arm/conv_winograd_test.cc b/mace/ops/arm/conv_winograd_test.cc similarity index 91% rename from mace/kernels/arm/conv_winograd_test.cc rename to mace/ops/arm/conv_winograd_test.cc index ccb4f118..906dd3d9 100644 --- a/mace/kernels/arm/conv_winograd_test.cc +++ b/mace/ops/arm/conv_winograd_test.cc @@ -19,10 +19,10 @@ #include "mace/core/tensor.h" #include "mace/core/types.h" -#include "mace/kernels/arm/conv_winograd.h" +#include "mace/ops/arm/conv_winograd.h" namespace mace { -namespace kernels { +namespace ops { TEST(ConvWinogradTest, winograd) { index_t batch = 1; @@ -62,11 +62,11 @@ TEST(ConvWinogradTest, winograd) { return std::max(-1.0f, std::min(1.0f, nd(gen))); }); - kernels::ConvRef3x3s1(input_data, filter_data, batch, in_height, in_width, + ops::ConvRef3x3s1(input_data, filter_data, batch, in_height, in_width, in_channels, out_channels, output_data_ref); SGemm sgemm; - kernels::WinoGradConv3x3s1(input_data, filter_data, batch, in_height, + ops::WinoGradConv3x3s1(input_data, filter_data, batch, in_height, in_width, in_channels, out_channels, 6, output_data, &sgemm, nullptr); @@ -76,5 +76,5 @@ TEST(ConvWinogradTest, winograd) { } } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/arm/deconv_2d_neon.h b/mace/ops/arm/deconv_2d_neon.h similarity index 93% rename from mace/kernels/arm/deconv_2d_neon.h rename to mace/ops/arm/deconv_2d_neon.h index 1cddbf1a..d8abe427 100644 --- a/mace/kernels/arm/deconv_2d_neon.h +++ b/mace/ops/arm/deconv_2d_neon.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_ARM_DECONV_2D_NEON_H_ -#define MACE_KERNELS_ARM_DECONV_2D_NEON_H_ +#ifndef MACE_OPS_ARM_DECONV_2D_NEON_H_ +#define MACE_OPS_ARM_DECONV_2D_NEON_H_ #if defined(MACE_ENABLE_NEON) #include @@ -22,7 +22,7 @@ #include "mace/core/types.h" namespace mace { -namespace kernels { +namespace ops { void Deconv2dNeonK3x3S1(const float *input, const float *filter, @@ -90,7 +90,7 @@ inline float32x4_t neon_vfma_lane_3(float32x4_t a, } #endif -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_ARM_DECONV_2D_NEON_H_ +#endif // MACE_OPS_ARM_DECONV_2D_NEON_H_ diff --git a/mace/kernels/arm/deconv_2d_neon_3x3.cc b/mace/ops/arm/deconv_2d_neon_3x3.cc similarity index 99% rename from mace/kernels/arm/deconv_2d_neon_3x3.cc rename to mace/ops/arm/deconv_2d_neon_3x3.cc index cdba42c0..0495cf93 100644 --- a/mace/kernels/arm/deconv_2d_neon_3x3.cc +++ b/mace/ops/arm/deconv_2d_neon_3x3.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "mace/core/macros.h" -#include "mace/kernels/arm/deconv_2d_neon.h" +#include "mace/ops/arm/deconv_2d_neon.h" namespace mace { -namespace kernels { +namespace ops { void Deconv2dNeonK3x3S1(const float *input, const float *filter, @@ -387,5 +387,5 @@ void Deconv2dNeonK3x3S2(const float *input, } } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/arm/deconv_2d_neon_4x4.cc b/mace/ops/arm/deconv_2d_neon_4x4.cc similarity index 99% rename from mace/kernels/arm/deconv_2d_neon_4x4.cc rename to mace/ops/arm/deconv_2d_neon_4x4.cc index 575a8494..bddb56f5 100644 --- a/mace/kernels/arm/deconv_2d_neon_4x4.cc +++ b/mace/ops/arm/deconv_2d_neon_4x4.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "mace/core/macros.h" -#include "mace/kernels/arm/deconv_2d_neon.h" +#include "mace/ops/arm/deconv_2d_neon.h" namespace mace { -namespace kernels { +namespace ops { void Deconv2dNeonK4x4S1(const float *input, const float *filter, @@ -501,5 +501,5 @@ void Deconv2dNeonK4x4S2(const float *input, } } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/arm/depthwise_conv2d_neon.h b/mace/ops/arm/depthwise_conv2d_neon.h similarity index 90% rename from mace/kernels/arm/depthwise_conv2d_neon.h rename to mace/ops/arm/depthwise_conv2d_neon.h index ec3fb360..cd475573 100644 --- a/mace/kernels/arm/depthwise_conv2d_neon.h +++ b/mace/ops/arm/depthwise_conv2d_neon.h @@ -12,13 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_ARM_DEPTHWISE_CONV2D_NEON_H_ -#define MACE_KERNELS_ARM_DEPTHWISE_CONV2D_NEON_H_ +#ifndef MACE_OPS_ARM_DEPTHWISE_CONV2D_NEON_H_ +#define MACE_OPS_ARM_DEPTHWISE_CONV2D_NEON_H_ #include "mace/core/types.h" namespace mace { -namespace kernels { +namespace ops { void DepthwiseConv2dNeonK3x3S1(const float *input, const float *filter, @@ -42,7 +42,7 @@ void DepthwiseConv2dNeonK3x3S2(const float *input, const index_t valid_w_stop, float *output); -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_ARM_DEPTHWISE_CONV2D_NEON_H_ +#endif // MACE_OPS_ARM_DEPTHWISE_CONV2D_NEON_H_ diff --git a/mace/kernels/arm/depthwise_conv2d_neon_3x3.cc b/mace/ops/arm/depthwise_conv2d_neon_3x3.cc similarity index 99% rename from mace/kernels/arm/depthwise_conv2d_neon_3x3.cc rename to mace/ops/arm/depthwise_conv2d_neon_3x3.cc index 3a4491fb..2e997912 100644 --- a/mace/kernels/arm/depthwise_conv2d_neon_3x3.cc +++ b/mace/ops/arm/depthwise_conv2d_neon_3x3.cc @@ -17,10 +17,10 @@ #endif #include "mace/core/macros.h" -#include "mace/kernels/arm/depthwise_conv2d_neon.h" +#include "mace/ops/arm/depthwise_conv2d_neon.h" namespace mace { -namespace kernels { +namespace ops { namespace { void DepthwiseConv2dPixel(const float *in_base, @@ -381,5 +381,5 @@ void DepthwiseConv2dNeonK3x3S2(const float *input, } // b } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/batch_norm.cc b/mace/ops/batch_norm.cc similarity index 96% rename from mace/kernels/batch_norm.cc rename to mace/ops/batch_norm.cc index b07f2f43..07c00189 100644 --- a/mace/kernels/batch_norm.cc +++ b/mace/ops/batch_norm.cc @@ -17,13 +17,13 @@ #include #include "mace/core/operator.h" -#include "mace/kernels/activation.h" +#include "mace/ops/activation.h" #ifdef MACE_ENABLE_OPENCL -#include "mace/kernels/opencl/image/batch_norm.h" +#include "mace/ops/opencl/image/batch_norm.h" #endif // MACE_ENABLE_OPENCL namespace mace { -namespace kernels { +namespace ops { template class BatchNormOp; @@ -35,7 +35,7 @@ class BatchNormOp : public Operation { : Operation(context), epsilon_(Operation::GetOptionalArg("epsilon", static_cast(1e-4))), - activation_(kernels::StringToActivationType( + activation_(ops::StringToActivationType( Operation::GetOptionalArg("activation", "NOOP"))), relux_max_limit_(Operation::GetOptionalArg("max_limit", 0.0f)) {} @@ -144,7 +144,7 @@ class BatchNormOp : public Operation { : Operation(context) { float epsilon = Operation::GetOptionalArg( "epsilon", static_cast(1e-4)); - ActivationType activation = kernels::StringToActivationType( + ActivationType activation = ops::StringToActivationType( Operation::GetOptionalArg("activation", "NOOP")); float relux_max_limit = Operation::GetOptionalArg("max_limit", 0.0f); if (context->device()->opencl_runtime()->UseImageMemory()) { @@ -205,5 +205,5 @@ void RegisterBatchNorm(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/batch_norm_benchmark.cc b/mace/ops/batch_norm_benchmark.cc index c390860e..814b631e 100644 --- a/mace/ops/batch_norm_benchmark.cc +++ b/mace/ops/batch_norm_benchmark.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" @@ -54,15 +53,15 @@ void BatchNorm( .Finalize(net.NewOperatorDef()); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Scale", "ScaleImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); BufferToImage(&net, "Offset", "OffsetImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); BufferToImage(&net, "Mean", "MeanImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); BufferToImage(&net, "Var", "VarImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("BatchNorm", "BatchNormBM") .Input("InputImage") .Input("ScaleImage") diff --git a/mace/ops/batch_norm_test.cc b/mace/ops/batch_norm_test.cc index 3c22d5ff..214fd507 100644 --- a/mace/ops/batch_norm_test.cc +++ b/mace/ops/batch_norm_test.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" namespace mace { @@ -51,15 +50,15 @@ void Simple() { net.TransformDataFormat("OutputNCHW", NCHW, "Output", NHWC); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Scale", "ScaleImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); BufferToImage(&net, "Offset", "OffsetImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); BufferToImage(&net, "Mean", "MeanImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); BufferToImage(&net, "Var", "VarImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("BatchNorm", "BatchNormTest") .Input("InputImage") @@ -75,7 +74,7 @@ void Simple() { // Transfer output ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } // Check @@ -135,15 +134,15 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { // Run on opencl BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Scale", "ScaleImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); BufferToImage(&net, "Offset", "OffsetImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); BufferToImage(&net, "Mean", "MeanImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); BufferToImage(&net, "Var", "VarImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("BatchNorm", "BatchNormTest") .Input("InputImage") @@ -165,7 +164,7 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { net.Sync(); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(*expected, *net.GetOutput("OPENCLOutput"), 1e-5, 1e-4); } @@ -214,15 +213,15 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) { // Run on opencl BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Scale", "ScaleImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); BufferToImage(&net, "Offset", "OffsetImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); BufferToImage(&net, "Mean", "MeanImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); BufferToImage(&net, "Var", "VarImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("BatchNorm", "BatchNormTest") .Input("InputImage") @@ -245,7 +244,7 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) { net.Sync(); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(*expected, *net.GetOutput("OPENCLOutput"), 1e-1, 1e-2); } @@ -294,15 +293,15 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) { // Run on opencl BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Scale", "ScaleImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); BufferToImage(&net, "Offset", "OffsetImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); BufferToImage(&net, "Mean", "MeanImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); BufferToImage(&net, "Var", "VarImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("BatchNorm", "BatchNormTest") .Input("InputImage") @@ -324,7 +323,7 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) { net.Sync(); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(*expected, *net.GetOutput("OPENCLOutput"), 1e-5, 1e-4); } @@ -373,15 +372,15 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) { // Run on opencl BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Scale", "ScaleImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); BufferToImage(&net, "Offset", "OffsetImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); BufferToImage(&net, "Mean", "MeanImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); BufferToImage(&net, "Var", "VarImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("BatchNorm", "BatchNormTest") .Input("InputImage") @@ -404,7 +403,7 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) { net.Sync(); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(*expected, *net.GetOutput("OPENCLOutput"), 1e-1, 1e-2); } diff --git a/mace/kernels/batch_to_space.cc b/mace/ops/batch_to_space.cc similarity index 99% rename from mace/kernels/batch_to_space.cc rename to mace/ops/batch_to_space.cc index 5df98aef..529a900b 100644 --- a/mace/kernels/batch_to_space.cc +++ b/mace/ops/batch_to_space.cc @@ -17,11 +17,11 @@ #include "mace/core/operator.h" #ifdef MACE_ENABLE_OPENCL -#include "mace/kernels/opencl/image/batch_to_space.h" +#include "mace/ops/opencl/image/batch_to_space.h" #endif // MACE_ENABLE_OPENCL namespace mace { -namespace kernels { +namespace ops { class BatchToSpaceOpBase : public Operation { public: @@ -303,5 +303,5 @@ void RegisterBatchToSpaceND(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/batch_to_space_benchmark.cc b/mace/ops/batch_to_space_benchmark.cc index 4cf55b33..7ea19f6b 100644 --- a/mace/ops/batch_to_space_benchmark.cc +++ b/mace/ops/batch_to_space_benchmark.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" @@ -42,7 +41,7 @@ void BMBatchToSpace( .Finalize(net.NewOperatorDef()); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("BatchToSpaceND", "BatchToSpaceNDTest") .Input("InputImage") .Output("OutputImage") diff --git a/mace/kernels/bias_add.cc b/mace/ops/bias_add.cc similarity index 97% rename from mace/kernels/bias_add.cc rename to mace/ops/bias_add.cc index fc8b7374..9b528fa9 100644 --- a/mace/kernels/bias_add.cc +++ b/mace/ops/bias_add.cc @@ -17,13 +17,13 @@ #include #include "mace/core/operator.h" -#include "mace/kernels/activation.h" +#include "mace/ops/activation.h" #ifdef MACE_ENABLE_OPENCL -#include "mace/kernels/opencl/image/bias_add.h" +#include "mace/ops/opencl/image/bias_add.h" #endif // MACE_ENABLE_OPENCL namespace mace { -namespace kernels { +namespace ops { template class BiasAddOp; @@ -139,5 +139,5 @@ void RegisterBiasAdd(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/bias_add_benchmark.cc b/mace/ops/bias_add_benchmark.cc index 5908caa2..dce361e9 100644 --- a/mace/ops/bias_add_benchmark.cc +++ b/mace/ops/bias_add_benchmark.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" @@ -47,9 +46,9 @@ void BiasAdd(int iters, int batch, int channels, int height, int width) { .Finalize(net.NewOperatorDef()); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("BiasAdd", "BiasAddBM") .Input("InputImage") .Input("BiasImage") diff --git a/mace/ops/bias_add_test.cc b/mace/ops/bias_add_test.cc index 771065c2..ba31ccec 100644 --- a/mace/ops/bias_add_test.cc +++ b/mace/ops/bias_add_test.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" namespace mace { @@ -46,9 +45,9 @@ void BiasAddSimple() { "Output", NHWC); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("BiasAdd", "BiasAddTest") .Input("InputImage") @@ -60,7 +59,7 @@ void BiasAddSimple() { // Transfer output ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } else { MACE_NOT_IMPLEMENTED; } @@ -116,9 +115,9 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) { // Run on opencl BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("BiasAdd", "BiasAddTest") .Input("InputImage") @@ -131,7 +130,7 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) { net.Sync(); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(*expected, *net.GetOutput("OPENCLOutput"), 1e-5); } @@ -172,9 +171,9 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) { // Run on opencl BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("BiasAdd", "BiasAddTest") .Input("InputImage") @@ -187,7 +186,7 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) { net.Sync(); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(*expected, *net.GetOutput("OPENCLOutput"), 1e-5); } diff --git a/mace/kernels/buffer_inverse_transform.cc b/mace/ops/buffer_inverse_transform.cc similarity index 85% rename from mace/kernels/buffer_inverse_transform.cc rename to mace/ops/buffer_inverse_transform.cc index b447334c..8cfd72b5 100644 --- a/mace/kernels/buffer_inverse_transform.cc +++ b/mace/ops/buffer_inverse_transform.cc @@ -15,11 +15,11 @@ #include #include "mace/core/operator.h" -#include "mace/kernels/opencl/buffer/buffer_inverse_transform.h" -#include "mace/kernels/opencl/image/image_to_buffer.h" +#include "mace/ops/opencl/buffer/buffer_inverse_transform.h" +#include "mace/ops/opencl/image/image_to_buffer.h" namespace mace { -namespace kernels { +namespace ops { template class BufferInverseTransformOp; @@ -41,9 +41,9 @@ class BufferInverseTransformOp : public Operation { const Tensor *input = this->Input(0); Tensor *output = this->Output(0); - kernels::BufferType type = - static_cast(Operation::GetOptionalArg( - "buffer_type", static_cast(kernels::CONV2D_FILTER))); + ops::BufferType type = + static_cast(Operation::GetOptionalArg( + "buffer_type", static_cast(ops::CONV2D_FILTER))); return kernel_->Compute(context, input, type, wino_blk_size_, output); @@ -63,5 +63,5 @@ void RegisterBufferInverseTransform(OpRegistryBase *op_registry) { BufferInverseTransformOp, DeviceType::GPU, half); } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/buffer_to_image_benchmark.cc b/mace/ops/buffer_to_image_benchmark.cc index fb1cf51c..825ba105 100644 --- a/mace/ops/buffer_to_image_benchmark.cc +++ b/mace/ops/buffer_to_image_benchmark.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" diff --git a/mace/ops/buffer_to_image_test.cc b/mace/ops/buffer_to_image_test.cc index 040e666e..fcf7e370 100644 --- a/mace/ops/buffer_to_image_test.cc +++ b/mace/ops/buffer_to_image_test.cc @@ -54,103 +54,103 @@ void TestBidirectionTransform(const int type, } // namespace TEST(BufferToImageTest, ArgSmall) { - TestBidirectionTransform(kernels::ARGUMENT, {1}); + TestBidirectionTransform(ops::ARGUMENT, {1}); } TEST(BufferToImageTest, ArgHalfSmall) { - TestBidirectionTransform(kernels::ARGUMENT, {11}); + TestBidirectionTransform(ops::ARGUMENT, {11}); } TEST(BufferToImageTest, ArgMedium) { - TestBidirectionTransform(kernels::ARGUMENT, {11}); + TestBidirectionTransform(ops::ARGUMENT, {11}); } TEST(BufferToImageTest, ArgLarge) { - TestBidirectionTransform(kernels::ARGUMENT, {256}); + TestBidirectionTransform(ops::ARGUMENT, {256}); } TEST(BufferToImageTest, InputSmallSingleChannel) { - TestBidirectionTransform(kernels::IN_OUT_CHANNEL, + TestBidirectionTransform(ops::IN_OUT_CHANNEL, {1, 2, 3, 1}); } TEST(BufferToImageTest, InputSmallMultipleChannel) { - TestBidirectionTransform(kernels::IN_OUT_CHANNEL, + TestBidirectionTransform(ops::IN_OUT_CHANNEL, {1, 2, 3, 3}); } TEST(BufferToImageTest, InputSmallMultipleBatchAndChannel) { - TestBidirectionTransform(kernels::IN_OUT_CHANNEL, + TestBidirectionTransform(ops::IN_OUT_CHANNEL, {3, 2, 3, 3}); } TEST(BufferToImageTest, InputMedium) { - TestBidirectionTransform(kernels::IN_OUT_CHANNEL, + TestBidirectionTransform(ops::IN_OUT_CHANNEL, {3, 13, 17, 128}); } TEST(BufferToImageTest, InputLarge) { - TestBidirectionTransform(kernels::IN_OUT_CHANNEL, + TestBidirectionTransform(ops::IN_OUT_CHANNEL, {3, 64, 64, 256}); } TEST(BufferToImageTest, Filter1x1Small) { - TestBidirectionTransform(kernels::CONV2D_FILTER, + TestBidirectionTransform(ops::CONV2D_FILTER, {5, 3, 1, 1}); } TEST(BufferToImageTest, Filter1x1Medium) { - TestBidirectionTransform(kernels::CONV2D_FILTER, + TestBidirectionTransform(ops::CONV2D_FILTER, {13, 17, 1, 1}); } TEST(BufferToImageTest, Filter1x1Large) { - TestBidirectionTransform(kernels::CONV2D_FILTER, + TestBidirectionTransform(ops::CONV2D_FILTER, {512, 128, 1, 1}); } TEST(BufferToImageTest, Filter3x3Small) { - TestBidirectionTransform(kernels::CONV2D_FILTER, + TestBidirectionTransform(ops::CONV2D_FILTER, {3, 5, 3, 3}); } TEST(BufferToImageTest, Filter3x3Medium) { - TestBidirectionTransform(kernels::CONV2D_FILTER, + TestBidirectionTransform(ops::CONV2D_FILTER, {17, 13, 3, 3}); } TEST(BufferToImageTest, Filter3x3Large) { - TestBidirectionTransform(kernels::CONV2D_FILTER, + TestBidirectionTransform(ops::CONV2D_FILTER, {256, 128, 3, 3}); } TEST(BufferToImageTest, WeightWidthSmall) { - TestBidirectionTransform(kernels::WEIGHT_WIDTH, + TestBidirectionTransform(ops::WEIGHT_WIDTH, {1, 3, 3, 3}); } TEST(BufferToImageTest, WeightWidthMedium) { - TestBidirectionTransform(kernels::WEIGHT_WIDTH, + TestBidirectionTransform(ops::WEIGHT_WIDTH, {11, 13, 13, 17}); } TEST(BufferToImageTest, WeightWidthLarge) { - TestBidirectionTransform(kernels::WEIGHT_WIDTH, + TestBidirectionTransform(ops::WEIGHT_WIDTH, {64, 64, 11, 13}); } TEST(BufferToImageTest, WeightHeightSmall) { - TestBidirectionTransform(kernels::WEIGHT_HEIGHT, + TestBidirectionTransform(ops::WEIGHT_HEIGHT, {2, 1, 1, 1}); } TEST(BufferToImageTest, WeightHeightMedium) { - TestBidirectionTransform(kernels::WEIGHT_HEIGHT, + TestBidirectionTransform(ops::WEIGHT_HEIGHT, {11, 13, 13, 17}); } TEST(BufferToImageTest, WeightHeightLarge) { - TestBidirectionTransform(kernels::WEIGHT_HEIGHT, + TestBidirectionTransform(ops::WEIGHT_HEIGHT, {64, 16, 11, 13}); } @@ -188,7 +188,7 @@ void TestDiffTypeBidirectionTransform(const int type, } // namespace TEST(BufferToImageTest, ArgFloatToHalfSmall) { - TestDiffTypeBidirectionTransform(kernels::ARGUMENT, + TestDiffTypeBidirectionTransform(ops::ARGUMENT, {11}); } @@ -233,7 +233,7 @@ TEST(BufferToImageTest, ArgStringHalfToHalfSmall) { const unsigned char input_data[] = { 0xCD, 0x3C, 0x33, 0x40, }; - TestStringHalfBidirectionTransform(kernels::ARGUMENT, + TestStringHalfBidirectionTransform(ops::ARGUMENT, {2}, input_data); } diff --git a/mace/kernels/buffer_transform.cc b/mace/ops/buffer_transform.cc similarity index 84% rename from mace/kernels/buffer_transform.cc rename to mace/ops/buffer_transform.cc index 2b14698c..cb127880 100644 --- a/mace/kernels/buffer_transform.cc +++ b/mace/ops/buffer_transform.cc @@ -15,11 +15,11 @@ #include #include "mace/core/operator.h" -#include "mace/kernels/opencl/buffer/buffer_transform.h" -#include "mace/kernels/opencl/image/buffer_to_image.h" +#include "mace/ops/opencl/buffer/buffer_transform.h" +#include "mace/ops/opencl/image/buffer_to_image.h" namespace mace { -namespace kernels { +namespace ops { template class BufferTransformOp; @@ -41,9 +41,9 @@ class BufferTransformOp : public Operation { const Tensor *input = this->Input(0); Tensor *output = this->Output(0); - kernels::BufferType type = - static_cast(Operation::GetOptionalArg( - "buffer_type", static_cast(kernels::CONV2D_FILTER))); + ops::BufferType type = + static_cast(Operation::GetOptionalArg( + "buffer_type", static_cast(ops::CONV2D_FILTER))); return kernel_->Compute(context, input, type, wino_blk_size_, output); @@ -63,5 +63,5 @@ void RegisterBufferTransform(OpRegistryBase *op_registry) { BufferTransformOp, DeviceType::GPU, half); } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/buffer_transform_test.cc b/mace/ops/buffer_transform_test.cc index aff6855f..c768d671 100644 --- a/mace/ops/buffer_transform_test.cc +++ b/mace/ops/buffer_transform_test.cc @@ -69,12 +69,12 @@ void TestBidirectionTransform(const int type, } // namespace TEST_F(BufferTransformTest, FloatToHalf) { - TestBidirectionTransform(kernels::BufferType::IN_OUT_CHANNEL, + TestBidirectionTransform(ops::BufferType::IN_OUT_CHANNEL, {1, 2, 3, 4}); } TEST_F(BufferTransformTest, HalfToHalf) { - TestBidirectionTransform(kernels::BufferType::IN_OUT_CHANNEL, + TestBidirectionTransform(ops::BufferType::IN_OUT_CHANNEL, {1, 2, 3, 4}); } @@ -85,7 +85,7 @@ void TestArgumentTransform(const index_t input_size) { OpDefBuilder("BufferTransform", "BufferTransformTest") .Input("Input") .Output("Output") - .AddIntArg("buffer_type", kernels::BufferType::ARGUMENT) + .AddIntArg("buffer_type", ops::BufferType::ARGUMENT) .AddIntArg("T", DataTypeToEnum::value) .Finalize(net.NewOperatorDef()); diff --git a/mace/kernels/cast.cc b/mace/ops/cast.cc similarity index 97% rename from mace/kernels/cast.cc rename to mace/ops/cast.cc index 0bd971e1..f215d80f 100644 --- a/mace/kernels/cast.cc +++ b/mace/ops/cast.cc @@ -15,7 +15,7 @@ #include "mace/core/operator.h" namespace mace { -namespace kernels { +namespace ops { template class CastOp : public Operation { @@ -57,5 +57,5 @@ void RegisterCast(OpRegistryBase *op_registry) { DeviceType::CPU, int32_t); } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/cast_test.cc b/mace/ops/cast_test.cc index a0064993..666bc04e 100644 --- a/mace/ops/cast_test.cc +++ b/mace/ops/cast_test.cc @@ -13,7 +13,6 @@ // limitations under the License. #include "gmock/gmock.h" -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" namespace mace { diff --git a/mace/kernels/channel_shuffle.cc b/mace/ops/channel_shuffle.cc similarity index 97% rename from mace/kernels/channel_shuffle.cc rename to mace/ops/channel_shuffle.cc index 8258ea1c..78e6f7ad 100644 --- a/mace/kernels/channel_shuffle.cc +++ b/mace/ops/channel_shuffle.cc @@ -16,11 +16,11 @@ #include "mace/core/operator.h" #ifdef MACE_ENABLE_OPENCL -#include "mace/kernels/opencl/image/channel_shuffle.h" +#include "mace/ops/opencl/image/channel_shuffle.h" #endif // MACE_ENABLE_OPENCL namespace mace { -namespace kernels { +namespace ops { template class ChannelShuffleOp; @@ -115,5 +115,5 @@ void RegisterChannelShuffle(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/channel_shuffle_benchmark.cc b/mace/ops/channel_shuffle_benchmark.cc index d45216eb..6707a5c9 100644 --- a/mace/ops/channel_shuffle_benchmark.cc +++ b/mace/ops/channel_shuffle_benchmark.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" @@ -44,7 +43,7 @@ void ChannelShuffle( .Finalize(net.NewOperatorDef()); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("ChannelShuffle", "ChannelShuffleTest") .Input("InputImage") diff --git a/mace/ops/channel_shuffle_test.cc b/mace/ops/channel_shuffle_test.cc index 1ce0cea1..ca301a1f 100644 --- a/mace/ops/channel_shuffle_test.cc +++ b/mace/ops/channel_shuffle_test.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" namespace mace { @@ -61,7 +60,7 @@ TEST_F(ChannelShuffleOpTest, C16G4_OPENCL) { {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}); BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("ChannelShuffle", "ChannelShuffleTest") .Input("InputImage") @@ -74,7 +73,7 @@ TEST_F(ChannelShuffleOpTest, C16G4_OPENCL) { // Transfer output ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); // Check auto expected = net.CreateTensor( diff --git a/mace/kernels/concat.cc b/mace/ops/concat.cc similarity index 97% rename from mace/kernels/concat.cc rename to mace/ops/concat.cc index de501192..0cebac68 100644 --- a/mace/kernels/concat.cc +++ b/mace/ops/concat.cc @@ -18,11 +18,11 @@ #include "mace/utils/quantize.h" #ifdef MACE_ENABLE_OPENCL -#include "mace/kernels/opencl/image/concat.h" +#include "mace/ops/opencl/image/concat.h" #endif // MACE_ENABLE_OPENCL namespace mace { -namespace kernels { +namespace ops { class ConcatOpBase : public Operation { public: @@ -206,6 +206,9 @@ void RegisterConcat(OpRegistryBase *op_registry) { MACE_REGISTER_OP(op_registry, "Concat", ConcatOp, DeviceType::CPU, float); + MACE_REGISTER_OP(op_registry, "Concat", ConcatOp, + DeviceType::CPU, int32_t); + MACE_REGISTER_OP(op_registry, "Concat", ConcatOp, DeviceType::CPU, uint8_t); @@ -218,5 +221,5 @@ void RegisterConcat(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/concat_benchmark.cc b/mace/ops/concat_benchmark.cc index 486d9b6e..02411591 100644 --- a/mace/ops/concat_benchmark.cc +++ b/mace/ops/concat_benchmark.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" @@ -90,9 +89,9 @@ void OpenclConcatHelper(int iters, net.AddRandomInput("Input1", shape1); BufferToImage(&net, "Input0", "InputImage0", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Input1", "InputImage1", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Concat", "ConcatBM") .Input("InputImage0") .Input("InputImage1") diff --git a/mace/ops/concat_test.cc b/mace/ops/concat_test.cc index 431e7a2d..83307e78 100644 --- a/mace/ops/concat_test.cc +++ b/mace/ops/concat_test.cc @@ -262,7 +262,7 @@ void OpenclRandomTest(const std::vector> &shapes, net.AddInputFromArray(input_name, shapes[i], inputs[i]); BufferToImage(&net, input_name, image_name, - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } auto builder = OpDefBuilder("Concat", "ConcatTest"); @@ -279,7 +279,7 @@ void OpenclRandomTest(const std::vector> &shapes, net.RunOp(DeviceType::GPU); ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); // Check auto output = net.GetOutput("Output"); diff --git a/mace/kernels/conv_2d.cc b/mace/ops/conv_2d.cc similarity index 98% rename from mace/kernels/conv_2d.cc rename to mace/ops/conv_2d.cc index c6edbff6..74234b5e 100644 --- a/mace/kernels/conv_2d.cc +++ b/mace/ops/conv_2d.cc @@ -26,20 +26,20 @@ #include "mace/core/future.h" #include "mace/core/operator.h" #include "mace/core/tensor.h" -#include "mace/kernels/activation.h" -#include "mace/kernels/arm/conv_2d_neon.h" -#include "mace/kernels/arm/conv_winograd.h" -#include "mace/kernels/conv_pool_2d_base.h" -#include "mace/kernels/conv_pool_2d_util.h" -#include "mace/kernels/gemmlowp_util.h" +#include "mace/ops/activation.h" +#include "mace/ops/arm/conv_2d_neon.h" +#include "mace/ops/arm/conv_winograd.h" +#include "mace/ops/conv_pool_2d_base.h" +#include "mace/ops/conv_pool_2d_util.h" +#include "mace/ops/gemmlowp_util.h" #include "mace/utils/utils.h" #ifdef MACE_ENABLE_OPENCL -#include "mace/kernels/opencl/image/conv_2d.h" -#include "mace/kernels/opencl/buffer/conv_2d.h" +#include "mace/ops/opencl/image/conv_2d.h" +#include "mace/ops/opencl/buffer/conv_2d.h" #endif // MACE_ENABLE_OPENCL namespace mace { -namespace kernels { +namespace ops { template class Conv2dOp; @@ -49,7 +49,7 @@ class Conv2dOp : public ConvPool2dOpBase { public: explicit Conv2dOp(OpConstructContext *context) : ConvPool2dOpBase(context), - activation_(kernels::StringToActivationType( + activation_(ops::StringToActivationType( Operation::GetOptionalArg("activation", "NOOP"))), relux_max_limit_(Operation::GetOptionalArg("max_limit", 0.0f)), @@ -712,7 +712,7 @@ class Conv2dOp : public ConvPool2dOpBase { public: explicit Conv2dOp(OpConstructContext *context) : ConvPool2dOpBase(context), - activation_(kernels::StringToActivationType( + activation_(ops::StringToActivationType( Operation::GetOptionalArg("activation", "NOOP"))), relux_max_limit_(Operation::GetOptionalArg("max_limit", 0.0f)) {} @@ -950,7 +950,7 @@ class Conv2dOp : public ConvPool2dOpBase { public: explicit Conv2dOp(OpConstructContext *context) : ConvPool2dOpBase(context), - activation_(kernels::StringToActivationType( + activation_(ops::StringToActivationType( Operation::GetOptionalArg("activation", "NOOP"))), relux_max_limit_(Operation::GetOptionalArg("max_limit", 0.0f)) { @@ -999,5 +999,5 @@ void RegisterConv2D(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/conv_2d_benchmark.cc b/mace/ops/conv_2d_benchmark.cc index 76e3696d..96be2902 100644 --- a/mace/ops/conv_2d_benchmark.cc +++ b/mace/ops/conv_2d_benchmark.cc @@ -14,9 +14,8 @@ #include -#include "mace/core/op_def_registry.h" #include "mace/core/testing/test_benchmark.h" -#include "mace/kernels/conv_pool_2d_util.h" +#include "mace/ops/conv_pool_2d_util.h" #include "mace/ops/ops_test_util.h" namespace mace { @@ -66,11 +65,11 @@ void Conv2d(int iters, .Finalize(net.NewOperatorDef()); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Filter", "FilterImage", - kernels::BufferType::CONV2D_FILTER); + ops::BufferType::CONV2D_FILTER); BufferToImage(&net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") .Input("FilterImage") diff --git a/mace/ops/conv_2d_test.cc b/mace/ops/conv_2d_test.cc index 28037011..db7f0458 100644 --- a/mace/ops/conv_2d_test.cc +++ b/mace/ops/conv_2d_test.cc @@ -15,7 +15,7 @@ #include #include -#include "mace/kernels/conv_pool_2d_util.h" +#include "mace/ops/conv_pool_2d_util.h" #include "mace/ops/ops_test_util.h" namespace mace { @@ -61,11 +61,11 @@ void TestNHWCSimple3x3VALID() { "Output", NHWC); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Filter", "FilterImage", - kernels::BufferType::CONV2D_FILTER); + ops::BufferType::CONV2D_FILTER); BufferToImage(&net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") .Input("FilterImage") @@ -81,7 +81,7 @@ void TestNHWCSimple3x3VALID() { // Transfer output ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } else { MACE_NOT_IMPLEMENTED; @@ -127,11 +127,11 @@ void TestNHWCSimple3x3SAME() { "Output", NHWC); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Filter", "FilterImage", - kernels::BufferType::CONV2D_FILTER); + ops::BufferType::CONV2D_FILTER); BufferToImage(&net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") .Input("FilterImage") @@ -147,7 +147,7 @@ void TestNHWCSimple3x3SAME() { // Transfer output ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } else { MACE_NOT_IMPLEMENTED; @@ -213,9 +213,9 @@ void TestNHWCSimple3x3WithoutBias() { "Output", NHWC); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Filter", "FilterImage", - kernels::BufferType::CONV2D_FILTER); + ops::BufferType::CONV2D_FILTER); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") @@ -230,7 +230,7 @@ void TestNHWCSimple3x3WithoutBias() { net.RunOp(D); // Transfer output ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } else { MACE_NOT_IMPLEMENTED; } @@ -287,11 +287,11 @@ void TestNHWCCombined3x3() { "Output", NHWC); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Filter", "FilterImage", - kernels::BufferType::CONV2D_FILTER); + ops::BufferType::CONV2D_FILTER); BufferToImage(&net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2DTest") .Input("InputImage") @@ -307,7 +307,7 @@ void TestNHWCCombined3x3() { net.RunOp(D); ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } else { MACE_NOT_IMPLEMENTED; } @@ -362,11 +362,11 @@ void TestFusedNHWCSimple3x3VALID() { "Output", NHWC); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Filter", "FilterImage", - kernels::BufferType::CONV2D_FILTER); + ops::BufferType::CONV2D_FILTER); BufferToImage(&net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2DTest") .Input("InputImage") .Input("FilterImage") @@ -383,7 +383,7 @@ void TestFusedNHWCSimple3x3VALID() { // Transfer output ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } else { MACE_NOT_IMPLEMENTED; @@ -425,9 +425,9 @@ void TestFusedNHWCSimple3x3WithoutBias() { "Output", NHWC); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Filter", "FilterImage", - kernels::BufferType::CONV2D_FILTER); + ops::BufferType::CONV2D_FILTER); OpDefBuilder("Conv2D", "Conv2DTest") .Input("InputImage") @@ -443,7 +443,7 @@ void TestFusedNHWCSimple3x3WithoutBias() { net.RunOp(D); // Transfer output ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } else { MACE_NOT_IMPLEMENTED; } @@ -505,11 +505,11 @@ void TestConv1x1() { "Output", NHWC); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Filter", "FilterImage", - kernels::BufferType::CONV2D_FILTER); + ops::BufferType::CONV2D_FILTER); BufferToImage(&net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2DTest") .Input("InputImage") @@ -524,7 +524,7 @@ void TestConv1x1() { net.RunOp(D); ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } else { MACE_NOT_IMPLEMENTED; } @@ -596,11 +596,11 @@ void TestComplexConvNxNS12(const std::vector &shape, // run on gpu BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Filter", "FilterImage", - kernels::BufferType::CONV2D_FILTER); + ops::BufferType::CONV2D_FILTER); BufferToImage(&net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") @@ -616,7 +616,7 @@ void TestComplexConvNxNS12(const std::vector &shape, net.RunOp(D); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(*expected, *net.GetOutput("OPENCLOutput"), 1e-4, 1e-4); }; @@ -705,11 +705,11 @@ void TestHalfComplexConvNxNS12(const std::vector &input_shape, // run on gpu BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Filter", "FilterImage", - kernels::BufferType::CONV2D_FILTER); + ops::BufferType::CONV2D_FILTER); BufferToImage(&net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") @@ -725,7 +725,7 @@ void TestHalfComplexConvNxNS12(const std::vector &input_shape, net.RunOp(D); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(*expected, *net.GetOutput("OPENCLOutput"), 1e-2, 1e-2); @@ -857,11 +857,11 @@ void TestDilationConvNxN(const std::vector &shape, // run on gpu BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Filter", "FilterImage", - kernels::BufferType::CONV2D_FILTER); + ops::BufferType::CONV2D_FILTER); BufferToImage(&net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") @@ -877,7 +877,7 @@ void TestDilationConvNxN(const std::vector &shape, net.RunOp(D); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(*expected, *net.GetOutput("OPENCLOutput"), 1e-4, 1e-4); }; @@ -954,11 +954,11 @@ void TestGeneralHalfAtrousConv(const std::vector &image_shape, // run on gpu BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Filter", "FilterImage", - kernels::BufferType::CONV2D_FILTER); + ops::BufferType::CONV2D_FILTER); BufferToImage(&net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") @@ -974,7 +974,7 @@ void TestGeneralHalfAtrousConv(const std::vector &image_shape, net.RunOp(D); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(*expected, *net.GetOutput("OPENCLOutput"), 1e-2, 1e-1); }; @@ -1041,11 +1041,11 @@ void TestArbitraryPadConvNxN(const std::vector &shape, // run on gpu BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Filter", "FilterImage", - kernels::BufferType::CONV2D_FILTER); + ops::BufferType::CONV2D_FILTER); BufferToImage(&net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") @@ -1060,7 +1060,7 @@ void TestArbitraryPadConvNxN(const std::vector &shape, net.RunOp(D); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(*expected, *net.GetOutput("OPENCLOutput"), 1e-4, 1e-4); }; diff --git a/mace/kernels/conv_pool_2d_base.h b/mace/ops/conv_pool_2d_base.h similarity index 85% rename from mace/kernels/conv_pool_2d_base.h rename to mace/ops/conv_pool_2d_base.h index d1e59c61..2d886faa 100644 --- a/mace/kernels/conv_pool_2d_base.h +++ b/mace/ops/conv_pool_2d_base.h @@ -12,16 +12,16 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_CONV_POOL_2D_BASE_H_ -#define MACE_KERNELS_CONV_POOL_2D_BASE_H_ +#ifndef MACE_OPS_CONV_POOL_2D_BASE_H_ +#define MACE_OPS_CONV_POOL_2D_BASE_H_ #include #include "mace/core/operator.h" -#include "mace/kernels/conv_pool_2d_util.h" +#include "mace/ops/conv_pool_2d_util.h" namespace mace { -namespace kernels { +namespace ops { class ConvPool2dOpBase : public Operation { public: @@ -40,7 +40,7 @@ class ConvPool2dOpBase : public Operation { std::vector dilations_; }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_CONV_POOL_2D_BASE_H_ +#endif // MACE_OPS_CONV_POOL_2D_BASE_H_ diff --git a/mace/kernels/conv_pool_2d_util.cc b/mace/ops/conv_pool_2d_util.cc similarity index 99% rename from mace/kernels/conv_pool_2d_util.cc rename to mace/ops/conv_pool_2d_util.cc index c4669f4c..6ec025b9 100644 --- a/mace/kernels/conv_pool_2d_util.cc +++ b/mace/ops/conv_pool_2d_util.cc @@ -12,14 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/kernels/conv_pool_2d_util.h" +#include "mace/ops/conv_pool_2d_util.h" #include #include #include namespace mace { -namespace kernels { +namespace ops { void CalcPaddingAndOutputSize(const index_t *input_shape, const DataFormat input_format, @@ -463,5 +463,5 @@ MaceStatus ConstructNHWCInputWithPadding(const Tensor *input_tensor, return MaceStatus::MACE_SUCCESS; } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/conv_pool_2d_util.h b/mace/ops/conv_pool_2d_util.h similarity index 96% rename from mace/kernels/conv_pool_2d_util.h rename to mace/ops/conv_pool_2d_util.h index e735a97d..0e45c31e 100644 --- a/mace/kernels/conv_pool_2d_util.h +++ b/mace/ops/conv_pool_2d_util.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_CONV_POOL_2D_UTIL_H_ -#define MACE_KERNELS_CONV_POOL_2D_UTIL_H_ +#ifndef MACE_OPS_CONV_POOL_2D_UTIL_H_ +#define MACE_OPS_CONV_POOL_2D_UTIL_H_ #include "mace/core/tensor.h" @@ -30,7 +30,7 @@ enum RoundType { CEIL = 1, }; -namespace kernels { +namespace ops { void CalcPaddingAndOutputSize(const index_t *input_shape, const DataFormat input_format, @@ -113,7 +113,7 @@ MaceStatus ConstructNHWCInputWithPadding(const Tensor *input, Tensor *output_tensor, bool padding_same_value = false); -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_CONV_POOL_2D_UTIL_H_ +#endif // MACE_OPS_CONV_POOL_2D_UTIL_H_ diff --git a/mace/ops/core_test.cc b/mace/ops/core_test.cc index 5afd621f..3e3185b3 100644 --- a/mace/ops/core_test.cc +++ b/mace/ops/core_test.cc @@ -29,7 +29,7 @@ TEST(CoreTest, INIT_MODE) { OpDefBuilder("BufferTransform", "BufferTransformTest") .Input("Input") .Output("B2IOutput") - .AddIntArg("buffer_type", kernels::BufferType::CONV2D_FILTER) + .AddIntArg("buffer_type", ops::BufferType::CONV2D_FILTER) .AddIntArg("mode", static_cast(NetMode::INIT)) .Finalize(&op_defs[op_defs.size() - 1]); @@ -46,18 +46,16 @@ TEST(CoreTest, INIT_MODE) { OpDefBuilder("BufferInverseTransform", "BufferInverseTransformTest") .Input("B2IOutput") .Output("Output") - .AddIntArg("buffer_type", kernels::BufferType::CONV2D_FILTER) + .AddIntArg("buffer_type", ops::BufferType::CONV2D_FILTER) .Finalize(&op_defs[op_defs.size() - 1]); NetDef net_def; for (auto &op_def : op_defs) { net_def.add_op()->CopyFrom(op_def); - net_def.add_op_types(op_def.type()); } - std::shared_ptr op_def_registry(new OpDefRegistry()); - std::shared_ptr op_registry(new OpRegistry()); + std::shared_ptr op_registry(new OpRegistry()); auto net = std::unique_ptr(new SerialNet( - op_def_registry.get(), op_registry.get(), &net_def, &ws, device, + op_registry.get(), &net_def, &ws, device, NetMode::INIT)); MaceStatus status = net->Init(); MACE_CHECK(status == MaceStatus::MACE_SUCCESS); @@ -67,7 +65,7 @@ TEST(CoreTest, INIT_MODE) { EXPECT_TRUE(ws.GetTensor("B2IOutput") != nullptr); EXPECT_TRUE(ws.GetTensor("Output") == nullptr); net = std::unique_ptr(new SerialNet( - op_def_registry.get(), op_registry.get(), &net_def, &ws, device)); + op_registry.get(), &net_def, &ws, device)); status = net->Init(); MACE_CHECK(status == MaceStatus::MACE_SUCCESS); status = net->Run(); diff --git a/mace/kernels/crop.cc b/mace/ops/crop.cc similarity index 98% rename from mace/kernels/crop.cc rename to mace/ops/crop.cc index 6b1ffa6a..b056f21c 100644 --- a/mace/kernels/crop.cc +++ b/mace/ops/crop.cc @@ -16,11 +16,11 @@ #include "mace/core/operator.h" #ifdef MACE_ENABLE_OPENCL -#include "mace/kernels/opencl/image/crop.h" +#include "mace/ops/opencl/image/crop.h" #endif // MACE_ENABLE_OPENCL namespace mace { -namespace kernels { +namespace ops { template class CropOp : public Operation { @@ -143,5 +143,5 @@ void RegisterCrop(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/crop_benchmark.cc b/mace/ops/crop_benchmark.cc index 75cd494f..b186cecc 100644 --- a/mace/ops/crop_benchmark.cc +++ b/mace/ops/crop_benchmark.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" @@ -81,9 +80,9 @@ void OpenclCropHelper(int iters, net.AddRandomInput("Input1", shape1); BufferToImage(&net, "Input0", "InputImage0", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Input1", "InputImage1", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Crop", "CropBM") .Input("InputImage0") .Input("InputImage1") diff --git a/mace/ops/crop_test.cc b/mace/ops/crop_test.cc index 67a2fdeb..efada981 100644 --- a/mace/ops/crop_test.cc +++ b/mace/ops/crop_test.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" namespace mace { @@ -36,9 +35,9 @@ void RunCrop(const std::vector &input_shape, if (D == GPU) { BufferToImage(&net, "Input0", "InputImage0", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Input1", "InputImage1", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Crop", "CropTest") .Input("InputImage0") .Input("InputImage1") @@ -69,7 +68,7 @@ void RunCrop(const std::vector &input_shape, if (D == GPU) { ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } else if (D == CPU) { net.TransformDataFormat("OutputNCHW", NCHW, "Output", NHWC); diff --git a/mace/kernels/deconv_2d.cc b/mace/ops/deconv_2d.cc similarity index 97% rename from mace/kernels/deconv_2d.cc rename to mace/ops/deconv_2d.cc index 44c0c119..0bfa8200 100644 --- a/mace/kernels/deconv_2d.cc +++ b/mace/ops/deconv_2d.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/kernels/deconv_2d.h" +#include "mace/ops/deconv_2d.h" #if defined(MACE_ENABLE_NEON) #include @@ -27,16 +27,16 @@ #include "mace/core/future.h" #include "mace/core/operator.h" #include "mace/core/tensor.h" -#include "mace/kernels/activation.h" -#include "mace/kernels/arm/deconv_2d_neon.h" -#include "mace/kernels/conv_pool_2d_util.h" +#include "mace/ops/activation.h" +#include "mace/ops/arm/deconv_2d_neon.h" +#include "mace/ops/conv_pool_2d_util.h" #include "mace/utils/utils.h" #ifdef MACE_ENABLE_OPENCL -#include "mace/kernels/opencl/image/deconv_2d.h" +#include "mace/ops/opencl/image/deconv_2d.h" #endif // MACE_ENABLE_OPENCL namespace mace { -namespace kernels { +namespace ops { class Deconv2dOpBase : public Operation { public: @@ -46,9 +46,9 @@ class Deconv2dOpBase : public Operation { padding_type_(static_cast(Operation::GetOptionalArg( "padding", static_cast(SAME)))), paddings_(Operation::GetRepeatedArgs("padding_values")), - model_type_(static_cast( + model_type_(static_cast( Operation::GetOptionalArg("framework_type", 0))), - activation_(kernels::StringToActivationType( + activation_(ops::StringToActivationType( Operation::GetOptionalArg("activation", "NOOP"))), relux_max_limit_(Operation::GetOptionalArg("max_limit", 0.0f)) {} @@ -180,7 +180,7 @@ class Deconv2dOp : public Deconv2dOpBase { const Tensor *filter = this->Input(1); const Tensor *bias = nullptr; const Tensor *output_shape_tensor = nullptr; - if (model_type_ == kernels::CAFFE) { + if (model_type_ == ops::CAFFE) { bias = this->InputSize() >= 3 ? this->Input(2) : nullptr; } else { output_shape_tensor = @@ -491,7 +491,7 @@ class Deconv2dOp : public Deconv2dOpBase { const Tensor *filter = this->Input(1); const Tensor *bias = nullptr; const Tensor *output_shape_tensor = nullptr; - if (model_type_ == kernels::CAFFE) { + if (model_type_ == ops::CAFFE) { bias = this->InputSize() >= 3 ? this->Input(2) : nullptr; } else { output_shape_tensor = @@ -557,5 +557,5 @@ void RegisterDeconv2D(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/deconv_2d.h b/mace/ops/deconv_2d.h similarity index 82% rename from mace/kernels/deconv_2d.h rename to mace/ops/deconv_2d.h index 25413d98..35dcee8b 100644 --- a/mace/kernels/deconv_2d.h +++ b/mace/ops/deconv_2d.h @@ -12,18 +12,18 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_DECONV_2D_H_ -#define MACE_KERNELS_DECONV_2D_H_ +#ifndef MACE_OPS_DECONV_2D_H_ +#define MACE_OPS_DECONV_2D_H_ namespace mace { -namespace kernels { +namespace ops { enum FrameworkType { TENSORFLOW = 0, CAFFE = 1, }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_DECONV_2D_H_ +#endif // MACE_OPS_DECONV_2D_H_ diff --git a/mace/ops/deconv_2d_benchmark.cc b/mace/ops/deconv_2d_benchmark.cc index 197e8f73..175feaca 100644 --- a/mace/ops/deconv_2d_benchmark.cc +++ b/mace/ops/deconv_2d_benchmark.cc @@ -14,9 +14,8 @@ #include -#include "mace/core/op_def_registry.h" #include "mace/core/testing/test_benchmark.h" -#include "mace/kernels/conv_pool_2d_util.h" +#include "mace/ops/conv_pool_2d_util.h" #include "mace/ops/ops_test_util.h" namespace mace { @@ -54,11 +53,11 @@ static void Deconv2d(int iters, {batch, out_h, out_w, output_channels}); if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Filter", "FilterImage", - kernels::BufferType::CONV2D_FILTER); + ops::BufferType::CONV2D_FILTER); BufferToImage(&net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("Deconv2D", "Deconv2dTest") .Input("InputImage") .Input("FilterImage") diff --git a/mace/ops/deconv_2d_test.cc b/mace/ops/deconv_2d_test.cc index 88476414..a33b2f7b 100644 --- a/mace/ops/deconv_2d_test.cc +++ b/mace/ops/deconv_2d_test.cc @@ -15,8 +15,8 @@ #include #include -#include "mace/kernels/deconv_2d.h" -#include "mace/kernels/conv_pool_2d_util.h" +#include "mace/ops/deconv_2d.h" +#include "mace/ops/conv_pool_2d_util.h" #include "mace/ops/ops_test_util.h" namespace mace { @@ -38,7 +38,7 @@ void RunTestSimple(const std::vector &input_shape, const std::vector &filter_data, const std::vector &expected_shape, const std::vector &expected_data, - kernels::FrameworkType model_type) { + ops::FrameworkType model_type) { OpsTestNet net; // Add input data const index_t batch = input_shape[0]; @@ -50,12 +50,12 @@ void RunTestSimple(const std::vector &input_shape, net.TransformDataFormat("Filter", HWOI, "FilterOIHW", OIHW); if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); BufferToImage(&net, "FilterOIHW", "FilterImage", - kernels::BufferType::CONV2D_FILTER); - if (model_type == kernels::FrameworkType::CAFFE) { + ops::BufferType::CONV2D_FILTER); + if (model_type == ops::FrameworkType::CAFFE) { OpDefBuilder("Deconv2D", "Deconv2dTest") .Input("InputImage") .Input("FilterImage") @@ -85,12 +85,12 @@ void RunTestSimple(const std::vector &input_shape, // Transfer output ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } else { net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); - if (model_type == kernels::FrameworkType::CAFFE) { + if (model_type == ops::FrameworkType::CAFFE) { OpDefBuilder("Deconv2D", "Deconv2dTest") .Input("InputNCHW") .Input("FilterOIHW") @@ -138,7 +138,7 @@ void TestNHWCSimple3x3SAME_S1() { {4.5, 4.6, 4.7, 6.5, 6.6, 6.7, 4.5, 4.6, 4.7, 6.5, 6.6, 6.7, 9.5, 9.6, 9.7, 6.5, 6.6, 6.7, 4.5, 4.6, 4.7, 6.5, 6.6, 6.7, 4.5, 4.6, 4.7}, - kernels::FrameworkType::TENSORFLOW); + ops::FrameworkType::TENSORFLOW); RunTestSimple({1, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1}, {0, 0, 0}, 1, Padding::VALID, {2, 2}, {0}, {3, 3, 3, 1}, @@ -147,7 +147,7 @@ void TestNHWCSimple3x3SAME_S1() { {1, 3, 3, 3}, {4, 4, 4, 6, 6, 6, 4, 4, 4, 6, 6, 6, 9, 9, 9, 6, 6, 6, 4, 4, 4, 6, 6, 6, 4, 4, 4}, - kernels::FrameworkType::CAFFE); + ops::FrameworkType::CAFFE); RunTestSimple({1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 0, 0}, 1, Padding::SAME, {}, {1, 3, 3, 3}, {3, 3, 3, 1}, @@ -157,7 +157,7 @@ void TestNHWCSimple3x3SAME_S1() { {54, 66, 78, 126, 147, 168, 130, 146, 162, 198, 225, 252, 405, 450, 495, 366, 399, 432, 354, 378, 402, 630, 669, 708, 502, 530, 558}, - kernels::FrameworkType::TENSORFLOW); + ops::FrameworkType::TENSORFLOW); RunTestSimple({1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 0, 0}, 1, Padding::SAME, {2, 2}, {0}, {3, 3, 3, 1}, @@ -167,7 +167,7 @@ void TestNHWCSimple3x3SAME_S1() { {54, 66, 78, 126, 147, 168, 130, 146, 162, 198, 225, 252, 405, 450, 495, 366, 399, 432, 354, 378, 402, 630, 669, 708, 502, 530, 558}, - kernels::FrameworkType::CAFFE); + ops::FrameworkType::CAFFE); } template @@ -185,7 +185,7 @@ void TestNHWCSimple3x3SAME_S2() { 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 4, 4, 4, 2, 2, 2, 4, 4, 4, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1}, - kernels::FrameworkType::TENSORFLOW); + ops::FrameworkType::TENSORFLOW); RunTestSimple({1, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1}, {0, 0, 0}, 2, Padding::SAME, {2, 2}, {0}, {3, 3, 3, 1}, @@ -198,7 +198,7 @@ void TestNHWCSimple3x3SAME_S2() { 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 4, 4, 4, 2, 2, 2, 4, 4, 4, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1}, - kernels::FrameworkType::CAFFE); + ops::FrameworkType::CAFFE); RunTestSimple({1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 0, 0}, 2, Padding::SAME, {}, {1, 6, 6, 3}, {3, 3, 3, 1}, @@ -216,7 +216,7 @@ void TestNHWCSimple3x3SAME_S2() { 83, 94, 105, 116, 127, 138, 252, 276, 300, 142, 155, 168, 304, 332, 360, 168, 183, 198, 70, 77, 84, 91, 98, 105, 192, 207, 222, 104, 112, 120, 218, 235, 252, 117, 126, 135}, - kernels::FrameworkType::TENSORFLOW); + ops::FrameworkType::TENSORFLOW); RunTestSimple({1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 0, 0}, 2, Padding::SAME, {2, 2}, {0}, {3, 3, 3, 1}, @@ -229,7 +229,7 @@ void TestNHWCSimple3x3SAME_S2() { 140, 151, 162, 78, 84, 90, 116, 127, 138, 252, 276, 300, 142, 155, 168, 304, 332, 360, 168, 183, 198, 91, 98, 105, 192, 207, 222, 104, 112, 120, 218, 235, 252, 117, 126, 135}, - kernels::FrameworkType::CAFFE); + ops::FrameworkType::CAFFE); } template @@ -246,7 +246,7 @@ void TestNHWCSimple3x3SAME_S2_1() { 18, 18, 18, 45, 45, 45, 27, 27, 27, 45, 45, 45, 18, 18, 18, 30, 30, 30, 75, 75, 75, 45, 45, 45, 75, 75, 75, 30, 30, 30, 12, 12, 12, 30, 30, 30, 18, 18, 18, 30, 30, 30, 12, 12, 12}, - kernels::FrameworkType::TENSORFLOW); + ops::FrameworkType::TENSORFLOW); } template @@ -271,7 +271,7 @@ void TestNHWCSimple3x3VALID_S2() { 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1}, - kernels::FrameworkType::TENSORFLOW); + ops::FrameworkType::TENSORFLOW); } template @@ -288,7 +288,7 @@ void TestNHWCSimple3x3VALID_S1() { 366, 399, 432, 234, 252, 270, 146, 157, 168, 354, 378, 402, 630, 669, 708, 502, 530, 558, 294, 309, 324, 133, 140, 147, 306, 321, 336, 522, 546, 570, 398, 415, 432, 225, 234, 243}, - kernels::FrameworkType::TENSORFLOW); + ops::FrameworkType::TENSORFLOW); } template @@ -297,7 +297,7 @@ void TestNHWCSimple2x2SAME() { {1, 2, 2, 1}, {3, 3, 1, 1}, {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, {1, 2, 2, 1}, {4.f, 4.f, 4.f, 4.f}, - kernels::FrameworkType::TENSORFLOW); + ops::FrameworkType::TENSORFLOW); } template @@ -308,7 +308,7 @@ void TestNHWCSimple2x2VALID() { {1, 5, 5, 1}, {1.f, 1.f, 2.f, 1.f, 1.f, 1.f, 1.f, 2.f, 1.f, 1.f, 2.f, 2.f, 4.f, 2.f, 2.f, 1.f, 1.f, 2.f, 1.f, 1.f, 1.f, 1.f, 2.f, 1.f, 1.f}, - kernels::FrameworkType::TENSORFLOW); + ops::FrameworkType::TENSORFLOW); } } // namespace @@ -397,11 +397,11 @@ void TestComplexDeconvNxNS12(const int batch, std::vector paddings; std::vector output_shape; - kernels::FrameworkType model_type = + ops::FrameworkType model_type = padding < 0 ? - kernels::FrameworkType::TENSORFLOW : kernels::FrameworkType::CAFFE; + ops::FrameworkType::TENSORFLOW : ops::FrameworkType::CAFFE; - if (model_type == kernels::FrameworkType::TENSORFLOW) { + if (model_type == ops::FrameworkType::TENSORFLOW) { if (type == Padding::SAME) { out_h = (height - 1) * stride_h + 1; out_w = (width - 1) * stride_w + 1; @@ -421,7 +421,7 @@ void TestComplexDeconvNxNS12(const int batch, paddings.push_back(padding); } - if (model_type == kernels::FrameworkType::CAFFE) { + if (model_type == ops::FrameworkType::CAFFE) { OpDefBuilder("Deconv2D", "Deconv2dTest") .Input("InputNCHW") .Input("Filter") @@ -458,13 +458,13 @@ void TestComplexDeconvNxNS12(const int batch, // run on gpu BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Filter", "FilterImage", - kernels::BufferType::CONV2D_FILTER); + ops::BufferType::CONV2D_FILTER); BufferToImage(&net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); - if (model_type == kernels::FrameworkType::CAFFE) { + if (model_type == ops::FrameworkType::CAFFE) { OpDefBuilder("Deconv2D", "Deconv2dTest") .Input("InputImage") .Input("FilterImage") @@ -492,7 +492,7 @@ void TestComplexDeconvNxNS12(const int batch, net.RunOp(D); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(*expected, *net.GetOutput("OPENCLOutput"), 1e-4, 1e-4); }; diff --git a/mace/kernels/depth_to_space.cc b/mace/ops/depth_to_space.cc similarity index 97% rename from mace/kernels/depth_to_space.cc rename to mace/ops/depth_to_space.cc index cd10b2b0..be7a2f82 100644 --- a/mace/kernels/depth_to_space.cc +++ b/mace/ops/depth_to_space.cc @@ -17,11 +17,11 @@ #include "mace/core/operator.h" #ifdef MACE_ENABLE_OPENCL -#include "mace/kernels/opencl/image/depth_to_space.h" +#include "mace/ops/opencl/image/depth_to_space.h" #endif // MACE_ENABLE_OPENCL namespace mace { -namespace kernels { +namespace ops { template class DepthToSpaceOp : public Operation { @@ -127,5 +127,5 @@ void RegisterDepthToSpace(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/depth_to_space_benchmark.cc b/mace/ops/depth_to_space_benchmark.cc index 45bc6036..822bf8f0 100644 --- a/mace/ops/depth_to_space_benchmark.cc +++ b/mace/ops/depth_to_space_benchmark.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" @@ -44,7 +43,7 @@ void DepthToSpace( .Finalize(net.NewOperatorDef()); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("DepthToSpace", "DepthToSpaceBM") .Input("InputImage") diff --git a/mace/ops/depth_to_space_test.cc b/mace/ops/depth_to_space_test.cc index fdce99c1..aa9b9c28 100644 --- a/mace/ops/depth_to_space_test.cc +++ b/mace/ops/depth_to_space_test.cc @@ -15,7 +15,6 @@ #include #include -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" namespace mace { @@ -47,7 +46,7 @@ void RunDepthToSpace(const std::vector &input_shape, } else { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("DepthToSpace", "DepthToSpaceTest") .Input("InputImage") .Output("OutputImage") @@ -59,7 +58,7 @@ void RunDepthToSpace(const std::vector &input_shape, if (D == DeviceType::GPU) { ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } auto expected = net.CreateTensor(expected_shape, expected_data); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); @@ -136,7 +135,7 @@ void RandomTest(const int block_size, NHWC); BufferToImage(&net, "Input", "InputImg", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("DepthToSpace", "DepthToSpaceTest") .Input("InputImg") @@ -149,7 +148,7 @@ void RandomTest(const int block_size, net.RunOp(D); ImageToBuffer(&net, "OutputImg", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); if (DataTypeToEnum::value == DT_FLOAT) { ExpectTensorNear(*net.GetTensor("Output"), diff --git a/mace/kernels/depthwise_conv2d.cc b/mace/ops/depthwise_conv2d.cc similarity index 98% rename from mace/kernels/depthwise_conv2d.cc rename to mace/ops/depthwise_conv2d.cc index 74def6cf..76eee2f2 100644 --- a/mace/kernels/depthwise_conv2d.cc +++ b/mace/ops/depthwise_conv2d.cc @@ -26,24 +26,24 @@ #include "mace/core/future.h" #include "mace/core/operator.h" -#include "mace/kernels/activation.h" -#include "mace/kernels/arm/depthwise_conv2d_neon.h" -#include "mace/kernels/conv_pool_2d_base.h" +#include "mace/ops/activation.h" +#include "mace/ops/arm/depthwise_conv2d_neon.h" +#include "mace/ops/conv_pool_2d_base.h" #include "mace/public/mace.h" #include "mace/utils/quantize.h" #ifdef MACE_ENABLE_OPENCL -#include "mace/kernels/opencl/image/depthwise_conv2d.h" -#include "mace/kernels/opencl/buffer/depthwise_conv2d.h" +#include "mace/ops/opencl/image/depthwise_conv2d.h" +#include "mace/ops/opencl/buffer/depthwise_conv2d.h" #endif // MACE_ENABLE_OPENCL namespace mace { -namespace kernels { +namespace ops { class DepthwiseConv2dOpBase : public ConvPool2dOpBase { public: explicit DepthwiseConv2dOpBase(OpConstructContext *context) : ConvPool2dOpBase(context), - activation_(kernels::StringToActivationType( + activation_(ops::StringToActivationType( Operation::GetOptionalArg("activation", "NOOP"))), relux_max_limit_(Operation::GetOptionalArg("max_limit", 0.0f)) {} @@ -532,5 +532,5 @@ void RegisterDepthwiseConv2d(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/depthwise_conv2d_benchmark.cc b/mace/ops/depthwise_conv2d_benchmark.cc index 3257e580..54f3e8b7 100644 --- a/mace/ops/depthwise_conv2d_benchmark.cc +++ b/mace/ops/depthwise_conv2d_benchmark.cc @@ -14,9 +14,8 @@ #include -#include "mace/core/op_def_registry.h" #include "mace/core/testing/test_benchmark.h" -#include "mace/kernels/conv_pool_2d_util.h" +#include "mace/ops/conv_pool_2d_util.h" #include "mace/ops/ops_test_util.h" namespace mace { @@ -81,11 +80,11 @@ void DepthwiseConv2d(int iters, .Finalize(net.NewOperatorDef()); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Filter", "FilterImage", - kernels::BufferType::DW_CONV2D_FILTER); + ops::BufferType::DW_CONV2D_FILTER); BufferToImage(&net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("DepthwiseConv2d", "DepthwiseConv2dTest") .Input("InputImage") .Input("FilterImage") diff --git a/mace/ops/depthwise_conv2d_test.cc b/mace/ops/depthwise_conv2d_test.cc index 3089286c..dfb76b44 100644 --- a/mace/ops/depthwise_conv2d_test.cc +++ b/mace/ops/depthwise_conv2d_test.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/kernels/conv_pool_2d_util.h" +#include "mace/ops/conv_pool_2d_util.h" #include "mace/ops/ops_test_util.h" namespace mace { @@ -53,11 +53,11 @@ void SimpleValidTest() { "Output", NHWC); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Filter", "FilterImage", - kernels::BufferType::DW_CONV2D_FILTER); + ops::BufferType::DW_CONV2D_FILTER); BufferToImage(&net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("DepthwiseConv2d", "DepthwiseConv2DTest") .Input("InputImage") .Input("FilterImage") @@ -73,7 +73,7 @@ void SimpleValidTest() { // Transfer output ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } else { MACE_NOT_IMPLEMENTED; @@ -150,11 +150,11 @@ void ComplexValidTest(index_t batch, "Output", NHWC); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Filter", "FilterImage", - kernels::BufferType::DW_CONV2D_FILTER); + ops::BufferType::DW_CONV2D_FILTER); BufferToImage(&net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("DepthwiseConv2d", "DepthwiseConv2DTest") .Input("InputImage") .Input("FilterImage") @@ -170,7 +170,7 @@ void ComplexValidTest(index_t batch, // Transfer output ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } else { MACE_NOT_IMPLEMENTED; @@ -290,11 +290,11 @@ void TestNxNS12(const index_t height, const index_t width) { expected->Copy(*net.GetOutput("Output")); BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Filter", "FilterImage", - kernels::BufferType::DW_CONV2D_FILTER); + ops::BufferType::DW_CONV2D_FILTER); BufferToImage(&net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("DepthwiseConv2d", "DepthwiseConv2DTest") .Input("InputImage") .Input("FilterImage") @@ -312,7 +312,7 @@ void TestNxNS12(const index_t height, const index_t width) { // Transfer output ImageToBuffer(&net, "OutputImage", "DeviceOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); // Check if (DataTypeToEnum::value == DT_FLOAT) { diff --git a/mace/kernels/eltwise.cc b/mace/ops/eltwise.cc similarity index 98% rename from mace/kernels/eltwise.cc rename to mace/ops/eltwise.cc index e33006ea..bb7532cc 100644 --- a/mace/kernels/eltwise.cc +++ b/mace/ops/eltwise.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/kernels/eltwise.h" +#include "mace/ops/eltwise.h" #include #include @@ -26,11 +26,11 @@ #include "mace/core/tensor.h" #include "mace/utils/quantize.h" #ifdef MACE_ENABLE_OPENCL -#include "mace/kernels/opencl/image/eltwise.h" +#include "mace/ops/opencl/image/eltwise.h" #endif // MACE_ENABLE_OPENCL namespace mace { -namespace kernels { +namespace ops { inline index_t GetIndex(const std::vector &shape, @@ -792,8 +792,8 @@ class EltwiseOp : public Operation { public: explicit EltwiseOp(OpConstructContext *context) : Operation(context), - type_(static_cast(Operation::GetOptionalArg( - "type", static_cast(kernels::EltwiseType::NONE)))), + type_(static_cast(Operation::GetOptionalArg( + "type", static_cast(ops::EltwiseType::NONE)))), coeff_(Operation::GetRepeatedArgs("coeff")), scalar_input_(Operation::GetOptionalArg("scalar_input", 1.0)), scalar_input_index_(Operation::GetOptionalArg( @@ -934,8 +934,8 @@ class EltwiseOp : public Operation { public: explicit EltwiseOp(OpConstructContext *context) : Operation(context), - type_(static_cast(Operation::GetOptionalArg( - "type", static_cast(kernels::EltwiseType::NONE)))), + type_(static_cast(Operation::GetOptionalArg( + "type", static_cast(ops::EltwiseType::NONE)))), coeff_(Operation::GetRepeatedArgs("coeff")), scalar_input_(Operation::GetOptionalArg("scalar_input", 1.0)), scalar_input_index_(Operation::GetOptionalArg( @@ -1076,9 +1076,9 @@ class EltwiseOp : public Operation { public: explicit EltwiseOp(OpConstructContext *context) : Operation(context) { - EltwiseType type = static_cast( + EltwiseType type = static_cast( Operation::GetOptionalArg( - "type", static_cast(kernels::EltwiseType::NONE))); + "type", static_cast(ops::EltwiseType::NONE))); std::vector coeff = Operation::GetRepeatedArgs("coeff"); float scalar_input = Operation::GetOptionalArg("scalar_input", 1.0); int32_t scalar_input_index = Operation::GetOptionalArg( @@ -1121,5 +1121,5 @@ void RegisterEltwise(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/eltwise.h b/mace/ops/eltwise.h similarity index 86% rename from mace/kernels/eltwise.h rename to mace/ops/eltwise.h index b71f4e42..31ee93f5 100644 --- a/mace/kernels/eltwise.h +++ b/mace/ops/eltwise.h @@ -12,11 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_ELTWISE_H_ -#define MACE_KERNELS_ELTWISE_H_ +#ifndef MACE_OPS_ELTWISE_H_ +#define MACE_OPS_ELTWISE_H_ namespace mace { -namespace kernels { +namespace ops { enum EltwiseType { SUM = 0, @@ -35,7 +35,7 @@ enum EltwiseType { inline bool IsLogicalType(EltwiseType type) { return type == EQUAL; } -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_ELTWISE_H_ +#endif // MACE_OPS_ELTWISE_H_ diff --git a/mace/ops/eltwise_benchmark.cc b/mace/ops/eltwise_benchmark.cc index 4a8fa041..82fbc63f 100644 --- a/mace/ops/eltwise_benchmark.cc +++ b/mace/ops/eltwise_benchmark.cc @@ -14,9 +14,8 @@ #include -#include "mace/core/op_def_registry.h" #include "mace/core/testing/test_benchmark.h" -#include "mace/kernels/eltwise.h" +#include "mace/ops/eltwise.h" #include "mace/ops/ops_test_util.h" namespace mace { @@ -26,7 +25,7 @@ namespace test { namespace { template void EltwiseBenchmark( - int iters, kernels::EltwiseType type, int n, int h, int w, int c) { + int iters, ops::EltwiseType type, int n, int h, int w, int c) { mace::testing::StopTiming(); OpsTestNet net; @@ -36,9 +35,9 @@ void EltwiseBenchmark( if (D == DeviceType::GPU) { BufferToImage(&net, "Input0", "InputImg0", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Input1", "InputImg1", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Eltwise", "EltwiseTest") .Input("InputImg0") .Input("InputImg1") @@ -84,7 +83,7 @@ void EltwiseBenchmark( mace::testing::MaccProcessed(tot); \ mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ EltwiseBenchmark( \ - iters, static_cast(ELT_TYPE), N, H, W, C); \ + iters, static_cast(ELT_TYPE), N, H, W, C); \ } \ MACE_BENCHMARK( \ MACE_BM_ELTWISE_##ELT_TYPE##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE) diff --git a/mace/ops/eltwise_test.cc b/mace/ops/eltwise_test.cc index da9687ce..ac920ac0 100644 --- a/mace/ops/eltwise_test.cc +++ b/mace/ops/eltwise_test.cc @@ -12,8 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/kernels/eltwise.h" -#include "mace/core/op_def_registry.h" +#include + +#include "mace/ops/eltwise.h" #include "mace/ops/ops_test_util.h" namespace mace { @@ -24,7 +25,7 @@ class EltwiseOpTest : public OpsTestBase {}; namespace { template -void SimpleScalarScalar(const kernels::EltwiseType type, +void SimpleScalarScalar(const ops::EltwiseType type, const T input, const float x, const DstType output) { @@ -40,7 +41,7 @@ void SimpleScalarScalar(const kernels::EltwiseType type, .AddIntArg("T", DataTypeToEnum::v()) .AddIntArg("type", static_cast(type)) .AddFloatArg("scalar_input", x) - .OutputType({kernels::IsLogicalType(type) ? DT_INT32 : DT_FLOAT}) + .OutputType({ops::IsLogicalType(type) ? DT_INT32 : DT_FLOAT}) .Output("Output") .Finalize(net.NewOperatorDef()); // Run @@ -55,7 +56,7 @@ void SimpleScalarScalar(const kernels::EltwiseType type, } template -void SimpleTensorScalar(const kernels::EltwiseType type, +void SimpleTensorScalar(const ops::EltwiseType type, const std::vector &shape, const std::vector &input, const float x, @@ -74,7 +75,7 @@ void SimpleTensorScalar(const kernels::EltwiseType type, .AddIntArg("type", static_cast(type)) .AddFloatArg("scalar_input", x) .AddIntArg("data_format", DataFormat::NCHW) - .OutputType({kernels::IsLogicalType(type) ? DT_INT32 : DT_FLOAT}) + .OutputType({ops::IsLogicalType(type) ? DT_INT32 : DT_FLOAT}) .Output("TOutput") .Finalize(net.NewOperatorDef()); // Run @@ -82,7 +83,7 @@ void SimpleTensorScalar(const kernels::EltwiseType type, net.TransformDataFormat("TOutput", NCHW, "Output", NHWC); } else { BufferToImage(&net, "Input", "InputImg", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Eltwise", "EltwiseTest") .Input("InputImg") .AddIntArg("type", static_cast(type)) @@ -94,7 +95,7 @@ void SimpleTensorScalar(const kernels::EltwiseType type, net.RunOp(D); ImageToBuffer(&net, "OutputImg", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } auto expected = net.CreateTensor(shape, output); @@ -103,7 +104,7 @@ void SimpleTensorScalar(const kernels::EltwiseType type, } template -void SimpleTensorEltwise(const kernels::EltwiseType type, +void SimpleTensorEltwise(const ops::EltwiseType type, const std::vector &shape0, const std::vector &input0, const std::vector &shape1, @@ -124,7 +125,7 @@ void SimpleTensorEltwise(const kernels::EltwiseType type, .AddIntArg("type", static_cast(type)) .AddFloatsArg("coeff", coeff) .AddIntArg("data_format", DataFormat::NCHW) - .OutputType({kernels::IsLogicalType(type) ? DT_INT32 : DT_FLOAT}) + .OutputType({ops::IsLogicalType(type) ? DT_INT32 : DT_FLOAT}) .Output("TOutput"); if (shape0.size() > 1) { net.TransformDataFormat("Input0", NHWC, "TInput0", NCHW); @@ -145,9 +146,9 @@ void SimpleTensorEltwise(const kernels::EltwiseType type, net.TransformDataFormat("TOutput", NCHW, "Output", NHWC); } else { BufferToImage(&net, "Input0", "InputImg0", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Input1", "InputImg1", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Eltwise", "EltwiseTest") .Input("InputImg0") .Input("InputImg1") @@ -160,7 +161,7 @@ void SimpleTensorEltwise(const kernels::EltwiseType type, net.RunOp(D); ImageToBuffer(&net, "OutputImg", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } std::vector output_shape = shape0; @@ -173,7 +174,7 @@ void SimpleTensorEltwise(const kernels::EltwiseType type, } template -void TensorGeneralBroadcastEltwise(const kernels::EltwiseType type, +void TensorGeneralBroadcastEltwise(const ops::EltwiseType type, const std::vector &shape0, const std::vector &input0, const std::vector &shape1, @@ -196,7 +197,7 @@ void TensorGeneralBroadcastEltwise(const kernels::EltwiseType type, .Input("Input1") .AddIntArg("type", static_cast(type)) .AddFloatsArg("coeff", coeff) - .OutputType({kernels::IsLogicalType(type) ? DT_INT32 : DT_FLOAT}) + .OutputType({ops::IsLogicalType(type) ? DT_INT32 : DT_FLOAT}) .Output("Output"); op_builder.Finalize(net.NewOperatorDef()); @@ -204,9 +205,9 @@ void TensorGeneralBroadcastEltwise(const kernels::EltwiseType type, net.RunOp(D); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input0", "InputImage0", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Input1", "InputImage1", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); auto op_builder = OpDefBuilder("Eltwise", "EltwiseTest") .AddIntArg("T", DataTypeToEnum::v()) @@ -214,7 +215,7 @@ void TensorGeneralBroadcastEltwise(const kernels::EltwiseType type, .Input("InputImage1") .AddIntArg("type", static_cast(type)) .AddFloatsArg("coeff", coeff) - .OutputType({kernels::IsLogicalType(type) ? DT_INT32 : DT_FLOAT}) + .OutputType({ops::IsLogicalType(type) ? DT_INT32 : DT_FLOAT}) .Output("OutputImage"); op_builder.Finalize(net.NewOperatorDef()); @@ -222,7 +223,7 @@ void TensorGeneralBroadcastEltwise(const kernels::EltwiseType type, net.RunOp(D); ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } else { MACE_NOT_IMPLEMENTED; } @@ -234,249 +235,249 @@ void TensorGeneralBroadcastEltwise(const kernels::EltwiseType type, TEST_F(EltwiseOpTest, CPUSimpleScalarScalar) { SimpleScalarScalar( - kernels::EltwiseType::SUM, 1, 2, 3); + ops::EltwiseType::SUM, 1, 2, 3); SimpleScalarScalar( - kernels::EltwiseType::SUB, 1, 2, -1); + ops::EltwiseType::SUB, 1, 2, -1); SimpleScalarScalar( - kernels::EltwiseType::PROD, 1, 2, 2); + ops::EltwiseType::PROD, 1, 2, 2); SimpleScalarScalar( - kernels::EltwiseType::DIV, 1, 2, 0.5); + ops::EltwiseType::DIV, 1, 2, 0.5); SimpleScalarScalar( - kernels::EltwiseType::MIN, 1, 2, 1); + ops::EltwiseType::MIN, 1, 2, 1); SimpleScalarScalar( - kernels::EltwiseType::MAX, 1, 2, 2); + ops::EltwiseType::MAX, 1, 2, 2); SimpleScalarScalar( - kernels::EltwiseType::NEG, 1, 2, -1); + ops::EltwiseType::NEG, 1, 2, -1); SimpleScalarScalar( - kernels::EltwiseType::ABS, -1, 3, 1); + ops::EltwiseType::ABS, -1, 3, 1); SimpleScalarScalar( - kernels::EltwiseType::EQUAL, 1, 3, 0); + ops::EltwiseType::EQUAL, 1, 3, 0); SimpleScalarScalar( - kernels::EltwiseType::EQUAL, 3, 3, 1); + ops::EltwiseType::EQUAL, 3, 3, 1); } TEST_F(EltwiseOpTest, CPUSimpleTensorScalar) { - SimpleTensorScalar(kernels::EltwiseType::SUM, + SimpleTensorScalar(ops::EltwiseType::SUM, {1, 1, 1, 1}, {1}, 1, {2}); SimpleTensorScalar( - kernels::EltwiseType::SUB, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, 1, + ops::EltwiseType::SUB, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, 1, {0, 1, 2, 3, 4, 5}); SimpleTensorScalar( - kernels::EltwiseType::PROD, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, 2, + ops::EltwiseType::PROD, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, 2, {2, 4, 6, 8, 10, 12}); SimpleTensorScalar( - kernels::EltwiseType::DIV, {1, 1, 2, 3}, {2, 4, 6, 8, 10, 12}, 2, + ops::EltwiseType::DIV, {1, 1, 2, 3}, {2, 4, 6, 8, 10, 12}, 2, {1, 2, 3, 4, 5, 6}); SimpleTensorScalar( - kernels::EltwiseType::MIN, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, 1, + ops::EltwiseType::MIN, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, 1, {1, 1, 1, 1, 1, 1}); SimpleTensorScalar( - kernels::EltwiseType::MAX, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, 3, + ops::EltwiseType::MAX, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, 3, {3, 3, 3, 4, 5, 6}); SimpleTensorScalar( - kernels::EltwiseType::NEG, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, 3, + ops::EltwiseType::NEG, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, 3, {-1, -2, -3, -4, -5, -6}); SimpleTensorScalar( - kernels::EltwiseType::ABS, {1, 1, 2, 3}, {-1, -2, -3, -4, -5, -6}, 3, + ops::EltwiseType::ABS, {1, 1, 2, 3}, {-1, -2, -3, -4, -5, -6}, 3, {1, 2, 3, 4, 5, 6}); SimpleTensorScalar( - kernels::EltwiseType::SQR_DIFF, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, 1, + ops::EltwiseType::SQR_DIFF, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, 1, {0, 1, 4, 9, 16, 25}); SimpleTensorScalar( - kernels::EltwiseType::EQUAL, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, 3, + ops::EltwiseType::EQUAL, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, 3, {0, 0, 1, 0, 0, 0}); } TEST_F(EltwiseOpTest, GPUSimpleTensorScalar) { - SimpleTensorScalar(kernels::EltwiseType::SUM, + SimpleTensorScalar(ops::EltwiseType::SUM, {1, 1, 1, 1}, {1}, 1, {2}); SimpleTensorScalar( - kernels::EltwiseType::SUB, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, 1, + ops::EltwiseType::SUB, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, 1, {0, 1, 2, 3, 4, 5}); SimpleTensorScalar( - kernels::EltwiseType::PROD, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, 2, + ops::EltwiseType::PROD, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, 2, {2, 4, 6, 8, 10, 12}); SimpleTensorScalar( - kernels::EltwiseType::DIV, {1, 1, 2, 3}, {2, 4, 6, 8, 10, 12}, 2, + ops::EltwiseType::DIV, {1, 1, 2, 3}, {2, 4, 6, 8, 10, 12}, 2, {1, 2, 3, 4, 5, 6}); SimpleTensorScalar( - kernels::EltwiseType::MIN, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, 1, + ops::EltwiseType::MIN, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, 1, {1, 1, 1, 1, 1, 1}); SimpleTensorScalar( - kernels::EltwiseType::MAX, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, 3, + ops::EltwiseType::MAX, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, 3, {3, 3, 3, 4, 5, 6}); SimpleTensorScalar( - kernels::EltwiseType::NEG, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, 3, + ops::EltwiseType::NEG, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, 3, {-1, -2, -3, -4, -5, -6}); SimpleTensorScalar( - kernels::EltwiseType::ABS, {1, 1, 2, 3}, {-1, -2, -3, -4, -5, -6}, 3, + ops::EltwiseType::ABS, {1, 1, 2, 3}, {-1, -2, -3, -4, -5, -6}, 3, {1, 2, 3, 4, 5, 6}); SimpleTensorScalar( - kernels::EltwiseType::SQR_DIFF, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, 1, + ops::EltwiseType::SQR_DIFF, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, 1, {0, 1, 4, 9, 16, 25}); } TEST_F(EltwiseOpTest, CPUSimpleTensorVector) { SimpleTensorEltwise( - kernels::EltwiseType::SUM, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 1, 3}, + ops::EltwiseType::SUM, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 1, 3}, {1, 2, 3}, {2, 4, 6, 5, 7, 9}); SimpleTensorEltwise( - kernels::EltwiseType::SUB, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + ops::EltwiseType::SUB, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {0, 0, 0, 0, 0, 5, 5, 5, 5, 5}); SimpleTensorEltwise( - kernels::EltwiseType::SUB, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 2, 1, 5}, + ops::EltwiseType::SUB, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {0, 0, 0, 0, 0, -5, -5, -5, -5, -5}); SimpleTensorEltwise( - kernels::EltwiseType::PROD, {1, 1, 1, 3}, {1, 2, 3}, {1, 2, 1, 3}, + ops::EltwiseType::PROD, {1, 1, 1, 3}, {1, 2, 3}, {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, {1, 4, 9, 4, 10, 18}); SimpleTensorEltwise( - kernels::EltwiseType::DIV, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + ops::EltwiseType::DIV, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 1, 1, 5}, {1, 1, 1, 1, 5}, {1, 2, 3, 4, 1, 6, 7, 8, 9, 2}); SimpleTensorEltwise( - kernels::EltwiseType::DIV, {1, 1, 1, 5}, {1, 1, 1, 2, 4}, {1, 2, 1, 5}, + ops::EltwiseType::DIV, {1, 1, 1, 5}, {1, 1, 1, 2, 4}, {1, 2, 1, 5}, {1, 1, 1, 2, 2, 1, 1, 1, 1, 1}, {1, 1, 1, 1, 2, 1, 1, 1, 2, 4}); SimpleTensorEltwise( - kernels::EltwiseType::MIN, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 2, 1, 5}, + ops::EltwiseType::MIN, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}); SimpleTensorEltwise( - kernels::EltwiseType::MAX, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + ops::EltwiseType::MAX, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); SimpleTensorEltwise( - kernels::EltwiseType::SQR_DIFF, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, + ops::EltwiseType::SQR_DIFF, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {0, 0, 0, 0, 0, 25, 25, 25, 25, 25}); SimpleTensorEltwise( - kernels::EltwiseType::EQUAL, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, + ops::EltwiseType::EQUAL, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 1, 3}, {1, 2, 3}, {1, 1, 1, 0, 0, 0}); SimpleTensorEltwise( - kernels::EltwiseType::SUM, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {3}, + ops::EltwiseType::SUM, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {3}, {1, 2, 3}, {2, 4, 6, 5, 7, 9}); SimpleTensorEltwise( - kernels::EltwiseType::SUB, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + ops::EltwiseType::SUB, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {5}, {1, 2, 3, 4, 5}, {0, 0, 0, 0, 0, 5, 5, 5, 5, 5}); SimpleTensorEltwise( - kernels::EltwiseType::SUB, {5}, {1, 2, 3, 4, 5}, {1, 2, 1, 5}, + ops::EltwiseType::SUB, {5}, {1, 2, 3, 4, 5}, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {0, 0, 0, 0, 0, -5, -5, -5, -5, -5}); SimpleTensorEltwise( - kernels::EltwiseType::PROD, {3}, {1, 2, 3}, {1, 2, 1, 3}, + ops::EltwiseType::PROD, {3}, {1, 2, 3}, {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, {1, 4, 9, 4, 10, 18}); SimpleTensorEltwise( - kernels::EltwiseType::DIV, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + ops::EltwiseType::DIV, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {5}, {1, 1, 1, 1, 5}, {1, 2, 3, 4, 1, 6, 7, 8, 9, 2}); SimpleTensorEltwise( - kernels::EltwiseType::DIV, {5}, {1, 1, 1, 2, 4}, {1, 2, 1, 5}, + ops::EltwiseType::DIV, {5}, {1, 1, 1, 2, 4}, {1, 2, 1, 5}, {1, 1, 1, 2, 2, 1, 1, 1, 1, 1}, {1, 1, 1, 1, 2, 1, 1, 1, 2, 4}); SimpleTensorEltwise( - kernels::EltwiseType::MIN, {5}, {1, 2, 3, 4, 5}, {1, 2, 1, 5}, + ops::EltwiseType::MIN, {5}, {1, 2, 3, 4, 5}, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}); SimpleTensorEltwise( - kernels::EltwiseType::MAX, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + ops::EltwiseType::MAX, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {5}, {1, 2, 3, 4, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); SimpleTensorEltwise( - kernels::EltwiseType::SQR_DIFF, {5}, {1, 2, 3, 4, 5}, {1, 2, 1, 5}, + ops::EltwiseType::SQR_DIFF, {5}, {1, 2, 3, 4, 5}, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {0, 0, 0, 0, 0, 25, 25, 25, 25, 25}); SimpleTensorEltwise( - kernels::EltwiseType::EQUAL, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {3}, + ops::EltwiseType::EQUAL, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {3}, {1, 2, 3}, {1, 1, 1, 0, 0, 0}); } TEST_F(EltwiseOpTest, GPUSimpleTensorVector) { SimpleTensorEltwise( - kernels::EltwiseType::SUM, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 1, 3}, + ops::EltwiseType::SUM, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 1, 3}, {1, 2, 3}, {2, 4, 6, 5, 7, 9}); SimpleTensorEltwise( - kernels::EltwiseType::SUB, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + ops::EltwiseType::SUB, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {0, 0, 0, 0, 0, 5, 5, 5, 5, 5}); SimpleTensorEltwise( - kernels::EltwiseType::SUB, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 2, 1, 5}, + ops::EltwiseType::SUB, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {0, 0, 0, 0, 0, -5, -5, -5, -5, -5}); SimpleTensorEltwise( - kernels::EltwiseType::PROD, {1, 1, 1, 3}, {1, 2, 3}, {1, 2, 1, 3}, + ops::EltwiseType::PROD, {1, 1, 1, 3}, {1, 2, 3}, {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, {1, 4, 9, 4, 10, 18}); SimpleTensorEltwise( - kernels::EltwiseType::DIV, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + ops::EltwiseType::DIV, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 1, 1, 5}, {1, 1, 1, 1, 5}, {1, 2, 3, 4, 1, 6, 7, 8, 9, 2}); SimpleTensorEltwise( - kernels::EltwiseType::DIV, {1, 1, 1, 5}, {1, 1, 1, 2, 4}, {1, 2, 1, 5}, + ops::EltwiseType::DIV, {1, 1, 1, 5}, {1, 1, 1, 2, 4}, {1, 2, 1, 5}, {1, 1, 1, 2, 2, 1, 1, 1, 1, 1}, {1, 1, 1, 1, 2, 1, 1, 1, 2, 4}); SimpleTensorEltwise( - kernels::EltwiseType::MIN, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 2, 1, 5}, + ops::EltwiseType::MIN, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}); SimpleTensorEltwise( - kernels::EltwiseType::MAX, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + ops::EltwiseType::MAX, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); SimpleTensorEltwise( - kernels::EltwiseType::SQR_DIFF, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, + ops::EltwiseType::SQR_DIFF, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {0, 0, 0, 0, 0, 25, 25, 25, 25, 25}); } TEST_F(EltwiseOpTest, CPUSimpleTensorTensor) { SimpleTensorEltwise( - kernels::EltwiseType::SUM, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 3}, + ops::EltwiseType::SUM, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}); SimpleTensorEltwise( - kernels::EltwiseType::SUM, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 3}, + ops::EltwiseType::SUM, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {0.2, 0.4, 0.6, 0.8, 1, 1.2}, {0.1, 0.1}); SimpleTensorEltwise( - kernels::EltwiseType::SUB, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 1, 1, 5}, + ops::EltwiseType::SUB, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {0, 0, 0, 0, 0}); SimpleTensorEltwise( - kernels::EltwiseType::PROD, {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, + ops::EltwiseType::PROD, {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, {1, 4, 9, 16, 25, 36}); SimpleTensorEltwise( - kernels::EltwiseType::DIV, {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, {1, 2, 1, 3}, + ops::EltwiseType::DIV, {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 1, 1, 1, 1}); SimpleTensorEltwise( - kernels::EltwiseType::MIN, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}, + ops::EltwiseType::MIN, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}); SimpleTensorEltwise( - kernels::EltwiseType::MAX, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + ops::EltwiseType::MAX, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); SimpleTensorEltwise( - kernels::EltwiseType::SQR_DIFF, {1, 2, 1, 5}, + ops::EltwiseType::SQR_DIFF, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {0, 0, 0, 0, 0, 25, 25, 25, 25, 25}); SimpleTensorEltwise( - kernels::EltwiseType::EQUAL, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, + ops::EltwiseType::EQUAL, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 1, 1, 1, 1}); } TEST_F(EltwiseOpTest, GPUSimpleTensorTensor) { SimpleTensorEltwise( - kernels::EltwiseType::SUM, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 3}, + ops::EltwiseType::SUM, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}); SimpleTensorEltwise( - kernels::EltwiseType::SUM, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 3}, + ops::EltwiseType::SUM, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {0.2, 0.4, 0.6, 0.8, 1, 1.2}, {0.1, 0.1}); SimpleTensorEltwise( - kernels::EltwiseType::SUB, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 1, 1, 5}, + ops::EltwiseType::SUB, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {0, 0, 0, 0, 0}); SimpleTensorEltwise( - kernels::EltwiseType::PROD, {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, + ops::EltwiseType::PROD, {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, {1, 4, 9, 16, 25, 36}); SimpleTensorEltwise( - kernels::EltwiseType::DIV, {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, {1, 2, 1, 3}, + ops::EltwiseType::DIV, {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 1, 1, 1, 1}); SimpleTensorEltwise( - kernels::EltwiseType::MIN, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}, + ops::EltwiseType::MIN, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}); SimpleTensorEltwise( - kernels::EltwiseType::MAX, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + ops::EltwiseType::MAX, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); SimpleTensorEltwise( - kernels::EltwiseType::SQR_DIFF, {1, 2, 1, 5}, + ops::EltwiseType::SQR_DIFF, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {0, 0, 0, 0, 0, 25, 25, 25, 25, 25}); } namespace { template -void RandomTensorScalar(const kernels::EltwiseType type, +void RandomTensorScalar(const ops::EltwiseType type, const std::vector &shape) { // Construct graph OpsTestNet net; @@ -501,7 +502,7 @@ void RandomTensorScalar(const kernels::EltwiseType type, expected->Copy(*net.GetOutput("Output")); BufferToImage(&net, "Input", "InputImg", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Eltwise", "EltwiseTest") .Input("InputImg") .AddIntArg("type", static_cast(type)) @@ -514,7 +515,7 @@ void RandomTensorScalar(const kernels::EltwiseType type, net.RunOp(DeviceType::GPU); ImageToBuffer(&net, "OutputImg", "GPUOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); if (DataTypeToEnum::value == DT_FLOAT) { ExpectTensorNear(*expected, *net.GetOutput("GPUOutput"), 1e-5); @@ -524,7 +525,7 @@ void RandomTensorScalar(const kernels::EltwiseType type, } template -void RandomTensorEltwise(const kernels::EltwiseType type, +void RandomTensorEltwise(const ops::EltwiseType type, const std::vector &shape0, const std::vector &shape1, const std::vector &coeff = {}) { @@ -556,9 +557,9 @@ void RandomTensorEltwise(const kernels::EltwiseType type, expected->Copy(*net.GetOutput("Output")); BufferToImage(&net, "Input0", "InputImg0", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Input1", "InputImg1", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Eltwise", "EltwiseTest") .Input("InputImg0") .Input("InputImg1") @@ -572,7 +573,7 @@ void RandomTensorEltwise(const kernels::EltwiseType type, net.RunOp(DeviceType::GPU); ImageToBuffer(&net, "OutputImg", "GPUOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); if (DataTypeToEnum::value == DT_FLOAT) { ExpectTensorNear(*expected, *net.GetOutput("GPUOutput"), 1e-5); @@ -597,7 +598,7 @@ void QuantizedSum(const std::vector &shape) { OpDefBuilder("Eltwise", "EltwiseTest") .Input("TInput0") .Input("TInput1") - .AddIntArg("type", static_cast(kernels::EltwiseType::SUM)) + .AddIntArg("type", static_cast(ops::EltwiseType::SUM)) .AddIntArg("data_format", DataFormat::NCHW) .Output("TOutput") .Finalize(net.NewOperatorDef()); @@ -638,7 +639,7 @@ void QuantizedSum(const std::vector &shape) { .Input("QuantizedInput0") .Input("QuantizedInput1") .Output("QuantizedOutput") - .AddIntArg("type", static_cast(kernels::EltwiseType::SUM)) + .AddIntArg("type", static_cast(ops::EltwiseType::SUM)) .AddIntArg("T", static_cast(DT_UINT8)) .Finalize(net.NewOperatorDef()); net.Setup(DeviceType::CPU); @@ -663,159 +664,159 @@ void QuantizedSum(const std::vector &shape) { } // namespace TEST_F(EltwiseOpTest, RandomTensorScalarFloat) { - RandomTensorScalar(kernels::EltwiseType::SUM, {1, 32, 32, 16}); - RandomTensorScalar(kernels::EltwiseType::SUB, {3, 32, 32, 16}); - RandomTensorScalar(kernels::EltwiseType::PROD, {1, 31, 37, 17}); - RandomTensorScalar(kernels::EltwiseType::DIV, {3, 31, 37, 17}); - RandomTensorScalar(kernels::EltwiseType::MIN, {1, 32, 32, 16}); - RandomTensorScalar(kernels::EltwiseType::MAX, {3, 31, 37, 17}); - RandomTensorScalar(kernels::EltwiseType::NEG, {1, 32, 32, 32}); - RandomTensorScalar(kernels::EltwiseType::ABS, {3, 31, 37, 17}); - RandomTensorScalar(kernels::EltwiseType::SQR_DIFF, {3, 31, 37, 17}); + RandomTensorScalar(ops::EltwiseType::SUM, {1, 32, 32, 16}); + RandomTensorScalar(ops::EltwiseType::SUB, {3, 32, 32, 16}); + RandomTensorScalar(ops::EltwiseType::PROD, {1, 31, 37, 17}); + RandomTensorScalar(ops::EltwiseType::DIV, {3, 31, 37, 17}); + RandomTensorScalar(ops::EltwiseType::MIN, {1, 32, 32, 16}); + RandomTensorScalar(ops::EltwiseType::MAX, {3, 31, 37, 17}); + RandomTensorScalar(ops::EltwiseType::NEG, {1, 32, 32, 32}); + RandomTensorScalar(ops::EltwiseType::ABS, {3, 31, 37, 17}); + RandomTensorScalar(ops::EltwiseType::SQR_DIFF, {3, 31, 37, 17}); } TEST_F(EltwiseOpTest, RandomTensorScalarHalf) { - RandomTensorScalar(kernels::EltwiseType::SUM, {1, 32, 32, 16}); - RandomTensorScalar(kernels::EltwiseType::SUB, {3, 32, 32, 16}); - RandomTensorScalar(kernels::EltwiseType::PROD, {1, 31, 37, 17}); - RandomTensorScalar(kernels::EltwiseType::DIV, {3, 31, 37, 17}); - RandomTensorScalar(kernels::EltwiseType::MIN, {1, 32, 32, 16}); - RandomTensorScalar(kernels::EltwiseType::MAX, {3, 31, 37, 17}); - RandomTensorScalar(kernels::EltwiseType::NEG, {1, 32, 32, 32}); - RandomTensorScalar(kernels::EltwiseType::ABS, {3, 31, 37, 17}); - RandomTensorScalar(kernels::EltwiseType::SQR_DIFF, {3, 31, 37, 17}); + RandomTensorScalar(ops::EltwiseType::SUM, {1, 32, 32, 16}); + RandomTensorScalar(ops::EltwiseType::SUB, {3, 32, 32, 16}); + RandomTensorScalar(ops::EltwiseType::PROD, {1, 31, 37, 17}); + RandomTensorScalar(ops::EltwiseType::DIV, {3, 31, 37, 17}); + RandomTensorScalar(ops::EltwiseType::MIN, {1, 32, 32, 16}); + RandomTensorScalar(ops::EltwiseType::MAX, {3, 31, 37, 17}); + RandomTensorScalar(ops::EltwiseType::NEG, {1, 32, 32, 32}); + RandomTensorScalar(ops::EltwiseType::ABS, {3, 31, 37, 17}); + RandomTensorScalar(ops::EltwiseType::SQR_DIFF, {3, 31, 37, 17}); } TEST_F(EltwiseOpTest, RandomTensorVecFloat) { - RandomTensorEltwise(kernels::EltwiseType::SUM, {1, 32, 32, 16}, + RandomTensorEltwise(ops::EltwiseType::SUM, {1, 32, 32, 16}, {1, 1, 1, 16}); - RandomTensorEltwise(kernels::EltwiseType::SUB, {5, 32, 32, 16}, + RandomTensorEltwise(ops::EltwiseType::SUB, {5, 32, 32, 16}, {5, 1, 1, 16}); - RandomTensorEltwise(kernels::EltwiseType::SUB, {5, 32, 32, 16}, + RandomTensorEltwise(ops::EltwiseType::SUB, {5, 32, 32, 16}, {1, 1, 1, 16}); - RandomTensorEltwise(kernels::EltwiseType::SUB, {5, 1, 1, 16}, + RandomTensorEltwise(ops::EltwiseType::SUB, {5, 1, 1, 16}, {5, 32, 32, 16}); - RandomTensorEltwise(kernels::EltwiseType::PROD, {1, 31, 37, 17}, + RandomTensorEltwise(ops::EltwiseType::PROD, {1, 31, 37, 17}, {1, 1, 1, 17}); - RandomTensorEltwise(kernels::EltwiseType::PROD, {1, 1, 1, 17}, + RandomTensorEltwise(ops::EltwiseType::PROD, {1, 1, 1, 17}, {1, 31, 37, 17}); - RandomTensorEltwise(kernels::EltwiseType::DIV, {3, 1, 1, 17}, + RandomTensorEltwise(ops::EltwiseType::DIV, {3, 1, 1, 17}, {3, 31, 37, 17}); - RandomTensorEltwise(kernels::EltwiseType::MIN, {1, 1, 1, 16}, + RandomTensorEltwise(ops::EltwiseType::MIN, {1, 1, 1, 16}, {1, 32, 32, 16}); - RandomTensorEltwise(kernels::EltwiseType::MAX, {5, 31, 37, 17}, + RandomTensorEltwise(ops::EltwiseType::MAX, {5, 31, 37, 17}, {5, 1, 1, 17}); - RandomTensorEltwise(kernels::EltwiseType::SQR_DIFF, {5, 31, 37, 17}, + RandomTensorEltwise(ops::EltwiseType::SQR_DIFF, {5, 31, 37, 17}, {5, 1, 1, 17}); } TEST_F(EltwiseOpTest, RandomTensorVecHalf) { - RandomTensorEltwise(kernels::EltwiseType::SUM, {1, 32, 32, 16}, + RandomTensorEltwise(ops::EltwiseType::SUM, {1, 32, 32, 16}, {1, 1, 1, 16}); - RandomTensorEltwise(kernels::EltwiseType::SUB, {3, 32, 32, 16}, + RandomTensorEltwise(ops::EltwiseType::SUB, {3, 32, 32, 16}, {3, 1, 1, 16}); - RandomTensorEltwise(kernels::EltwiseType::SUB, {3, 32, 32, 16}, + RandomTensorEltwise(ops::EltwiseType::SUB, {3, 32, 32, 16}, {1, 1, 1, 16}); - RandomTensorEltwise(kernels::EltwiseType::SUB, {3, 1, 1, 16}, + RandomTensorEltwise(ops::EltwiseType::SUB, {3, 1, 1, 16}, {3, 32, 32, 16}); - RandomTensorEltwise(kernels::EltwiseType::PROD, {1, 1, 1, 17}, + RandomTensorEltwise(ops::EltwiseType::PROD, {1, 1, 1, 17}, {1, 31, 37, 17}); - RandomTensorEltwise(kernels::EltwiseType::DIV, {5, 31, 37, 17}, + RandomTensorEltwise(ops::EltwiseType::DIV, {5, 31, 37, 17}, {5, 1, 1, 17}); - RandomTensorEltwise(kernels::EltwiseType::DIV, {5, 31, 37, 17}, + RandomTensorEltwise(ops::EltwiseType::DIV, {5, 31, 37, 17}, {1, 1, 1, 17}); - RandomTensorEltwise(kernels::EltwiseType::DIV, {5, 1, 1, 17}, + RandomTensorEltwise(ops::EltwiseType::DIV, {5, 1, 1, 17}, {5, 31, 37, 17}); - RandomTensorEltwise(kernels::EltwiseType::MIN, {1, 1, 1, 16}, + RandomTensorEltwise(ops::EltwiseType::MIN, {1, 1, 1, 16}, {1, 32, 32, 16}); - RandomTensorEltwise(kernels::EltwiseType::MAX, {3, 31, 37, 17}, + RandomTensorEltwise(ops::EltwiseType::MAX, {3, 31, 37, 17}, {3, 1, 1, 17}); - RandomTensorEltwise(kernels::EltwiseType::SQR_DIFF, {3, 31, 37, 17}, + RandomTensorEltwise(ops::EltwiseType::SQR_DIFF, {3, 31, 37, 17}, {3, 1, 1, 17}); } TEST_F(EltwiseOpTest, RandomTensorTensorFloat) { - RandomTensorEltwise(kernels::EltwiseType::SUM, {1, 32, 32, 16}, + RandomTensorEltwise(ops::EltwiseType::SUM, {1, 32, 32, 16}, {1, 32, 32, 16}); - RandomTensorEltwise(kernels::EltwiseType::SUB, {3, 32, 32, 16}, + RandomTensorEltwise(ops::EltwiseType::SUB, {3, 32, 32, 16}, {3, 32, 32, 16}); - RandomTensorEltwise(kernels::EltwiseType::PROD, {1, 31, 37, 17}, + RandomTensorEltwise(ops::EltwiseType::PROD, {1, 31, 37, 17}, {1, 31, 37, 17}); - RandomTensorEltwise(kernels::EltwiseType::DIV, {5, 31, 37, 17}, + RandomTensorEltwise(ops::EltwiseType::DIV, {5, 31, 37, 17}, {5, 31, 37, 17}); - RandomTensorEltwise(kernels::EltwiseType::MIN, {1, 32, 32, 16}, + RandomTensorEltwise(ops::EltwiseType::MIN, {1, 32, 32, 16}, {1, 32, 32, 16}); - RandomTensorEltwise(kernels::EltwiseType::MAX, {3, 31, 37, 17}, + RandomTensorEltwise(ops::EltwiseType::MAX, {3, 31, 37, 17}, {3, 31, 37, 17}); - RandomTensorEltwise(kernels::EltwiseType::SQR_DIFF, {3, 31, 37, 17}, + RandomTensorEltwise(ops::EltwiseType::SQR_DIFF, {3, 31, 37, 17}, {3, 31, 37, 17}); } TEST_F(EltwiseOpTest, RandomTensorTensorHalf) { - RandomTensorEltwise(kernels::EltwiseType::SUM, {1, 32, 32, 16}, + RandomTensorEltwise(ops::EltwiseType::SUM, {1, 32, 32, 16}, {1, 32, 32, 16}); - RandomTensorEltwise(kernels::EltwiseType::SUB, {3, 32, 32, 16}, + RandomTensorEltwise(ops::EltwiseType::SUB, {3, 32, 32, 16}, {3, 32, 32, 16}); - RandomTensorEltwise(kernels::EltwiseType::PROD, {1, 31, 37, 17}, + RandomTensorEltwise(ops::EltwiseType::PROD, {1, 31, 37, 17}, {1, 31, 37, 17}); - RandomTensorEltwise(kernels::EltwiseType::DIV, {5, 31, 37, 17}, + RandomTensorEltwise(ops::EltwiseType::DIV, {5, 31, 37, 17}, {5, 31, 37, 17}); - RandomTensorEltwise(kernels::EltwiseType::MIN, {1, 32, 32, 16}, + RandomTensorEltwise(ops::EltwiseType::MIN, {1, 32, 32, 16}, {1, 32, 32, 16}); - RandomTensorEltwise(kernels::EltwiseType::MAX, {3, 31, 37, 17}, + RandomTensorEltwise(ops::EltwiseType::MAX, {3, 31, 37, 17}, {3, 31, 37, 17}); - RandomTensorEltwise(kernels::EltwiseType::SQR_DIFF, {3, 31, 37, 17}, + RandomTensorEltwise(ops::EltwiseType::SQR_DIFF, {3, 31, 37, 17}, {3, 31, 37, 17}); } TEST_F(EltwiseOpTest, TensorGeneralBroadcastCPU) { TensorGeneralBroadcastEltwise( - kernels::EltwiseType::SUM, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, + ops::EltwiseType::SUM, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, {1, 2}, {1, 1, 2, 3}, {2, 3, 4, 6, 7, 8}); TensorGeneralBroadcastEltwise( - kernels::EltwiseType::SUB, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, + ops::EltwiseType::SUB, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, {1, 2}, {1, 1, 2, 3}, {0, 1, 2, 2, 3, 4}); TensorGeneralBroadcastEltwise( - kernels::EltwiseType::PROD, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, + ops::EltwiseType::PROD, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, {1, 2}, {1, 1, 2, 3}, {1, 2, 3, 8, 10, 12}); TensorGeneralBroadcastEltwise( - kernels::EltwiseType::DIV, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, + ops::EltwiseType::DIV, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, {1, 2}, {1, 1, 2, 3}, {1, 2, 3, 2, 2.5, 3}); TensorGeneralBroadcastEltwise( - kernels::EltwiseType::MIN, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, + ops::EltwiseType::MIN, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, {1, 2}, {1, 1, 2, 3}, {1, 1, 1, 2, 2, 2}); TensorGeneralBroadcastEltwise( - kernels::EltwiseType::MAX, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, + ops::EltwiseType::MAX, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, {1, 2}, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}); TensorGeneralBroadcastEltwise( - kernels::EltwiseType::SQR_DIFF, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, + ops::EltwiseType::SQR_DIFF, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, {1, 2}, {1, 1, 2, 3}, {0, 1, 4, 4, 9, 16}); TensorGeneralBroadcastEltwise( - kernels::EltwiseType::EQUAL, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, + ops::EltwiseType::EQUAL, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, {1, 2}, {1, 1, 2, 3}, {1, 0, 0, 0, 0, 0}); } TEST_F(EltwiseOpTest, TensorGeneralBroadcastGPU) { TensorGeneralBroadcastEltwise( - kernels::EltwiseType::SUM, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, + ops::EltwiseType::SUM, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, {1, 2}, {1, 1, 2, 3}, {2, 3, 4, 6, 7, 8}); TensorGeneralBroadcastEltwise( - kernels::EltwiseType::SUB, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, + ops::EltwiseType::SUB, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, {1, 2}, {1, 1, 2, 3}, {0, 1, 2, 2, 3, 4}); TensorGeneralBroadcastEltwise( - kernels::EltwiseType::PROD, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, + ops::EltwiseType::PROD, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, {1, 2}, {1, 1, 2, 3}, {1, 2, 3, 8, 10, 12}); TensorGeneralBroadcastEltwise( - kernels::EltwiseType::DIV, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, + ops::EltwiseType::DIV, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, {1, 2}, {1, 1, 2, 3}, {1, 2, 3, 2, 2.5, 3}); TensorGeneralBroadcastEltwise( - kernels::EltwiseType::MIN, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, + ops::EltwiseType::MIN, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, {1, 2}, {1, 1, 2, 3}, {1, 1, 1, 2, 2, 2}); TensorGeneralBroadcastEltwise( - kernels::EltwiseType::MAX, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, + ops::EltwiseType::MAX, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, {1, 2}, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}); TensorGeneralBroadcastEltwise( - kernels::EltwiseType::SQR_DIFF, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, + ops::EltwiseType::SQR_DIFF, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, {1, 2}, {1, 1, 2, 3}, {0, 1, 4, 4, 9, 16}); } diff --git a/mace/kernels/expand_dims.cc b/mace/ops/expand_dims.cc similarity index 98% rename from mace/kernels/expand_dims.cc rename to mace/ops/expand_dims.cc index 5dc58436..a912e0c9 100644 --- a/mace/kernels/expand_dims.cc +++ b/mace/ops/expand_dims.cc @@ -16,7 +16,7 @@ #include "mace/core/operator.h" namespace mace { -namespace kernels { +namespace ops { template class ExpandDimsOp; @@ -67,5 +67,5 @@ void RegisterExpandDims(OpRegistryBase *op_registry) { DeviceType::CPU, uint8_t); } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/expand_dims_test.cc b/mace/ops/expand_dims_test.cc index ac3312ea..35acb4f1 100644 --- a/mace/ops/expand_dims_test.cc +++ b/mace/ops/expand_dims_test.cc @@ -13,7 +13,6 @@ // limitations under the License. #include "gmock/gmock.h" -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" namespace mace { diff --git a/mace/kernels/fill.cc b/mace/ops/fill.cc similarity index 97% rename from mace/kernels/fill.cc rename to mace/ops/fill.cc index 0cd20930..b554c003 100644 --- a/mace/kernels/fill.cc +++ b/mace/ops/fill.cc @@ -16,7 +16,7 @@ #include "mace/core/operator.h" namespace mace { -namespace kernels { +namespace ops { template class FillOp; @@ -66,5 +66,5 @@ void RegisterFill(OpRegistryBase *op_registry) { DeviceType::CPU, float); } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/fill_test.cc b/mace/ops/fill_test.cc index 8ecbed5d..5fde9968 100644 --- a/mace/ops/fill_test.cc +++ b/mace/ops/fill_test.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" namespace mace { diff --git a/mace/kernels/fixpoint.h b/mace/ops/fixpoint.h similarity index 92% rename from mace/kernels/fixpoint.h rename to mace/ops/fixpoint.h index 47f0a8d8..1d0ef0b9 100644 --- a/mace/kernels/fixpoint.h +++ b/mace/ops/fixpoint.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_FIXPOINT_H_ -#define MACE_KERNELS_FIXPOINT_H_ +#ifndef MACE_OPS_FIXPOINT_H_ +#define MACE_OPS_FIXPOINT_H_ #if defined(MACE_ENABLE_NEON) #include @@ -23,7 +23,7 @@ #include "mace/core/types.h" namespace mace { -namespace kernels { +namespace ops { inline uint8_t FindMax(const uint8_t *xs, const index_t size) { uint8_t max_value = 0; @@ -57,8 +57,8 @@ inline uint8_t FindMax(const uint8_t *xs, const index_t size) { } -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_FIXPOINT_H_ +#endif // MACE_OPS_FIXPOINT_H_ diff --git a/mace/kernels/fixpoint_test.cc b/mace/ops/fixpoint_test.cc similarity index 94% rename from mace/kernels/fixpoint_test.cc rename to mace/ops/fixpoint_test.cc index 8b926cd9..83189695 100644 --- a/mace/kernels/fixpoint_test.cc +++ b/mace/ops/fixpoint_test.cc @@ -17,10 +17,10 @@ #include #include -#include "mace/kernels/fixpoint.h" +#include "mace/ops/fixpoint.h" namespace mace { -namespace kernels { +namespace ops { namespace test { namespace { @@ -49,6 +49,6 @@ TEST(FixpointTest, FindMax) { } } // namespace test -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/folded_batch_norm_test.cc b/mace/ops/folded_batch_norm_test.cc index a19d7d77..35e69cd1 100644 --- a/mace/ops/folded_batch_norm_test.cc +++ b/mace/ops/folded_batch_norm_test.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" namespace mace { @@ -62,11 +61,11 @@ void Simple() { net.TransformDataFormat("OutputNCHW", NCHW, "Output", NHWC); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Scale", "ScaleImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); BufferToImage(&net, "Offset", "OffsetImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("BatchNorm", "FoldedBatchNormTest") .Input("InputImage") @@ -79,7 +78,7 @@ void Simple() { // Transfer output ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } // Check @@ -134,11 +133,11 @@ TEST_F(FoldedBatchNormOpTest, SimpleRandomOPENCL) { // Run on opencl BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Scale", "ScaleImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); BufferToImage(&net, "Offset", "OffsetImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("BatchNorm", "FoldedBatchNormTest") .Input("InputImage") @@ -152,7 +151,7 @@ TEST_F(FoldedBatchNormOpTest, SimpleRandomOPENCL) { net.Sync(); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(*expected, *net.GetOutput("OPENCLOutput"), 1e-5, 1e-4); } @@ -196,11 +195,11 @@ TEST_F(FoldedBatchNormOpTest, SimpleRandomHalfOPENCL) { // Run on opencl BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Scale", "ScaleImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); BufferToImage(&net, "Offset", "OffsetImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("BatchNorm", "FoldedBatchNormTest") .Input("InputImage") @@ -215,7 +214,7 @@ TEST_F(FoldedBatchNormOpTest, SimpleRandomHalfOPENCL) { net.Sync(); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(*expected, *net.GetOutput("OPENCLOutput"), 1e-2, 1e-2); } @@ -259,11 +258,11 @@ TEST_F(FoldedBatchNormOpTest, ComplexRandomOPENCL) { // Run on opencl BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Scale", "ScaleImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); BufferToImage(&net, "Offset", "OffsetImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("BatchNorm", "FoldedBatchNormTest") .Input("InputImage") @@ -276,7 +275,7 @@ TEST_F(FoldedBatchNormOpTest, ComplexRandomOPENCL) { net.RunOp(DeviceType::GPU); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(*expected, *net.GetOutput("OPENCLOutput"), 1e-5, 1e-4); } @@ -320,11 +319,11 @@ TEST_F(FoldedBatchNormOpTest, ComplexRandomHalfOPENCL) { // Run on opencl BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Scale", "ScaleImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); BufferToImage(&net, "Offset", "OffsetImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("BatchNorm", "FoldedBatchNormTest") .Input("InputImage") @@ -338,7 +337,7 @@ TEST_F(FoldedBatchNormOpTest, ComplexRandomHalfOPENCL) { net.RunOp(DeviceType::GPU); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(*expected, *net.GetOutput("OPENCLOutput"), 1e-2, 1e-2); } diff --git a/mace/kernels/fully_connected.cc b/mace/ops/fully_connected.cc similarity index 96% rename from mace/kernels/fully_connected.cc rename to mace/ops/fully_connected.cc index a7b74c69..c5a8872b 100644 --- a/mace/kernels/fully_connected.cc +++ b/mace/ops/fully_connected.cc @@ -19,22 +19,22 @@ #include "mace/core/future.h" #include "mace/core/operator.h" #include "mace/core/tensor.h" -#include "mace/kernels/activation.h" -#include "mace/kernels/gemm.h" -#include "mace/kernels/gemmlowp_util.h" +#include "mace/ops/activation.h" +#include "mace/ops/gemm.h" +#include "mace/ops/gemmlowp_util.h" #ifdef MACE_ENABLE_OPENCL -#include "mace/kernels/opencl/image/fully_connected.h" +#include "mace/ops/opencl/image/fully_connected.h" #endif // MACE_ENABLE_OPENCL namespace mace { -namespace kernels { +namespace ops { class FullyConnectedOpBase : public Operation { public: explicit FullyConnectedOpBase(OpConstructContext *context) : Operation(context), - activation_(kernels::StringToActivationType( + activation_(ops::StringToActivationType( Operation::GetOptionalArg("activation", "NOOP"))), relux_max_limit_(Operation::GetOptionalArg("max_limit", 0.0f)) {} @@ -229,5 +229,5 @@ void RegisterFullyConnected(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/fully_connected_benchmark.cc b/mace/ops/fully_connected_benchmark.cc index 66af8792..6b75e60d 100644 --- a/mace/ops/fully_connected_benchmark.cc +++ b/mace/ops/fully_connected_benchmark.cc @@ -14,7 +14,6 @@ #include -#include "mace/core/op_def_registry.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" @@ -48,13 +47,13 @@ void FCBenchmark( .Output("Output") .Finalize(net.NewOperatorDef()); } else if (D == DeviceType::GPU) { - kernels::BufferType weight_type = kernels::BufferType::WEIGHT_WIDTH; + ops::BufferType weight_type = ops::BufferType::WEIGHT_WIDTH; BufferToImage(&net, "Weight", "WeightImage", weight_type); BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("FullyConnected", "FullyConnectedTest") .Input("InputImage") diff --git a/mace/ops/fully_connected_test.cc b/mace/ops/fully_connected_test.cc index d075aac2..0fd98848 100644 --- a/mace/ops/fully_connected_test.cc +++ b/mace/ops/fully_connected_test.cc @@ -14,7 +14,6 @@ #include -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" namespace mace { @@ -52,11 +51,11 @@ void Simple(const std::vector &input_shape, net.TransformDataFormat("OutputNCHW", NCHW, "Output", NHWC); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Weight", "WeightImage", - kernels::BufferType::WEIGHT_WIDTH); + ops::BufferType::WEIGHT_WIDTH); BufferToImage(&net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("FullyConnected", "FullyConnectedTest") .Input("InputImage") @@ -69,7 +68,7 @@ void Simple(const std::vector &input_shape, // Transfer output ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } else { MACE_NOT_IMPLEMENTED; } @@ -160,11 +159,11 @@ void Random(const index_t batch, // Run on opencl BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Weight", "WeightImage", - kernels::BufferType::WEIGHT_WIDTH); + ops::BufferType::WEIGHT_WIDTH); BufferToImage(&net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); OpDefBuilder("FullyConnected", "FullyConnectedTest") .Input("InputImage") @@ -178,7 +177,7 @@ void Random(const index_t batch, net.RunOp(DeviceType::GPU); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); if (DataTypeToEnum::value == DataType::DT_HALF) { ExpectTensorNear(*expected, *net.GetOutput("OPENCLOutput"), 1e-1, 1e-1); diff --git a/mace/kernels/gather.cc b/mace/ops/gather.cc similarity index 98% rename from mace/kernels/gather.cc rename to mace/ops/gather.cc index ff947e82..f8ceb543 100644 --- a/mace/kernels/gather.cc +++ b/mace/ops/gather.cc @@ -17,7 +17,7 @@ #include "mace/core/operator.h" namespace mace { -namespace kernels { +namespace ops { template class GatherOp; @@ -100,5 +100,5 @@ void RegisterGather(OpRegistryBase *op_registry) { DeviceType::CPU, float); } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/gather_benchmark.cc b/mace/ops/gather_benchmark.cc index 8a0cd123..5e52875c 100644 --- a/mace/ops/gather_benchmark.cc +++ b/mace/ops/gather_benchmark.cc @@ -14,7 +14,6 @@ #include -#include "mace/core/op_def_registry.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" diff --git a/mace/ops/gather_test.cc b/mace/ops/gather_test.cc index c716b12a..2da0338b 100644 --- a/mace/ops/gather_test.cc +++ b/mace/ops/gather_test.cc @@ -14,7 +14,6 @@ #include -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" namespace mace { diff --git a/mace/kernels/gemm.cc b/mace/ops/gemm.cc similarity index 99% rename from mace/kernels/gemm.cc rename to mace/ops/gemm.cc index 5043a104..a4d2b8a8 100644 --- a/mace/kernels/gemm.cc +++ b/mace/ops/gemm.cc @@ -18,7 +18,7 @@ #include "mace/core/tensor.h" #include "mace/core/runtime/cpu/cpu_runtime.h" -#include "mace/kernels/gemm.h" +#include "mace/ops/gemm.h" /** * Gemm does fast matrix multiplications with batch. @@ -40,7 +40,7 @@ #endif namespace mace { -namespace kernels { +namespace ops { namespace { inline void GemmBlock(const float *A, @@ -1540,5 +1540,5 @@ void Gemv(const float *m_ptr, #endif } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/gemm.h b/mace/ops/gemm.h similarity index 94% rename from mace/kernels/gemm.h rename to mace/ops/gemm.h index 17096bf5..78f044e0 100644 --- a/mace/kernels/gemm.h +++ b/mace/ops/gemm.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_GEMM_H_ -#define MACE_KERNELS_GEMM_H_ +#ifndef MACE_OPS_GEMM_H_ +#define MACE_OPS_GEMM_H_ #if defined(MACE_ENABLE_NEON) && defined(__aarch64__) #include @@ -25,7 +25,7 @@ // Gemv function does fast matrix-vector multiplications with batch. namespace mace { -namespace kernels { +namespace ops { // Gemm calculates A[batch, height, K] dot B[batch, K, width] within each batch, // and output to C[batch, height, width]. @@ -72,7 +72,7 @@ void Transpose(const float *src, index_t stride_w, float *dst); -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_GEMM_H_ +#endif // MACE_OPS_GEMM_H_ diff --git a/mace/kernels/gemm_test.cc b/mace/ops/gemm_test.cc similarity index 84% rename from mace/kernels/gemm_test.cc rename to mace/ops/gemm_test.cc index 0942247d..dcb8483b 100644 --- a/mace/kernels/gemm_test.cc +++ b/mace/ops/gemm_test.cc @@ -18,8 +18,8 @@ #include #include "mace/core/types.h" -#include "mace/kernels/gemm.h" -#include "mace/kernels/sgemm.h" +#include "mace/ops/gemm.h" +#include "mace/ops/sgemm.h" namespace mace { @@ -44,9 +44,9 @@ void GemmTest(index_t batch, [&gen, &nd] { return nd(gen); }); std::generate(B.get(), B.get() + batch * K * M, [&gen, &nd] { return nd(gen); }); - kernels::Gemm(A.get(), B.get(), batch, N, K, M, C.get(), transpose_a, + ops::Gemm(A.get(), B.get(), batch, N, K, M, C.get(), transpose_a, transpose_b); - kernels::GemmRef(A.get(), B.get(), batch, N, K, M, C_ref.get(), transpose_a, + ops::GemmRef(A.get(), B.get(), batch, N, K, M, C_ref.get(), transpose_a, transpose_b); for (int i = 0; i < batch * N * M; ++i) { @@ -66,8 +66,8 @@ void GemvTest(index_t batch, index_t N, index_t M) { std::generate(A.get(), A.get() + N * M, [&gen, &nd] { return nd(gen); }); std::generate(B.get(), B.get() + batch * M, [&gen, &nd] { return nd(gen); }); - kernels::Gemv(A.get(), B.get(), batch, M, N, C.get()); - kernels::GemvRef(A.get(), B.get(), batch, M, N, C_ref.get()); + ops::Gemv(A.get(), B.get(), batch, M, N, C.get()); + ops::GemvRef(A.get(), B.get(), batch, M, N, C_ref.get()); for (int i = 0; i < batch * N; ++i) { EXPECT_NEAR(C_ref[i], C[i], 0.1); @@ -93,48 +93,48 @@ void SGemmTest(index_t batch, [&gen, &nd] { return nd(gen); }); std::generate(B.get(), B.get() + batch * K * M, [&gen, &nd] { return nd(gen); }); - kernels::GemmRef(A.get(), B.get(), batch, N, K, M, C_ref.get(), transpose_a, + ops::GemmRef(A.get(), B.get(), batch, N, K, M, C_ref.get(), transpose_a, transpose_b); - kernels::MatrixMap matrix_a; - kernels::MatrixMap matrix_b; + ops::MatrixMap matrix_a; + ops::MatrixMap matrix_b; if (!transpose_a) { matrix_a = - kernels::MatrixMap(batch, + ops::MatrixMap(batch, N, K, - kernels::RowMajor, + ops::RowMajor, A.get()); } else { matrix_a = - kernels::MatrixMap(batch, + ops::MatrixMap(batch, K, N, - kernels::RowMajor, + ops::RowMajor, A.get()); matrix_a = matrix_a.transpose(); } if (!transpose_b) { matrix_b = - kernels::MatrixMap(batch, + ops::MatrixMap(batch, K, M, - kernels::RowMajor, + ops::RowMajor, B.get()); } else { matrix_b = - kernels::MatrixMap(batch, + ops::MatrixMap(batch, M, K, - kernels::RowMajor, + ops::RowMajor, B.get()); matrix_b = matrix_b.transpose(); } - kernels::MatrixMap matrix_c(batch, N, M, kernels::RowMajor, C.get()); + ops::MatrixMap matrix_c(batch, N, M, ops::RowMajor, C.get()); - kernels::SGemm sgemm; + ops::SGemm sgemm; sgemm(matrix_a, matrix_b, &matrix_c); for (int i = 0; i < N * M; ++i) { diff --git a/mace/kernels/gemmlowp_util.h b/mace/ops/gemmlowp_util.h similarity index 96% rename from mace/kernels/gemmlowp_util.h rename to mace/ops/gemmlowp_util.h index 8a0148e1..e46e6ed8 100644 --- a/mace/kernels/gemmlowp_util.h +++ b/mace/ops/gemmlowp_util.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_GEMMLOWP_UTIL_H_ -#define MACE_KERNELS_GEMMLOWP_UTIL_H_ +#ifndef MACE_OPS_GEMMLOWP_UTIL_H_ +#define MACE_OPS_GEMMLOWP_UTIL_H_ #include @@ -75,4 +75,4 @@ struct GemmlowpOutputPipeline { }; } // namespace mace -#endif // MACE_KERNELS_GEMMLOWP_UTIL_H_ +#endif // MACE_OPS_GEMMLOWP_UTIL_H_ diff --git a/mace/kernels/identity.cc b/mace/ops/identity.cc similarity index 97% rename from mace/kernels/identity.cc rename to mace/ops/identity.cc index 1fba94bd..54d92e56 100644 --- a/mace/kernels/identity.cc +++ b/mace/ops/identity.cc @@ -16,7 +16,7 @@ #include "mace/core/operator.h" namespace mace { -namespace kernels { +namespace ops { template class IdentityOp : public Operation { @@ -46,5 +46,5 @@ void RegisterIdentity(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/identity_test.cc b/mace/ops/identity_test.cc index 1ef8848d..3787777b 100644 --- a/mace/ops/identity_test.cc +++ b/mace/ops/identity_test.cc @@ -13,7 +13,6 @@ // limitations under the License. #include "gmock/gmock.h" -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" namespace mace { diff --git a/mace/kernels/infer_conv2d_shape.cc b/mace/ops/infer_conv2d_shape.cc similarity index 97% rename from mace/kernels/infer_conv2d_shape.cc rename to mace/ops/infer_conv2d_shape.cc index 0e80aa61..fbd1d1b9 100644 --- a/mace/kernels/infer_conv2d_shape.cc +++ b/mace/ops/infer_conv2d_shape.cc @@ -14,10 +14,10 @@ #include "mace/core/operator.h" -#include "mace/kernels/conv_pool_2d_util.h" +#include "mace/ops/conv_pool_2d_util.h" namespace mace { -namespace kernels { +namespace ops { template class InferConv2dShapeOp : public Operation { @@ -111,5 +111,5 @@ void RegisterInferConv2dShape(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/infer_conv2d_shape_test.cc b/mace/ops/infer_conv2d_shape_test.cc index 735a599c..ab48817b 100644 --- a/mace/ops/infer_conv2d_shape_test.cc +++ b/mace/ops/infer_conv2d_shape_test.cc @@ -12,9 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" -#include "mace/kernels/conv_pool_2d_util.h" +#include "mace/ops/conv_pool_2d_util.h" namespace mace { namespace ops { diff --git a/mace/kernels/local_response_norm.cc b/mace/ops/local_response_norm.cc similarity index 98% rename from mace/kernels/local_response_norm.cc rename to mace/ops/local_response_norm.cc index 6a51ccb3..16828baa 100644 --- a/mace/kernels/local_response_norm.cc +++ b/mace/ops/local_response_norm.cc @@ -17,7 +17,7 @@ #include "mace/core/operator.h" namespace mace { -namespace kernels { +namespace ops { template class LocalResponseNormOp; @@ -89,5 +89,5 @@ void RegisterLocalResponseNorm(OpRegistryBase *op_registry) { LocalResponseNormOp, DeviceType::CPU, float); } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/local_response_norm_benchmark.cc b/mace/ops/local_response_norm_benchmark.cc index 893b65d1..b917c495 100644 --- a/mace/ops/local_response_norm_benchmark.cc +++ b/mace/ops/local_response_norm_benchmark.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" diff --git a/mace/ops/local_response_norm_test.cc b/mace/ops/local_response_norm_test.cc index 55adcedd..aa3780ca 100644 --- a/mace/ops/local_response_norm_test.cc +++ b/mace/ops/local_response_norm_test.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" namespace mace { diff --git a/mace/kernels/lstm_cell.cc b/mace/ops/lstm_cell.cc similarity index 95% rename from mace/kernels/lstm_cell.cc rename to mace/ops/lstm_cell.cc index be7f50d9..19abafe0 100644 --- a/mace/kernels/lstm_cell.cc +++ b/mace/ops/lstm_cell.cc @@ -16,10 +16,10 @@ #include #include "mace/core/operator.h" -#include "mace/kernels/opencl/image/lstm_cell.h" +#include "mace/ops/opencl/image/lstm_cell.h" namespace mace { -namespace kernels { +namespace ops { template class LSTMCellOp; @@ -66,5 +66,5 @@ void RegisterLSTMCell(OpRegistryBase *op_registry) { DeviceType::GPU, half); } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/lstmcell_benchmark.cc b/mace/ops/lstmcell_benchmark.cc index a1972e72..b8840bba 100644 --- a/mace/ops/lstmcell_benchmark.cc +++ b/mace/ops/lstmcell_benchmark.cc @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" -#include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/lstmcell_test_util.h" #include "mace/ops/ops_test_util.h" @@ -50,15 +48,15 @@ void LSTMCell(int iters, int batch, int input_size, int hidden_units) { "PreCellCPU", forget_add, "CellCPU", "OutputCPU"); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "PreOutput", "PreOutputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Weight", "WeightImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); BufferToImage(&net, "PreCell", "PreCellImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("LSTMCell", "LSTMCellTest") .Input("InputImage") diff --git a/mace/ops/lstmcell_test.cc b/mace/ops/lstmcell_test.cc index 5b26c677..99dea59c 100644 --- a/mace/ops/lstmcell_test.cc +++ b/mace/ops/lstmcell_test.cc @@ -12,8 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" -#include "mace/kernels/eltwise.h" +#include "mace/ops/eltwise.h" #include "mace/ops/lstmcell_test_util.h" #include "mace/ops/ops_test_util.h" @@ -53,15 +52,15 @@ void TestLSTMCell(const uint32_t &batch, // Run on GPU BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "PreOutput", "PreOutputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Weight", "WeightImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); + ops::BufferType::ARGUMENT); BufferToImage(&net, "PreCell", "PreCellImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("LSTMCell", "LSTMCellTest") .Input("InputImage") @@ -78,9 +77,9 @@ void TestLSTMCell(const uint32_t &batch, net.RunOp(D); ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); ImageToBuffer(&net, "CellImage", "Cell", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); Tensor expected_cell, expected_output; diff --git a/mace/ops/lstmcell_test_util.h b/mace/ops/lstmcell_test_util.h index bbd523c9..a7d7a19d 100644 --- a/mace/ops/lstmcell_test_util.h +++ b/mace/ops/lstmcell_test_util.h @@ -17,8 +17,7 @@ #include -#include "mace/core/op_def_registry.h" -#include "mace/kernels/eltwise.h" +#include "mace/ops/eltwise.h" #include "mace/ops/ops_test_util.h" namespace mace { @@ -79,7 +78,7 @@ void LSTMCellCPU(OpsTestNet *net, .Input("InputSigmoid") .Input("NewInputTanh") .AddIntArg("T", DataTypeToEnum::v()) - .AddIntArg("type", static_cast(kernels::EltwiseType::PROD)) + .AddIntArg("type", static_cast(ops::EltwiseType::PROD)) .Output("RememberMul") .Finalize(net->AddNewOperatorDef()); @@ -87,7 +86,7 @@ void LSTMCellCPU(OpsTestNet *net, .Input("SplitOutput2") .AddFloatArg("scalar_input", forget_add_name) .AddIntArg("T", DataTypeToEnum::v()) - .AddIntArg("type", static_cast(kernels::EltwiseType::SUM)) + .AddIntArg("type", static_cast(ops::EltwiseType::SUM)) .Output("ForgetAdd") .Finalize(net->AddNewOperatorDef()); @@ -101,7 +100,7 @@ void LSTMCellCPU(OpsTestNet *net, .Input("ForgetSigmoid") .Input(pre_cell_name) .AddIntArg("T", DataTypeToEnum::v()) - .AddIntArg("type", static_cast(kernels::EltwiseType::PROD)) + .AddIntArg("type", static_cast(ops::EltwiseType::PROD)) .Output("ForgetMulPreCell") .Finalize(net->AddNewOperatorDef()); @@ -109,7 +108,7 @@ void LSTMCellCPU(OpsTestNet *net, .Input("RememberMul") .Input("ForgetMulPreCell") .AddIntArg("T", DataTypeToEnum::v()) - .AddIntArg("type", static_cast(kernels::EltwiseType::SUM)) + .AddIntArg("type", static_cast(ops::EltwiseType::SUM)) .Output(cell_name) .Finalize(net->AddNewOperatorDef()); @@ -129,7 +128,7 @@ void LSTMCellCPU(OpsTestNet *net, .Input("OutputSigmoid") .Input("CellTanh") .AddIntArg("T", DataTypeToEnum::v()) - .AddIntArg("type", static_cast(kernels::EltwiseType::PROD)) + .AddIntArg("type", static_cast(ops::EltwiseType::PROD)) .Output(output_name) .Finalize(net->AddNewOperatorDef()); } diff --git a/mace/kernels/matmul.cc b/mace/ops/matmul.cc similarity index 98% rename from mace/kernels/matmul.cc rename to mace/ops/matmul.cc index 8ef93a29..8608657e 100644 --- a/mace/kernels/matmul.cc +++ b/mace/ops/matmul.cc @@ -21,16 +21,16 @@ #include "mace/core/operator.h" #include "mace/core/tensor.h" -#include "mace/kernels/gemm.h" -#include "mace/kernels/gemmlowp_util.h" -#include "mace/kernels/sgemm.h" +#include "mace/ops/gemm.h" +#include "mace/ops/gemmlowp_util.h" +#include "mace/ops/sgemm.h" #include "mace/utils/utils.h" #ifdef MACE_ENABLE_OPENCL -#include "mace/kernels/opencl/image/matmul.h" +#include "mace/ops/opencl/image/matmul.h" #endif // MACE_ENABLE_OPENCL namespace mace { -namespace kernels { +namespace ops { class MatMulOpBase : public Operation { public: @@ -353,5 +353,5 @@ void RegisterMatMul(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/matmul_benchmark.cc b/mace/ops/matmul_benchmark.cc index c553e33d..2d62d86a 100644 --- a/mace/ops/matmul_benchmark.cc +++ b/mace/ops/matmul_benchmark.cc @@ -12,16 +12,278 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include +#include #include +#include +#include -#include "mace/core/op_def_registry.h" +#include "public/gemmlowp.h" #include "mace/core/testing/test_benchmark.h" +#include "mace/ops/gemm.h" +#include "mace/ops/sgemm.h" #include "mace/ops/ops_test_util.h" +namespace gemmlowp { + +template +class Matrix : public MatrixMap { + public: + typedef MatrixMap Map; + typedef MatrixMap ConstMap; + typedef typename Map::Scalar Scalar; + static const MapOrder Order = tOrder; + using Map::cols_; + using Map::data_; + using Map::kOrder; + using Map::rows_; + using Map::stride_; + + public: + Matrix() : Map(nullptr, 0, 0, 0) {} + + Matrix(int rows, int cols) : Map(nullptr, 0, 0, 0) { Resize(rows, cols); } + + Matrix(const Matrix &other) : Map(nullptr, 0, 0, 0) { *this = other; } + + Matrix &operator=(const Matrix &other) { + Resize(other.rows_, other.cols_); + std::memcpy(data_, other.data_, size() * sizeof(Scalar)); + return *this; + } + + friend bool operator==(const Matrix &a, const Matrix &b) { + return a.rows_ == b.rows_ && a.cols_ == b.cols_ && + !std::memcmp(a.data_, b.data_, a.size()); + } + + void Resize(int rows, int cols) { + rows_ = rows; + cols_ = cols; + stride_ = kOrder == gemmlowp::MapOrder::ColMajor ? rows : cols; + storage.resize(size()); + data_ = storage.data(); + } + + int size() const { return rows_ * cols_; } + + Map &map() { return *static_cast(this); } + + ConstMap const_map() const { return ConstMap(data_, rows_, cols_, stride_); } + + protected: + std::vector storage; +}; + +template +void MakeZero(MatrixType *m) { + for (int c = 0; c < m->cols(); c++) { + for (int r = 0; r < m->rows(); r++) { + (*m)(r, c) = 128; + } + } +} + +} // namespace gemmlowp + namespace mace { namespace ops { namespace test { +// Test the speed of different access order of a NHWC buffer + +namespace { + +// Matmul with (m, k) x (k, n) +void MatmulBenchmark_Mace(int iters, int m, int k, int n) { + mace::testing::StopTiming(); + std::vector lhs(m * k); + std::vector rhs(k * n); + std::vector result(m * n); + // warm up + Gemm(lhs.data(), rhs.data(), 1, m, k, n, result.data()); + mace::testing::StartTiming(); + while (iters--) { + Gemm(lhs.data(), rhs.data(), 1, m, k, n, result.data()); + } +} + +void MatmulBenchmark_Mace_SGemm(int iters, int m, int k, int n) { + mace::testing::StopTiming(); + std::vector lhs(m * k); + std::vector rhs(k * n); + std::vector result(m * n); + + ops::MatrixMap matrix_lhs(1, m, k, RowMajor, lhs.data(), + true); + ops::MatrixMap matrix_rhs(1, k, n, RowMajor, rhs.data(), + true); + ops::MatrixMap matrix_result(1, m, n, RowMajor, result.data()); + + ops::SGemm sgemm; + + sgemm(matrix_lhs, matrix_rhs, &matrix_result); + + mace::testing::StartTiming(); + while (iters--) { + sgemm(matrix_lhs, matrix_rhs, &matrix_result); + } +} + +void MatmulBenchmark_Eigen(int iters, int m, int k, int n) { + mace::testing::StopTiming(); + Eigen::MatrixXf lhs = Eigen::MatrixXf::Random(m, k); + Eigen::MatrixXf rhs = Eigen::MatrixXf::Random(k, n); + Eigen::MatrixXf result = Eigen::MatrixXf::Zero(m, n); + // warm up + result = lhs * rhs; + mace::testing::StartTiming(); + while (iters--) { + result = lhs * rhs; + } +} + +void MatmulBenchmark_gemmlowp_uint8(int iters, int rows, int depth, int cols) { + mace::testing::StopTiming(); + + gemmlowp::Matrix lhs; + gemmlowp::Matrix rhs; + gemmlowp::Matrix result; + lhs.Resize(rows, depth); + rhs.Resize(depth, cols); + result.Resize(rows, cols); + gemmlowp::MakeZero(&lhs); + gemmlowp::MakeZero(&rhs); + gemmlowp::MakeZero(&result); + + gemmlowp::OutputStageQuantizeDownInt32ByFixedPoint quantize_down_stage; + quantize_down_stage.result_offset_after_shift = 128; + quantize_down_stage.result_fixedpoint_multiplier = 1234567890; + quantize_down_stage.result_shift = 16; + gemmlowp::OutputStageSaturatingCastToUint8 saturating_cast_stage; + const auto output_pipeline = + std::make_tuple(quantize_down_stage, saturating_cast_stage); + + auto gemm_context = + mace::ops::test::OpTestContext::Get() + ->GetDevice(CPU)->cpu_runtime()->GetGemmlowpContext(); + MACE_CHECK_NOTNULL(gemm_context); + + using BitDepthParams = gemmlowp::L8R8WithLhsNonzeroBitDepthParams; + + gemmlowp::GemmWithOutputPipeline( + gemm_context, lhs.const_map(), rhs.const_map(), &result.map(), -128, + -128, output_pipeline); + + mace::testing::StartTiming(); + while (iters--) { + gemmlowp::GemmWithOutputPipeline( + gemm_context, lhs.const_map(), rhs.const_map(), &result.map(), -128, + -128, output_pipeline); + } +} + +void MatmulBenchmark_gemmlowp_int32(int iters, int rows, int depth, int cols) { + mace::testing::StopTiming(); + + gemmlowp::Matrix lhs; + gemmlowp::Matrix rhs; + gemmlowp::Matrix result; + lhs.Resize(rows, depth); + rhs.Resize(depth, cols); + result.Resize(rows, cols); + gemmlowp::MakeZero(&lhs); + gemmlowp::MakeZero(&rhs); + gemmlowp::MakeZero(&result); + + const auto output_pipeline = std::make_tuple(); + + auto gemm_context = + mace::ops::test::OpTestContext::Get() + ->GetDevice(CPU)->cpu_runtime()->GetGemmlowpContext(); + MACE_CHECK_NOTNULL(gemm_context); + + using BitDepthParams = gemmlowp::L8R8WithLhsNonzeroBitDepthParams; + + gemmlowp::GemmWithOutputPipeline( + gemm_context, lhs.const_map(), rhs.const_map(), &result.map(), -128, + -128, output_pipeline); + + mace::testing::StartTiming(); + while (iters--) { + gemmlowp::GemmWithOutputPipeline( + gemm_context, lhs.const_map(), rhs.const_map(), &result.map(), -128, + -128, output_pipeline); + } +} + +} // namespace + +#define MACE_BM_MATMUL_FUNC(M, K, N, FUNC, TYPE) \ + static void MACE_BM_MATMUL_##M##_##K##_##N##_##FUNC(int iters) { \ + const int64_t macc = static_cast(iters) * M * K * N; \ + const int64_t tot = static_cast(iters) * (M + N) * K; \ + mace::testing::MaccProcessed(macc); \ + mace::testing::BytesProcessed(tot * sizeof(TYPE)); \ + MatmulBenchmark_##FUNC(iters, M, K, N); \ + } \ + MACE_BENCHMARK(MACE_BM_MATMUL_##M##_##K##_##N##_##FUNC) + +#define MACE_BM_MATMUL(M, K, N) \ + MACE_BM_MATMUL_FUNC(M, K, N, Mace, float); \ + MACE_BM_MATMUL_FUNC(M, K, N, Mace_SGemm, float); \ + MACE_BM_MATMUL_FUNC(M, K, N, Eigen, float); \ + MACE_BM_MATMUL_FUNC(M, K, N, gemmlowp_uint8, uint8_t); \ + MACE_BM_MATMUL_FUNC(M, K, N, gemmlowp_int32, uint8_t); + +// Embedding size 384 +MACE_BM_MATMUL(7, 384, 384); +MACE_BM_MATMUL(7, 384, 1536); +MACE_BM_MATMUL(7, 1536, 384); + +MACE_BM_MATMUL(15, 384, 384); +MACE_BM_MATMUL(15, 384, 1536); +MACE_BM_MATMUL(15, 1536, 384); + +MACE_BM_MATMUL(1, 256, 256); +MACE_BM_MATMUL(1, 256, 1536); +MACE_BM_MATMUL(1, 1536, 256); +MACE_BM_MATMUL(256, 256, 1); +MACE_BM_MATMUL(1536, 256, 1); +MACE_BM_MATMUL(256, 1536, 1); +MACE_BM_MATMUL(29792, 256, 1); +MACE_BM_MATMUL(1, 256, 29792); +MACE_BM_MATMUL(2, 256, 256); +MACE_BM_MATMUL(2, 256, 1536); +MACE_BM_MATMUL(2, 1536, 256); +MACE_BM_MATMUL(3, 256, 256); +MACE_BM_MATMUL(3, 256, 1536); +MACE_BM_MATMUL(3, 1536, 256); +MACE_BM_MATMUL(4, 256, 256); +MACE_BM_MATMUL(4, 256, 1536); +MACE_BM_MATMUL(4, 1536, 256); +MACE_BM_MATMUL(8, 256, 256); +MACE_BM_MATMUL(8, 256, 1536); +MACE_BM_MATMUL(8, 1536, 256); +MACE_BM_MATMUL(10, 256, 256); +MACE_BM_MATMUL(10, 256, 1536); +MACE_BM_MATMUL(10, 1536, 256); +MACE_BM_MATMUL(15, 256, 256); +MACE_BM_MATMUL(15, 256, 1536); +MACE_BM_MATMUL(15, 1536, 256); + +// Embedding size 128 +MACE_BM_MATMUL(1, 128, 1536); +MACE_BM_MATMUL(1, 128, 44678); + +// MobileNet +MACE_BM_MATMUL(128, 128, 3136); +MACE_BM_MATMUL(256, 256, 784); +MACE_BM_MATMUL(512, 512, 196); +MACE_BM_MATMUL(1024, 1024, 49); + namespace { template void MatMulBenchmark( @@ -41,9 +303,9 @@ void MatMulBenchmark( } if (D == DeviceType::GPU) { BufferToImage(&net, "A", "AImage", - kernels::BufferType::IN_OUT_WIDTH); + ops::BufferType::IN_OUT_WIDTH); BufferToImage(&net, "B", "BImage", - kernels::BufferType::IN_OUT_HEIGHT); + ops::BufferType::IN_OUT_HEIGHT); OpDefBuilder("MatMul", "MatMulBM") .Input("AImage") @@ -137,7 +399,7 @@ void MatMulTransposeBenchmark( } \ MACE_BENCHMARK(MACE_BM_MATMUL_##N##_##H##_##C##_##W##_##TYPE##_##DEVICE) -#define MACE_BM_MATMUL(N, H, C, W) \ +#define MACE_BM_MATMUL_OP(N, H, C, W) \ MACE_BM_MATMUL_MACRO(N, H, C, W, float, CPU); \ MACE_BM_MATMUL_MACRO(N, H, C, W, float, GPU); \ MACE_BM_MATMUL_MACRO(N, H, C, W, half, GPU); \ @@ -158,17 +420,17 @@ void MatMulTransposeBenchmark( MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, float, CPU); \ MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, uint8_t, CPU); -MACE_BM_MATMUL(1, 128, 128, 49); -MACE_BM_MATMUL(2, 128, 128, 49); -MACE_BM_MATMUL(3, 128, 128, 49); -MACE_BM_MATMUL(4, 128, 128, 49); -MACE_BM_MATMUL(16, 32, 128, 49); -MACE_BM_MATMUL(16, 32, 128, 961); -MACE_BM_MATMUL(16, 32, 128, 3969); -MACE_BM_MATMUL(16, 128, 128, 49); -MACE_BM_MATMUL(16, 49, 128, 128); -MACE_BM_MATMUL(16, 128, 128, 961); -MACE_BM_MATMUL(16, 128, 128, 3969); +MACE_BM_MATMUL_OP(1, 128, 128, 49); +MACE_BM_MATMUL_OP(2, 128, 128, 49); +MACE_BM_MATMUL_OP(3, 128, 128, 49); +MACE_BM_MATMUL_OP(4, 128, 128, 49); +MACE_BM_MATMUL_OP(16, 32, 128, 49); +MACE_BM_MATMUL_OP(16, 32, 128, 961); +MACE_BM_MATMUL_OP(16, 32, 128, 3969); +MACE_BM_MATMUL_OP(16, 128, 128, 49); +MACE_BM_MATMUL_OP(16, 49, 128, 128); +MACE_BM_MATMUL_OP(16, 128, 128, 961); +MACE_BM_MATMUL_OP(16, 128, 128, 3969); MACE_BM_MATMUL_TRANPOSE(16, 32, 128, 49); MACE_BM_MATMUL_TRANPOSE(16, 32, 128, 961); diff --git a/mace/ops/matmul_test.cc b/mace/ops/matmul_test.cc index e31d8616..d2d95874 100644 --- a/mace/ops/matmul_test.cc +++ b/mace/ops/matmul_test.cc @@ -14,7 +14,6 @@ #include -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" namespace mace { @@ -39,9 +38,9 @@ void Simple(const std::vector &A_shape, if (D == DeviceType::GPU) { BufferToImage(&net, "A", "AImage", - kernels::BufferType::IN_OUT_WIDTH); + ops::BufferType::IN_OUT_WIDTH); BufferToImage(&net, "B", "BImage", - kernels::BufferType::IN_OUT_HEIGHT); + ops::BufferType::IN_OUT_HEIGHT); OpDefBuilder("MatMul", "MatMulTest") .Input("AImage") @@ -53,7 +52,7 @@ void Simple(const std::vector &A_shape, // Transfer output ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_HEIGHT); + ops::BufferType::IN_OUT_HEIGHT); } else { OpDefBuilder("MatMul", "MatMulTest") .Input("A") @@ -130,9 +129,9 @@ void Complex(const std::vector &batch, // Run on opencl BufferToImage(&net, "A", "AImage", - kernels::BufferType::IN_OUT_WIDTH); + ops::BufferType::IN_OUT_WIDTH); BufferToImage(&net, "B", "BImage", - kernels::BufferType::IN_OUT_HEIGHT); + ops::BufferType::IN_OUT_HEIGHT); OpDefBuilder("MatMul", "MatMulTest") .Input("AImage") @@ -144,7 +143,7 @@ void Complex(const std::vector &batch, net.RunOp(DeviceType::GPU); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_HEIGHT); + ops::BufferType::IN_OUT_HEIGHT); // run cpu std::vector shape_a = batch; diff --git a/mace/kernels/memory_benchmark.cc b/mace/ops/memory_benchmark.cc similarity index 98% rename from mace/kernels/memory_benchmark.cc rename to mace/ops/memory_benchmark.cc index 5d9ab1f4..e3bb30a8 100644 --- a/mace/kernels/memory_benchmark.cc +++ b/mace/ops/memory_benchmark.cc @@ -19,7 +19,7 @@ #include "mace/core/testing/test_benchmark.h" namespace mace { -namespace kernels { +namespace ops { namespace test { // Test the speed of different access order of a NHWC buffer @@ -107,5 +107,5 @@ MACE_BM_MEMORY_ACCESS(10, 64, 1024, 64, NHCW); MACE_BM_MEMORY_ACCESS(10, 64, 1024, 64, NWCH); } // namespace test -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/opencl/activation.h b/mace/ops/opencl/activation.h similarity index 85% rename from mace/kernels/opencl/activation.h rename to mace/ops/opencl/activation.h index 35f1785c..7a20ff16 100644 --- a/mace/kernels/opencl/activation.h +++ b/mace/ops/opencl/activation.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_ACTIVATION_H_ -#define MACE_KERNELS_OPENCL_ACTIVATION_H_ +#ifndef MACE_OPS_OPENCL_ACTIVATION_H_ +#define MACE_OPS_OPENCL_ACTIVATION_H_ #include "mace/public/mace.h" #include "mace/utils/utils.h" @@ -23,7 +23,7 @@ namespace mace { class OpContext; class Tensor; -namespace kernels { +namespace ops { class OpenCLActivationKernel { public: virtual MaceStatus Compute( @@ -34,7 +34,7 @@ class OpenCLActivationKernel { MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLActivationKernel); }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_ACTIVATION_H_ +#endif // MACE_OPS_OPENCL_ACTIVATION_H_ diff --git a/mace/kernels/opencl/addn.h b/mace/ops/opencl/addn.h similarity index 86% rename from mace/kernels/opencl/addn.h rename to mace/ops/opencl/addn.h index 908ff113..12e8888c 100644 --- a/mace/kernels/opencl/addn.h +++ b/mace/ops/opencl/addn.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_ADDN_H_ -#define MACE_KERNELS_OPENCL_ADDN_H_ +#ifndef MACE_OPS_OPENCL_ADDN_H_ +#define MACE_OPS_OPENCL_ADDN_H_ #include @@ -25,7 +25,7 @@ namespace mace { class OpContext; class Tensor; -namespace kernels { +namespace ops { class OpenCLAddNKernel { public: @@ -36,7 +36,7 @@ class OpenCLAddNKernel { MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLAddNKernel); }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_ADDN_H_ +#endif // MACE_OPS_OPENCL_ADDN_H_ diff --git a/mace/kernels/opencl/batch_norm.h b/mace/ops/opencl/batch_norm.h similarity index 85% rename from mace/kernels/opencl/batch_norm.h rename to mace/ops/opencl/batch_norm.h index b97dfe6e..da3353d2 100644 --- a/mace/kernels/opencl/batch_norm.h +++ b/mace/ops/opencl/batch_norm.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_BATCH_NORM_H_ -#define MACE_KERNELS_OPENCL_BATCH_NORM_H_ +#ifndef MACE_OPS_OPENCL_BATCH_NORM_H_ +#define MACE_OPS_OPENCL_BATCH_NORM_H_ #include "mace/public/mace.h" #include "mace/utils/utils.h" @@ -23,7 +23,7 @@ namespace mace { class OpContext; class Tensor; -namespace kernels { +namespace ops { class OpenCLBatchNormKernel { public: virtual MaceStatus Compute( @@ -37,7 +37,7 @@ class OpenCLBatchNormKernel { MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLBatchNormKernel); }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_BATCH_NORM_H_ +#endif // MACE_OPS_OPENCL_BATCH_NORM_H_ diff --git a/mace/kernels/opencl/batch_to_space.h b/mace/ops/opencl/batch_to_space.h similarity index 86% rename from mace/kernels/opencl/batch_to_space.h rename to mace/ops/opencl/batch_to_space.h index 9f155336..d07e79ee 100644 --- a/mace/kernels/opencl/batch_to_space.h +++ b/mace/ops/opencl/batch_to_space.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_BATCH_TO_SPACE_H_ -#define MACE_KERNELS_OPENCL_BATCH_TO_SPACE_H_ +#ifndef MACE_OPS_OPENCL_BATCH_TO_SPACE_H_ +#define MACE_OPS_OPENCL_BATCH_TO_SPACE_H_ #include @@ -26,7 +26,7 @@ namespace mace { class OpContext; class Tensor; -namespace kernels { +namespace ops { class OpenCLBatchToSpaceKernel { public: virtual MaceStatus Compute( @@ -39,7 +39,7 @@ class OpenCLBatchToSpaceKernel { MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLBatchToSpaceKernel); }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_BATCH_TO_SPACE_H_ +#endif // MACE_OPS_OPENCL_BATCH_TO_SPACE_H_ diff --git a/mace/kernels/opencl/bias_add.h b/mace/ops/opencl/bias_add.h similarity index 85% rename from mace/kernels/opencl/bias_add.h rename to mace/ops/opencl/bias_add.h index 1a0a1050..67e5b3aa 100644 --- a/mace/kernels/opencl/bias_add.h +++ b/mace/ops/opencl/bias_add.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_BIAS_ADD_H_ -#define MACE_KERNELS_OPENCL_BIAS_ADD_H_ +#ifndef MACE_OPS_OPENCL_BIAS_ADD_H_ +#define MACE_OPS_OPENCL_BIAS_ADD_H_ #include "mace/public/mace.h" #include "mace/utils/utils.h" @@ -23,7 +23,7 @@ namespace mace { class OpContext; class Tensor; -namespace kernels { +namespace ops { class OpenCLBiasAddKernel { public: virtual MaceStatus Compute( @@ -34,7 +34,7 @@ class OpenCLBiasAddKernel { MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLBiasAddKernel); }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_BIAS_ADD_H_ +#endif // MACE_OPS_OPENCL_BIAS_ADD_H_ diff --git a/mace/kernels/opencl/buffer/buffer_inverse_transform.h b/mace/ops/opencl/buffer/buffer_inverse_transform.h similarity index 85% rename from mace/kernels/opencl/buffer/buffer_inverse_transform.h rename to mace/ops/opencl/buffer/buffer_inverse_transform.h index 29e63143..647f2514 100644 --- a/mace/kernels/opencl/buffer/buffer_inverse_transform.h +++ b/mace/ops/opencl/buffer/buffer_inverse_transform.h @@ -12,17 +12,17 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_BUFFER_BUFFER_INVERSE_TRANSFORM_H_ -#define MACE_KERNELS_OPENCL_BUFFER_BUFFER_INVERSE_TRANSFORM_H_ +#ifndef MACE_OPS_OPENCL_BUFFER_BUFFER_INVERSE_TRANSFORM_H_ +#define MACE_OPS_OPENCL_BUFFER_BUFFER_INVERSE_TRANSFORM_H_ -#include "mace/kernels/opencl/buffer_inverse_transform.h" +#include "mace/ops/opencl/buffer_inverse_transform.h" #include "mace/core/op_context.h" #include "mace/core/tensor.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace buffer { @@ -65,7 +65,7 @@ MaceStatus BufferInverseTransform::Compute(OpContext *context, } // namespace buffer } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_BUFFER_BUFFER_INVERSE_TRANSFORM_H_ +#endif // MACE_OPS_OPENCL_BUFFER_BUFFER_INVERSE_TRANSFORM_H_ diff --git a/mace/kernels/opencl/buffer/buffer_transform.cc b/mace/ops/opencl/buffer/buffer_transform.cc similarity index 98% rename from mace/kernels/opencl/buffer/buffer_transform.cc rename to mace/ops/opencl/buffer/buffer_transform.cc index 7e5897a5..29f467e2 100644 --- a/mace/kernels/opencl/buffer/buffer_transform.cc +++ b/mace/ops/opencl/buffer/buffer_transform.cc @@ -12,14 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/kernels/opencl/buffer/buffer_transform.h" +#include "mace/ops/opencl/buffer/buffer_transform.h" #include #include #include namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace buffer { @@ -237,5 +237,5 @@ MaceStatus TransformArgument( } // namespace buffer } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/opencl/buffer/buffer_transform.h b/mace/ops/opencl/buffer/buffer_transform.h similarity index 90% rename from mace/kernels/opencl/buffer/buffer_transform.h rename to mace/ops/opencl/buffer/buffer_transform.h index 4a2213e4..4919bb09 100644 --- a/mace/kernels/opencl/buffer/buffer_transform.h +++ b/mace/ops/opencl/buffer/buffer_transform.h @@ -12,19 +12,19 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_BUFFER_BUFFER_TRANSFORM_H_ -#define MACE_KERNELS_OPENCL_BUFFER_BUFFER_TRANSFORM_H_ +#ifndef MACE_OPS_OPENCL_BUFFER_BUFFER_TRANSFORM_H_ +#define MACE_OPS_OPENCL_BUFFER_BUFFER_TRANSFORM_H_ -#include "mace/kernels/opencl/buffer_transform.h" +#include "mace/ops/opencl/buffer_transform.h" #include #include "mace/core/op_context.h" #include "mace/core/tensor.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace buffer { @@ -101,7 +101,7 @@ MaceStatus BufferTransform::Compute(OpContext *context, } // namespace buffer } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_BUFFER_BUFFER_TRANSFORM_H_ +#endif // MACE_OPS_OPENCL_BUFFER_BUFFER_TRANSFORM_H_ diff --git a/mace/kernels/opencl/buffer/buffer_type_transform.cc b/mace/ops/opencl/buffer/buffer_type_transform.cc similarity index 97% rename from mace/kernels/opencl/buffer/buffer_type_transform.cc rename to mace/ops/opencl/buffer/buffer_type_transform.cc index 4f78f83a..d1d52fe4 100644 --- a/mace/kernels/opencl/buffer/buffer_type_transform.cc +++ b/mace/ops/opencl/buffer/buffer_type_transform.cc @@ -15,10 +15,10 @@ #include "mace/core/op_context.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/tensor.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace buffer { @@ -93,5 +93,5 @@ MaceStatus BufferTypeTransform( } // namespace buffer } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/opencl/buffer/conv_2d.h b/mace/ops/opencl/buffer/conv_2d.h similarity index 95% rename from mace/kernels/opencl/buffer/conv_2d.h rename to mace/ops/opencl/buffer/conv_2d.h index 8e7ee8b5..e50d8e5c 100644 --- a/mace/kernels/opencl/buffer/conv_2d.h +++ b/mace/ops/opencl/buffer/conv_2d.h @@ -11,20 +11,20 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_BUFFER_CONV_2D_H_ -#define MACE_KERNELS_OPENCL_BUFFER_CONV_2D_H_ +#ifndef MACE_OPS_OPENCL_BUFFER_CONV_2D_H_ +#define MACE_OPS_OPENCL_BUFFER_CONV_2D_H_ -#include "mace/kernels/opencl/conv_2d.h" +#include "mace/ops/opencl/conv_2d.h" #include #include #include -#include "mace/kernels/opencl/buffer/utils.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/buffer/utils.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace buffer { namespace conv2d { @@ -102,7 +102,7 @@ MaceStatus Conv2dKernel::Compute( std::vector output_shape(4); std::vector paddings(2); if (padding_data.empty()) { - kernels::CalcNHWCPaddingAndOutputSize( + ops::CalcNHWCPaddingAndOutputSize( input->shape().data(), filter->shape().data(), dilations, strides, padding_type, output_shape.data(), paddings.data()); } else { @@ -211,7 +211,7 @@ MaceStatus Conv2dKernel::Compute( } // namespace buffer } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_BUFFER_CONV_2D_H_ +#endif // MACE_OPS_OPENCL_BUFFER_CONV_2D_H_ diff --git a/mace/kernels/opencl/buffer/conv_2d_1x1.cc b/mace/ops/opencl/buffer/conv_2d_1x1.cc similarity index 97% rename from mace/kernels/opencl/buffer/conv_2d_1x1.cc rename to mace/ops/opencl/buffer/conv_2d_1x1.cc index cbe12466..62e77b17 100644 --- a/mace/kernels/opencl/buffer/conv_2d_1x1.cc +++ b/mace/ops/opencl/buffer/conv_2d_1x1.cc @@ -14,11 +14,11 @@ #include "mace/core/op_context.h" #include "mace/core/runtime/opencl/opencl_runtime.h" -#include "mace/kernels/activation.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/activation.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace buffer { namespace conv2d { @@ -122,5 +122,5 @@ MaceStatus Conv2d1x1(OpContext *context, } // namespace conv2d } // namespace buffer } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/opencl/buffer/conv_2d_general.cc b/mace/ops/opencl/buffer/conv_2d_general.cc similarity index 97% rename from mace/kernels/opencl/buffer/conv_2d_general.cc rename to mace/ops/opencl/buffer/conv_2d_general.cc index 17506a8b..f9cc804d 100644 --- a/mace/kernels/opencl/buffer/conv_2d_general.cc +++ b/mace/ops/opencl/buffer/conv_2d_general.cc @@ -14,11 +14,11 @@ #include "mace/core/op_context.h" #include "mace/core/runtime/opencl/opencl_runtime.h" -#include "mace/kernels/activation.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/activation.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace buffer { namespace conv2d { @@ -136,5 +136,5 @@ MaceStatus Conv2dGeneral(OpContext *context, } // namespace conv2d } // namespace buffer } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/opencl/buffer/depthwise_conv2d.cc b/mace/ops/opencl/buffer/depthwise_conv2d.cc similarity index 98% rename from mace/kernels/opencl/buffer/depthwise_conv2d.cc rename to mace/ops/opencl/buffer/depthwise_conv2d.cc index 9ff27690..0ba4526c 100644 --- a/mace/kernels/opencl/buffer/depthwise_conv2d.cc +++ b/mace/ops/opencl/buffer/depthwise_conv2d.cc @@ -12,13 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/kernels/opencl/buffer/depthwise_conv2d.h" +#include "mace/ops/opencl/buffer/depthwise_conv2d.h" #include #include namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace buffer { namespace depthwise { @@ -133,5 +133,5 @@ MaceStatus DepthwiseConv2d(OpContext *context, } // namespace depthwise } // namespace buffer } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/opencl/buffer/depthwise_conv2d.h b/mace/ops/opencl/buffer/depthwise_conv2d.h similarity index 94% rename from mace/kernels/opencl/buffer/depthwise_conv2d.h rename to mace/ops/opencl/buffer/depthwise_conv2d.h index b5e26c40..2d6ce0c8 100644 --- a/mace/kernels/opencl/buffer/depthwise_conv2d.h +++ b/mace/ops/opencl/buffer/depthwise_conv2d.h @@ -11,20 +11,20 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_BUFFER_DEPTHWISE_CONV2D_H_ -#define MACE_KERNELS_OPENCL_BUFFER_DEPTHWISE_CONV2D_H_ +#ifndef MACE_OPS_OPENCL_BUFFER_DEPTHWISE_CONV2D_H_ +#define MACE_OPS_OPENCL_BUFFER_DEPTHWISE_CONV2D_H_ -#include "mace/kernels/opencl/depthwise_conv2d.h" +#include "mace/ops/opencl/depthwise_conv2d.h" #include #include #include -#include "mace/kernels/opencl/buffer/utils.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/buffer/utils.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace buffer { namespace depthwise { @@ -95,7 +95,7 @@ MaceStatus DepthwiseConv2dKernel::Compute( std::vector output_shape(4); std::vector paddings(2); if (padding_data.empty()) { - kernels::CalcNHWCPaddingAndOutputSize( + ops::CalcNHWCPaddingAndOutputSize( input->shape().data(), fake_filter_shape.data(), dilations, strides, padding_type, output_shape.data(), paddings.data()); } else { @@ -182,7 +182,7 @@ MaceStatus DepthwiseConv2dKernel::Compute( } // namespace buffer } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_BUFFER_DEPTHWISE_CONV2D_H_ +#endif // MACE_OPS_OPENCL_BUFFER_DEPTHWISE_CONV2D_H_ diff --git a/mace/kernels/opencl/buffer/pooling.h b/mace/ops/opencl/buffer/pooling.h similarity index 95% rename from mace/kernels/opencl/buffer/pooling.h rename to mace/ops/opencl/buffer/pooling.h index a4433d13..4684d687 100644 --- a/mace/kernels/opencl/buffer/pooling.h +++ b/mace/ops/opencl/buffer/pooling.h @@ -11,10 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_BUFFER_POOLING_H_ -#define MACE_KERNELS_OPENCL_BUFFER_POOLING_H_ +#ifndef MACE_OPS_OPENCL_BUFFER_POOLING_H_ +#define MACE_OPS_OPENCL_BUFFER_POOLING_H_ -#include "mace/kernels/opencl/pooling.h" +#include "mace/ops/opencl/pooling.h" #include #include @@ -22,11 +22,11 @@ #include #include -#include "mace/kernels/opencl/buffer/utils.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/buffer/utils.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace buffer { @@ -76,7 +76,7 @@ MaceStatus PoolingKernel::Compute( std::vector paddings(2); if (padding_data.empty()) { - kernels::CalcNHWCPaddingAndOutputSize( + ops::CalcNHWCPaddingAndOutputSize( input->shape().data(), filter_shape.data(), dilations, strides, padding_type, output_shape.data(), paddings.data()); } else { @@ -205,7 +205,7 @@ MaceStatus PoolingKernel::Compute( } // namespace buffer } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_BUFFER_POOLING_H_ +#endif // MACE_OPS_OPENCL_BUFFER_POOLING_H_ diff --git a/mace/kernels/opencl/buffer/softmax.h b/mace/ops/opencl/buffer/softmax.h similarity index 93% rename from mace/kernels/opencl/buffer/softmax.h rename to mace/ops/opencl/buffer/softmax.h index 502899d8..3147a935 100644 --- a/mace/kernels/opencl/buffer/softmax.h +++ b/mace/ops/opencl/buffer/softmax.h @@ -11,10 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_BUFFER_SOFTMAX_H_ -#define MACE_KERNELS_OPENCL_BUFFER_SOFTMAX_H_ +#ifndef MACE_OPS_OPENCL_BUFFER_SOFTMAX_H_ +#define MACE_OPS_OPENCL_BUFFER_SOFTMAX_H_ -#include "mace/kernels/opencl/softmax.h" +#include "mace/ops/opencl/softmax.h" #include #include @@ -23,10 +23,10 @@ #include "mace/core/op_context.h" #include "mace/core/tensor.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace buffer { template @@ -119,7 +119,7 @@ MaceStatus SoftmaxKernel::Compute( } // namespace buffer } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_BUFFER_SOFTMAX_H_ +#endif // MACE_OPS_OPENCL_BUFFER_SOFTMAX_H_ diff --git a/mace/kernels/opencl/buffer/utils.cc b/mace/ops/opencl/buffer/utils.cc similarity index 96% rename from mace/kernels/opencl/buffer/utils.cc rename to mace/ops/opencl/buffer/utils.cc index a6d5502a..b4214a0a 100644 --- a/mace/kernels/opencl/buffer/utils.cc +++ b/mace/ops/opencl/buffer/utils.cc @@ -12,17 +12,17 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/kernels/opencl/buffer/utils.h" +#include "mace/ops/opencl/buffer/utils.h" #include #include #include #include "mace/core/runtime/opencl/opencl_runtime.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace buffer { @@ -93,5 +93,5 @@ MaceStatus PadInput(OpContext *context, } // namespace buffer } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/opencl/buffer/utils.h b/mace/ops/opencl/buffer/utils.h similarity index 86% rename from mace/kernels/opencl/buffer/utils.h rename to mace/ops/opencl/buffer/utils.h index f783691f..c1df8acc 100644 --- a/mace/kernels/opencl/buffer/utils.h +++ b/mace/ops/opencl/buffer/utils.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_BUFFER_UTILS_H_ -#define MACE_KERNELS_OPENCL_BUFFER_UTILS_H_ +#ifndef MACE_OPS_OPENCL_BUFFER_UTILS_H_ +#define MACE_OPS_OPENCL_BUFFER_UTILS_H_ #include "mace/core/future.h" #include "mace/core/op_context.h" @@ -21,7 +21,7 @@ #include "mace/public/mace.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace buffer { @@ -36,6 +36,6 @@ MaceStatus PadInput(OpContext *context, } // namespace buffer } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_BUFFER_UTILS_H_ +#endif // MACE_OPS_OPENCL_BUFFER_UTILS_H_ diff --git a/mace/kernels/opencl/buffer_inverse_transform.h b/mace/ops/opencl/buffer_inverse_transform.h similarity index 81% rename from mace/kernels/opencl/buffer_inverse_transform.h rename to mace/ops/opencl/buffer_inverse_transform.h index 0c785910..7f52a64f 100644 --- a/mace/kernels/opencl/buffer_inverse_transform.h +++ b/mace/ops/opencl/buffer_inverse_transform.h @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_BUFFER_INVERSE_TRANSFORM_H_ -#define MACE_KERNELS_OPENCL_BUFFER_INVERSE_TRANSFORM_H_ +#ifndef MACE_OPS_OPENCL_BUFFER_INVERSE_TRANSFORM_H_ +#define MACE_OPS_OPENCL_BUFFER_INVERSE_TRANSFORM_H_ -#include "mace/kernels/opencl/common.h" +#include "mace/ops/opencl/common.h" #include "mace/public/mace.h" #include "mace/utils/utils.h" @@ -24,7 +24,7 @@ namespace mace { class OpContext; class Tensor; -namespace kernels { +namespace ops { class OpenCLBufferInverseTransformKernel { public: virtual MaceStatus Compute(OpContext *context, @@ -35,7 +35,7 @@ class OpenCLBufferInverseTransformKernel { MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLBufferInverseTransformKernel) }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_BUFFER_INVERSE_TRANSFORM_H_ +#endif // MACE_OPS_OPENCL_BUFFER_INVERSE_TRANSFORM_H_ diff --git a/mace/kernels/opencl/buffer_transform.h b/mace/ops/opencl/buffer_transform.h similarity index 82% rename from mace/kernels/opencl/buffer_transform.h rename to mace/ops/opencl/buffer_transform.h index cc53ef77..45808d40 100644 --- a/mace/kernels/opencl/buffer_transform.h +++ b/mace/ops/opencl/buffer_transform.h @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_BUFFER_TRANSFORM_H_ -#define MACE_KERNELS_OPENCL_BUFFER_TRANSFORM_H_ +#ifndef MACE_OPS_OPENCL_BUFFER_TRANSFORM_H_ +#define MACE_OPS_OPENCL_BUFFER_TRANSFORM_H_ -#include "mace/kernels/opencl/common.h" +#include "mace/ops/opencl/common.h" #include "mace/public/mace.h" #include "mace/utils/utils.h" @@ -24,7 +24,7 @@ namespace mace { class OpContext; class Tensor; -namespace kernels { +namespace ops { class OpenCLBufferTransformKernel { public: virtual MaceStatus Compute(OpContext *context, @@ -35,7 +35,7 @@ class OpenCLBufferTransformKernel { MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLBufferTransformKernel) }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_BUFFER_TRANSFORM_H_ +#endif // MACE_OPS_OPENCL_BUFFER_TRANSFORM_H_ diff --git a/mace/kernels/opencl/channel_shuffle.h b/mace/ops/opencl/channel_shuffle.h similarity index 83% rename from mace/kernels/opencl/channel_shuffle.h rename to mace/ops/opencl/channel_shuffle.h index 5a5da027..e6243fcd 100644 --- a/mace/kernels/opencl/channel_shuffle.h +++ b/mace/ops/opencl/channel_shuffle.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_CHANNEL_SHUFFLE_H_ -#define MACE_KERNELS_OPENCL_CHANNEL_SHUFFLE_H_ +#ifndef MACE_OPS_OPENCL_CHANNEL_SHUFFLE_H_ +#define MACE_OPS_OPENCL_CHANNEL_SHUFFLE_H_ #include "mace/public/mace.h" #include "mace/utils/utils.h" @@ -23,7 +23,7 @@ namespace mace { class OpContext; class Tensor; -namespace kernels { +namespace ops { class OpenCLChannelShuffleKernel { public: virtual MaceStatus Compute( @@ -33,7 +33,7 @@ class OpenCLChannelShuffleKernel { MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLChannelShuffleKernel); }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_CHANNEL_SHUFFLE_H_ +#endif // MACE_OPS_OPENCL_CHANNEL_SHUFFLE_H_ diff --git a/mace/kernels/opencl/cl/activation.cl b/mace/ops/opencl/cl/activation.cl similarity index 100% rename from mace/kernels/opencl/cl/activation.cl rename to mace/ops/opencl/cl/activation.cl diff --git a/mace/kernels/opencl/cl/addn.cl b/mace/ops/opencl/cl/addn.cl similarity index 100% rename from mace/kernels/opencl/cl/addn.cl rename to mace/ops/opencl/cl/addn.cl diff --git a/mace/kernels/opencl/cl/batch_norm.cl b/mace/ops/opencl/cl/batch_norm.cl similarity index 100% rename from mace/kernels/opencl/cl/batch_norm.cl rename to mace/ops/opencl/cl/batch_norm.cl diff --git a/mace/kernels/opencl/cl/batch_to_space.cl b/mace/ops/opencl/cl/batch_to_space.cl similarity index 100% rename from mace/kernels/opencl/cl/batch_to_space.cl rename to mace/ops/opencl/cl/batch_to_space.cl diff --git a/mace/kernels/opencl/cl/bias_add.cl b/mace/ops/opencl/cl/bias_add.cl similarity index 100% rename from mace/kernels/opencl/cl/bias_add.cl rename to mace/ops/opencl/cl/bias_add.cl diff --git a/mace/kernels/opencl/cl/buffer_to_image.cl b/mace/ops/opencl/cl/buffer_to_image.cl similarity index 100% rename from mace/kernels/opencl/cl/buffer_to_image.cl rename to mace/ops/opencl/cl/buffer_to_image.cl diff --git a/mace/kernels/opencl/cl/buffer_transform.cl b/mace/ops/opencl/cl/buffer_transform.cl similarity index 100% rename from mace/kernels/opencl/cl/buffer_transform.cl rename to mace/ops/opencl/cl/buffer_transform.cl diff --git a/mace/kernels/opencl/cl/channel_shuffle.cl b/mace/ops/opencl/cl/channel_shuffle.cl similarity index 100% rename from mace/kernels/opencl/cl/channel_shuffle.cl rename to mace/ops/opencl/cl/channel_shuffle.cl diff --git a/mace/kernels/opencl/cl/common.h b/mace/ops/opencl/cl/common.h similarity index 96% rename from mace/kernels/opencl/cl/common.h rename to mace/ops/opencl/cl/common.h index abfdd978..069130d4 100644 --- a/mace/kernels/opencl/cl/common.h +++ b/mace/ops/opencl/cl/common.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_CL_COMMON_H_ -#define MACE_KERNELS_OPENCL_CL_COMMON_H_ +#ifndef MACE_OPS_OPENCL_CL_COMMON_H_ +#define MACE_OPS_OPENCL_CL_COMMON_H_ #pragma OPENCL EXTENSION cl_khr_fp16 : enable @@ -126,4 +126,4 @@ inline void check_out_of_range_for_buffer(__private const int length, } -#endif // MACE_KERNELS_OPENCL_CL_COMMON_H_ +#endif // MACE_OPS_OPENCL_CL_COMMON_H_ diff --git a/mace/kernels/opencl/cl/concat.cl b/mace/ops/opencl/cl/concat.cl similarity index 100% rename from mace/kernels/opencl/cl/concat.cl rename to mace/ops/opencl/cl/concat.cl diff --git a/mace/kernels/opencl/cl/conv_2d.cl b/mace/ops/opencl/cl/conv_2d.cl similarity index 100% rename from mace/kernels/opencl/cl/conv_2d.cl rename to mace/ops/opencl/cl/conv_2d.cl diff --git a/mace/kernels/opencl/cl/conv_2d_1x1.cl b/mace/ops/opencl/cl/conv_2d_1x1.cl similarity index 100% rename from mace/kernels/opencl/cl/conv_2d_1x1.cl rename to mace/ops/opencl/cl/conv_2d_1x1.cl diff --git a/mace/kernels/opencl/cl/conv_2d_1x1_buffer.cl b/mace/ops/opencl/cl/conv_2d_1x1_buffer.cl similarity index 100% rename from mace/kernels/opencl/cl/conv_2d_1x1_buffer.cl rename to mace/ops/opencl/cl/conv_2d_1x1_buffer.cl diff --git a/mace/kernels/opencl/cl/conv_2d_3x3.cl b/mace/ops/opencl/cl/conv_2d_3x3.cl similarity index 100% rename from mace/kernels/opencl/cl/conv_2d_3x3.cl rename to mace/ops/opencl/cl/conv_2d_3x3.cl diff --git a/mace/kernels/opencl/cl/conv_2d_buffer.cl b/mace/ops/opencl/cl/conv_2d_buffer.cl similarity index 100% rename from mace/kernels/opencl/cl/conv_2d_buffer.cl rename to mace/ops/opencl/cl/conv_2d_buffer.cl diff --git a/mace/kernels/opencl/cl/crop.cl b/mace/ops/opencl/cl/crop.cl similarity index 100% rename from mace/kernels/opencl/cl/crop.cl rename to mace/ops/opencl/cl/crop.cl diff --git a/mace/kernels/opencl/cl/deconv_2d.cl b/mace/ops/opencl/cl/deconv_2d.cl similarity index 100% rename from mace/kernels/opencl/cl/deconv_2d.cl rename to mace/ops/opencl/cl/deconv_2d.cl diff --git a/mace/kernels/opencl/cl/depth_to_space.cl b/mace/ops/opencl/cl/depth_to_space.cl similarity index 100% rename from mace/kernels/opencl/cl/depth_to_space.cl rename to mace/ops/opencl/cl/depth_to_space.cl diff --git a/mace/kernels/opencl/cl/depthwise_conv2d.cl b/mace/ops/opencl/cl/depthwise_conv2d.cl similarity index 100% rename from mace/kernels/opencl/cl/depthwise_conv2d.cl rename to mace/ops/opencl/cl/depthwise_conv2d.cl diff --git a/mace/kernels/opencl/cl/depthwise_conv2d_buffer.cl b/mace/ops/opencl/cl/depthwise_conv2d_buffer.cl similarity index 100% rename from mace/kernels/opencl/cl/depthwise_conv2d_buffer.cl rename to mace/ops/opencl/cl/depthwise_conv2d_buffer.cl diff --git a/mace/kernels/opencl/cl/eltwise.cl b/mace/ops/opencl/cl/eltwise.cl similarity index 100% rename from mace/kernels/opencl/cl/eltwise.cl rename to mace/ops/opencl/cl/eltwise.cl diff --git a/mace/kernels/opencl/cl/fully_connected.cl b/mace/ops/opencl/cl/fully_connected.cl similarity index 100% rename from mace/kernels/opencl/cl/fully_connected.cl rename to mace/ops/opencl/cl/fully_connected.cl diff --git a/mace/kernels/opencl/cl/lstmcell.cl b/mace/ops/opencl/cl/lstmcell.cl similarity index 100% rename from mace/kernels/opencl/cl/lstmcell.cl rename to mace/ops/opencl/cl/lstmcell.cl diff --git a/mace/kernels/opencl/cl/matmul.cl b/mace/ops/opencl/cl/matmul.cl similarity index 100% rename from mace/kernels/opencl/cl/matmul.cl rename to mace/ops/opencl/cl/matmul.cl diff --git a/mace/kernels/opencl/cl/pad.cl b/mace/ops/opencl/cl/pad.cl similarity index 100% rename from mace/kernels/opencl/cl/pad.cl rename to mace/ops/opencl/cl/pad.cl diff --git a/mace/kernels/opencl/cl/pooling.cl b/mace/ops/opencl/cl/pooling.cl similarity index 100% rename from mace/kernels/opencl/cl/pooling.cl rename to mace/ops/opencl/cl/pooling.cl diff --git a/mace/kernels/opencl/cl/pooling_buffer.cl b/mace/ops/opencl/cl/pooling_buffer.cl similarity index 100% rename from mace/kernels/opencl/cl/pooling_buffer.cl rename to mace/ops/opencl/cl/pooling_buffer.cl diff --git a/mace/kernels/opencl/cl/reduce_mean.cl b/mace/ops/opencl/cl/reduce_mean.cl similarity index 100% rename from mace/kernels/opencl/cl/reduce_mean.cl rename to mace/ops/opencl/cl/reduce_mean.cl diff --git a/mace/kernels/opencl/cl/resize_bicubic.cl b/mace/ops/opencl/cl/resize_bicubic.cl similarity index 100% rename from mace/kernels/opencl/cl/resize_bicubic.cl rename to mace/ops/opencl/cl/resize_bicubic.cl diff --git a/mace/kernels/opencl/cl/resize_bilinear.cl b/mace/ops/opencl/cl/resize_bilinear.cl similarity index 100% rename from mace/kernels/opencl/cl/resize_bilinear.cl rename to mace/ops/opencl/cl/resize_bilinear.cl diff --git a/mace/kernels/opencl/cl/softmax.cl b/mace/ops/opencl/cl/softmax.cl similarity index 100% rename from mace/kernels/opencl/cl/softmax.cl rename to mace/ops/opencl/cl/softmax.cl diff --git a/mace/kernels/opencl/cl/softmax_buffer.cl b/mace/ops/opencl/cl/softmax_buffer.cl similarity index 100% rename from mace/kernels/opencl/cl/softmax_buffer.cl rename to mace/ops/opencl/cl/softmax_buffer.cl diff --git a/mace/kernels/opencl/cl/space_to_batch.cl b/mace/ops/opencl/cl/space_to_batch.cl similarity index 100% rename from mace/kernels/opencl/cl/space_to_batch.cl rename to mace/ops/opencl/cl/space_to_batch.cl diff --git a/mace/kernels/opencl/cl/space_to_depth.cl b/mace/ops/opencl/cl/space_to_depth.cl similarity index 100% rename from mace/kernels/opencl/cl/space_to_depth.cl rename to mace/ops/opencl/cl/space_to_depth.cl diff --git a/mace/kernels/opencl/cl/split.cl b/mace/ops/opencl/cl/split.cl similarity index 100% rename from mace/kernels/opencl/cl/split.cl rename to mace/ops/opencl/cl/split.cl diff --git a/mace/kernels/opencl/cl/sqrdiff_mean.cl b/mace/ops/opencl/cl/sqrdiff_mean.cl similarity index 100% rename from mace/kernels/opencl/cl/sqrdiff_mean.cl rename to mace/ops/opencl/cl/sqrdiff_mean.cl diff --git a/mace/kernels/opencl/cl/winograd_transform.cl b/mace/ops/opencl/cl/winograd_transform.cl similarity index 100% rename from mace/kernels/opencl/cl/winograd_transform.cl rename to mace/ops/opencl/cl/winograd_transform.cl diff --git a/mace/kernels/opencl/common.h b/mace/ops/opencl/common.h similarity index 84% rename from mace/kernels/opencl/common.h rename to mace/ops/opencl/common.h index 176f58ed..0a238960 100644 --- a/mace/kernels/opencl/common.h +++ b/mace/ops/opencl/common.h @@ -12,11 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_COMMON_H_ -#define MACE_KERNELS_OPENCL_COMMON_H_ +#ifndef MACE_OPS_OPENCL_COMMON_H_ +#define MACE_OPS_OPENCL_COMMON_H_ namespace mace { -namespace kernels { +namespace ops { enum BufferType { CONV2D_FILTER = 0, @@ -30,6 +30,6 @@ enum BufferType { WEIGHT_WIDTH = 8, }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_COMMON_H_ +#endif // MACE_OPS_OPENCL_COMMON_H_ diff --git a/mace/kernels/opencl/concat.h b/mace/ops/opencl/concat.h similarity index 86% rename from mace/kernels/opencl/concat.h rename to mace/ops/opencl/concat.h index 78ef14d9..f6b83434 100644 --- a/mace/kernels/opencl/concat.h +++ b/mace/ops/opencl/concat.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_CONCAT_H_ -#define MACE_KERNELS_OPENCL_CONCAT_H_ +#ifndef MACE_OPS_OPENCL_CONCAT_H_ +#define MACE_OPS_OPENCL_CONCAT_H_ #include @@ -25,7 +25,7 @@ namespace mace { class OpContext; class Tensor; -namespace kernels { +namespace ops { class OpenCLConcatKernel { public: virtual MaceStatus Compute( @@ -35,7 +35,7 @@ class OpenCLConcatKernel { MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLConcatKernel); }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_CONCAT_H_ +#endif // MACE_OPS_OPENCL_CONCAT_H_ diff --git a/mace/kernels/opencl/conv_2d.h b/mace/ops/opencl/conv_2d.h similarity index 82% rename from mace/kernels/opencl/conv_2d.h rename to mace/ops/opencl/conv_2d.h index d5ff40ed..cf0911f7 100644 --- a/mace/kernels/opencl/conv_2d.h +++ b/mace/ops/opencl/conv_2d.h @@ -12,18 +12,18 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_CONV_2D_H_ -#define MACE_KERNELS_OPENCL_CONV_2D_H_ +#ifndef MACE_OPS_OPENCL_CONV_2D_H_ +#define MACE_OPS_OPENCL_CONV_2D_H_ #include -#include "mace/kernels/activation.h" -#include "mace/kernels/conv_pool_2d_util.h" +#include "mace/ops/activation.h" +#include "mace/ops/conv_pool_2d_util.h" namespace mace { class OpContext; -namespace kernels { +namespace ops { class OpenCLConv2dKernel { public: virtual MaceStatus Compute( @@ -41,7 +41,7 @@ class OpenCLConv2dKernel { MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLConv2dKernel); }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_CONV_2D_H_ +#endif // MACE_OPS_OPENCL_CONV_2D_H_ diff --git a/mace/kernels/opencl/crop.h b/mace/ops/opencl/crop.h similarity index 86% rename from mace/kernels/opencl/crop.h rename to mace/ops/opencl/crop.h index d59f67f5..f46f4f12 100644 --- a/mace/kernels/opencl/crop.h +++ b/mace/ops/opencl/crop.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_CROP_H_ -#define MACE_KERNELS_OPENCL_CROP_H_ +#ifndef MACE_OPS_OPENCL_CROP_H_ +#define MACE_OPS_OPENCL_CROP_H_ #include @@ -25,7 +25,7 @@ namespace mace { class OpContext; class Tensor; -namespace kernels { +namespace ops { class OpenCLCropKernel { public: virtual MaceStatus Compute( @@ -35,7 +35,7 @@ class OpenCLCropKernel { MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLCropKernel); }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_CROP_H_ +#endif // MACE_OPS_OPENCL_CROP_H_ diff --git a/mace/kernels/opencl/deconv_2d.h b/mace/ops/opencl/deconv_2d.h similarity index 84% rename from mace/kernels/opencl/deconv_2d.h rename to mace/ops/opencl/deconv_2d.h index c601acfe..69bc6f97 100644 --- a/mace/kernels/opencl/deconv_2d.h +++ b/mace/ops/opencl/deconv_2d.h @@ -12,19 +12,19 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_DECONV_2D_H_ -#define MACE_KERNELS_OPENCL_DECONV_2D_H_ +#ifndef MACE_OPS_OPENCL_DECONV_2D_H_ +#define MACE_OPS_OPENCL_DECONV_2D_H_ #include -#include "mace/kernels/activation.h" +#include "mace/ops/activation.h" namespace mace { class OpContext; class Tensor; -namespace kernels { +namespace ops { class OpenCLDeconv2dKernel { public: virtual MaceStatus Compute( @@ -40,7 +40,7 @@ class OpenCLDeconv2dKernel { Tensor *output) = 0; MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLDeconv2dKernel); }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_DECONV_2D_H_ +#endif // MACE_OPS_OPENCL_DECONV_2D_H_ diff --git a/mace/kernels/opencl/depth_to_space.h b/mace/ops/opencl/depth_to_space.h similarity index 83% rename from mace/kernels/opencl/depth_to_space.h rename to mace/ops/opencl/depth_to_space.h index 02585911..c5fce383 100644 --- a/mace/kernels/opencl/depth_to_space.h +++ b/mace/ops/opencl/depth_to_space.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_DEPTH_TO_SPACE_H_ -#define MACE_KERNELS_OPENCL_DEPTH_TO_SPACE_H_ +#ifndef MACE_OPS_OPENCL_DEPTH_TO_SPACE_H_ +#define MACE_OPS_OPENCL_DEPTH_TO_SPACE_H_ #include "mace/public/mace.h" #include "mace/utils/utils.h" @@ -22,7 +22,7 @@ namespace mace { class OpContext; class Tensor; -namespace kernels { +namespace ops { class OpenCLDepthToSpaceKernel { public: @@ -33,7 +33,7 @@ class OpenCLDepthToSpaceKernel { MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLDepthToSpaceKernel); }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_DEPTH_TO_SPACE_H_ +#endif // MACE_OPS_OPENCL_DEPTH_TO_SPACE_H_ diff --git a/mace/kernels/opencl/depthwise_conv2d.h b/mace/ops/opencl/depthwise_conv2d.h similarity index 81% rename from mace/kernels/opencl/depthwise_conv2d.h rename to mace/ops/opencl/depthwise_conv2d.h index 24d08a20..b993e120 100644 --- a/mace/kernels/opencl/depthwise_conv2d.h +++ b/mace/ops/opencl/depthwise_conv2d.h @@ -12,19 +12,19 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_DEPTHWISE_CONV2D_H_ -#define MACE_KERNELS_OPENCL_DEPTHWISE_CONV2D_H_ +#ifndef MACE_OPS_OPENCL_DEPTHWISE_CONV2D_H_ +#define MACE_OPS_OPENCL_DEPTHWISE_CONV2D_H_ #include -#include "mace/kernels/activation.h" -#include "mace/kernels/conv_pool_2d_util.h" +#include "mace/ops/activation.h" +#include "mace/ops/conv_pool_2d_util.h" namespace mace { class OpContext; -namespace kernels { +namespace ops { class OpenCLDepthwiseConv2dKernel { public: virtual MaceStatus Compute( @@ -42,7 +42,7 @@ class OpenCLDepthwiseConv2dKernel { MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLDepthwiseConv2dKernel); }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_DEPTHWISE_CONV2D_H_ +#endif // MACE_OPS_OPENCL_DEPTHWISE_CONV2D_H_ diff --git a/mace/kernels/opencl/eltwise.h b/mace/ops/opencl/eltwise.h similarity index 85% rename from mace/kernels/opencl/eltwise.h rename to mace/ops/opencl/eltwise.h index 83a94feb..abd38045 100644 --- a/mace/kernels/opencl/eltwise.h +++ b/mace/ops/opencl/eltwise.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_ELTWISE_H_ -#define MACE_KERNELS_OPENCL_ELTWISE_H_ +#ifndef MACE_OPS_OPENCL_ELTWISE_H_ +#define MACE_OPS_OPENCL_ELTWISE_H_ #include "mace/public/mace.h" #include "mace/utils/utils.h" @@ -23,7 +23,7 @@ namespace mace { class OpContext; class Tensor; -namespace kernels { +namespace ops { class OpenCLEltwiseKernel { public: virtual MaceStatus Compute( @@ -34,7 +34,7 @@ class OpenCLEltwiseKernel { MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLEltwiseKernel); }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_ELTWISE_H_ +#endif // MACE_OPS_OPENCL_ELTWISE_H_ diff --git a/mace/kernels/opencl/fully_connected.h b/mace/ops/opencl/fully_connected.h similarity index 82% rename from mace/kernels/opencl/fully_connected.h rename to mace/ops/opencl/fully_connected.h index 7982d468..952c5b9c 100644 --- a/mace/kernels/opencl/fully_connected.h +++ b/mace/ops/opencl/fully_connected.h @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_FULLY_CONNECTED_H_ -#define MACE_KERNELS_OPENCL_FULLY_CONNECTED_H_ +#ifndef MACE_OPS_OPENCL_FULLY_CONNECTED_H_ +#define MACE_OPS_OPENCL_FULLY_CONNECTED_H_ -#include "mace/kernels/activation.h" +#include "mace/ops/activation.h" #include "mace/public/mace.h" #include "mace/utils/utils.h" @@ -25,7 +25,7 @@ namespace mace { class OpContext; class Tensor; -namespace kernels { +namespace ops { class OpenCLFullyConnectedKernel { public: virtual MaceStatus Compute( @@ -39,7 +39,7 @@ class OpenCLFullyConnectedKernel { MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLFullyConnectedKernel); }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_FULLY_CONNECTED_H_ +#endif // MACE_OPS_OPENCL_FULLY_CONNECTED_H_ diff --git a/mace/kernels/opencl/helper.cc b/mace/ops/opencl/helper.cc similarity index 99% rename from mace/kernels/opencl/helper.cc rename to mace/ops/opencl/helper.cc index aa3daadb..7eb392a8 100644 --- a/mace/kernels/opencl/helper.cc +++ b/mace/ops/opencl/helper.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/helper.h" #include #include @@ -22,7 +22,7 @@ #include "mace/utils/utils.h" namespace mace { -namespace kernels { +namespace ops { namespace { // [(C + 3) / 4 * W, N * H] @@ -489,5 +489,5 @@ MaceStatus TuningOrRun2DKernel(OpenCLRuntime *runtime, return MaceStatus::MACE_SUCCESS; } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/opencl/helper.h b/mace/ops/opencl/helper.h similarity index 85% rename from mace/kernels/opencl/helper.h rename to mace/ops/opencl/helper.h index 0126d2ac..d4b5aa51 100644 --- a/mace/kernels/opencl/helper.h +++ b/mace/ops/opencl/helper.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_HELPER_H_ -#define MACE_KERNELS_OPENCL_HELPER_H_ +#ifndef MACE_OPS_OPENCL_HELPER_H_ +#define MACE_OPS_OPENCL_HELPER_H_ #include #include @@ -25,30 +25,30 @@ #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/types.h" -#include "mace/kernels/opencl/common.h" +#include "mace/ops/opencl/common.h" #include "mace/utils/utils.h" namespace mace { -namespace kernels { +namespace ops { // oorc for 'Out Of Range Check' #define MACE_OUT_OF_RANGE_DEFINITION \ std::shared_ptr oorc_flag; -#define MACE_OUT_OF_RANGE_CONFIG \ - if (runtime->IsOutOfRangeCheckEnabled()) { \ - built_options.emplace("-DOUT_OF_RANGE_CHECK"); \ +#define MACE_OUT_OF_RANGE_CONFIG \ + if (runtime->IsOutOfRangeCheckEnabled()) { \ + built_options.emplace("-DOUT_OF_RANGE_CHECK"); \ } -#define MACE_OUT_OF_RANGE_INIT(kernel) \ - if (runtime->IsOutOfRangeCheckEnabled()) { \ - oorc_flag = std::move(std::unique_ptr( \ - new Buffer((context)->device()->allocator()))); \ - MACE_RETURN_IF_ERROR((oorc_flag)->Allocate(sizeof(int))); \ - oorc_flag->Map(nullptr); \ - *(oorc_flag->mutable_data()) = 0; \ - oorc_flag->UnMap(); \ - (kernel).setArg(0, \ - *(static_cast(oorc_flag->buffer())));\ +#define MACE_OUT_OF_RANGE_INIT(kernel) \ + if (runtime->IsOutOfRangeCheckEnabled()) { \ + oorc_flag = std::move(std::unique_ptr( \ + new Buffer((context)->device()->allocator()))); \ + MACE_RETURN_IF_ERROR((oorc_flag)->Allocate(sizeof(int)));\ + oorc_flag->Map(nullptr); \ + *(oorc_flag->mutable_data()) = 0; \ + oorc_flag->UnMap(); \ + (kernel).setArg(0, \ + *(static_cast(oorc_flag->buffer()))); \ } #define MACE_OUT_OF_RANGE_SET_ARGS(kernel) \ @@ -67,7 +67,7 @@ namespace kernels { #define MACE_OUT_OF_RANGE_VALIDATION \ if (runtime->IsOutOfRangeCheckEnabled()) { \ oorc_flag->Map(nullptr); \ - int *kerror_code = oorc_flag->mutable_data(); \ + int *kerror_code = oorc_flag->mutable_data(); \ MACE_CHECK(*kerror_code == 0, "Kernel error code: ", *kerror_code); \ oorc_flag->UnMap(); \ } @@ -173,6 +173,6 @@ std::vector Default3DLocalWS(OpenCLRuntime *runtime, const uint32_t *gws, const uint32_t kwg_size); -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_HELPER_H_ +#endif // MACE_OPS_OPENCL_HELPER_H_ diff --git a/mace/kernels/opencl/image/activation.h b/mace/ops/opencl/image/activation.h similarity index 93% rename from mace/kernels/opencl/image/activation.h rename to mace/ops/opencl/image/activation.h index b1633076..93944b5b 100644 --- a/mace/kernels/opencl/image/activation.h +++ b/mace/ops/opencl/image/activation.h @@ -11,10 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_IMAGE_ACTIVATION_H_ -#define MACE_KERNELS_OPENCL_IMAGE_ACTIVATION_H_ +#ifndef MACE_OPS_OPENCL_IMAGE_ACTIVATION_H_ +#define MACE_OPS_OPENCL_IMAGE_ACTIVATION_H_ -#include "mace/kernels/opencl/activation.h" +#include "mace/ops/opencl/activation.h" #include #include @@ -23,11 +23,11 @@ #include "mace/core/op_context.h" #include "mace/core/tensor.h" -#include "mace/kernels/activation.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/activation.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { @@ -142,7 +142,7 @@ MaceStatus ActivationKernel::Compute( } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_IMAGE_ACTIVATION_H_ +#endif // MACE_OPS_OPENCL_IMAGE_ACTIVATION_H_ diff --git a/mace/kernels/opencl/image/addn.h b/mace/ops/opencl/image/addn.h similarity index 94% rename from mace/kernels/opencl/image/addn.h rename to mace/ops/opencl/image/addn.h index 8f50d140..bde9c6b0 100644 --- a/mace/kernels/opencl/image/addn.h +++ b/mace/ops/opencl/image/addn.h @@ -11,10 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_IMAGE_ADDN_H_ -#define MACE_KERNELS_OPENCL_IMAGE_ADDN_H_ +#ifndef MACE_OPS_OPENCL_IMAGE_ADDN_H_ +#define MACE_OPS_OPENCL_IMAGE_ADDN_H_ -#include "mace/kernels/opencl/addn.h" +#include "mace/ops/opencl/addn.h" #include #include @@ -23,10 +23,10 @@ #include "mace/core/op_context.h" #include "mace/core/tensor.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { @@ -129,7 +129,7 @@ MaceStatus AddNKernel::Compute( } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_IMAGE_ADDN_H_ +#endif // MACE_OPS_OPENCL_IMAGE_ADDN_H_ diff --git a/mace/kernels/opencl/image/batch_norm.h b/mace/ops/opencl/image/batch_norm.h similarity index 94% rename from mace/kernels/opencl/image/batch_norm.h rename to mace/ops/opencl/image/batch_norm.h index 9414f28b..5685c514 100644 --- a/mace/kernels/opencl/image/batch_norm.h +++ b/mace/ops/opencl/image/batch_norm.h @@ -11,10 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_IMAGE_BATCH_NORM_H_ -#define MACE_KERNELS_OPENCL_IMAGE_BATCH_NORM_H_ +#ifndef MACE_OPS_OPENCL_IMAGE_BATCH_NORM_H_ +#define MACE_OPS_OPENCL_IMAGE_BATCH_NORM_H_ -#include "mace/kernels/opencl/batch_norm.h" +#include "mace/ops/opencl/batch_norm.h" #include #include @@ -23,11 +23,11 @@ #include "mace/core/op_context.h" #include "mace/core/tensor.h" -#include "mace/kernels/activation.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/activation.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { @@ -156,7 +156,7 @@ MaceStatus BatchNormKernel::Compute( } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_IMAGE_BATCH_NORM_H_ +#endif // MACE_OPS_OPENCL_IMAGE_BATCH_NORM_H_ diff --git a/mace/kernels/opencl/image/batch_to_space.h b/mace/ops/opencl/image/batch_to_space.h similarity index 93% rename from mace/kernels/opencl/image/batch_to_space.h rename to mace/ops/opencl/image/batch_to_space.h index 8d984270..b92c9a4e 100644 --- a/mace/kernels/opencl/image/batch_to_space.h +++ b/mace/ops/opencl/image/batch_to_space.h @@ -11,10 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_IMAGE_BATCH_TO_SPACE_H_ -#define MACE_KERNELS_OPENCL_IMAGE_BATCH_TO_SPACE_H_ +#ifndef MACE_OPS_OPENCL_IMAGE_BATCH_TO_SPACE_H_ +#define MACE_OPS_OPENCL_IMAGE_BATCH_TO_SPACE_H_ -#include "mace/kernels/opencl/batch_to_space.h" +#include "mace/ops/opencl/batch_to_space.h" #include #include @@ -23,10 +23,10 @@ #include "mace/core/op_context.h" #include "mace/core/tensor.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { @@ -124,7 +124,7 @@ MaceStatus BatchToSpaceKernel::Compute( } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_IMAGE_BATCH_TO_SPACE_H_ +#endif // MACE_OPS_OPENCL_IMAGE_BATCH_TO_SPACE_H_ diff --git a/mace/kernels/opencl/image/bias_add.h b/mace/ops/opencl/image/bias_add.h similarity index 93% rename from mace/kernels/opencl/image/bias_add.h rename to mace/ops/opencl/image/bias_add.h index 2180df11..25e2392e 100644 --- a/mace/kernels/opencl/image/bias_add.h +++ b/mace/ops/opencl/image/bias_add.h @@ -11,10 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_IMAGE_BIAS_ADD_H_ -#define MACE_KERNELS_OPENCL_IMAGE_BIAS_ADD_H_ +#ifndef MACE_OPS_OPENCL_IMAGE_BIAS_ADD_H_ +#define MACE_OPS_OPENCL_IMAGE_BIAS_ADD_H_ -#include "mace/kernels/opencl/bias_add.h" +#include "mace/ops/opencl/bias_add.h" #include #include @@ -23,10 +23,10 @@ #include "mace/core/op_context.h" #include "mace/core/tensor.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { @@ -125,7 +125,7 @@ MaceStatus BiasAddKernel::Compute( } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_IMAGE_BIAS_ADD_H_ +#endif // MACE_OPS_OPENCL_IMAGE_BIAS_ADD_H_ diff --git a/mace/kernels/opencl/image/buffer_to_image.h b/mace/ops/opencl/image/buffer_to_image.h similarity index 95% rename from mace/kernels/opencl/image/buffer_to_image.h rename to mace/ops/opencl/image/buffer_to_image.h index 208c33fa..64319721 100644 --- a/mace/kernels/opencl/image/buffer_to_image.h +++ b/mace/ops/opencl/image/buffer_to_image.h @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_IMAGE_BUFFER_TO_IMAGE_H_ -#define MACE_KERNELS_OPENCL_IMAGE_BUFFER_TO_IMAGE_H_ +#ifndef MACE_OPS_OPENCL_IMAGE_BUFFER_TO_IMAGE_H_ +#define MACE_OPS_OPENCL_IMAGE_BUFFER_TO_IMAGE_H_ -#include "mace/kernels/opencl/buffer_transform.h" +#include "mace/ops/opencl/buffer_transform.h" #include #include @@ -23,10 +23,10 @@ #include "mace/core/op_context.h" #include "mace/core/tensor.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { @@ -204,7 +204,7 @@ MaceStatus BufferToImage::Compute( } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_IMAGE_BUFFER_TO_IMAGE_H_ +#endif // MACE_OPS_OPENCL_IMAGE_BUFFER_TO_IMAGE_H_ diff --git a/mace/kernels/opencl/image/channel_shuffle.h b/mace/ops/opencl/image/channel_shuffle.h similarity index 93% rename from mace/kernels/opencl/image/channel_shuffle.h rename to mace/ops/opencl/image/channel_shuffle.h index 8d351c0a..53acbf15 100644 --- a/mace/kernels/opencl/image/channel_shuffle.h +++ b/mace/ops/opencl/image/channel_shuffle.h @@ -11,10 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_IMAGE_CHANNEL_SHUFFLE_H_ -#define MACE_KERNELS_OPENCL_IMAGE_CHANNEL_SHUFFLE_H_ +#ifndef MACE_OPS_OPENCL_IMAGE_CHANNEL_SHUFFLE_H_ +#define MACE_OPS_OPENCL_IMAGE_CHANNEL_SHUFFLE_H_ -#include "mace/kernels/opencl/channel_shuffle.h" +#include "mace/ops/opencl/channel_shuffle.h" #include #include @@ -23,10 +23,10 @@ #include "mace/core/op_context.h" #include "mace/core/tensor.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { @@ -115,7 +115,7 @@ MaceStatus ChannelShuffleKernel::Compute( } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_IMAGE_CHANNEL_SHUFFLE_H_ +#endif // MACE_OPS_OPENCL_IMAGE_CHANNEL_SHUFFLE_H_ diff --git a/mace/kernels/opencl/image/concat.cc b/mace/ops/opencl/image/concat.cc similarity index 98% rename from mace/kernels/opencl/image/concat.cc rename to mace/ops/opencl/image/concat.cc index 9fc6dd48..aab72c54 100644 --- a/mace/kernels/opencl/image/concat.cc +++ b/mace/ops/opencl/image/concat.cc @@ -12,14 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/kernels/opencl/image/concat.h" +#include "mace/ops/opencl/image/concat.h" #include #include #include namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { namespace concat { @@ -207,5 +207,5 @@ MaceStatus ConcatN(OpContext *context, } // namespace concat } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/opencl/image/concat.h b/mace/ops/opencl/image/concat.h similarity index 93% rename from mace/kernels/opencl/image/concat.h rename to mace/ops/opencl/image/concat.h index 4041cc3e..f12ad25c 100644 --- a/mace/kernels/opencl/image/concat.h +++ b/mace/ops/opencl/image/concat.h @@ -11,20 +11,20 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_IMAGE_CONCAT_H_ -#define MACE_KERNELS_OPENCL_IMAGE_CONCAT_H_ +#ifndef MACE_OPS_OPENCL_IMAGE_CONCAT_H_ +#define MACE_OPS_OPENCL_IMAGE_CONCAT_H_ -#include "mace/kernels/opencl/concat.h" +#include "mace/ops/opencl/concat.h" #include #include #include "mace/core/op_context.h" #include "mace/core/tensor.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { namespace concat { @@ -114,7 +114,7 @@ MaceStatus ConcatKernel::Compute( } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_IMAGE_CONCAT_H_ +#endif // MACE_OPS_OPENCL_IMAGE_CONCAT_H_ diff --git a/mace/kernels/opencl/image/conv_2d.h b/mace/ops/opencl/image/conv_2d.h similarity index 95% rename from mace/kernels/opencl/image/conv_2d.h rename to mace/ops/opencl/image/conv_2d.h index 415beac4..224432e8 100644 --- a/mace/kernels/opencl/image/conv_2d.h +++ b/mace/ops/opencl/image/conv_2d.h @@ -11,20 +11,20 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_IMAGE_CONV_2D_H_ -#define MACE_KERNELS_OPENCL_IMAGE_CONV_2D_H_ +#ifndef MACE_OPS_OPENCL_IMAGE_CONV_2D_H_ +#define MACE_OPS_OPENCL_IMAGE_CONV_2D_H_ -#include "mace/kernels/opencl/conv_2d.h" +#include "mace/ops/opencl/conv_2d.h" #include #include #include "mace/core/op_context.h" #include "mace/core/tensor.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { @@ -137,7 +137,7 @@ MaceStatus Conv2dKernel::Compute( std::vector output_shape(4); std::vector paddings(2); if (padding_data.empty()) { - kernels::CalcNHWCPaddingAndOutputSize( + ops::CalcNHWCPaddingAndOutputSize( input->shape().data(), filter->shape().data(), dilations, strides, padding_type, output_shape.data(), paddings.data()); } else { @@ -170,7 +170,7 @@ MaceStatus Conv2dKernel::Compute( } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_IMAGE_CONV_2D_H_ +#endif // MACE_OPS_OPENCL_IMAGE_CONV_2D_H_ diff --git a/mace/kernels/opencl/image/conv_2d_1x1.cc b/mace/ops/opencl/image/conv_2d_1x1.cc similarity index 98% rename from mace/kernels/opencl/image/conv_2d_1x1.cc rename to mace/ops/opencl/image/conv_2d_1x1.cc index 36f8ba34..74a7ddc9 100644 --- a/mace/kernels/opencl/image/conv_2d_1x1.cc +++ b/mace/ops/opencl/image/conv_2d_1x1.cc @@ -14,11 +14,11 @@ #include "mace/core/op_context.h" #include "mace/core/runtime/opencl/opencl_runtime.h" -#include "mace/kernels/activation.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/activation.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { @@ -176,5 +176,5 @@ extern MaceStatus Conv2dOpenclK1x1(OpContext *context, } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/opencl/image/conv_2d_3x3.cc b/mace/ops/opencl/image/conv_2d_3x3.cc similarity index 98% rename from mace/kernels/opencl/image/conv_2d_3x3.cc rename to mace/ops/opencl/image/conv_2d_3x3.cc index f2f94c03..42a2a81e 100644 --- a/mace/kernels/opencl/image/conv_2d_3x3.cc +++ b/mace/ops/opencl/image/conv_2d_3x3.cc @@ -14,12 +14,12 @@ #include "mace/core/op_context.h" #include "mace/core/runtime/opencl/opencl_runtime.h" -#include "mace/kernels/activation.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/activation.h" +#include "mace/ops/opencl/helper.h" #include "mace/utils/utils.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { @@ -163,5 +163,5 @@ extern MaceStatus Conv2dOpenclK3x3(OpContext *context, } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/opencl/image/conv_2d_general.cc b/mace/ops/opencl/image/conv_2d_general.cc similarity index 98% rename from mace/kernels/opencl/image/conv_2d_general.cc rename to mace/ops/opencl/image/conv_2d_general.cc index 8221814e..9b577c2b 100644 --- a/mace/kernels/opencl/image/conv_2d_general.cc +++ b/mace/ops/opencl/image/conv_2d_general.cc @@ -14,12 +14,12 @@ #include "mace/core/op_context.h" #include "mace/core/runtime/opencl/opencl_runtime.h" -#include "mace/kernels/opencl/helper.h" -#include "mace/kernels/activation.h" +#include "mace/ops/opencl/helper.h" +#include "mace/ops/activation.h" #include "mace/utils/utils.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { @@ -176,5 +176,5 @@ extern MaceStatus Conv2dOpencl(OpContext *context, } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/opencl/image/crop.h b/mace/ops/opencl/image/crop.h similarity index 96% rename from mace/kernels/opencl/image/crop.h rename to mace/ops/opencl/image/crop.h index 7ab8ce1c..95a989a1 100644 --- a/mace/kernels/opencl/image/crop.h +++ b/mace/ops/opencl/image/crop.h @@ -11,10 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_IMAGE_CROP_H_ -#define MACE_KERNELS_OPENCL_IMAGE_CROP_H_ +#ifndef MACE_OPS_OPENCL_IMAGE_CROP_H_ +#define MACE_OPS_OPENCL_IMAGE_CROP_H_ -#include "mace/kernels/opencl/crop.h" +#include "mace/ops/opencl/crop.h" #include #include @@ -23,10 +23,10 @@ #include "mace/core/op_context.h" #include "mace/core/tensor.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { @@ -188,7 +188,7 @@ MaceStatus CropKernel::Compute( } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_IMAGE_CROP_H_ +#endif // MACE_OPS_OPENCL_IMAGE_CROP_H_ diff --git a/mace/kernels/opencl/image/deconv_2d.h b/mace/ops/opencl/image/deconv_2d.h similarity index 96% rename from mace/kernels/opencl/image/deconv_2d.h rename to mace/ops/opencl/image/deconv_2d.h index eae5978a..c4dfa2bf 100644 --- a/mace/kernels/opencl/image/deconv_2d.h +++ b/mace/ops/opencl/image/deconv_2d.h @@ -11,10 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_IMAGE_DECONV_2D_H_ -#define MACE_KERNELS_OPENCL_IMAGE_DECONV_2D_H_ +#ifndef MACE_OPS_OPENCL_IMAGE_DECONV_2D_H_ +#define MACE_OPS_OPENCL_IMAGE_DECONV_2D_H_ -#include "mace/kernels/opencl/deconv_2d.h" +#include "mace/ops/opencl/deconv_2d.h" #include #include @@ -23,10 +23,10 @@ #include "mace/core/op_context.h" #include "mace/core/tensor.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { @@ -182,7 +182,7 @@ MaceStatus Deconv2dKernel::Compute( } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_IMAGE_DECONV_2D_H_ +#endif // MACE_OPS_OPENCL_IMAGE_DECONV_2D_H_ diff --git a/mace/kernels/opencl/image/depth_to_space.h b/mace/ops/opencl/image/depth_to_space.h similarity index 94% rename from mace/kernels/opencl/image/depth_to_space.h rename to mace/ops/opencl/image/depth_to_space.h index 0a961d53..2ed253df 100644 --- a/mace/kernels/opencl/image/depth_to_space.h +++ b/mace/ops/opencl/image/depth_to_space.h @@ -11,10 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_IMAGE_DEPTH_TO_SPACE_H_ -#define MACE_KERNELS_OPENCL_IMAGE_DEPTH_TO_SPACE_H_ +#ifndef MACE_OPS_OPENCL_IMAGE_DEPTH_TO_SPACE_H_ +#define MACE_OPS_OPENCL_IMAGE_DEPTH_TO_SPACE_H_ -#include "mace/kernels/opencl/depth_to_space.h" +#include "mace/ops/opencl/depth_to_space.h" #include #include @@ -23,10 +23,10 @@ #include "mace/core/op_context.h" #include "mace/core/tensor.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { @@ -138,7 +138,7 @@ MaceStatus DepthToSpaceKernel::Compute( } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_IMAGE_DEPTH_TO_SPACE_H_ +#endif // MACE_OPS_OPENCL_IMAGE_DEPTH_TO_SPACE_H_ diff --git a/mace/kernels/opencl/image/depthwise_conv2d.cc b/mace/ops/opencl/image/depthwise_conv2d.cc similarity index 98% rename from mace/kernels/opencl/image/depthwise_conv2d.cc rename to mace/ops/opencl/image/depthwise_conv2d.cc index 57953960..02409ebe 100644 --- a/mace/kernels/opencl/image/depthwise_conv2d.cc +++ b/mace/ops/opencl/image/depthwise_conv2d.cc @@ -12,14 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/kernels/opencl/image/depthwise_conv2d.h" +#include "mace/ops/opencl/image/depthwise_conv2d.h" #include #include #include namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { namespace depthwise { @@ -189,5 +189,5 @@ MaceStatus DepthwiseConv2d(OpContext *context, } // namespace depthwise } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/opencl/image/depthwise_conv2d.h b/mace/ops/opencl/image/depthwise_conv2d.h similarity index 92% rename from mace/kernels/opencl/image/depthwise_conv2d.h rename to mace/ops/opencl/image/depthwise_conv2d.h index 7bfa9ede..e818b039 100644 --- a/mace/kernels/opencl/image/depthwise_conv2d.h +++ b/mace/ops/opencl/image/depthwise_conv2d.h @@ -11,20 +11,20 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_IMAGE_DEPTHWISE_CONV2D_H_ -#define MACE_KERNELS_OPENCL_IMAGE_DEPTHWISE_CONV2D_H_ +#ifndef MACE_OPS_OPENCL_IMAGE_DEPTHWISE_CONV2D_H_ +#define MACE_OPS_OPENCL_IMAGE_DEPTHWISE_CONV2D_H_ -#include "mace/kernels/opencl/depthwise_conv2d.h" +#include "mace/ops/opencl/depthwise_conv2d.h" #include #include #include "mace/core/op_context.h" #include "mace/core/tensor.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { namespace depthwise { @@ -101,7 +101,7 @@ MaceStatus DepthwiseConv2dKernel::Compute( std::vector output_shape(4); std::vector paddings(2); if (padding_data.empty()) { - kernels::CalcNHWCPaddingAndOutputSize( + ops::CalcNHWCPaddingAndOutputSize( input->shape().data(), fake_filter_shape.data(), dilations, strides, padding_type, output_shape.data(), paddings.data()); } else { @@ -124,7 +124,7 @@ MaceStatus DepthwiseConv2dKernel::Compute( } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_IMAGE_DEPTHWISE_CONV2D_H_ +#endif // MACE_OPS_OPENCL_IMAGE_DEPTHWISE_CONV2D_H_ diff --git a/mace/kernels/opencl/image/eltwise.h b/mace/ops/opencl/image/eltwise.h similarity index 95% rename from mace/kernels/opencl/image/eltwise.h rename to mace/ops/opencl/image/eltwise.h index d2352602..25235a44 100644 --- a/mace/kernels/opencl/image/eltwise.h +++ b/mace/ops/opencl/image/eltwise.h @@ -11,10 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_IMAGE_ELTWISE_H_ -#define MACE_KERNELS_OPENCL_IMAGE_ELTWISE_H_ +#ifndef MACE_OPS_OPENCL_IMAGE_ELTWISE_H_ +#define MACE_OPS_OPENCL_IMAGE_ELTWISE_H_ -#include "mace/kernels/opencl/eltwise.h" +#include "mace/ops/opencl/eltwise.h" #include #include @@ -24,11 +24,11 @@ #include "mace/core/op_context.h" #include "mace/core/tensor.h" -#include "mace/kernels/eltwise.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/eltwise.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { @@ -185,7 +185,7 @@ MaceStatus EltwiseKernel::Compute( } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_IMAGE_ELTWISE_H_ +#endif // MACE_OPS_OPENCL_IMAGE_ELTWISE_H_ diff --git a/mace/kernels/opencl/image/fully_connected.h b/mace/ops/opencl/image/fully_connected.h similarity index 95% rename from mace/kernels/opencl/image/fully_connected.h rename to mace/ops/opencl/image/fully_connected.h index 605c9ee9..2d8fbb88 100644 --- a/mace/kernels/opencl/image/fully_connected.h +++ b/mace/ops/opencl/image/fully_connected.h @@ -11,10 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_IMAGE_FULLY_CONNECTED_H_ -#define MACE_KERNELS_OPENCL_IMAGE_FULLY_CONNECTED_H_ +#ifndef MACE_OPS_OPENCL_IMAGE_FULLY_CONNECTED_H_ +#define MACE_OPS_OPENCL_IMAGE_FULLY_CONNECTED_H_ -#include "mace/kernels/opencl/fully_connected.h" +#include "mace/ops/opencl/fully_connected.h" #include #include @@ -23,10 +23,10 @@ #include "mace/core/op_context.h" #include "mace/core/tensor.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { @@ -184,7 +184,7 @@ MaceStatus FullyConnectedKernel::Compute( } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_IMAGE_FULLY_CONNECTED_H_ +#endif // MACE_OPS_OPENCL_IMAGE_FULLY_CONNECTED_H_ diff --git a/mace/kernels/opencl/image/image_to_buffer.h b/mace/ops/opencl/image/image_to_buffer.h similarity index 95% rename from mace/kernels/opencl/image/image_to_buffer.h rename to mace/ops/opencl/image/image_to_buffer.h index da8667f0..9aa65f0e 100644 --- a/mace/kernels/opencl/image/image_to_buffer.h +++ b/mace/ops/opencl/image/image_to_buffer.h @@ -12,19 +12,19 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_IMAGE_IMAGE_TO_BUFFER_H_ -#define MACE_KERNELS_OPENCL_IMAGE_IMAGE_TO_BUFFER_H_ +#ifndef MACE_OPS_OPENCL_IMAGE_IMAGE_TO_BUFFER_H_ +#define MACE_OPS_OPENCL_IMAGE_IMAGE_TO_BUFFER_H_ #include #include #include #include "mace/core/op_context.h" -#include "mace/kernels/opencl/buffer_inverse_transform.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/buffer_inverse_transform.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { @@ -185,7 +185,7 @@ MaceStatus ImageToBuffer::Compute(OpContext *context, } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_IMAGE_IMAGE_TO_BUFFER_H_ +#endif // MACE_OPS_OPENCL_IMAGE_IMAGE_TO_BUFFER_H_ diff --git a/mace/kernels/opencl/image/lstm_cell.h b/mace/ops/opencl/image/lstm_cell.h similarity index 94% rename from mace/kernels/opencl/image/lstm_cell.h rename to mace/ops/opencl/image/lstm_cell.h index 00b07356..967c4bf4 100644 --- a/mace/kernels/opencl/image/lstm_cell.h +++ b/mace/ops/opencl/image/lstm_cell.h @@ -11,10 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_IMAGE_LSTM_CELL_H_ -#define MACE_KERNELS_OPENCL_IMAGE_LSTM_CELL_H_ +#ifndef MACE_OPS_OPENCL_IMAGE_LSTM_CELL_H_ +#define MACE_OPS_OPENCL_IMAGE_LSTM_CELL_H_ -#include "mace/kernels/opencl/lstm_cell.h" +#include "mace/ops/opencl/lstm_cell.h" #include #include @@ -23,10 +23,10 @@ #include "mace/core/op_context.h" #include "mace/core/tensor.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { @@ -135,7 +135,7 @@ MaceStatus LSTMCellKernel::Compute( } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_IMAGE_LSTM_CELL_H_ +#endif // MACE_OPS_OPENCL_IMAGE_LSTM_CELL_H_ diff --git a/mace/kernels/opencl/image/matmul.h b/mace/ops/opencl/image/matmul.h similarity index 93% rename from mace/kernels/opencl/image/matmul.h rename to mace/ops/opencl/image/matmul.h index aa688646..899df5a5 100644 --- a/mace/kernels/opencl/image/matmul.h +++ b/mace/ops/opencl/image/matmul.h @@ -11,10 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_IMAGE_MATMUL_H_ -#define MACE_KERNELS_OPENCL_IMAGE_MATMUL_H_ +#ifndef MACE_OPS_OPENCL_IMAGE_MATMUL_H_ +#define MACE_OPS_OPENCL_IMAGE_MATMUL_H_ -#include "mace/kernels/opencl/matmul.h" +#include "mace/ops/opencl/matmul.h" #include #include @@ -24,10 +24,10 @@ #include "mace/core/op_context.h" #include "mace/core/tensor.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { @@ -122,7 +122,7 @@ MaceStatus MatMulKernel::Compute( } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_IMAGE_MATMUL_H_ +#endif // MACE_OPS_OPENCL_IMAGE_MATMUL_H_ diff --git a/mace/kernels/opencl/image/pad.h b/mace/ops/opencl/image/pad.h similarity index 94% rename from mace/kernels/opencl/image/pad.h rename to mace/ops/opencl/image/pad.h index b9673e9e..c96d964a 100644 --- a/mace/kernels/opencl/image/pad.h +++ b/mace/ops/opencl/image/pad.h @@ -11,10 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_IMAGE_PAD_H_ -#define MACE_KERNELS_OPENCL_IMAGE_PAD_H_ +#ifndef MACE_OPS_OPENCL_IMAGE_PAD_H_ +#define MACE_OPS_OPENCL_IMAGE_PAD_H_ -#include "mace/kernels/opencl/pad.h" +#include "mace/ops/opencl/pad.h" #include #include @@ -23,10 +23,10 @@ #include "mace/core/op_context.h" #include "mace/core/tensor.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { @@ -130,7 +130,7 @@ MaceStatus PadKernel::Compute( } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_IMAGE_PAD_H_ +#endif // MACE_OPS_OPENCL_IMAGE_PAD_H_ diff --git a/mace/kernels/opencl/image/pooling.h b/mace/ops/opencl/image/pooling.h similarity index 95% rename from mace/kernels/opencl/image/pooling.h rename to mace/ops/opencl/image/pooling.h index 769f3cf8..1384b54b 100644 --- a/mace/kernels/opencl/image/pooling.h +++ b/mace/ops/opencl/image/pooling.h @@ -11,10 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_IMAGE_POOLING_H_ -#define MACE_KERNELS_OPENCL_IMAGE_POOLING_H_ +#ifndef MACE_OPS_OPENCL_IMAGE_POOLING_H_ +#define MACE_OPS_OPENCL_IMAGE_POOLING_H_ -#include "mace/kernels/opencl/pooling.h" +#include "mace/ops/opencl/pooling.h" #include #include @@ -24,10 +24,10 @@ #include "mace/core/op_context.h" #include "mace/core/tensor.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { namespace pooling { @@ -97,7 +97,7 @@ MaceStatus PoolingKernel::Compute( std::vector paddings(2); if (padding_data.empty()) { - kernels::CalcNHWCPaddingAndOutputSize( + ops::CalcNHWCPaddingAndOutputSize( input->shape().data(), filter_shape.data(), dilations, strides, padding_type, output_shape.data(), paddings.data()); } else { @@ -181,7 +181,7 @@ MaceStatus PoolingKernel::Compute( } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_IMAGE_POOLING_H_ +#endif // MACE_OPS_OPENCL_IMAGE_POOLING_H_ diff --git a/mace/kernels/opencl/image/reduce_mean.h b/mace/ops/opencl/image/reduce_mean.h similarity index 95% rename from mace/kernels/opencl/image/reduce_mean.h rename to mace/ops/opencl/image/reduce_mean.h index 7d7c5fba..ca5daa5f 100644 --- a/mace/kernels/opencl/image/reduce_mean.h +++ b/mace/ops/opencl/image/reduce_mean.h @@ -11,10 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_IMAGE_REDUCE_MEAN_H_ -#define MACE_KERNELS_OPENCL_IMAGE_REDUCE_MEAN_H_ +#ifndef MACE_OPS_OPENCL_IMAGE_REDUCE_MEAN_H_ +#define MACE_OPS_OPENCL_IMAGE_REDUCE_MEAN_H_ -#include "mace/kernels/opencl/reduce_mean.h" +#include "mace/ops/opencl/reduce_mean.h" #include #include @@ -23,10 +23,10 @@ #include "mace/core/op_context.h" #include "mace/core/tensor.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { @@ -171,7 +171,7 @@ MaceStatus ReduceMeanKernel::Compute( } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_IMAGE_REDUCE_MEAN_H_ +#endif // MACE_OPS_OPENCL_IMAGE_REDUCE_MEAN_H_ diff --git a/mace/kernels/opencl/image/resize_bicubic.h b/mace/ops/opencl/image/resize_bicubic.h similarity index 91% rename from mace/kernels/opencl/image/resize_bicubic.h rename to mace/ops/opencl/image/resize_bicubic.h index 20d062ac..e801e59f 100644 --- a/mace/kernels/opencl/image/resize_bicubic.h +++ b/mace/ops/opencl/image/resize_bicubic.h @@ -11,10 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_IMAGE_RESIZE_BICUBIC_H_ -#define MACE_KERNELS_OPENCL_IMAGE_RESIZE_BICUBIC_H_ +#ifndef MACE_OPS_OPENCL_IMAGE_RESIZE_BICUBIC_H_ +#define MACE_OPS_OPENCL_IMAGE_RESIZE_BICUBIC_H_ -#include "mace/kernels/opencl/resize_bicubic.h" +#include "mace/ops/opencl/resize_bicubic.h" #include #include @@ -24,11 +24,11 @@ #include "mace/core/op_context.h" #include "mace/core/tensor.h" -#include "mace/kernels/opencl/helper.h" -#include "mace/kernels/resize_bicubic.h" +#include "mace/ops/opencl/helper.h" +#include "mace/ops/resize_bicubic.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { namespace resize_bicubic { @@ -116,7 +116,7 @@ MaceStatus ResizeBicubicKernel::Compute( built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpCompatibleCLCMDDt(dt)); built_options.emplace( MakeString("-DTABLE_SIZE=", - mace::kernels::resize_bicubic::kTableSize)); + mace::ops::resize_bicubic::kTableSize)); MACE_RETURN_IF_ERROR( runtime->BuildKernel("resize_bicubic", kernel_name, @@ -138,10 +138,10 @@ MaceStatus ResizeBicubicKernel::Compute( MACE_RETURN_IF_ERROR(output->ResizeImage(output_shape, output_image_shape)); float height_scale = - mace::kernels::resize_bicubic::CalculateResizeScale( + mace::ops::resize_bicubic::CalculateResizeScale( in_height, out_height, align_corners_); float width_scale = - mace::kernels::resize_bicubic::CalculateResizeScale( + mace::ops::resize_bicubic::CalculateResizeScale( in_width, out_width, align_corners_); uint32_t idx = 0; @@ -172,7 +172,7 @@ MaceStatus ResizeBicubicKernel::Compute( } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_IMAGE_RESIZE_BICUBIC_H_ +#endif // MACE_OPS_OPENCL_IMAGE_RESIZE_BICUBIC_H_ diff --git a/mace/kernels/opencl/image/resize_bilinear.h b/mace/ops/opencl/image/resize_bilinear.h similarity index 92% rename from mace/kernels/opencl/image/resize_bilinear.h rename to mace/ops/opencl/image/resize_bilinear.h index d34b7d50..7af9a5f6 100644 --- a/mace/kernels/opencl/image/resize_bilinear.h +++ b/mace/ops/opencl/image/resize_bilinear.h @@ -11,10 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_IMAGE_RESIZE_BILINEAR_H_ -#define MACE_KERNELS_OPENCL_IMAGE_RESIZE_BILINEAR_H_ +#ifndef MACE_OPS_OPENCL_IMAGE_RESIZE_BILINEAR_H_ +#define MACE_OPS_OPENCL_IMAGE_RESIZE_BILINEAR_H_ -#include "mace/kernels/opencl/resize_bilinear.h" +#include "mace/ops/opencl/resize_bilinear.h" #include #include @@ -24,11 +24,11 @@ #include "mace/core/op_context.h" #include "mace/core/tensor.h" -#include "mace/kernels/opencl/helper.h" -#include "mace/kernels/resize_bilinear.h" +#include "mace/ops/opencl/helper.h" +#include "mace/ops/resize_bilinear.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { namespace resize_bilinear { @@ -139,11 +139,11 @@ MaceStatus ResizeBilinearKernel::Compute( MACE_RETURN_IF_ERROR(output->ResizeImage(output_shape, output_image_shape)); float height_scale = - mace::kernels::resize_bilinear::CalculateResizeScale(in_height, + mace::ops::resize_bilinear::CalculateResizeScale(in_height, out_height, align_corners_); float width_scale = - mace::kernels::resize_bilinear::CalculateResizeScale(in_width, + mace::ops::resize_bilinear::CalculateResizeScale(in_width, out_width, align_corners_); @@ -175,7 +175,7 @@ MaceStatus ResizeBilinearKernel::Compute( } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_IMAGE_RESIZE_BILINEAR_H_ +#endif // MACE_OPS_OPENCL_IMAGE_RESIZE_BILINEAR_H_ diff --git a/mace/kernels/opencl/image/softmax.h b/mace/ops/opencl/image/softmax.h similarity index 94% rename from mace/kernels/opencl/image/softmax.h rename to mace/ops/opencl/image/softmax.h index cf2dd5b4..ffd5ec89 100644 --- a/mace/kernels/opencl/image/softmax.h +++ b/mace/ops/opencl/image/softmax.h @@ -11,10 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_IMAGE_SOFTMAX_H_ -#define MACE_KERNELS_OPENCL_IMAGE_SOFTMAX_H_ +#ifndef MACE_OPS_OPENCL_IMAGE_SOFTMAX_H_ +#define MACE_OPS_OPENCL_IMAGE_SOFTMAX_H_ -#include "mace/kernels/opencl/softmax.h" +#include "mace/ops/opencl/softmax.h" #include #include @@ -24,10 +24,10 @@ #include "mace/core/op_context.h" #include "mace/core/tensor.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { namespace softmax { @@ -145,7 +145,7 @@ MaceStatus SoftmaxKernel::Compute( } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_IMAGE_SOFTMAX_H_ +#endif // MACE_OPS_OPENCL_IMAGE_SOFTMAX_H_ diff --git a/mace/kernels/opencl/image/space_to_batch.h b/mace/ops/opencl/image/space_to_batch.h similarity index 93% rename from mace/kernels/opencl/image/space_to_batch.h rename to mace/ops/opencl/image/space_to_batch.h index 0a20e6f6..9924f02f 100644 --- a/mace/kernels/opencl/image/space_to_batch.h +++ b/mace/ops/opencl/image/space_to_batch.h @@ -11,10 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_IMAGE_SPACE_TO_BATCH_H_ -#define MACE_KERNELS_OPENCL_IMAGE_SPACE_TO_BATCH_H_ +#ifndef MACE_OPS_OPENCL_IMAGE_SPACE_TO_BATCH_H_ +#define MACE_OPS_OPENCL_IMAGE_SPACE_TO_BATCH_H_ -#include "mace/kernels/opencl/space_to_batch.h" +#include "mace/ops/opencl/space_to_batch.h" #include #include @@ -23,10 +23,10 @@ #include "mace/core/op_context.h" #include "mace/core/tensor.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { @@ -122,7 +122,7 @@ MaceStatus SpaceToBatchKernel::Compute( } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_IMAGE_SPACE_TO_BATCH_H_ +#endif // MACE_OPS_OPENCL_IMAGE_SPACE_TO_BATCH_H_ diff --git a/mace/kernels/opencl/image/space_to_depth.h b/mace/ops/opencl/image/space_to_depth.h similarity index 94% rename from mace/kernels/opencl/image/space_to_depth.h rename to mace/ops/opencl/image/space_to_depth.h index 2e3f2a74..961d1606 100644 --- a/mace/kernels/opencl/image/space_to_depth.h +++ b/mace/ops/opencl/image/space_to_depth.h @@ -11,10 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_IMAGE_SPACE_TO_DEPTH_H_ -#define MACE_KERNELS_OPENCL_IMAGE_SPACE_TO_DEPTH_H_ +#ifndef MACE_OPS_OPENCL_IMAGE_SPACE_TO_DEPTH_H_ +#define MACE_OPS_OPENCL_IMAGE_SPACE_TO_DEPTH_H_ -#include "mace/kernels/opencl/space_to_depth.h" +#include "mace/ops/opencl/space_to_depth.h" #include #include @@ -23,10 +23,10 @@ #include "mace/core/op_context.h" #include "mace/core/tensor.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { @@ -132,7 +132,7 @@ MaceStatus SpaceToDepthKernel::Compute( } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_IMAGE_SPACE_TO_DEPTH_H_ +#endif // MACE_OPS_OPENCL_IMAGE_SPACE_TO_DEPTH_H_ diff --git a/mace/kernels/opencl/image/split.h b/mace/ops/opencl/image/split.h similarity index 95% rename from mace/kernels/opencl/image/split.h rename to mace/ops/opencl/image/split.h index ee7fab71..12286a6d 100644 --- a/mace/kernels/opencl/image/split.h +++ b/mace/ops/opencl/image/split.h @@ -11,10 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_IMAGE_SPLIT_H_ -#define MACE_KERNELS_OPENCL_IMAGE_SPLIT_H_ +#ifndef MACE_OPS_OPENCL_IMAGE_SPLIT_H_ +#define MACE_OPS_OPENCL_IMAGE_SPLIT_H_ -#include "mace/kernels/opencl/split.h" +#include "mace/ops/opencl/split.h" #include #include @@ -24,10 +24,10 @@ #include "mace/core/op_context.h" #include "mace/core/tensor.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { @@ -146,7 +146,7 @@ MaceStatus SplitKernel::Compute( } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_IMAGE_SPLIT_H_ +#endif // MACE_OPS_OPENCL_IMAGE_SPLIT_H_ diff --git a/mace/kernels/opencl/image/sqrdiff_mean.h b/mace/ops/opencl/image/sqrdiff_mean.h similarity index 95% rename from mace/kernels/opencl/image/sqrdiff_mean.h rename to mace/ops/opencl/image/sqrdiff_mean.h index 3d86b05d..791566f8 100644 --- a/mace/kernels/opencl/image/sqrdiff_mean.h +++ b/mace/ops/opencl/image/sqrdiff_mean.h @@ -11,10 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_IMAGE_SQRDIFF_MEAN_H_ -#define MACE_KERNELS_OPENCL_IMAGE_SQRDIFF_MEAN_H_ +#ifndef MACE_OPS_OPENCL_IMAGE_SQRDIFF_MEAN_H_ +#define MACE_OPS_OPENCL_IMAGE_SQRDIFF_MEAN_H_ -#include "mace/kernels/opencl/sqrdiff_mean.h" +#include "mace/ops/opencl/sqrdiff_mean.h" #include #include @@ -23,10 +23,10 @@ #include "mace/core/op_context.h" #include "mace/core/tensor.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { @@ -168,7 +168,7 @@ MaceStatus SqrDiffMeanKernel::Compute( } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_IMAGE_SQRDIFF_MEAN_H_ +#endif // MACE_OPS_OPENCL_IMAGE_SQRDIFF_MEAN_H_ diff --git a/mace/kernels/opencl/image/winograd_transform.h b/mace/ops/opencl/image/winograd_transform.h similarity index 96% rename from mace/kernels/opencl/image/winograd_transform.h rename to mace/ops/opencl/image/winograd_transform.h index f00e5556..2d9b6c0a 100644 --- a/mace/kernels/opencl/image/winograd_transform.h +++ b/mace/ops/opencl/image/winograd_transform.h @@ -11,10 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_IMAGE_WINOGRAD_TRANSFORM_H_ -#define MACE_KERNELS_OPENCL_IMAGE_WINOGRAD_TRANSFORM_H_ +#ifndef MACE_OPS_OPENCL_IMAGE_WINOGRAD_TRANSFORM_H_ +#define MACE_OPS_OPENCL_IMAGE_WINOGRAD_TRANSFORM_H_ -#include "mace/kernels/opencl/winograd_transform.h" +#include "mace/ops/opencl/winograd_transform.h" #include #include @@ -23,12 +23,12 @@ #include "mace/core/op_context.h" #include "mace/core/tensor.h" -#include "mace/kernels/activation.h" -#include "mace/kernels/conv_pool_2d_util.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/activation.h" +#include "mace/ops/conv_pool_2d_util.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace opencl { namespace image { @@ -103,7 +103,7 @@ MaceStatus WinogradTransformKernel::Compute( std::vector filter_shape = {1, input_tensor->dim(3), 3, 3}; std::vector paddings(2); if (paddings_.empty()) { - kernels::CalcNHWCPaddingAndOutputSize( + ops::CalcNHWCPaddingAndOutputSize( input_tensor->shape().data(), filter_shape.data(), dilations_.data(), strides_.data(), padding_type_, output_shape.data(), paddings.data()); } else { @@ -319,7 +319,7 @@ MaceStatus WinogradInverseTransformKernel::Compute( } } // namespace image } // namespace opencl -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_IMAGE_WINOGRAD_TRANSFORM_H_ +#endif // MACE_OPS_OPENCL_IMAGE_WINOGRAD_TRANSFORM_H_ diff --git a/mace/kernels/opencl/lstm_cell.h b/mace/ops/opencl/lstm_cell.h similarity index 86% rename from mace/kernels/opencl/lstm_cell.h rename to mace/ops/opencl/lstm_cell.h index 0ce1d26f..611201be 100644 --- a/mace/kernels/opencl/lstm_cell.h +++ b/mace/ops/opencl/lstm_cell.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_LSTM_CELL_H_ -#define MACE_KERNELS_OPENCL_LSTM_CELL_H_ +#ifndef MACE_OPS_OPENCL_LSTM_CELL_H_ +#define MACE_OPS_OPENCL_LSTM_CELL_H_ #include "mace/public/mace.h" #include "mace/utils/utils.h" @@ -23,7 +23,7 @@ namespace mace { class OpContext; class Tensor; -namespace kernels { +namespace ops { class OpenCLLSTMCellKernel { public: virtual MaceStatus Compute( @@ -38,7 +38,7 @@ class OpenCLLSTMCellKernel { MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLLSTMCellKernel); }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_LSTM_CELL_H_ +#endif // MACE_OPS_OPENCL_LSTM_CELL_H_ diff --git a/mace/kernels/opencl/matmul.h b/mace/ops/opencl/matmul.h similarity index 86% rename from mace/kernels/opencl/matmul.h rename to mace/ops/opencl/matmul.h index e971328e..b23a0c40 100644 --- a/mace/kernels/opencl/matmul.h +++ b/mace/ops/opencl/matmul.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_MATMUL_H_ -#define MACE_KERNELS_OPENCL_MATMUL_H_ +#ifndef MACE_OPS_OPENCL_MATMUL_H_ +#define MACE_OPS_OPENCL_MATMUL_H_ #include "mace/public/mace.h" #include "mace/utils/utils.h" @@ -23,7 +23,7 @@ namespace mace { class OpContext; class Tensor; -namespace kernels { +namespace ops { class OpenCLMatMulKernel { public: virtual MaceStatus Compute( @@ -36,7 +36,7 @@ class OpenCLMatMulKernel { MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLMatMulKernel); }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_MATMUL_H_ +#endif // MACE_OPS_OPENCL_MATMUL_H_ diff --git a/mace/kernels/opencl/out_of_range_check_test.cc b/mace/ops/opencl/out_of_range_check_test.cc similarity index 98% rename from mace/kernels/opencl/out_of_range_check_test.cc rename to mace/ops/opencl/out_of_range_check_test.cc index 957026b2..f63d1db0 100644 --- a/mace/kernels/opencl/out_of_range_check_test.cc +++ b/mace/ops/opencl/out_of_range_check_test.cc @@ -21,10 +21,10 @@ #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/tensor.h" #include "mace/core/workspace.h" -#include "mace/kernels/opencl/helper.h" +#include "mace/ops/opencl/helper.h" namespace mace { -namespace kernels { +namespace ops { namespace { MaceStatus BufferToImageOpImpl(OpContext *context, @@ -160,5 +160,5 @@ TEST(OutOfRangeCheckTest, RandomTest) { != MaceStatus::MACE_SUCCESS); } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/opencl/pad.h b/mace/ops/opencl/pad.h similarity index 86% rename from mace/kernels/opencl/pad.h rename to mace/ops/opencl/pad.h index ec91a446..9c68cee0 100644 --- a/mace/kernels/opencl/pad.h +++ b/mace/ops/opencl/pad.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_PAD_H_ -#define MACE_KERNELS_OPENCL_PAD_H_ +#ifndef MACE_OPS_OPENCL_PAD_H_ +#define MACE_OPS_OPENCL_PAD_H_ #include "mace/public/mace.h" #include "mace/utils/utils.h" @@ -22,7 +22,7 @@ namespace mace { class OpContext; class Tensor; -namespace kernels { +namespace ops { class OpenCLPadKernel { public: virtual MaceStatus Compute( @@ -32,7 +32,7 @@ class OpenCLPadKernel { MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLPadKernel); }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_PAD_H_ +#endif // MACE_OPS_OPENCL_PAD_H_ diff --git a/mace/kernels/opencl/pooling.h b/mace/ops/opencl/pooling.h similarity index 82% rename from mace/kernels/opencl/pooling.h rename to mace/ops/opencl/pooling.h index ce3c8b54..fc41a474 100644 --- a/mace/kernels/opencl/pooling.h +++ b/mace/ops/opencl/pooling.h @@ -12,19 +12,19 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_POOLING_H_ -#define MACE_KERNELS_OPENCL_POOLING_H_ +#ifndef MACE_OPS_OPENCL_POOLING_H_ +#define MACE_OPS_OPENCL_POOLING_H_ #include -#include "mace/kernels/pooling.h" -#include "mace/kernels/conv_pool_2d_util.h" +#include "mace/ops/pooling.h" +#include "mace/ops/conv_pool_2d_util.h" namespace mace { class OpContext; class Tensor; -namespace kernels { +namespace ops { class OpenCLPoolingKernel { public: virtual MaceStatus Compute( @@ -40,7 +40,7 @@ class OpenCLPoolingKernel { MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLPoolingKernel); }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_POOLING_H_ +#endif // MACE_OPS_OPENCL_POOLING_H_ diff --git a/mace/kernels/opencl/reduce_mean.h b/mace/ops/opencl/reduce_mean.h similarity index 84% rename from mace/kernels/opencl/reduce_mean.h rename to mace/ops/opencl/reduce_mean.h index 1960aac5..9e279a2a 100644 --- a/mace/kernels/opencl/reduce_mean.h +++ b/mace/ops/opencl/reduce_mean.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_REDUCE_MEAN_H_ -#define MACE_KERNELS_OPENCL_REDUCE_MEAN_H_ +#ifndef MACE_OPS_OPENCL_REDUCE_MEAN_H_ +#define MACE_OPS_OPENCL_REDUCE_MEAN_H_ #include "mace/public/mace.h" #include "mace/utils/utils.h" @@ -23,7 +23,7 @@ namespace mace { class OpContext; class Tensor; -namespace kernels { +namespace ops { class OpenCLReduceMeanKernel { public: virtual MaceStatus Compute( @@ -33,7 +33,7 @@ class OpenCLReduceMeanKernel { MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLReduceMeanKernel); }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_REDUCE_MEAN_H_ +#endif // MACE_OPS_OPENCL_REDUCE_MEAN_H_ diff --git a/mace/kernels/opencl/resize_bicubic.h b/mace/ops/opencl/resize_bicubic.h similarity index 84% rename from mace/kernels/opencl/resize_bicubic.h rename to mace/ops/opencl/resize_bicubic.h index bfb6f8b5..2d7d3be1 100644 --- a/mace/kernels/opencl/resize_bicubic.h +++ b/mace/ops/opencl/resize_bicubic.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_RESIZE_BICUBIC_H_ -#define MACE_KERNELS_OPENCL_RESIZE_BICUBIC_H_ +#ifndef MACE_OPS_OPENCL_RESIZE_BICUBIC_H_ +#define MACE_OPS_OPENCL_RESIZE_BICUBIC_H_ #include "mace/public/mace.h" #include "mace/utils/utils.h" @@ -24,7 +24,7 @@ namespace mace { class OpContext; class Tensor; -namespace kernels { +namespace ops { class OpenCLResizeBicubicKernel { public: virtual MaceStatus Compute( @@ -33,7 +33,7 @@ class OpenCLResizeBicubicKernel { Tensor *output) = 0; MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLResizeBicubicKernel); }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_RESIZE_BICUBIC_H_ +#endif // MACE_OPS_OPENCL_RESIZE_BICUBIC_H_ diff --git a/mace/kernels/opencl/resize_bilinear.h b/mace/ops/opencl/resize_bilinear.h similarity index 84% rename from mace/kernels/opencl/resize_bilinear.h rename to mace/ops/opencl/resize_bilinear.h index f60fb282..37d60a72 100644 --- a/mace/kernels/opencl/resize_bilinear.h +++ b/mace/ops/opencl/resize_bilinear.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_RESIZE_BILINEAR_H_ -#define MACE_KERNELS_OPENCL_RESIZE_BILINEAR_H_ +#ifndef MACE_OPS_OPENCL_RESIZE_BILINEAR_H_ +#define MACE_OPS_OPENCL_RESIZE_BILINEAR_H_ #include "mace/core/types.h" #include "mace/public/mace.h" @@ -24,7 +24,7 @@ namespace mace { class OpContext; class Tensor; -namespace kernels { +namespace ops { class OpenCLResizeBilinearKernel { public: virtual MaceStatus Compute( @@ -33,7 +33,7 @@ class OpenCLResizeBilinearKernel { Tensor *output) = 0; MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLResizeBilinearKernel); }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_RESIZE_BILINEAR_H_ +#endif // MACE_OPS_OPENCL_RESIZE_BILINEAR_H_ diff --git a/mace/kernels/opencl/softmax.h b/mace/ops/opencl/softmax.h similarity index 85% rename from mace/kernels/opencl/softmax.h rename to mace/ops/opencl/softmax.h index 308b606e..958cae36 100644 --- a/mace/kernels/opencl/softmax.h +++ b/mace/ops/opencl/softmax.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_SOFTMAX_H_ -#define MACE_KERNELS_OPENCL_SOFTMAX_H_ +#ifndef MACE_OPS_OPENCL_SOFTMAX_H_ +#define MACE_OPS_OPENCL_SOFTMAX_H_ #include "mace/public/mace.h" #include "mace/utils/utils.h" @@ -23,7 +23,7 @@ namespace mace { class OpContext; class Tensor; -namespace kernels { +namespace ops { class OpenCLSoftmaxKernel { public: virtual MaceStatus Compute( @@ -33,7 +33,7 @@ class OpenCLSoftmaxKernel { MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLSoftmaxKernel); }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_SOFTMAX_H_ +#endif // MACE_OPS_OPENCL_SOFTMAX_H_ diff --git a/mace/kernels/opencl/space_to_batch.h b/mace/ops/opencl/space_to_batch.h similarity index 86% rename from mace/kernels/opencl/space_to_batch.h rename to mace/ops/opencl/space_to_batch.h index 22d308ac..0b530ab6 100644 --- a/mace/kernels/opencl/space_to_batch.h +++ b/mace/ops/opencl/space_to_batch.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_SPACE_TO_BATCH_H_ -#define MACE_KERNELS_OPENCL_SPACE_TO_BATCH_H_ +#ifndef MACE_OPS_OPENCL_SPACE_TO_BATCH_H_ +#define MACE_OPS_OPENCL_SPACE_TO_BATCH_H_ #include @@ -26,7 +26,7 @@ namespace mace { class OpContext; class Tensor; -namespace kernels { +namespace ops { class OpenCLSpaceToBatchKernel { public: virtual MaceStatus Compute( @@ -39,7 +39,7 @@ class OpenCLSpaceToBatchKernel { MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLSpaceToBatchKernel); }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_SPACE_TO_BATCH_H_ +#endif // MACE_OPS_OPENCL_SPACE_TO_BATCH_H_ diff --git a/mace/kernels/opencl/space_to_depth.h b/mace/ops/opencl/space_to_depth.h similarity index 83% rename from mace/kernels/opencl/space_to_depth.h rename to mace/ops/opencl/space_to_depth.h index ea6b16c1..44d04975 100644 --- a/mace/kernels/opencl/space_to_depth.h +++ b/mace/ops/opencl/space_to_depth.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_SPACE_TO_DEPTH_H_ -#define MACE_KERNELS_OPENCL_SPACE_TO_DEPTH_H_ +#ifndef MACE_OPS_OPENCL_SPACE_TO_DEPTH_H_ +#define MACE_OPS_OPENCL_SPACE_TO_DEPTH_H_ #include "mace/public/mace.h" #include "mace/utils/utils.h" @@ -23,7 +23,7 @@ namespace mace { class OpContext; class Tensor; -namespace kernels { +namespace ops { class OpenCLSpaceToDepthKernel { public: virtual MaceStatus Compute( @@ -33,7 +33,7 @@ class OpenCLSpaceToDepthKernel { MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLSpaceToDepthKernel); }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_SPACE_TO_DEPTH_H_ +#endif // MACE_OPS_OPENCL_SPACE_TO_DEPTH_H_ diff --git a/mace/kernels/opencl/split.h b/mace/ops/opencl/split.h similarity index 86% rename from mace/kernels/opencl/split.h rename to mace/ops/opencl/split.h index c5cacd6f..3daae7d7 100644 --- a/mace/kernels/opencl/split.h +++ b/mace/ops/opencl/split.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_SPLIT_H_ -#define MACE_KERNELS_OPENCL_SPLIT_H_ +#ifndef MACE_OPS_OPENCL_SPLIT_H_ +#define MACE_OPS_OPENCL_SPLIT_H_ #include @@ -25,7 +25,7 @@ namespace mace { class OpContext; class Tensor; -namespace kernels { +namespace ops { class OpenCLSplitKernel { public: virtual MaceStatus Compute( @@ -35,7 +35,7 @@ class OpenCLSplitKernel { MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLSplitKernel); }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_SPLIT_H_ +#endif // MACE_OPS_OPENCL_SPLIT_H_ diff --git a/mace/kernels/opencl/sqrdiff_mean.h b/mace/ops/opencl/sqrdiff_mean.h similarity index 84% rename from mace/kernels/opencl/sqrdiff_mean.h rename to mace/ops/opencl/sqrdiff_mean.h index c2d5d197..2814aa6d 100644 --- a/mace/kernels/opencl/sqrdiff_mean.h +++ b/mace/ops/opencl/sqrdiff_mean.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_SQRDIFF_MEAN_H_ -#define MACE_KERNELS_OPENCL_SQRDIFF_MEAN_H_ +#ifndef MACE_OPS_OPENCL_SQRDIFF_MEAN_H_ +#define MACE_OPS_OPENCL_SQRDIFF_MEAN_H_ #include "mace/public/mace.h" #include "mace/utils/utils.h" @@ -22,7 +22,7 @@ namespace mace { class OpContext; class Tensor; -namespace kernels { +namespace ops { class OpenCLSqrDiffMeanKernel { public: virtual MaceStatus Compute( @@ -33,7 +33,7 @@ class OpenCLSqrDiffMeanKernel { MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLSqrDiffMeanKernel); }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_SQRDIFF_MEAN_H_ +#endif // MACE_OPS_OPENCL_SQRDIFF_MEAN_H_ diff --git a/mace/kernels/opencl/winograd_transform.h b/mace/ops/opencl/winograd_transform.h similarity index 86% rename from mace/kernels/opencl/winograd_transform.h rename to mace/ops/opencl/winograd_transform.h index d706e89b..f150481a 100644 --- a/mace/kernels/opencl/winograd_transform.h +++ b/mace/ops/opencl/winograd_transform.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPENCL_WINOGRAD_TRANSFORM_H_ -#define MACE_KERNELS_OPENCL_WINOGRAD_TRANSFORM_H_ +#ifndef MACE_OPS_OPENCL_WINOGRAD_TRANSFORM_H_ +#define MACE_OPS_OPENCL_WINOGRAD_TRANSFORM_H_ #include @@ -24,7 +24,7 @@ namespace mace { class OpContext; class Tensor; -namespace kernels { +namespace ops { class OpenCLWinogradTransformKernel { public: @@ -44,7 +44,7 @@ class OpenCLWinogradInverseTransformKernel { MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLWinogradInverseTransformKernel); }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_OPENCL_WINOGRAD_TRANSFORM_H_ +#endif // MACE_OPS_OPENCL_WINOGRAD_TRANSFORM_H_ diff --git a/mace/ops/ops_def_register.cc b/mace/ops/ops_def_register.cc deleted file mode 100644 index 46ee5184..00000000 --- a/mace/ops/ops_def_register.cc +++ /dev/null @@ -1,373 +0,0 @@ -// Copyright 2018 Xiaomi, Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "mace/ops/ops_def_register.h" - -#include - -namespace mace { -namespace ops { - -void RegisterOpDefs(OpDefRegistryBase *op_def_registry) { - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("Activation") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("AddN") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("ArgMax") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("BatchNorm") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("BatchToSpaceND") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("BiasAdd") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("BufferInverseTransform") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("BufferTransform") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("Cast") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("ChannelShuffle") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("Concat") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("Conv2D") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("Crop") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("Deconv2D") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("DepthToSpace") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("DepthwiseConv2d") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("Dequantize") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("Eltwise") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("ExpandDims") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("Fill") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("FullyConnected") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("Gather") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("Identity") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("InferConv2dShape") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("LocalResponseNorm") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("LSTMCell") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("MatMul") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("Pad") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("Pooling") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("Quantize") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("ReduceMean") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("Reshape") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("ResizeBicubic") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("ResizeBilinear") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("Reverse") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("ScalarMath") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("Shape") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("Softmax") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("SpaceToBatchND") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("SpaceToDepth") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("Split") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("SqrDiffMean") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("Squeeze") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("Stack") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("StridedSlice") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("Transpose") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("Unstack") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::CPU, DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("WinogradInverseTransform") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::GPU}; - })); - - MACE_REGISTER_OP_DEF( - op_def_registry, - OpRegistrationBuilder("WinogradTransform") - .SetDevicePlaceFunc([]() -> std::vector { - return {DeviceType::GPU}; - })); -} -} // namespace ops - - -OpDefRegistry::OpDefRegistry() : OpDefRegistryBase() { - ops::RegisterOpDefs(this); -} - -} // namespace mace diff --git a/mace/ops/ops_def_register.h b/mace/ops/ops_def_register.h deleted file mode 100644 index 5b2d6acb..00000000 --- a/mace/ops/ops_def_register.h +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright 2018 Xiaomi, Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef MACE_OPS_OPS_DEF_REGISTER_H_ -#define MACE_OPS_OPS_DEF_REGISTER_H_ - -#include "mace/core/op_def_registry.h" - -namespace mace { - -class OpDefRegistry : public OpDefRegistryBase { - public: - OpDefRegistry(); - ~OpDefRegistry() override = default; -}; - -} // namespace mace - -#endif // MACE_OPS_OPS_DEF_REGISTER_H_ diff --git a/mace/kernels/ops_register.cc b/mace/ops/ops_registry.cc similarity index 67% rename from mace/kernels/ops_register.cc rename to mace/ops/ops_registry.cc index 4dba8910..e330d66a 100644 --- a/mace/kernels/ops_register.cc +++ b/mace/ops/ops_registry.cc @@ -12,11 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/kernels/ops_register.h" +#include "mace/ops/ops_registry.h" namespace mace { -namespace kernels { +namespace ops { // Keep in lexicographical order extern void RegisterActivation(OpRegistryBase *op_registry); extern void RegisterAddN(OpRegistryBase *op_registry); @@ -62,70 +62,70 @@ extern void RegisterStack(OpRegistryBase *op_registry); extern void RegisterStridedSlice(OpRegistryBase *op_registry); extern void RegisterTranspose(OpRegistryBase *op_registry); extern void RegisterUnstack(OpRegistryBase *op_registry); + #ifdef MACE_ENABLE_OPENCL extern void RegisterBufferTransform(OpRegistryBase *op_registry); extern void RegisterBufferInverseTransform(OpRegistryBase *op_registry); extern void RegisterLSTMCell(OpRegistryBase *op_registry); extern void RegisterWinogradInverseTransform(OpRegistryBase *op_registry); extern void RegisterWinogradTransform(OpRegistryBase *op_registry); - #endif // MACE_ENABLE_OPENCL -} // namespace kernels +} // namespace ops OpRegistry::OpRegistry() : OpRegistryBase() { // Keep in lexicographical order - kernels::RegisterActivation(this); - kernels::RegisterAddN(this); - kernels::RegisterArgMax(this); - kernels::RegisterBatchNorm(this); - kernels::RegisterBatchToSpaceND(this); - kernels::RegisterBiasAdd(this); - kernels::RegisterCast(this); - kernels::RegisterChannelShuffle(this); - kernels::RegisterConcat(this); - kernels::RegisterConv2D(this); - kernels::RegisterCrop(this); - kernels::RegisterDeconv2D(this); - kernels::RegisterDepthToSpace(this); - kernels::RegisterDepthwiseConv2d(this); - kernels::RegisterDequantize(this); - kernels::RegisterEltwise(this); - kernels::RegisterExpandDims(this); - kernels::RegisterFill(this); - kernels::RegisterFullyConnected(this); - kernels::RegisterGather(this); - kernels::RegisterIdentity(this); - kernels::RegisterInferConv2dShape(this); - kernels::RegisterLocalResponseNorm(this); - kernels::RegisterMatMul(this); - kernels::RegisterPad(this); - kernels::RegisterPooling(this); - kernels::RegisterQuantize(this); - kernels::RegisterReduceMean(this); - kernels::RegisterReshape(this); - kernels::RegisterResizeBicubic(this); - kernels::RegisterResizeBilinear(this); - kernels::RegisterReverse(this); - kernels::RegisterScalarMath(this); - kernels::RegisterShape(this); - kernels::RegisterSoftmax(this); - kernels::RegisterSpaceToBatchND(this); - kernels::RegisterSpaceToDepth(this); - kernels::RegisterSplit(this); - kernels::RegisterStack(this); - kernels::RegisterStridedSlice(this); - kernels::RegisterSqrDiffMean(this); - kernels::RegisterSqueeze(this); - kernels::RegisterTranspose(this); - kernels::RegisterUnstack(this); -#ifdef MACE_ENABLE_OPENCL - kernels::RegisterBufferTransform(this); - kernels::RegisterBufferInverseTransform(this); - kernels::RegisterLSTMCell(this); - kernels::RegisterWinogradInverseTransform(this); - kernels::RegisterWinogradTransform(this); + ops::RegisterActivation(this); + ops::RegisterAddN(this); + ops::RegisterArgMax(this); + ops::RegisterBatchNorm(this); + ops::RegisterBatchToSpaceND(this); + ops::RegisterBiasAdd(this); + ops::RegisterCast(this); + ops::RegisterChannelShuffle(this); + ops::RegisterConcat(this); + ops::RegisterConv2D(this); + ops::RegisterCrop(this); + ops::RegisterDeconv2D(this); + ops::RegisterDepthToSpace(this); + ops::RegisterDepthwiseConv2d(this); + ops::RegisterDequantize(this); + ops::RegisterEltwise(this); + ops::RegisterExpandDims(this); + ops::RegisterFill(this); + ops::RegisterFullyConnected(this); + ops::RegisterGather(this); + ops::RegisterIdentity(this); + ops::RegisterInferConv2dShape(this); + ops::RegisterLocalResponseNorm(this); + ops::RegisterMatMul(this); + ops::RegisterPad(this); + ops::RegisterPooling(this); + ops::RegisterQuantize(this); + ops::RegisterReduceMean(this); + ops::RegisterReshape(this); + ops::RegisterResizeBicubic(this); + ops::RegisterResizeBilinear(this); + ops::RegisterReverse(this); + ops::RegisterScalarMath(this); + ops::RegisterShape(this); + ops::RegisterSoftmax(this); + ops::RegisterSpaceToBatchND(this); + ops::RegisterSpaceToDepth(this); + ops::RegisterSplit(this); + ops::RegisterStack(this); + ops::RegisterStridedSlice(this); + ops::RegisterSqrDiffMean(this); + ops::RegisterSqueeze(this); + ops::RegisterTranspose(this); + ops::RegisterUnstack(this); +#ifdef MACE_ENABLE_OPENCL + ops::RegisterBufferTransform(this); + ops::RegisterBufferInverseTransform(this); + ops::RegisterLSTMCell(this); + ops::RegisterWinogradInverseTransform(this); + ops::RegisterWinogradTransform(this); #endif // MACE_ENABLE_OPENCL } diff --git a/mace/kernels/ops_register.h b/mace/ops/ops_registry.h similarity index 87% rename from mace/kernels/ops_register.h rename to mace/ops/ops_registry.h index e3576adb..beb17c37 100644 --- a/mace/kernels/ops_register.h +++ b/mace/ops/ops_registry.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_OPS_REGISTER_H_ -#define MACE_KERNELS_OPS_REGISTER_H_ +#ifndef MACE_OPS_OPS_REGISTRY_H_ +#define MACE_OPS_OPS_REGISTRY_H_ #include "mace/core/operator.h" @@ -27,4 +27,4 @@ class OpRegistry : public OpRegistryBase { } // namespace mace -#endif // MACE_KERNELS_OPS_REGISTER_H_ +#endif // MACE_OPS_OPS_REGISTRY_H_ diff --git a/mace/ops/ops_test_util.h b/mace/ops/ops_test_util.h index 4823bd80..5bf842f3 100644 --- a/mace/ops/ops_test_util.h +++ b/mace/ops/ops_test_util.h @@ -31,9 +31,8 @@ #include "mace/core/runtime/opencl/gpu_device.h" #include "mace/core/tensor.h" #include "mace/core/workspace.h" -#include "mace/kernels/opencl/common.h" -#include "mace/kernels/ops_register.h" -#include "mace/ops/ops_def_register.h" +#include "mace/ops/opencl/common.h" +#include "mace/ops/ops_registry.h" #include "mace/public/mace.h" #include "mace/utils/utils.h" #include "mace/utils/quantize.h" @@ -141,7 +140,6 @@ class OpTestContext { class OpsTestNet { public: OpsTestNet() : - op_def_registry_(new OpDefRegistry()), op_registry_(new OpRegistry()) {} template @@ -455,10 +453,8 @@ class OpsTestNet { NetDef net_def; for (auto &op_def_ : op_defs_) { net_def.add_op()->CopyFrom(op_def_); - net_def.add_op_types(op_def_.type()); } net_ = std::unique_ptr(new SerialNet( - op_def_registry_.get(), op_registry_.get(), &net_def, &ws_, @@ -502,7 +498,6 @@ class OpsTestNet { MaceStatus RunNet(const NetDef &net_def, const DeviceType device) { device_type_ = device; auto net = std::unique_ptr(new SerialNet( - op_def_registry_.get(), op_registry_.get(), &net_def, &ws_, @@ -511,7 +506,6 @@ class OpsTestNet { MACE_RETURN_IF_ERROR(net->Init()); MACE_RETURN_IF_ERROR(net->Run()); net_ = std::unique_ptr(new SerialNet( - op_def_registry_.get(), op_registry_.get(), &net_def, &ws_, @@ -538,7 +532,6 @@ class OpsTestNet { } public: - std::shared_ptr op_def_registry_; std::shared_ptr op_registry_; Workspace ws_; std::vector op_defs_; @@ -784,7 +777,7 @@ template void BufferToImage(OpsTestNet *net, const std::string &input_name, const std::string &output_name, - const kernels::BufferType type, + const ops::BufferType type, const int wino_block_size = 2) { MACE_CHECK_NOTNULL(net); @@ -806,7 +799,7 @@ template void ImageToBuffer(OpsTestNet *net, const std::string &input_name, const std::string &output_name, - const kernels::BufferType type, + const ops::BufferType type, const int wino_block_size = 2) { MACE_CHECK_NOTNULL(net); diff --git a/mace/kernels/pad.cc b/mace/ops/pad.cc similarity index 98% rename from mace/kernels/pad.cc rename to mace/ops/pad.cc index 9024eb0f..60bfbc19 100644 --- a/mace/kernels/pad.cc +++ b/mace/ops/pad.cc @@ -17,11 +17,11 @@ #include "mace/core/operator.h" #ifdef MACE_ENABLE_OPENCL -#include "mace/kernels/opencl/image/pad.h" +#include "mace/ops/opencl/image/pad.h" #endif // MACE_ENABLE_OPENCL namespace mace { -namespace kernels { +namespace ops { template class PadOp; @@ -126,5 +126,5 @@ void RegisterPad(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/pad_benchmark.cc b/mace/ops/pad_benchmark.cc index ad8a1254..e295d2ad 100644 --- a/mace/ops/pad_benchmark.cc +++ b/mace/ops/pad_benchmark.cc @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" -#include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" @@ -35,7 +33,7 @@ void Pad(int iters, int batch, int height, const std::vector paddings = {0, 0, pad, pad, pad, pad, 0, 0}; if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Pad", "PadTest") .Input("InputImage") .Output("OutputImage") diff --git a/mace/ops/pad_test.cc b/mace/ops/pad_test.cc index a8c2267f..f0eece25 100644 --- a/mace/ops/pad_test.cc +++ b/mace/ops/pad_test.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" namespace mace { @@ -31,7 +30,7 @@ void Simple() { net.AddRepeatedInput("Input", {1, 2, 3, 1}, 2); if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Pad", "PadTest") .Input("InputImage") .Output("OutputImage") @@ -43,7 +42,7 @@ void Simple() { net.RunOp(D); ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } else { net.TransformDataFormat("Input", NHWC, "TInput", NCHW); @@ -138,7 +137,7 @@ void Complex(const std::vector &input_shape, expected->Copy(*net.GetOutput("Output")); BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Pad", "PadTest") .Input("InputImage") .Output("OutputImage") @@ -150,7 +149,7 @@ void Complex(const std::vector &input_shape, net.RunOp(DeviceType::GPU); ImageToBuffer(&net, "OutputImage", "OpenCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); auto output = net.GetTensor("OpenCLOutput"); diff --git a/mace/kernels/pooling.cc b/mace/ops/pooling.cc similarity index 98% rename from mace/kernels/pooling.cc rename to mace/ops/pooling.cc index 07d41d11..e1800b87 100644 --- a/mace/kernels/pooling.cc +++ b/mace/ops/pooling.cc @@ -16,7 +16,7 @@ #include #endif -#include "mace/kernels/pooling.h" +#include "mace/ops/pooling.h" #include #include @@ -26,15 +26,15 @@ #include "mace/core/future.h" #include "mace/core/operator.h" #include "mace/core/tensor.h" -#include "mace/kernels/conv_pool_2d_base.h" -#include "mace/kernels/conv_pool_2d_util.h" +#include "mace/ops/conv_pool_2d_base.h" +#include "mace/ops/conv_pool_2d_util.h" #ifdef MACE_ENABLE_OPENCL -#include "mace/kernels/opencl/image/pooling.h" -#include "mace/kernels/opencl/buffer/pooling.h" +#include "mace/ops/opencl/image/pooling.h" +#include "mace/ops/opencl/buffer/pooling.h" #endif // MACE_ENABLE_OPENCL namespace mace { -namespace kernels { +namespace ops { class PoolingOpBase : public ConvPool2dOpBase { public: @@ -72,7 +72,7 @@ class PoolingOp : public PoolingOpBase { std::vector paddings(2); if (paddings_.empty()) { - kernels::CalcNCHWPaddingAndOutputSize( + ops::CalcNCHWPaddingAndOutputSize( input_tensor->shape().data(), filter_shape.data(), dilations_.data(), strides_.data(), padding_type_, output_shape.data(), paddings.data()); } else { @@ -463,5 +463,5 @@ void RegisterPooling(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/pooling.h b/mace/ops/pooling.h similarity index 87% rename from mace/kernels/pooling.h rename to mace/ops/pooling.h index 9780907c..b974e826 100644 --- a/mace/kernels/pooling.h +++ b/mace/ops/pooling.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_POOLING_H_ -#define MACE_KERNELS_POOLING_H_ +#ifndef MACE_OPS_POOLING_H_ +#define MACE_OPS_POOLING_H_ namespace mace { @@ -23,4 +23,4 @@ enum PoolingType { }; } // namespace mace -#endif // MACE_KERNELS_POOLING_H_ +#endif // MACE_OPS_POOLING_H_ diff --git a/mace/ops/pooling_benchmark.cc b/mace/ops/pooling_benchmark.cc index e5199001..ff915ec0 100644 --- a/mace/ops/pooling_benchmark.cc +++ b/mace/ops/pooling_benchmark.cc @@ -12,10 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/core/testing/test_benchmark.h" -#include "mace/kernels/conv_pool_2d_util.h" -#include "mace/kernels/pooling.h" +#include "mace/ops/conv_pool_2d_util.h" +#include "mace/ops/pooling.h" #include "mace/ops/ops_test_util.h" namespace mace { @@ -66,7 +65,7 @@ void Pooling(int iters, .Finalize(net.NewOperatorDef()); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Pooling", "PoolingTest") .Input("InputImage") diff --git a/mace/ops/pooling_test.cc b/mace/ops/pooling_test.cc index 4cd432d5..99691db8 100644 --- a/mace/ops/pooling_test.cc +++ b/mace/ops/pooling_test.cc @@ -12,11 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "gtest/gtest.h" +#include -#include "mace/core/op_def_registry.h" -#include "mace/kernels/pooling.h" -#include "mace/kernels/conv_pool_2d_util.h" +#include "mace/ops/pooling.h" +#include "mace/ops/conv_pool_2d_util.h" #include "mace/ops/ops_test_util.h" namespace mace { @@ -192,7 +191,7 @@ void SimpleMaxPooling3S2() { "Output", NHWC); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Pooling", "PoolingTest") .Input("InputImage") .Output("OutputImage") @@ -204,7 +203,7 @@ void SimpleMaxPooling3S2() { .Finalize(net.NewOperatorDef()); net.RunOp(D); ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } // Check @@ -252,7 +251,7 @@ void MaxPooling3S2(const std::vector &input_shape, expected->Copy(*net.GetOutput("Output")); BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Pooling", "PoolingTest") .Input("InputImage") .Output("OutputImage") @@ -265,7 +264,7 @@ void MaxPooling3S2(const std::vector &input_shape, .Finalize(net.NewOperatorDef()); net.RunOp(D); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); if (DataTypeToEnum::value == DT_HALF) { ExpectTensorNear(*expected, *net.GetOutput("OPENCLOutput"), 1e-3, @@ -351,7 +350,7 @@ void SimpleAvgPoolingTest() { {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}); BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Pooling", "PoolingTest") .Input("InputImage") .Output("OutputImage") @@ -364,7 +363,7 @@ void SimpleAvgPoolingTest() { // Run net.RunOp(D); ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); // Check auto expected = net.CreateTensor({1, 1, 4, 1}, {4.5, 6.5, 8.5, 10.5}); @@ -410,7 +409,7 @@ void AvgPoolingTest(const std::vector &shape, expected->Copy(*net.GetOutput("Output")); BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Pooling", "PoolingTest") .Input("InputImage") .Output("OutputImage") @@ -423,7 +422,7 @@ void AvgPoolingTest(const std::vector &shape, .Finalize(net.NewOperatorDef()); net.RunOp(D); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); if (DataTypeToEnum::value == DT_HALF) { ExpectTensorNear(*expected, *net.GetOutput("OPENCLOutput"), 1e-3, diff --git a/mace/kernels/quantize.cc b/mace/ops/quantize.cc similarity index 98% rename from mace/kernels/quantize.cc rename to mace/ops/quantize.cc index 2fd9e7c3..3b2ea301 100644 --- a/mace/kernels/quantize.cc +++ b/mace/ops/quantize.cc @@ -22,7 +22,7 @@ #include "mace/utils/quantize.h" namespace mace { -namespace kernels { +namespace ops { template class QuantizeOp; @@ -107,5 +107,5 @@ void RegisterDequantize(OpRegistryBase *op_registry) { MACE_REGISTER_OP(op_registry, "Dequantize", DequantizeOp, DeviceType::CPU, int32_t); } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/quantize_test.cc b/mace/ops/quantize_test.cc index 207ab4e4..e889f852 100644 --- a/mace/ops/quantize_test.cc +++ b/mace/ops/quantize_test.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" namespace mace { diff --git a/mace/kernels/reduce_mean.cc b/mace/ops/reduce_mean.cc similarity index 98% rename from mace/kernels/reduce_mean.cc rename to mace/ops/reduce_mean.cc index d103125b..0857eb3e 100644 --- a/mace/kernels/reduce_mean.cc +++ b/mace/ops/reduce_mean.cc @@ -20,11 +20,11 @@ #include "mace/core/operator.h" #include "mace/core/tensor.h" #ifdef MACE_ENABLE_OPENCL -#include "mace/kernels/opencl/image/reduce_mean.h" +#include "mace/ops/opencl/image/reduce_mean.h" #endif // MACE_ENABLE_OPENCL namespace mace { -namespace kernels { +namespace ops { class ReduceMeanOpBase : public Operation { public: @@ -271,5 +271,5 @@ void RegisterReduceMean(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/reduce_mean_benchmark.cc b/mace/ops/reduce_mean_benchmark.cc index 02f6d447..24338ce7 100644 --- a/mace/ops/reduce_mean_benchmark.cc +++ b/mace/ops/reduce_mean_benchmark.cc @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" -#include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" @@ -33,7 +31,7 @@ void ReduceMean(int iters, int batch, int channels, if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("ReduceMean", "ReduceMeanBM") .Input("InputImage") .AddIntsArg("axis", {1, 2}) diff --git a/mace/ops/reduce_mean_test.cc b/mace/ops/reduce_mean_test.cc index 24ff7a4a..bc2577e2 100644 --- a/mace/ops/reduce_mean_test.cc +++ b/mace/ops/reduce_mean_test.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" namespace mace { @@ -45,7 +44,7 @@ void Simple(const std::vector &input_shape, net.RunOp(D); } else { BufferToImage(&net, "Input", "InputImg", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("ReduceMean", "ReduceMeanTest") .Input("InputImg") .AddIntsArg("axis", axis) @@ -55,7 +54,7 @@ void Simple(const std::vector &input_shape, // Run net.RunOp(D); ImageToBuffer(&net, "OutputImg", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } auto expected = net.CreateTensor(output_shape, output); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5, 1e-3); @@ -362,7 +361,7 @@ void RandomTest(const std::vector &input_shape, net.TransformDataFormat("OutputNCHW", NCHW, "Output", NHWC); BufferToImage(&net, "Input", "InputImg", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("ReduceMean", "ReduceMeanTest") .Input("InputImg") .AddIntsArg("axis", axis) @@ -372,7 +371,7 @@ void RandomTest(const std::vector &input_shape, // Run net.RunOp(D); ImageToBuffer(&net, "OutputImg", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); if (DataTypeToEnum::value == DT_FLOAT) { ExpectTensorNear(*net.GetTensor("Output"), *net.GetOutput("OPENCLOutput"), 1e-5, 1e-4); diff --git a/mace/kernels/reshape.cc b/mace/ops/reshape.cc similarity index 98% rename from mace/kernels/reshape.cc rename to mace/ops/reshape.cc index 2cfef42b..400d1cff 100644 --- a/mace/kernels/reshape.cc +++ b/mace/ops/reshape.cc @@ -17,7 +17,7 @@ #include "mace/core/operator.h" namespace mace { -namespace kernels { +namespace ops { template class ReshapeOp : public Operation { @@ -84,5 +84,5 @@ void RegisterReshape(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/reshape_test.cc b/mace/ops/reshape_test.cc index bdc7ab97..686975fe 100644 --- a/mace/ops/reshape_test.cc +++ b/mace/ops/reshape_test.cc @@ -13,7 +13,6 @@ // limitations under the License. #include "gmock/gmock.h" -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" namespace mace { diff --git a/mace/kernels/resize_bicubic.cc b/mace/ops/resize_bicubic.cc similarity index 98% rename from mace/kernels/resize_bicubic.cc rename to mace/ops/resize_bicubic.cc index fe0512ff..28912fae 100644 --- a/mace/kernels/resize_bicubic.cc +++ b/mace/ops/resize_bicubic.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/kernels/resize_bicubic.h" +#include "mace/ops/resize_bicubic.h" #include #include @@ -20,11 +20,11 @@ #include "mace/core/operator.h" #ifdef MACE_ENABLE_OPENCL -#include "mace/kernels/opencl/image/resize_bicubic.h" +#include "mace/ops/opencl/image/resize_bicubic.h" #endif // MACE_ENABLE_OPENCL namespace mace { -namespace kernels { +namespace ops { inline const std::shared_ptr InitCoeffsTable() { // Allocate and initialize coefficients table using Bicubic @@ -230,5 +230,5 @@ void RegisterResizeBicubic(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/resize_bicubic.h b/mace/ops/resize_bicubic.h similarity index 87% rename from mace/kernels/resize_bicubic.h rename to mace/ops/resize_bicubic.h index 5e02edd4..b53f112b 100644 --- a/mace/kernels/resize_bicubic.h +++ b/mace/ops/resize_bicubic.h @@ -12,13 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_RESIZE_BICUBIC_H_ -#define MACE_KERNELS_RESIZE_BICUBIC_H_ +#ifndef MACE_OPS_RESIZE_BICUBIC_H_ +#define MACE_OPS_RESIZE_BICUBIC_H_ #include "mace/core/types.h" namespace mace { -namespace kernels { +namespace ops { namespace resize_bicubic { constexpr int64_t kTableSize = (1u << 10); @@ -31,7 +31,7 @@ inline float CalculateResizeScale(index_t in_size, } } // namespace resize_bicubic -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_RESIZE_BICUBIC_H_ +#endif // MACE_OPS_RESIZE_BICUBIC_H_ diff --git a/mace/ops/resize_bicubic_benchmark.cc b/mace/ops/resize_bicubic_benchmark.cc index f0847e4c..896fb1e0 100644 --- a/mace/ops/resize_bicubic_benchmark.cc +++ b/mace/ops/resize_bicubic_benchmark.cc @@ -13,7 +13,6 @@ // limitations under the License. #include -#include "mace/core/op_def_registry.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" @@ -57,7 +56,7 @@ void ResizeBicubicBenchmark(int iters, .Finalize(net.NewOperatorDef()); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("ResizeBicubic", "ResizeBicubicBenchmark") .Input("InputImage") .Input("OutSize") diff --git a/mace/ops/resize_bicubic_test.cc b/mace/ops/resize_bicubic_test.cc index 8dc1dbf7..3a33eefc 100644 --- a/mace/ops/resize_bicubic_test.cc +++ b/mace/ops/resize_bicubic_test.cc @@ -14,7 +14,6 @@ #include -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" namespace mace { @@ -153,7 +152,7 @@ void TestRandomResizeBicubic() { if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("ResizeBicubic", "ResizeBicubicTest") .Input("InputImage") @@ -165,7 +164,7 @@ void TestRandomResizeBicubic() { net.RunOp(D); ImageToBuffer(&net, "OutputImage", "DeviceOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } // Check ExpectTensorNear(expected, *net.GetOutput("DeviceOutput"), 1e-2, diff --git a/mace/kernels/resize_bilinear.cc b/mace/ops/resize_bilinear.cc similarity index 98% rename from mace/kernels/resize_bilinear.cc rename to mace/ops/resize_bilinear.cc index 8ea86158..4b3f5a09 100644 --- a/mace/kernels/resize_bilinear.cc +++ b/mace/ops/resize_bilinear.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/kernels/resize_bilinear.h" +#include "mace/ops/resize_bilinear.h" #include #include @@ -21,11 +21,11 @@ #include "mace/core/operator.h" #include "mace/utils/quantize.h" #ifdef MACE_ENABLE_OPENCL -#include "mace/kernels/opencl/image/resize_bilinear.h" +#include "mace/ops/opencl/image/resize_bilinear.h" #endif // MACE_ENABLE_OPENCL namespace mace { -namespace kernels { +namespace ops { struct CachedInterpolation { index_t lower; // Lower source index used in the interpolation @@ -367,5 +367,5 @@ void RegisterResizeBilinear(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/resize_bilinear.h b/mace/ops/resize_bilinear.h similarity index 86% rename from mace/kernels/resize_bilinear.h rename to mace/ops/resize_bilinear.h index 1f94e500..cf0d32b4 100644 --- a/mace/kernels/resize_bilinear.h +++ b/mace/ops/resize_bilinear.h @@ -12,13 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_RESIZE_BILINEAR_H_ -#define MACE_KERNELS_RESIZE_BILINEAR_H_ +#ifndef MACE_OPS_RESIZE_BILINEAR_H_ +#define MACE_OPS_RESIZE_BILINEAR_H_ #include "mace/core/types.h" namespace mace { -namespace kernels { +namespace ops { namespace resize_bilinear { inline float CalculateResizeScale(index_t in_size, index_t out_size, @@ -28,7 +28,7 @@ inline float CalculateResizeScale(index_t in_size, : in_size / static_cast(out_size); } } // namespace resize_bilinear -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_RESIZE_BILINEAR_H_ +#endif // MACE_OPS_RESIZE_BILINEAR_H_ diff --git a/mace/ops/resize_bilinear_benchmark.cc b/mace/ops/resize_bilinear_benchmark.cc index 2fd6b6c2..345f86bb 100644 --- a/mace/ops/resize_bilinear_benchmark.cc +++ b/mace/ops/resize_bilinear_benchmark.cc @@ -13,7 +13,6 @@ // limitations under the License. #include -#include "mace/core/op_def_registry.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" @@ -64,7 +63,7 @@ void ResizeBilinearBenchmark(int iters, .Finalize(net.NewOperatorDef()); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("ResizeBilinear", "ResizeBilinearBenchmark") .Input("InputImage") .Input("OutSize") diff --git a/mace/ops/resize_bilinear_test.cc b/mace/ops/resize_bilinear_test.cc index 3ff5372a..b611854f 100644 --- a/mace/ops/resize_bilinear_test.cc +++ b/mace/ops/resize_bilinear_test.cc @@ -14,7 +14,6 @@ #include -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" namespace mace { @@ -120,7 +119,7 @@ void TestRandomResizeBilinear() { if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("ResizeBilinear", "ResizeBilinearTest") .Input("InputImage") @@ -132,7 +131,7 @@ void TestRandomResizeBilinear() { net.RunOp(D); ImageToBuffer(&net, "OutputImage", "DeviceOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } // Check ExpectTensorNear(*expected, *net.GetOutput("DeviceOutput"), 1e-5, diff --git a/mace/kernels/reverse.cc b/mace/ops/reverse.cc similarity index 98% rename from mace/kernels/reverse.cc rename to mace/ops/reverse.cc index f73db418..64d4d44d 100644 --- a/mace/kernels/reverse.cc +++ b/mace/ops/reverse.cc @@ -15,7 +15,7 @@ #include "mace/core/operator.h" namespace mace { -namespace kernels { +namespace ops { template class ReverseOp; @@ -77,5 +77,5 @@ void RegisterReverse(OpRegistryBase *op_registry) { DeviceType::CPU, float); } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/reverse_benchmark.cc b/mace/ops/reverse_benchmark.cc index 40f2f908..9630f696 100644 --- a/mace/ops/reverse_benchmark.cc +++ b/mace/ops/reverse_benchmark.cc @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" -#include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" diff --git a/mace/ops/reverse_test.cc b/mace/ops/reverse_test.cc index 282214fd..9b827461 100644 --- a/mace/ops/reverse_test.cc +++ b/mace/ops/reverse_test.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" namespace mace { diff --git a/mace/kernels/scalar_math.cc b/mace/ops/scalar_math.cc similarity index 95% rename from mace/kernels/scalar_math.cc rename to mace/ops/scalar_math.cc index f9f4822a..4a866ae7 100644 --- a/mace/kernels/scalar_math.cc +++ b/mace/ops/scalar_math.cc @@ -16,10 +16,10 @@ #include #include "mace/core/operator.h" -#include "mace/kernels/eltwise.h" +#include "mace/ops/eltwise.h" namespace mace { -namespace kernels { +namespace ops { template void ScalarEltwise(const T* in0, @@ -88,8 +88,8 @@ class ScalarMathOp : public Operation { public: explicit ScalarMathOp(OpConstructContext *context) : Operation(context), - type_(static_cast(Operation::GetOptionalArg( - "type", static_cast(kernels::EltwiseType::NONE)))), + type_(static_cast(Operation::GetOptionalArg( + "type", static_cast(ops::EltwiseType::NONE)))), coeff_(Operation::GetRepeatedArgs("coeff")), scalar_input_(Operation::GetOptionalArg("scalar_input", 1.0)), scalar_input_index_(Operation::GetOptionalArg( @@ -160,5 +160,5 @@ void RegisterScalarMath(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/scalar_math_test.cc b/mace/ops/scalar_math_test.cc index 99caa07d..b9d8fd0b 100644 --- a/mace/ops/scalar_math_test.cc +++ b/mace/ops/scalar_math_test.cc @@ -12,9 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" -#include "mace/kernels/eltwise.h" +#include "mace/ops/eltwise.h" namespace mace { namespace ops { @@ -24,7 +23,7 @@ class ScalarMathOpTest : public OpsTestBase {}; namespace { template -void ScalarMathTest(const kernels::EltwiseType type, +void ScalarMathTest(const ops::EltwiseType type, const T input0, const T input1, const float x, @@ -42,7 +41,7 @@ void ScalarMathTest(const kernels::EltwiseType type, .AddIntArg("T", DataTypeToEnum::v()) .AddIntArg("type", static_cast(type)) .AddFloatArg("scalar_input", x) - .OutputType({kernels::IsLogicalType(type) ? DT_INT32 : DT_FLOAT}) + .OutputType({ops::IsLogicalType(type) ? DT_INT32 : DT_FLOAT}) .Output("Output") .Finalize(net.NewOperatorDef()); // Run @@ -57,52 +56,52 @@ void ScalarMathTest(const kernels::EltwiseType type, TEST_F(ScalarMathOpTest, SimpleCPU) { ScalarMathTest( - kernels::EltwiseType::SUM, 1, 2, 3, 3); + ops::EltwiseType::SUM, 1, 2, 3, 3); ScalarMathTest( - kernels::EltwiseType::SUB, 1, 2, 3, -1); + ops::EltwiseType::SUB, 1, 2, 3, -1); ScalarMathTest( - kernels::EltwiseType::PROD, 3, -2, 3, -6); + ops::EltwiseType::PROD, 3, -2, 3, -6); ScalarMathTest( - kernels::EltwiseType::DIV, 3, -2, 1, -1.5); + ops::EltwiseType::DIV, 3, -2, 1, -1.5); ScalarMathTest( - kernels::EltwiseType::MIN, 3, -2, 1, -2); + ops::EltwiseType::MIN, 3, -2, 1, -2); ScalarMathTest( - kernels::EltwiseType::MAX, 3, -2, 1, 3); + ops::EltwiseType::MAX, 3, -2, 1, 3); ScalarMathTest( - kernels::EltwiseType::NEG, 3, -2, 1, -3); + ops::EltwiseType::NEG, 3, -2, 1, -3); ScalarMathTest( - kernels::EltwiseType::ABS, 3, -2, 1, 3); + ops::EltwiseType::ABS, 3, -2, 1, 3); ScalarMathTest( - kernels::EltwiseType::SQR_DIFF, 3, -2, 1, 25); + ops::EltwiseType::SQR_DIFF, 3, -2, 1, 25); ScalarMathTest( - kernels::EltwiseType::POW, 3, 1, 1, 3); + ops::EltwiseType::POW, 3, 1, 1, 3); ScalarMathTest( - kernels::EltwiseType::EQUAL, 3, 3, 1, 1); + ops::EltwiseType::EQUAL, 3, 3, 1, 1); } TEST_F(ScalarMathOpTest, SimpleGPU) { ScalarMathTest( - kernels::EltwiseType::SUM, 1, 2, 1, 3); + ops::EltwiseType::SUM, 1, 2, 1, 3); ScalarMathTest( - kernels::EltwiseType::SUB, 1, 2, 1, -1); + ops::EltwiseType::SUB, 1, 2, 1, -1); ScalarMathTest( - kernels::EltwiseType::PROD, 3, -2, 1, -6); + ops::EltwiseType::PROD, 3, -2, 1, -6); ScalarMathTest( - kernels::EltwiseType::DIV, 3, -2, 1, -1.5); + ops::EltwiseType::DIV, 3, -2, 1, -1.5); ScalarMathTest( - kernels::EltwiseType::MIN, 3, -2, 1, -2); + ops::EltwiseType::MIN, 3, -2, 1, -2); ScalarMathTest( - kernels::EltwiseType::MAX, 3, -2, 1, 3); + ops::EltwiseType::MAX, 3, -2, 1, 3); ScalarMathTest( - kernels::EltwiseType::NEG, 3, -2, 1, -3); + ops::EltwiseType::NEG, 3, -2, 1, -3); ScalarMathTest( - kernels::EltwiseType::ABS, 3, -2, 1, 3); + ops::EltwiseType::ABS, 3, -2, 1, 3); ScalarMathTest( - kernels::EltwiseType::SQR_DIFF, 3, -2, 1, 25); + ops::EltwiseType::SQR_DIFF, 3, -2, 1, 25); ScalarMathTest( - kernels::EltwiseType::POW, 3, 1, 1, 3); + ops::EltwiseType::POW, 3, 1, 1, 3); ScalarMathTest( - kernels::EltwiseType::EQUAL, 3, 3, 1, 1); + ops::EltwiseType::EQUAL, 3, 3, 1, 1); } } // namespace test } // namespace ops diff --git a/mace/kernels/sgemm.cc b/mace/ops/sgemm.cc similarity index 99% rename from mace/kernels/sgemm.cc rename to mace/ops/sgemm.cc index 6b20256d..2edb6fe3 100644 --- a/mace/kernels/sgemm.cc +++ b/mace/ops/sgemm.cc @@ -14,7 +14,7 @@ #include -#include "mace/kernels/sgemm.h" +#include "mace/ops/sgemm.h" #include "mace/core/runtime/cpu/cpu_runtime.h" @@ -27,7 +27,7 @@ #endif namespace mace { -namespace kernels { +namespace ops { void SGemm::operator()(const MatrixMap &lhs, const MatrixMap &rhs, @@ -123,14 +123,14 @@ void SGemm::Run(const float *A, MatrixMap(batch, height_a, width_a, - kernels::RowMajor, + ops::RowMajor, A, is_a_weight); MatrixMap matrix_b = - kernels::MatrixMap(batch, + ops::MatrixMap(batch, height_b, width_b, - kernels::RowMajor, + ops::RowMajor, B, is_b_weight); if (transpose_a) { @@ -139,7 +139,7 @@ void SGemm::Run(const float *A, if (transpose_b) { matrix_b = matrix_b.transpose(); } - MatrixMap matrix_c(batch, height_c, width_c, kernels::RowMajor, C); + MatrixMap matrix_c(batch, height_c, width_c, ops::RowMajor, C); operator()(matrix_a, matrix_b, &matrix_c, scratch_buffer); } @@ -1167,5 +1167,5 @@ void SGemm::UnPackPerBatch(const float *packed_data, } } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/sgemm.h b/mace/ops/sgemm.h similarity index 96% rename from mace/kernels/sgemm.h rename to mace/ops/sgemm.h index 92a185db..f233820f 100644 --- a/mace/kernels/sgemm.h +++ b/mace/ops/sgemm.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_KERNELS_SGEMM_H_ -#define MACE_KERNELS_SGEMM_H_ +#ifndef MACE_OPS_SGEMM_H_ +#define MACE_OPS_SGEMM_H_ #include #include @@ -27,7 +27,7 @@ #include "mace/core/tensor.h" namespace mace { -namespace kernels { +namespace ops { enum Major { RowMajor, @@ -178,7 +178,7 @@ class SGemm { bool packed_; }; -} // namespace kernels +} // namespace ops } // namespace mace -#endif // MACE_KERNELS_SGEMM_H_ +#endif // MACE_OPS_SGEMM_H_ diff --git a/mace/kernels/sgemm_pack_test.cc b/mace/ops/sgemm_pack_test.cc similarity index 98% rename from mace/kernels/sgemm_pack_test.cc rename to mace/ops/sgemm_pack_test.cc index 3e7aaa98..3c0f9a20 100644 --- a/mace/kernels/sgemm_pack_test.cc +++ b/mace/ops/sgemm_pack_test.cc @@ -17,10 +17,10 @@ #include #include -#include "mace/kernels/sgemm.h" +#include "mace/ops/sgemm.h" namespace mace { -namespace kernels { +namespace ops { namespace test { namespace { @@ -162,6 +162,6 @@ TEST(SGemmPackTest, UnPack) { } } // namespace test -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/kernels/shape.cc b/mace/ops/shape.cc similarity index 98% rename from mace/kernels/shape.cc rename to mace/ops/shape.cc index 1775f0a0..b981267a 100644 --- a/mace/kernels/shape.cc +++ b/mace/ops/shape.cc @@ -15,7 +15,7 @@ #include "mace/core/operator.h" namespace mace { -namespace kernels { +namespace ops { template class ShapeOp : public Operation { @@ -70,5 +70,5 @@ void RegisterShape(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/shape_test.cc b/mace/ops/shape_test.cc index 2b66c7eb..0ad72ead 100644 --- a/mace/ops/shape_test.cc +++ b/mace/ops/shape_test.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" namespace mace { diff --git a/mace/kernels/softmax.cc b/mace/ops/softmax.cc similarity index 98% rename from mace/kernels/softmax.cc rename to mace/ops/softmax.cc index 1ac3ab4d..8f6fb7fd 100644 --- a/mace/kernels/softmax.cc +++ b/mace/ops/softmax.cc @@ -18,15 +18,15 @@ #include #include "mace/core/operator.h" -#include "mace/kernels/fixpoint.h" -#include "mace/kernels/gemmlowp_util.h" +#include "mace/ops/fixpoint.h" +#include "mace/ops/gemmlowp_util.h" #ifdef MACE_ENABLE_OPENCL -#include "mace/kernels/opencl/image/softmax.h" -#include "mace/kernels/opencl/buffer/softmax.h" +#include "mace/ops/opencl/image/softmax.h" +#include "mace/ops/opencl/buffer/softmax.h" #endif // MACE_ENABLE_OPENCL namespace mace { -namespace kernels { +namespace ops { template class SoftmaxOp; @@ -394,5 +394,5 @@ void RegisterSoftmax(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/softmax_benchmark.cc b/mace/ops/softmax_benchmark.cc index 482709ad..66e27434 100644 --- a/mace/ops/softmax_benchmark.cc +++ b/mace/ops/softmax_benchmark.cc @@ -14,7 +14,6 @@ #include -#include "mace/core/op_def_registry.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" @@ -46,7 +45,7 @@ void SoftmaxBenchmark( .Finalize(net.NewOperatorDef()); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Softmax", "SoftmaxBM") .Input("InputImage") diff --git a/mace/ops/softmax_test.cc b/mace/ops/softmax_test.cc index 98b0ad97..69b5dafd 100644 --- a/mace/ops/softmax_test.cc +++ b/mace/ops/softmax_test.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" namespace mace { @@ -61,7 +60,7 @@ void Simple() { ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Softmax", "SoftmaxTest") .Input("InputImage") @@ -73,7 +72,7 @@ void Simple() { // Transfer output ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); } else { @@ -117,7 +116,7 @@ void Complex(const std::vector &logits_shape) { expected->Copy(*net.GetOutput("Output")); BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Softmax", "SoftmaxTest") .Input("InputImage") @@ -129,7 +128,7 @@ void Complex(const std::vector &logits_shape) { // Transfer output ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(*expected, *net.GetOutput("OPENCLOutput"), 1e-5); } diff --git a/mace/kernels/space_to_batch.cc b/mace/ops/space_to_batch.cc similarity index 99% rename from mace/kernels/space_to_batch.cc rename to mace/ops/space_to_batch.cc index 41c731c5..f391bc79 100644 --- a/mace/kernels/space_to_batch.cc +++ b/mace/ops/space_to_batch.cc @@ -17,11 +17,11 @@ #include "mace/core/operator.h" #ifdef MACE_ENABLE_OPENCL -#include "mace/kernels/opencl/image/space_to_batch.h" +#include "mace/ops/opencl/image/space_to_batch.h" #endif // MACE_ENABLE_OPENCL namespace mace { -namespace kernels { +namespace ops { class SpaceToBatchOpBase : public Operation { public: @@ -343,5 +343,5 @@ void RegisterSpaceToBatchND(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/space_to_batch_benchmark.cc b/mace/ops/space_to_batch_benchmark.cc index 565ad5dc..f6d5ad1a 100644 --- a/mace/ops/space_to_batch_benchmark.cc +++ b/mace/ops/space_to_batch_benchmark.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" @@ -49,7 +48,7 @@ void BMSpaceToBatch( .Finalize(net.NewOperatorDef()); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("SpaceToBatchND", "SpaceToBatchNDTest") .Input("InputImage") .Output("OutputImage") diff --git a/mace/ops/space_to_batch_test.cc b/mace/ops/space_to_batch_test.cc index 1d07ecfc..956dedc2 100644 --- a/mace/ops/space_to_batch_test.cc +++ b/mace/ops/space_to_batch_test.cc @@ -33,7 +33,7 @@ void RunSpaceToBatch(const std::vector &input_shape, if (D == GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("SpaceToBatchND", "SpaceToBatchNDTest") .Input("InputImage") .Output("OutputImage") @@ -56,7 +56,7 @@ void RunSpaceToBatch(const std::vector &input_shape, if (D == GPU) { ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } else if (D == CPU) { net.TransformDataFormat("OutputNCHW", NCHW, "Output", NHWC); @@ -77,7 +77,7 @@ void RunBatchToSpace(const std::vector &input_shape, if (D == GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("BatchToSpaceND", "BatchToSpaceNDTest") .Input("InputImage") .Output("OutputImage") @@ -100,7 +100,7 @@ void RunBatchToSpace(const std::vector &input_shape, if (D == GPU) { ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } else if (D == CPU) { net.TransformDataFormat("OutputNCHW", NCHW, "Output", NHWC); @@ -157,7 +157,7 @@ void TestSpaceToBatchLargeInput(const std::vector &input_shape, // run gpu BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("SpaceToBatchND", "SpaceToBatchNDTest") .Input("InputImage") .Output("OutputImage") @@ -166,7 +166,7 @@ void TestSpaceToBatchLargeInput(const std::vector &input_shape, .Finalize(net.NewOperatorDef()); net.RunOp(GPU); ImageToBuffer(&net, "OutputImage", "OutputGPU", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); // run cpu net.TransformDataFormat("Input", NHWC, "InputNCHW", @@ -194,7 +194,7 @@ void TestoBatchToSpaceLargeInput(const std::vector &input_shape, // run gpu BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("BatchToSpaceND", "BatchToSpaceNDTest") .Input("InputImage") .Output("OutputImage") @@ -203,7 +203,7 @@ void TestoBatchToSpaceLargeInput(const std::vector &input_shape, .Finalize(net.NewOperatorDef()); net.RunOp(GPU); ImageToBuffer(&net, "OutputImage", "OutputGPU", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); // run cpu net.TransformDataFormat("Input", NHWC, "InputNCHW", diff --git a/mace/kernels/space_to_depth.cc b/mace/ops/space_to_depth.cc similarity index 97% rename from mace/kernels/space_to_depth.cc rename to mace/ops/space_to_depth.cc index e2e302e6..11e5ade3 100644 --- a/mace/kernels/space_to_depth.cc +++ b/mace/ops/space_to_depth.cc @@ -17,11 +17,11 @@ #include "mace/core/operator.h" #ifdef MACE_ENABLE_OPENCL -#include "mace/kernels/opencl/image/space_to_depth.h" +#include "mace/ops/opencl/image/space_to_depth.h" #endif // MACE_ENABLE_OPENCL namespace mace { -namespace kernels { +namespace ops { template class SpaceToDepthOp : public Operation { @@ -125,5 +125,5 @@ void RegisterSpaceToDepth(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/space_to_depth_benchmark.cc b/mace/ops/space_to_depth_benchmark.cc index 480a0421..04760c54 100644 --- a/mace/ops/space_to_depth_benchmark.cc +++ b/mace/ops/space_to_depth_benchmark.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" @@ -44,7 +43,7 @@ void SpaceToDepth( .Finalize(net.NewOperatorDef()); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("SpaceToDepth", "SpaceToDepthBM") .Input("InputImage") diff --git a/mace/ops/space_to_depth_test.cc b/mace/ops/space_to_depth_test.cc index 76569492..e7ae77d6 100644 --- a/mace/ops/space_to_depth_test.cc +++ b/mace/ops/space_to_depth_test.cc @@ -15,7 +15,6 @@ #include #include -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" namespace mace { @@ -47,7 +46,7 @@ void RunSpaceToDepth(const std::vector &input_shape, } else { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("SpaceToDepth", "SpaceToDepthTest") .Input("InputImage") .Output("OutputImage") @@ -59,7 +58,7 @@ void RunSpaceToDepth(const std::vector &input_shape, if (D == DeviceType::GPU) { ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } auto expected = net.CreateTensor(expected_shape, expected_data); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); @@ -129,7 +128,7 @@ void RandomTest(const int block_size, NHWC); BufferToImage(&net, "Input", "InputImg", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("SpaceToDepth", "SpaceToDepthTest") .Input("InputImg") @@ -142,7 +141,7 @@ void RandomTest(const int block_size, net.RunOp(D); ImageToBuffer(&net, "OutputImg", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); if (DataTypeToEnum::value == DT_FLOAT) { ExpectTensorNear(*net.GetTensor("Output"), diff --git a/mace/kernels/split.cc b/mace/ops/split.cc similarity index 98% rename from mace/kernels/split.cc rename to mace/ops/split.cc index 68f5f274..1d632329 100644 --- a/mace/kernels/split.cc +++ b/mace/ops/split.cc @@ -17,11 +17,11 @@ #include "mace/core/operator.h" #ifdef MACE_ENABLE_OPENCL -#include "mace/kernels/opencl/image/split.h" +#include "mace/ops/opencl/image/split.h" #endif // MACE_ENABLE_OPENCL namespace mace { -namespace kernels { +namespace ops { template class SplitOp; @@ -129,5 +129,5 @@ void RegisterSplit(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/split_benchmark.cc b/mace/ops/split_benchmark.cc index aa0e8fba..687fc573 100644 --- a/mace/ops/split_benchmark.cc +++ b/mace/ops/split_benchmark.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" @@ -40,7 +39,7 @@ void BMSplitHelper(int iters, if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); auto builder = OpDefBuilder("Split", "SplitTest"); builder.Input("InputImage"); diff --git a/mace/ops/split_test.cc b/mace/ops/split_test.cc index d42b3716..906a47dd 100644 --- a/mace/ops/split_test.cc +++ b/mace/ops/split_test.cc @@ -50,7 +50,7 @@ void RandomTest(const int num_outputs, const int axis) { if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); auto builder = OpDefBuilder("Split", "SplitTest"); builder.Input("InputImage"); @@ -75,7 +75,7 @@ void RandomTest(const int num_outputs, const int axis) { for (int i = 0; i < num_outputs; ++i) { ImageToBuffer(&net, MakeString("OutputImage", i), MakeString("Output", i), - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } } diff --git a/mace/kernels/sqrdiff_mean.cc b/mace/ops/sqrdiff_mean.cc similarity index 97% rename from mace/kernels/sqrdiff_mean.cc rename to mace/ops/sqrdiff_mean.cc index e9c7bde0..f25d66c1 100644 --- a/mace/kernels/sqrdiff_mean.cc +++ b/mace/ops/sqrdiff_mean.cc @@ -17,11 +17,11 @@ #include "mace/core/operator.h" #ifdef MACE_ENABLE_OPENCL -#include "mace/kernels/opencl/image/sqrdiff_mean.h" +#include "mace/ops/opencl/image/sqrdiff_mean.h" #endif // MACE_ENABLE_OPENCL namespace mace { -namespace kernels { +namespace ops { template class SqrDiffMeanOp : public Operation { @@ -114,5 +114,5 @@ void RegisterSqrDiffMean(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/sqrdiff_mean_benchmark.cc b/mace/ops/sqrdiff_mean_benchmark.cc index f3bfd44c..bcf07500 100644 --- a/mace/ops/sqrdiff_mean_benchmark.cc +++ b/mace/ops/sqrdiff_mean_benchmark.cc @@ -34,9 +34,9 @@ void SqrDiffMean(int iters, int batch, int channels, if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Input1", "InputImage1", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("SqrDiffMean", "SqrDiffMeanBM") .Input("InputImage") .Input("InputImage1") diff --git a/mace/ops/sqrdiff_mean_test.cc b/mace/ops/sqrdiff_mean_test.cc index e88810bc..66f852b7 100644 --- a/mace/ops/sqrdiff_mean_test.cc +++ b/mace/ops/sqrdiff_mean_test.cc @@ -59,9 +59,9 @@ void Simple(const std::vector &input_shape0, NHWC); } else { BufferToImage(&net, "Input0", "InputImg0", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Input1", "InputImg1", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("SqrDiffMean", "SqrDiffMeanTest") .Input("InputImg0") .Input("InputImg1") @@ -70,7 +70,7 @@ void Simple(const std::vector &input_shape0, // Run net.RunOp(D); ImageToBuffer(&net, "OutputImg", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); } auto expected = net.CreateTensor(output_shape, output); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5, 1e-3); @@ -127,9 +127,9 @@ void RandomTest(const std::vector &input_shape0, net.TransformDataFormat("OutputNCHW", NCHW, "Output", NHWC); BufferToImage(&net, "Input0", "InputImg0", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Input1", "InputImg1", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("SqrDiffMean", "SqrDiffMeanTest") .Input("InputImg0") .Input("InputImg1") @@ -138,7 +138,7 @@ void RandomTest(const std::vector &input_shape0, // Run net.RunOp(D); ImageToBuffer(&net, "OutputImg", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); if (DataTypeToEnum::value == DT_FLOAT) { ExpectTensorNear(*net.GetTensor("Output"), *net.GetOutput("OPENCLOutput"), 1e-4, 1e-3); diff --git a/mace/kernels/squeeze.cc b/mace/ops/squeeze.cc similarity index 97% rename from mace/kernels/squeeze.cc rename to mace/ops/squeeze.cc index 8221bccb..37ee3d70 100644 --- a/mace/kernels/squeeze.cc +++ b/mace/ops/squeeze.cc @@ -18,7 +18,7 @@ #include "mace/core/operator.h" namespace mace { -namespace kernels { +namespace ops { template class SqueezeOp : public Operation { @@ -59,5 +59,5 @@ void RegisterSqueeze(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/squeeze_test.cc b/mace/ops/squeeze_test.cc index 166d9868..1bcd6c37 100644 --- a/mace/ops/squeeze_test.cc +++ b/mace/ops/squeeze_test.cc @@ -13,7 +13,6 @@ // limitations under the License. #include "gmock/gmock.h" -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" namespace mace { diff --git a/mace/kernels/stack.cc b/mace/ops/stack.cc similarity index 98% rename from mace/kernels/stack.cc rename to mace/ops/stack.cc index b3fc8bea..de795965 100644 --- a/mace/kernels/stack.cc +++ b/mace/ops/stack.cc @@ -18,7 +18,7 @@ #include "mace/core/operator.h" namespace mace { -namespace kernels { +namespace ops { template class StackOp : public Operation { @@ -85,5 +85,5 @@ void RegisterStack(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/stack_test.cc b/mace/ops/stack_test.cc index e55ff278..d63de70a 100644 --- a/mace/ops/stack_test.cc +++ b/mace/ops/stack_test.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" namespace mace { diff --git a/mace/kernels/strided_slice.cc b/mace/ops/strided_slice.cc similarity index 99% rename from mace/kernels/strided_slice.cc rename to mace/ops/strided_slice.cc index b030661b..7c60bfe8 100644 --- a/mace/kernels/strided_slice.cc +++ b/mace/ops/strided_slice.cc @@ -18,7 +18,7 @@ #include "mace/core/operator.h" namespace mace { -namespace kernels { +namespace ops { template class StridedSliceOp : public Operation { @@ -225,5 +225,5 @@ void RegisterStridedSlice(OpRegistryBase *op_registry) { #endif // MACE_ENABLE_OPENCL } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/strided_slice_test.cc b/mace/ops/strided_slice_test.cc index c13a813c..3ecbedc9 100644 --- a/mace/ops/strided_slice_test.cc +++ b/mace/ops/strided_slice_test.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" namespace mace { diff --git a/mace/kernels/transpose.cc b/mace/ops/transpose.cc similarity index 99% rename from mace/kernels/transpose.cc rename to mace/ops/transpose.cc index 2ec38015..4e98944c 100644 --- a/mace/kernels/transpose.cc +++ b/mace/ops/transpose.cc @@ -22,7 +22,7 @@ #include "mace/core/operator.h" namespace mace { -namespace kernels { +namespace ops { static void TransposeNHWCToNCHWC3(const float *input, float *output, @@ -232,5 +232,5 @@ void RegisterTranspose(OpRegistryBase *op_registry) { DeviceType::CPU, float); } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/transpose_benchmark.cc b/mace/ops/transpose_benchmark.cc index 6d37b93c..f584239a 100644 --- a/mace/ops/transpose_benchmark.cc +++ b/mace/ops/transpose_benchmark.cc @@ -15,7 +15,6 @@ #include #include -#include "mace/core/op_def_registry.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" diff --git a/mace/ops/transpose_test.cc b/mace/ops/transpose_test.cc index 44ef0ec2..d9f227c3 100644 --- a/mace/ops/transpose_test.cc +++ b/mace/ops/transpose_test.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" namespace mace { diff --git a/mace/kernels/unstack.cc b/mace/ops/unstack.cc similarity index 98% rename from mace/kernels/unstack.cc rename to mace/ops/unstack.cc index 8403b8f6..ee0a4369 100644 --- a/mace/kernels/unstack.cc +++ b/mace/ops/unstack.cc @@ -18,7 +18,7 @@ #include "mace/core/operator.h" namespace mace { -namespace kernels { +namespace ops { template class UnstackOp : public Operation { @@ -80,5 +80,5 @@ void RegisterUnstack(OpRegistryBase *op_registry) { DeviceType::CPU, int32_t); } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/unstack_test.cc b/mace/ops/unstack_test.cc index 4c9774ff..92a6bd61 100644 --- a/mace/ops/unstack_test.cc +++ b/mace/ops/unstack_test.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/ops/ops_test_util.h" namespace mace { diff --git a/mace/ops/winograd_convolution_benchmark.cc b/mace/ops/winograd_convolution_benchmark.cc index 3b126f07..62485165 100644 --- a/mace/ops/winograd_convolution_benchmark.cc +++ b/mace/ops/winograd_convolution_benchmark.cc @@ -12,9 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/core/testing/test_benchmark.h" -#include "mace/kernels/conv_pool_2d_util.h" +#include "mace/ops/conv_pool_2d_util.h" #include "mace/ops/ops_test_util.h" namespace mace { @@ -34,15 +33,15 @@ void BMWinogradConvolution( net.AddRandomInput("Bias", {out_channels}); BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Filter", "FilterImage", - kernels::BufferType::CONV2D_FILTER); - BufferToImage(&net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + ops::BufferType::CONV2D_FILTER); + BufferToImage(&net, "Bias", "BiasImage", ops::BufferType::ARGUMENT); // Winograd convolution // transform filter BufferToImage(&net, "Filter", "WinoFilter", - kernels::BufferType::WINOGRAD_FILTER, block_size); + ops::BufferType::WINOGRAD_FILTER, block_size); // Inference convolution output shape OpDefBuilder("InferConv2dShape", "InferConv2dShapeTest") diff --git a/mace/ops/winograd_convolution_test.cc b/mace/ops/winograd_convolution_test.cc index 1c82a189..4d015194 100644 --- a/mace/ops/winograd_convolution_test.cc +++ b/mace/ops/winograd_convolution_test.cc @@ -14,8 +14,7 @@ #include -#include "mace/core/op_def_registry.h" -#include "mace/kernels/conv_pool_2d_util.h" +#include "mace/ops/conv_pool_2d_util.h" #include "mace/ops/ops_test_util.h" namespace mace { @@ -44,10 +43,10 @@ void WinogradConvolution(const index_t batch, net.AddRandomInput("Bias", {out_channels}); BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Filter", "FilterImage", - kernels::BufferType::CONV2D_FILTER); - BufferToImage(&net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + ops::BufferType::CONV2D_FILTER); + BufferToImage(&net, "Bias", "BiasImage", ops::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") .Input("FilterImage") @@ -63,7 +62,7 @@ void WinogradConvolution(const index_t batch, // Transfer output ImageToBuffer(&net, "OutputImage", "ConvOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); auto expected = net.CreateTensor(); expected->Copy(*net.GetOutput("ConvOutput")); @@ -72,7 +71,7 @@ void WinogradConvolution(const index_t batch, // Winograd convolution // transform filter BufferToImage(&net, "Filter", "WinoFilter", - kernels::BufferType::WINOGRAD_FILTER, block_size); + ops::BufferType::WINOGRAD_FILTER, block_size); // transform input OpDefBuilder("WinogradTransform", "WinogradTransformTest") .Input("InputImage") @@ -123,7 +122,7 @@ void WinogradConvolution(const index_t batch, net.Sync(); ImageToBuffer(&net, "WinoOutputImage", "WinoOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); if (DataTypeToEnum::value == DataType::DT_HALF) { ExpectTensorNear(*expected, *net.GetOutput("WinoOutput"), 1e-2, 1e-2); @@ -195,10 +194,10 @@ void WinogradConvolutionWithPad(const index_t batch, net.AddRandomInput("Bias", {out_channels}); BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Filter", "FilterImage", - kernels::BufferType::CONV2D_FILTER); - BufferToImage(&net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + ops::BufferType::CONV2D_FILTER); + BufferToImage(&net, "Bias", "BiasImage", ops::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") .Input("FilterImage") @@ -214,7 +213,7 @@ void WinogradConvolutionWithPad(const index_t batch, // Transfer output ImageToBuffer(&net, "OutputImage", "ConvOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); auto expected = net.CreateTensor(); expected->Copy(*net.GetOutput("ConvOutput")); auto output_shape = expected->shape(); @@ -222,7 +221,7 @@ void WinogradConvolutionWithPad(const index_t batch, // Winograd convolution // transform filter BufferToImage(&net, "Filter", "WinoFilter", - kernels::BufferType::WINOGRAD_FILTER, block_size); + ops::BufferType::WINOGRAD_FILTER, block_size); // transform input OpDefBuilder("WinogradTransform", "WinogradTransformTest") .Input("InputImage") @@ -273,7 +272,7 @@ void WinogradConvolutionWithPad(const index_t batch, net.Sync(); ImageToBuffer(&net, "WinoOutputImage", "WinoOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); if (DataTypeToEnum::value == DataType::DT_HALF) { ExpectTensorNear(*expected, *net.GetOutput("WinoOutput"), 1e-2, 1e-2); diff --git a/mace/kernels/winograd_transform.cc b/mace/ops/winograd_transform.cc similarity index 93% rename from mace/kernels/winograd_transform.cc rename to mace/ops/winograd_transform.cc index 286bff95..b2635f4d 100644 --- a/mace/kernels/winograd_transform.cc +++ b/mace/ops/winograd_transform.cc @@ -16,12 +16,12 @@ #include #include "mace/core/operator.h" -#include "mace/kernels/activation.h" -#include "mace/kernels/conv_pool_2d_util.h" -#include "mace/kernels/opencl/image/winograd_transform.h" +#include "mace/ops/activation.h" +#include "mace/ops/conv_pool_2d_util.h" +#include "mace/ops/opencl/image/winograd_transform.h" namespace mace { -namespace kernels { +namespace ops { template class WinogradTransformOp; @@ -62,7 +62,7 @@ class WinogradInverseTransformOp : public Operation { public: explicit WinogradInverseTransformOp(OpConstructContext *context) : Operation(context) { - ActivationType activation = kernels::StringToActivationType( + ActivationType activation = ops::StringToActivationType( Operation::GetOptionalArg("activation", "NOOP")); float relux_max_limit = Operation::GetOptionalArg("max_limit", 0.0f); int block_size = Operation::GetOptionalArg("wino_block_size", 2); @@ -98,5 +98,5 @@ void RegisterWinogradInverseTransform( WinogradInverseTransformOp, DeviceType::GPU, half); } -} // namespace kernels +} // namespace ops } // namespace mace diff --git a/mace/ops/winograd_transform_benchmark.cc b/mace/ops/winograd_transform_benchmark.cc index 5c21c9ad..bb6679bb 100644 --- a/mace/ops/winograd_transform_benchmark.cc +++ b/mace/ops/winograd_transform_benchmark.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_def_registry.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" @@ -30,7 +29,7 @@ void BMWinogradTransform( net.AddRandomInput("Input", {batch, height, width, channels}); BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + ops::BufferType::IN_OUT_CHANNEL); OpDefBuilder("WinogradTransform", "WinogradTransformTest") .Input("InputImage") .Output("OutputImage") @@ -88,7 +87,7 @@ void BMWinogradInverseTransform( (block_size + 2), channels, p, 1}); BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_HEIGHT); + ops::BufferType::IN_OUT_HEIGHT); OpDefBuilder("WinogradInverseTransform", "WinogradInverseTransformTest") .Input("InputImage") .AddIntArg("batch", batch) @@ -155,7 +154,7 @@ void WinoFilterBufferToImage(int iters, OpDefBuilder("BufferToImage", "BufferToImageTest") .Input("Input") .Output("Output") - .AddIntArg("buffer_type", kernels::BufferType::WINOGRAD_FILTER) + .AddIntArg("buffer_type", ops::BufferType::WINOGRAD_FILTER) .AddIntArg("wino_block_size", wino_block_size) .AddIntArg("T", static_cast(DataTypeToEnum::value)) .Finalize(net.NewOperatorDef()); @@ -215,9 +214,9 @@ void WinoMatMulBenchmark( net.AddRandomInput("B", {batch, in_channels, out_width}); if (D == DeviceType::GPU) { - BufferToImage(&net, "A", "AImage", kernels::BufferType::IN_OUT_WIDTH); + BufferToImage(&net, "A", "AImage", ops::BufferType::IN_OUT_WIDTH); BufferToImage(&net, "B", "BImage", - kernels::BufferType::IN_OUT_HEIGHT); + ops::BufferType::IN_OUT_HEIGHT); OpDefBuilder("MatMul", "MatMulBM") .Input("AImage") diff --git a/mace/proto/mace.proto b/mace/proto/mace.proto index 4b789000..a3064df0 100644 --- a/mace/proto/mace.proto +++ b/mace/proto/mace.proto @@ -122,7 +122,6 @@ message NetDef { repeated OperatorDef op = 1; repeated Argument arg = 2; repeated ConstTensor tensors = 3; - repeated string op_types = 4; // for mem optimization optional MemoryArena mem_arena = 10; diff --git a/mace/python/tools/converter.py b/mace/python/tools/converter.py index 92a6b12d..a89e3abd 100644 --- a/mace/python/tools/converter.py +++ b/mace/python/tools/converter.py @@ -214,9 +214,6 @@ def main(unused_args): for arg in cpu_graph_def.arg: if arg.name not in output_graph_arg_names: output_graph_def.arg.extend(arg) - for op_type in cpu_graph_def.op_types: - if op_type not in output_graph_def.op_types: - output_graph_def.op_types.extend([op_type]) print("Merge done") else: option.device = device_type_map[FLAGS.runtime] diff --git a/mace/python/tools/converter_tool/transformer.py b/mace/python/tools/converter_tool/transformer.py index 8d3a3b64..1ab81452 100644 --- a/mace/python/tools/converter_tool/transformer.py +++ b/mace/python/tools/converter_tool/transformer.py @@ -1712,14 +1712,6 @@ class Transformer(base_converter.ConverterInterface): ConverterUtil.add_data_type_arg(op_def, mace_pb2.DT_FLOAT) - def add_op_types(self): - net = self._model - op_types = set() - for op in net.op: - op_types.add(op.type) - for op_type in op_types: - net.op_types.extend([op_type]) - def sort_by_execution(self): print("Sort by execution") net = self._model @@ -1736,8 +1728,6 @@ class Transformer(base_converter.ConverterInterface): del net.op[:] net.op.extend(sorted_nodes) - self.add_op_types() - print("Final ops:") for op in net.op: print("%s (%s): %s" % (op.name, op.type, [ diff --git a/mace/python/tools/encrypt_opencl_codegen.py b/mace/python/tools/encrypt_opencl_codegen.py index 957c8a51..776dc506 100644 --- a/mace/python/tools/encrypt_opencl_codegen.py +++ b/mace/python/tools/encrypt_opencl_codegen.py @@ -19,7 +19,7 @@ import sys import jinja2 -# python encrypt_opencl_codegen.py --cl_kernel_dir=./mace/kernels/opencl/cl/ \ +# python encrypt_opencl_codegen.py --cl_kernel_dir=./mace/ops/opencl/cl/ \ # --output_path=./mace/codegen/opencl_encrypt/opencl_encrypted_program.cc FLAGS = None @@ -96,7 +96,7 @@ def parse_args(): parser.add_argument( "--cl_kernel_dir", type=str, - default="./mace/kernels/opencl/cl/", + default="./mace/ops/opencl/cl/", help="The cl kernels directory.") parser.add_argument( "--output_path", diff --git a/mace/python/tools/model.jinja2 b/mace/python/tools/model.jinja2 index ec1ba284..3f4ba1c4 100644 --- a/mace/python/tools/model.jinja2 +++ b/mace/python/tools/model.jinja2 @@ -122,12 +122,6 @@ void CreateTensors(NetDef *net_def) { {% endfor %} } -void CreateOpTypes(NetDef *net_def) { - {% for op_type in net.op_types %} - net_def->add_op_types({{ op_type|tojson }}); - {% endfor %} -} - {% if net.mem_arena.mem_block|length != 0 %} void CreateMemoryArena(mace::MemoryArena *mem_arena) { mem_arena->mutable_mem_block()->Reserve({{ net.mem_arena.mem_block|length }}); @@ -168,9 +162,6 @@ const std::shared_ptr CreateNet() { {% if net.output_info | length > 0 %} CreateOutputInfo(net_def.get()); {% endif %} - {% if net.op_types|length > 0 %} - CreateOpTypes(net_def.get()); - {% endif %} return net_def; } diff --git a/mace/test/mace_api_mt_test.cc b/mace/test/mace_api_mt_test.cc index 0f8d1f49..0bb8342d 100644 --- a/mace/test/mace_api_mt_test.cc +++ b/mace/test/mace_api_mt_test.cc @@ -15,8 +15,8 @@ #include #include // NOLINT(build/c++11) -#include "mace/core/op_def_registry.h" -#include "mace/kernels/conv_pool_2d_util.h" +#include "mace/core/operator.h" +#include "mace/ops/conv_pool_2d_util.h" #include "mace/ops/ops_test_util.h" namespace mace { @@ -306,7 +306,7 @@ void MaceRunFunc(const int in_out_size) { std::string input_name = MakeString("mace_input_node_", input_names[i]); BufferToImage(input_name, input_names[i], - mace::kernels::IN_OUT_CHANNEL, + mace::ops::IN_OUT_CHANNEL, {mem_map[input_names[i]]}, device, net_def.get()); @@ -314,7 +314,7 @@ void MaceRunFunc(const int in_out_size) { info->set_name(input_names[i]); } BufferToImage(filter_tensor_name, filter_tensor_img_name, - mace::kernels::CONV2D_FILTER, {}, device, + mace::ops::CONV2D_FILTER, {}, device, net_def.get(), NetMode::INIT); for (size_t i = 0; i < output_names.size(); ++i) { Conv3x3(input_names[i], filter_tensor_img_name, @@ -326,15 +326,12 @@ void MaceRunFunc(const int in_out_size) { std::string output_name = MakeString("mace_output_node_", output_names[i]); ImageToBuffer(output_names[i], output_name, - mace::kernels::IN_OUT_CHANNEL, + mace::ops::IN_OUT_CHANNEL, device, net_def.get()); OutputInfo *info = net_def->add_output_info(); info->set_name(output_names[i]); } - for (int i = 0; i < net_def->op_size(); ++i) { - net_def->add_op_types(net_def->op(i).type()); - } MaceEngineConfig config(DeviceType::GPU); diff --git a/mace/test/mace_api_test.cc b/mace/test/mace_api_test.cc index 54dd99b7..127e5849 100644 --- a/mace/test/mace_api_test.cc +++ b/mace/test/mace_api_test.cc @@ -15,8 +15,7 @@ #include -#include "mace/core/op_def_registry.h" -#include "mace/kernels/conv_pool_2d_util.h" +#include "mace/ops/conv_pool_2d_util.h" #include "mace/ops/ops_test_util.h" #include "mace/public/mace.h" @@ -308,7 +307,7 @@ void MaceRun(const int in_out_size, std::string input_name = MakeString("mace_input_node_", input_names[i]); BufferToImage(input_name, input_names[i], - mace::kernels::IN_OUT_CHANNEL, + mace::ops::IN_OUT_CHANNEL, {mem_map[input_names[i]]}, device, net_def.get()); @@ -316,7 +315,7 @@ void MaceRun(const int in_out_size, info->set_name(input_names[i]); } BufferToImage(filter_tensor_name, filter_tensor_img_name, - mace::kernels::CONV2D_FILTER, {}, device, + mace::ops::CONV2D_FILTER, {}, device, net_def.get(), NetMode::INIT); for (size_t i = 0; i < output_names.size(); ++i) { Conv3x3(input_names[i], filter_tensor_img_name, @@ -327,17 +326,13 @@ void MaceRun(const int in_out_size, std::string output_name = MakeString("mace_output_node_", output_names[i]); ImageToBuffer(output_names[i], output_name, - mace::kernels::IN_OUT_CHANNEL, + mace::ops::IN_OUT_CHANNEL, device, net_def.get()); OutputInfo *info = net_def->add_output_info(); info->set_name(output_names[i]); } - for (int i = 0; i < net_def->op_size(); ++i) { - net_def->add_op_types(net_def->op(i).type()); - } - MaceEngineConfig config(DeviceType::GPU); MaceEngine engine(config); diff --git a/repository/opencl-kernel/opencl_kernel_configure.bzl b/repository/opencl-kernel/opencl_kernel_configure.bzl index 97c9639c..c844a5d3 100644 --- a/repository/opencl-kernel/opencl_kernel_configure.bzl +++ b/repository/opencl-kernel/opencl_kernel_configure.bzl @@ -18,51 +18,51 @@ def _opencl_encrypt_kernel_impl(repository_ctx): unused_var = repository_ctx.path(Label("//:.git/refs/heads/master")) ret = repository_ctx.execute( - ["test", "-f", "%s/mace/kernels/opencl/cl/common.h" % mace_root_path]) + ["test", "-f", "%s/mace/ops/opencl/cl/common.h" % mace_root_path]) if ret.return_code == 0: - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/activation.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/addn.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/batch_norm.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/batch_to_space.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/bias_add.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/buffer_to_image.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/buffer_transform.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/channel_shuffle.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/common.h")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/concat.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/conv_2d.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/conv_2d_1x1.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/conv_2d_1x1_buffer.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/conv_2d_3x3.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/conv_2d_buffer.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/crop.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/deconv_2d.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/depth_to_space.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/depthwise_conv2d.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/depthwise_conv2d_buffer.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/eltwise.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/fully_connected.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/lstmcell.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/matmul.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/pad.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/pooling.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/pooling_buffer.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/reduce_mean.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/resize_bicubic.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/resize_bilinear.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/split.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/softmax.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/softmax_buffer.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/space_to_batch.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/space_to_depth.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/sqrdiff_mean.cl")) - unused_var = repository_ctx.path(Label("//:mace/kernels/opencl/cl/winograd_transform.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/activation.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/addn.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/batch_norm.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/batch_to_space.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/bias_add.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/buffer_to_image.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/buffer_transform.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/channel_shuffle.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/common.h")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/concat.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/conv_2d.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/conv_2d_1x1.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/conv_2d_1x1_buffer.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/conv_2d_3x3.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/conv_2d_buffer.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/crop.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/deconv_2d.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/depth_to_space.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/depthwise_conv2d.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/depthwise_conv2d_buffer.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/eltwise.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/fully_connected.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/lstmcell.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/matmul.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/pad.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/pooling.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/pooling_buffer.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/reduce_mean.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/resize_bicubic.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/resize_bilinear.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/split.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/softmax.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/softmax_buffer.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/space_to_batch.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/space_to_depth.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/sqrdiff_mean.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/winograd_transform.cl")) python_bin_path = repository_ctx.which("python") repository_ctx.execute([ python_bin_path, '%s/mace/python/tools/encrypt_opencl_codegen.py' % mace_root_path, - '--cl_kernel_dir=%s/mace/kernels/opencl/cl' % mace_root_path, + '--cl_kernel_dir=%s/mace/ops/opencl/cl' % mace_root_path, '--output_path=%s/encrypt_opencl_kernel' % generated_files_path ], quiet=False) diff --git a/tools/bazel.rc b/tools/bazel.rc index bd1f79c7..1863738e 100644 --- a/tools/bazel.rc +++ b/tools/bazel.rc @@ -32,6 +32,7 @@ build:arm_linux --copt -Wno-ignored-attributes build:arm_linux --copt -Wno-unused-function build:arm_linux --copt -Wno-sequence-point build:arm_linux --copt -Wno-implicit-fallthrough +build:arm_linux --copt -Wno-psabi # Usage example: bazel build --config aarch64_linux build:aarch64_linux --config=cross_compile diff --git a/tools/converter.py b/tools/converter.py index 86bedae9..4ad1a8b5 100644 --- a/tools/converter.py +++ b/tools/converter.py @@ -673,7 +673,7 @@ def download_file(url, dst, num_retries=3): try: urllib.request.urlretrieve(url, dst) MaceLogger.info('\nDownloaded successfully.') - except (urllib.URLError, urllib.ContentTooShortError) as e: + except urllib.ContentTooShortError as e: MaceLogger.warning('Download error:', e.reason) if num_retries > 0: return download_file(url, dst, num_retries - 1) diff --git a/tools/sh_commands.py b/tools/sh_commands.py index 267dccfc..9c8045f1 100644 --- a/tools/sh_commands.py +++ b/tools/sh_commands.py @@ -410,7 +410,7 @@ def bazel_target_to_bin(target): ################################ def gen_encrypted_opencl_source(codegen_path="mace/codegen"): sh.mkdir("-p", "%s/opencl" % codegen_path) - encrypt_opencl_codegen("./mace/kernels/opencl/cl/", + encrypt_opencl_codegen("./mace/ops/opencl/cl/", "mace/codegen/opencl/opencl_encrypt_program.cc") @@ -680,7 +680,7 @@ def create_internal_storage_dir(serialno, phone_data_dir): def push_depended_so_libs(libmace_dynamic_library_path, abi, phone_data_dir, serialno): - dep_so_libs = sh.bash("/opt/android-ndk/ndk-depends", + dep_so_libs = sh.bash(os.environ["ANDROID_NDK_HOME"] + "/ndk-depends", libmace_dynamic_library_path) for dep in split_stdout(dep_so_libs): if dep == "libgnustl_shared.so": -- GitLab