From 2895c3eed2b4804f28b8b09dfb1d3f1c908d1f5d Mon Sep 17 00:00:00 2001 From: luxuhui Date: Fri, 3 Apr 2020 20:36:52 +0800 Subject: [PATCH] refactor: refactor op base module and op delegator mechanism N/A Signed-off-by: Luxuhui --- docs/development/adding_a_new_op.md | 8 +- docs/user_guide/advanced_usage.rst | 2 +- docs/user_guide/advanced_usage_cmake.rst | 37 +- mace/core/BUILD.bazel | 6 +- mace/core/CMakeLists.txt | 11 +- mace/core/net.cc | 9 +- mace/core/net.h | 5 +- mace/core/net_def_adapter.cc | 6 +- mace/core/net_def_adapter.h | 13 +- mace/core/operator.cc | 371 ------------------ mace/core/operator.h | 358 ----------------- mace/core/ops/op_condition_builder.cc | 59 +++ mace/core/ops/op_condition_builder.h | 53 +++ mace/core/ops/op_condition_context.cc | 104 +++++ mace/core/ops/op_condition_context.h | 94 +++++ mace/core/ops/op_construct_context.cc | 29 ++ mace/core/ops/op_construct_context.h | 73 ++++ mace/core/{ => ops}/op_context.cc | 2 +- mace/core/{ => ops}/op_context.h | 7 +- mace/core/ops/op_delegator.h | 58 +++ mace/core/ops/op_init_context.cc | 22 ++ mace/core/ops/op_init_context.h | 47 +++ mace/core/ops/operator.cc | 68 ++++ mace/core/ops/operator.h | 120 ++++++ mace/core/registry/op_delegator_registry.cc | 39 ++ mace/core/registry/op_delegator_registry.h | 94 +++++ mace/core/registry/op_registration_info.cc | 69 ++++ mace/core/registry/op_registration_info.h | 56 +++ mace/core/registry/ops_registry.cc | 149 +++++++ mace/core/registry/ops_registry.h | 99 +++++ mace/core/workspace.cc | 25 +- mace/core/workspace.h | 10 +- mace/libmace/mace.cc | 12 +- mace/ops/BUILD.bazel | 22 +- mace/ops/CMakeLists.txt | 17 +- mace/ops/activation.cc | 44 +-- mace/ops/activation.h | 2 +- mace/ops/addn.cc | 5 +- mace/ops/argmax.cc | 5 +- mace/ops/arm/fp32/activation.cc | 35 +- mace/ops/arm/fp32/activation.h | 53 --- mace/ops/arm/fp32/bias_add.cc | 19 +- mace/ops/arm/fp32/bias_add.h | 48 --- mace/ops/arm/fp32/conv_2d.h | 28 +- mace/ops/arm/fp32/conv_2d_1x1.cc | 24 +- mace/ops/arm/fp32/conv_2d_1x1.h | 51 --- mace/ops/arm/fp32/conv_2d_1xn.cc | 15 + mace/ops/arm/fp32/conv_2d_1xn.h | 21 +- mace/ops/arm/fp32/conv_2d_3x3.cc | 7 + mace/ops/arm/fp32/conv_2d_3x3.h | 13 +- mace/ops/arm/fp32/conv_2d_3x3_winograd.cc | 5 + mace/ops/arm/fp32/conv_2d_3x3_winograd.h | 13 +- mace/ops/arm/fp32/conv_2d_5x5.cc | 21 +- mace/ops/arm/fp32/conv_2d_5x5.h | 48 --- mace/ops/arm/fp32/conv_2d_7x7.cc | 9 + mace/ops/arm/fp32/conv_2d_7x7.h | 17 +- mace/ops/arm/fp32/conv_general.cc | 21 +- mace/ops/arm/fp32/conv_general.h | 50 --- mace/ops/arm/fp32/deconv_2d.h | 47 +-- mace/ops/arm/fp32/deconv_2d_2x2.cc | 8 +- mace/ops/arm/fp32/deconv_2d_2x2.h | 16 +- mace/ops/arm/fp32/deconv_2d_3x3.cc | 7 + mace/ops/arm/fp32/deconv_2d_3x3.h | 16 +- mace/ops/arm/fp32/deconv_2d_4x4.cc | 8 +- mace/ops/arm/fp32/deconv_2d_4x4.h | 16 +- mace/ops/arm/fp32/deconv_2d_general.cc | 20 +- mace/ops/arm/fp32/deconv_2d_general.h | 60 --- mace/ops/arm/fp32/depthwise_conv_2d_3x3.cc | 7 + mace/ops/arm/fp32/depthwise_conv_2d_3x3.h | 16 +- mace/ops/arm/fp32/depthwise_deconv_2d_3x3.cc | 14 + mace/ops/arm/fp32/depthwise_deconv_2d_3x3.h | 52 +-- mace/ops/arm/fp32/depthwise_deconv_2d_4x4.cc | 14 + mace/ops/arm/fp32/depthwise_deconv_2d_4x4.h | 51 +-- .../arm/fp32/depthwise_deconv_2d_general.cc | 8 + .../arm/fp32/depthwise_deconv_2d_general.h | 32 +- mace/ops/arm/fp32/gemm.cc | 3 + mace/ops/arm/fp32/gemm.h | 18 +- mace/ops/arm/fp32/gemv.cc | 4 + mace/ops/arm/fp32/gemv.h | 11 +- mace/ops/arm/q8/eltwise.cc | 18 +- mace/ops/arm/q8/eltwise.h | 48 --- mace/ops/arm/q8/gemv.cc | 8 + mace/ops/arm/q8/gemv.h | 19 +- mace/ops/arm/q8/quantize.cc | 7 +- mace/ops/batch_norm.cc | 35 +- mace/ops/batch_to_space.cc | 5 +- mace/ops/bias_add.cc | 27 +- mace/ops/cast.cc | 5 +- mace/ops/channel_shuffle.cc | 5 +- mace/ops/common/lstm.h | 2 +- mace/ops/common/transpose.h | 2 +- mace/ops/concat.cc | 5 +- mace/ops/conv_2d.cc | 230 +++++------ mace/ops/conv_pool_2d_base.h | 2 +- mace/ops/crop.cc | 5 +- mace/ops/cumsum.cc | 5 +- mace/ops/deconv_2d.cc | 159 ++++---- mace/ops/deconv_2d.h | 2 +- mace/ops/delegator/activation.h | 61 +++ mace/ops/delegator/bias_add.h | 43 ++ mace/ops/delegator/conv_2d.h | 90 +++++ mace/ops/delegator/deconv_2d.h | 95 +++++ .../depthwise_conv_2d.h} | 39 +- mace/ops/delegator/depthwise_deconv_2d.h | 35 ++ mace/ops/delegator/eltwise.h | 57 +++ mace/ops/delegator/gemm.h | 77 ++++ mace/ops/delegator/gemv.h | 52 +++ mace/ops/depth_to_space.cc | 5 +- mace/ops/depthwise_conv2d.cc | 115 +++--- mace/ops/depthwise_deconv2d.cc | 202 ++++------ mace/ops/dynamic_lstm.cc | 78 ++-- mace/ops/eltwise.cc | 102 +---- mace/ops/expand_dims.cc | 5 +- mace/ops/extract_pooling.cc | 5 +- mace/ops/fill.cc | 5 +- mace/ops/fully_connected.cc | 78 ++-- mace/ops/gather.cc | 5 +- mace/ops/identity.cc | 5 +- mace/ops/ifdefined.cc | 5 +- mace/ops/infer_conv2d_shape.cc | 5 +- mace/ops/kaldi_batch_norm.cc | 5 +- mace/ops/local_response_norm.cc | 5 +- mace/ops/lpnorm.cc | 5 +- mace/ops/lstm_nonlinear.cc | 5 +- mace/ops/matmul.cc | 116 +++--- mace/ops/mvnorm.cc | 5 +- mace/ops/one_hot.cc | 5 +- mace/ops/opencl/buffer/buffer_transform.h | 2 +- .../opencl/buffer/buffer_type_transform.cc | 2 +- mace/ops/opencl/buffer/conv_2d_1x1.cc | 2 +- mace/ops/opencl/buffer/conv_2d_general.cc | 2 +- mace/ops/opencl/buffer/reshape.cc | 2 +- mace/ops/opencl/buffer/softmax.h | 2 +- mace/ops/opencl/buffer/utils.h | 2 +- mace/ops/opencl/buffer_transform.cc | 5 +- mace/ops/opencl/buffer_transformer.h | 3 +- mace/ops/opencl/image/activation.h | 2 +- mace/ops/opencl/image/addn.h | 2 +- mace/ops/opencl/image/batch_norm.h | 2 +- mace/ops/opencl/image/batch_to_space.h | 2 +- mace/ops/opencl/image/bias_add.h | 2 +- mace/ops/opencl/image/buffer_to_image.h | 2 +- mace/ops/opencl/image/channel_shuffle.h | 2 +- mace/ops/opencl/image/concat.h | 2 +- mace/ops/opencl/image/conv_2d.h | 2 +- mace/ops/opencl/image/conv_2d_1x1.cc | 2 +- mace/ops/opencl/image/conv_2d_3x3.cc | 2 +- mace/ops/opencl/image/conv_2d_general.cc | 2 +- mace/ops/opencl/image/crop.h | 2 +- mace/ops/opencl/image/deconv_2d.h | 2 +- mace/ops/opencl/image/depth_to_space.h | 2 +- mace/ops/opencl/image/depthwise_conv2d.h | 2 +- mace/ops/opencl/image/depthwise_deconv2d.h | 2 +- mace/ops/opencl/image/eltwise.h | 2 +- mace/ops/opencl/image/fully_connected.h | 2 +- mace/ops/opencl/image/image_to_buffer.h | 2 +- mace/ops/opencl/image/lpnorm.h | 2 +- mace/ops/opencl/image/lstm_cell.h | 2 +- mace/ops/opencl/image/matmul.h | 2 +- mace/ops/opencl/image/mvnorm.h | 2 +- mace/ops/opencl/image/pad.h | 2 +- mace/ops/opencl/image/pooling.h | 2 +- mace/ops/opencl/image/reduce.h | 2 +- mace/ops/opencl/image/reshape.h | 2 +- mace/ops/opencl/image/resize_bicubic.h | 2 +- mace/ops/opencl/image/resize_bilinear.h | 2 +- .../opencl/image/resize_nearest_neighbor.h | 2 +- mace/ops/opencl/image/softmax.h | 2 +- mace/ops/opencl/image/space_to_batch.h | 2 +- mace/ops/opencl/image/space_to_depth.h | 2 +- mace/ops/opencl/image/split.h | 2 +- mace/ops/opencl/image/sqrdiff_mean.h | 2 +- mace/ops/opencl/image/winograd_conv2d.cc | 2 +- mace/ops/opencl/lstm_cell.cc | 5 +- mace/ops/pad.cc | 5 +- mace/ops/pad_context.cc | 5 +- mace/ops/pnorm.cc | 5 +- mace/ops/pooling.cc | 5 +- mace/ops/prior_box.cc | 5 +- mace/ops/reduce.cc | 5 +- mace/ops/ref/activation.cc | 25 +- mace/ops/ref/activation.h | 51 --- mace/ops/ref/bias_add.cc | 18 +- mace/ops/ref/conv_2d.cc | 4 + mace/ops/ref/conv_2d.h | 45 +-- mace/ops/ref/deconv_2d.cc | 5 + mace/ops/ref/deconv_2d.h | 51 +-- mace/ops/ref/depthwise_conv_2d.cc | 5 + mace/ops/ref/depthwise_conv_2d.h | 45 +-- mace/ops/ref/depthwise_deconv_2d.cc | 5 + mace/ops/ref/depthwise_deconv_2d.h | 91 +---- mace/ops/ref/gemm.cc | 4 + mace/ops/ref/gemm.h | 19 +- mace/ops/ref/gemv.cc | 8 + mace/ops/ref/gemv.h | 29 +- mace/ops/ref/q8/eltwise.cc | 116 ++++++ mace/ops/registry/op_delegators_registry.cc | 170 ++++++++ mace/ops/registry/ops_registry.cc | 284 +++++++------- .../registry/{ops_registry.h => registry.h} | 20 +- mace/ops/replace_index.cc | 5 +- mace/ops/reshape.cc | 5 +- mace/ops/resize_bicubic.cc | 5 +- mace/ops/resize_bilinear.cc | 5 +- mace/ops/resize_nearest_neighbor.cc | 5 +- mace/ops/reverse.cc | 5 +- mace/ops/scalar_math.cc | 5 +- mace/ops/select.cc | 5 +- mace/ops/shape.cc | 5 +- mace/ops/slice.cc | 5 +- mace/ops/softmax.cc | 5 +- mace/ops/space_to_batch.cc | 5 +- mace/ops/space_to_depth.cc | 5 +- mace/ops/splice.cc | 5 +- mace/ops/split.cc | 5 +- mace/ops/sqrdiff_mean.cc | 5 +- mace/ops/squeeze.cc | 5 +- mace/ops/stack.cc | 5 +- mace/ops/strided_slice.cc | 5 +- mace/ops/subsample.cc | 5 +- mace/ops/sum_group.cc | 5 +- mace/ops/target_rms_norm.cc | 5 +- mace/ops/tile.cc | 5 +- mace/ops/transpose.cc | 5 +- mace/ops/unsqueeze.cc | 5 +- mace/ops/unstack.cc | 5 +- .../mace/ops/depthwise_deconv2d_benchmark.cc | 2 +- .../mace/ops/quantize_benchmark.cc | 2 +- .../mace/ops/sqrdiff_mean_benchmark.cc | 2 +- test/ccunit/mace/ops/arm/fp32/gemm_test.cc | 6 +- test/ccunit/mace/ops/arm/fp32/gemv_test.cc | 8 +- test/ccunit/mace/ops/arm/q8/gemv_test.cc | 14 +- test/ccunit/mace/ops/matmul_test.cc | 3 +- .../ops/opencl/out_of_range_check_test.cc | 4 +- test/ccunit/mace/ops/sqrdiff_mean_test.cc | 2 +- test/ccutils/mace/ops/ops_test_util.h | 14 +- 235 files changed, 3604 insertions(+), 2744 deletions(-) delete mode 100644 mace/core/operator.cc delete mode 100644 mace/core/operator.h create mode 100644 mace/core/ops/op_condition_builder.cc create mode 100644 mace/core/ops/op_condition_builder.h create mode 100644 mace/core/ops/op_condition_context.cc create mode 100644 mace/core/ops/op_condition_context.h create mode 100644 mace/core/ops/op_construct_context.cc create mode 100644 mace/core/ops/op_construct_context.h rename mace/core/{ => ops}/op_context.cc (96%) rename mace/core/{ => ops}/op_context.h (90%) create mode 100644 mace/core/ops/op_delegator.h create mode 100644 mace/core/ops/op_init_context.cc create mode 100644 mace/core/ops/op_init_context.h create mode 100644 mace/core/ops/operator.cc create mode 100644 mace/core/ops/operator.h create mode 100644 mace/core/registry/op_delegator_registry.cc create mode 100644 mace/core/registry/op_delegator_registry.h create mode 100644 mace/core/registry/op_registration_info.cc create mode 100644 mace/core/registry/op_registration_info.h create mode 100644 mace/core/registry/ops_registry.cc create mode 100644 mace/core/registry/ops_registry.h delete mode 100644 mace/ops/arm/fp32/activation.h delete mode 100644 mace/ops/arm/fp32/bias_add.h delete mode 100644 mace/ops/arm/fp32/conv_2d_1x1.h delete mode 100644 mace/ops/arm/fp32/conv_2d_5x5.h delete mode 100644 mace/ops/arm/fp32/conv_general.h delete mode 100644 mace/ops/arm/fp32/deconv_2d_general.h delete mode 100644 mace/ops/arm/q8/eltwise.h create mode 100644 mace/ops/delegator/activation.h create mode 100644 mace/ops/delegator/bias_add.h create mode 100644 mace/ops/delegator/conv_2d.h create mode 100644 mace/ops/delegator/deconv_2d.h rename mace/ops/{ref/bias_add.h => delegator/depthwise_conv_2d.h} (52%) create mode 100644 mace/ops/delegator/depthwise_deconv_2d.h create mode 100644 mace/ops/delegator/eltwise.h create mode 100644 mace/ops/delegator/gemm.h create mode 100644 mace/ops/delegator/gemv.h delete mode 100644 mace/ops/ref/activation.h create mode 100644 mace/ops/ref/q8/eltwise.cc create mode 100644 mace/ops/registry/op_delegators_registry.cc rename mace/ops/registry/{ops_registry.h => registry.h} (68%) diff --git a/docs/development/adding_a_new_op.md b/docs/development/adding_a_new_op.md index 2bf0af81..4a631dd7 100644 --- a/docs/development/adding_a_new_op.md +++ b/docs/development/adding_a_new_op.md @@ -19,7 +19,7 @@ Define the new Op class in `mace/ops/my_custom_op.cc`. The structure of Op is like the following code. ```c++ -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" namespace mace { namespace ops { @@ -39,7 +39,7 @@ class MyCustomOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterMyCustomOp(OpRegistryBase *op_registry) { +void RegisterMyCustomOp(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "MyCustomOp", MyCustomOp, DeviceType::CPU, float); @@ -63,14 +63,14 @@ namespace ops { ... -extern void RegisterMyCustomOp(OpRegistryBase *op_registry); +extern void RegisterMyCustomOp(OpRegistry *op_registry); ... } // namespace ops -OpRegistry::OpRegistry() : OpRegistryBase() { +OpRegistry::OpRegistry() { // Keep in lexicographical order ... diff --git a/docs/user_guide/advanced_usage.rst b/docs/user_guide/advanced_usage.rst index 0f9d7609..dc7344be 100644 --- a/docs/user_guide/advanced_usage.rst +++ b/docs/user_guide/advanced_usage.rst @@ -557,7 +557,7 @@ which will reduce the library size significantly. the final binary just link the } // namespace ops - OpRegistry::OpRegistry() : OpRegistryBase() { + OpRegistry::OpRegistry() { // Just leave the ops used in your models ... diff --git a/docs/user_guide/advanced_usage_cmake.rst b/docs/user_guide/advanced_usage_cmake.rst index 7be5e2f2..23631b93 100644 --- a/docs/user_guide/advanced_usage_cmake.rst +++ b/docs/user_guide/advanced_usage_cmake.rst @@ -370,12 +370,13 @@ the sample code show how to calculate the Top-1 accuracy with imagenet validatio Reduce Library Size ------------------- -Remove the registration of the ops unused for your models in the ``mace/ops/ops_register.cc``, -which will reduce the library size significantly. the final binary just link the registered ops' code. +Remove the registration of the ops and delegators unused for your models in the +``mace/ops/registry/ops_registry.cc`` and ``mace/ops/registry/op_delegators_registry.cc``, +which will reduce the library size significantly. the final binary just link the registered ops and delegators' code. .. code-block:: cpp - #include "mace/ops/ops_register.h" + #include "mace/ops/registry/registry.h" namespace mace { namespace ops { @@ -386,12 +387,38 @@ which will reduce the library size significantly. the final binary just link the } // namespace ops - OpRegistry::OpRegistry() : OpRegistryBase() { + void RegisterAllOps(OpRegistry *registry) { // Just leave the ops used in your models ... - ops::RegisterMyCustomOp(this); + ops::RegisterMyCustomOp(registry); + + ... + + } + + } // namespace mace + +.. code-block:: cpp + + #include "mace/ops/registry/registry.h" + + namespace mace { + namespace ops { + // Just leave the delegators used in your ops + + ... + + } // namespace ops + + + void RegisterAllOpDelegators(OpDelegatorRegistry *registry) { + // Just leave the delegators used in your ops + + ... + + ops::RegisterMyCustomDelegator(registry); ... diff --git a/mace/core/BUILD.bazel b/mace/core/BUILD.bazel index 971b2a27..39fc3883 100644 --- a/mace/core/BUILD.bazel +++ b/mace/core/BUILD.bazel @@ -26,6 +26,8 @@ cc_library( srcs = glob( [ "*.cc", + "ops/*.cc", + "registry/*.cc", "runtime/cpu/*.cc", ], exclude = [ @@ -53,6 +55,8 @@ cc_library( hdrs = glob( [ "*.h", + "ops/*.h", + "registry/*.h", "runtime/cpu/*.h", ], exclude = [ @@ -68,7 +72,7 @@ cc_library( ])) + if_hta_enabled(glob([ "runtime/hexagon/*hta*.h", ])) + if_apu_enabled(glob([ - "runtime/apu/*.h" + "runtime/apu/*.h", ])) + if_rpcmem_enabled([ "rpcmem.h", ]), diff --git a/mace/core/CMakeLists.txt b/mace/core/CMakeLists.txt index 25ab20bf..775eca5d 100644 --- a/mace/core/CMakeLists.txt +++ b/mace/core/CMakeLists.txt @@ -8,9 +8,16 @@ set(CORE_SRCS net.cc net_def_adapter.cc net_optimizer.cc - op_context.cc - operator.cc + ops/op_condition_builder.cc + ops/op_condition_context.cc + ops/op_construct_context.cc + ops/op_context.cc + ops/operator.cc + ops/op_init_context.cc quantize.cc + registry/op_delegator_registry.cc + registry/op_registration_info.cc + registry/ops_registry.cc runtime_failure_mock.cc types.cc workspace.cc diff --git a/mace/core/net.cc b/mace/core/net.cc index 78d40dd7..1e116549 100644 --- a/mace/core/net.cc +++ b/mace/core/net.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "mace/core/net.h" + #include #include #include @@ -20,8 +22,9 @@ #include "mace/core/future.h" #include "mace/core/memory_optimizer.h" -#include "mace/core/net.h" -#include "mace/core/op_context.h" +#include "mace/core/ops/op_init_context.h" +#include "mace/core/ops/op_context.h" +#include "mace/core/registry/ops_registry.h" #include "mace/public/mace.h" #include "mace/port/env.h" #include "mace/utils/conf_util.h" @@ -33,7 +36,7 @@ namespace mace { -SerialNet::SerialNet(const OpRegistryBase *op_registry, +SerialNet::SerialNet(const OpRegistry *op_registry, const NetDef *net_def, Workspace *ws, Device *target_device, diff --git a/mace/core/net.h b/mace/core/net.h index 18ec5134..f761af13 100644 --- a/mace/core/net.h +++ b/mace/core/net.h @@ -21,13 +21,14 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" namespace mace { class RunMetadata; class Workspace; class MemoryOptimizer; +class OpRegistry; class NetBase { public: @@ -44,7 +45,7 @@ class NetBase { class SerialNet : public NetBase { public: - SerialNet(const OpRegistryBase *op_registry, + SerialNet(const OpRegistry *op_registry, const NetDef *net_def, Workspace *ws, Device *target_device, diff --git a/mace/core/net_def_adapter.cc b/mace/core/net_def_adapter.cc index 205dcdbe..7aa084b3 100644 --- a/mace/core/net_def_adapter.cc +++ b/mace/core/net_def_adapter.cc @@ -17,7 +17,9 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/ops/op_condition_context.h" +#include "mace/core/registry/ops_registry.h" #include "mace/utils/math.h" #ifdef MACE_ENABLE_OPENCL #include "mace/core/runtime/opencl/opencl_util.h" @@ -82,7 +84,7 @@ void BuildTransposeOpDef( } // namespace -NetDefAdapter::NetDefAdapter(const OpRegistryBase *op_registry, +NetDefAdapter::NetDefAdapter(const OpRegistry *op_registry, const Workspace *ws) : op_registry_(op_registry), ws_(ws) {} diff --git a/mace/core/net_def_adapter.h b/mace/core/net_def_adapter.h index 0268329e..b285a4a5 100644 --- a/mace/core/net_def_adapter.h +++ b/mace/core/net_def_adapter.h @@ -23,14 +23,17 @@ #include "mace/core/types.h" #include "mace/proto/mace.pb.h" #include "mace/port/port.h" -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" #include "mace/core/net_optimizer.h" namespace mace { -class OpRegistryBase; -class Workspace; class Device; +class OpConditionContext; +class OperatorDef; +class OpRegistry; +class Workspace; + /////////////////////////////////////////////////////////////////////////////// /// Conventions @@ -49,7 +52,7 @@ class Device; /////////////////////////////////////////////////////////////////////////////// class NetDefAdapter { public: - NetDefAdapter(const OpRegistryBase *op_registry, + NetDefAdapter(const OpRegistry *op_registry, const Workspace *ws); // Adapt original net_def to a better net. // 1. Adapt device: choose best device for every op in the net. @@ -122,7 +125,7 @@ class NetDefAdapter { std::string DebugString(const NetDef *net_def); private: - const OpRegistryBase *op_registry_; + const OpRegistry *op_registry_; const Workspace *ws_; NetOptimizer net_optimizer_; }; diff --git a/mace/core/operator.cc b/mace/core/operator.cc deleted file mode 100644 index a266ce2b..00000000 --- a/mace/core/operator.cc +++ /dev/null @@ -1,371 +0,0 @@ -// Copyright 2018 The MACE Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include - -#include "mace/core/operator.h" - -namespace mace { -OpConditionContext::OpConditionContext( - const Workspace *ws, - OpConditionContext::TensorShapeMap *info) - : operator_def_(nullptr), - ws_(ws), - device_(nullptr), - tensor_shape_info_(info) {} - -void OpConditionContext::set_operator_def( - const OperatorDef *operator_def) { - operator_def_ = operator_def; - input_data_types_.clear(); -} - -void OpConditionContext::SetInputInfo(size_t idx, - MemoryType mem_type, - DataType dt) { - if (input_mem_types_.empty()) { - // the default inputs' memory types are same as output memory type. - input_mem_types_.resize(operator_def_->input_size(), output_mem_type_); - } - if (input_data_types_.empty()) { - // the default inputs' data types are same as operation's data type. - DataType op_dt = static_cast( - ProtoArgHelper::GetOptionalArg( - *operator_def_, "T", static_cast(DataType::DT_FLOAT))); - input_data_types_.resize(operator_def_->input_size(), op_dt); - } - MACE_CHECK(idx < input_mem_types_.size() && idx < input_data_types_.size()); - input_mem_types_[idx] = mem_type; - input_data_types_[idx] = dt; -} - -void OpConditionContext::set_output_mem_type(MemoryType type) { - MACE_CHECK(operator_def_ != nullptr); - output_mem_type_ = type; - input_mem_types_.clear(); -} - -MemoryType OpConditionContext::GetInputMemType(size_t idx) const { - if (input_mem_types_.empty()) { - return output_mem_type_; - } - MACE_CHECK(idx < input_mem_types_.size(), - idx, " < ", input_mem_types_.size()); - return input_mem_types_[idx]; -} - -DataType OpConditionContext::GetInputDataType(size_t idx) const { - if (input_data_types_.empty()) { - // the default inputs' data types are same as operation's data type. - return static_cast( - ProtoArgHelper::GetOptionalArg( - *operator_def_, "T", static_cast(DataType::DT_FLOAT))); - } - MACE_CHECK(idx < input_data_types_.size()); - return input_data_types_[idx]; -} - -#ifdef MACE_ENABLE_OPENCL -void OpConditionContext::SetInputOpenCLBufferType( - size_t idx, OpenCLBufferType buffer_type) { - if (input_opencl_buffer_types_.empty()) { - // the default inputs' memory types are same as output memory type. - input_opencl_buffer_types_.resize(operator_def_->input_size(), - OpenCLBufferType::IN_OUT_CHANNEL); - } - MACE_CHECK(idx < input_opencl_buffer_types_.size()); - input_opencl_buffer_types_[idx] = buffer_type; -} -OpenCLBufferType OpConditionContext::GetInputOpenCLBufferType( - size_t idx) const { - if (input_opencl_buffer_types_.empty()) { - return OpenCLBufferType::IN_OUT_CHANNEL; - } - MACE_CHECK(idx < input_opencl_buffer_types_.size()); - return input_opencl_buffer_types_[idx]; -} -#endif // MACE_ENABLE_OPENCL - -OpConstructContext::OpConstructContext(Workspace *ws) - : operator_def_(nullptr), - ws_(ws), - device_(nullptr) {} - -void OpConstructContext::set_operator_def( - std::shared_ptr operator_def) { - operator_def_ = operator_def; -} - -OpInitContext::OpInitContext(Workspace *ws, Device *device) - : ws_(ws), device_(device) {} - -Operation::Operation(OpConstructContext *context) - : operator_def_(context->operator_def()) {} - -MaceStatus Operation::Init(OpInitContext *context) { - Workspace *ws = context->workspace(); - for (const std::string &input_str : operator_def_->input()) { - const Tensor *tensor = ws->GetTensor(input_str); - MACE_CHECK(tensor != nullptr, "op ", operator_def_->type(), - ": Encountered a non-existing input tensor: ", input_str); - inputs_.push_back(tensor); - } - for (int i = 0; i < operator_def_->output_size(); ++i) { - const std::string output_str = operator_def_->output(i); - if (ws->HasTensor(output_str)) { - outputs_.push_back(ws->GetTensor(output_str)); - } else { - MACE_CHECK( - operator_def_->output_type_size() == 0 || - operator_def_->output_size() == operator_def_->output_type_size(), - "operator output size != operator output type size", - operator_def_->output_size(), - operator_def_->output_type_size()); - DataType output_type; - if (i < operator_def_->output_type_size()) { - output_type = operator_def_->output_type(i); - } else { - output_type = static_cast( - ProtoArgHelper::GetOptionalArg( - *operator_def_, "T", static_cast(DT_FLOAT))); - } - outputs_.push_back(MACE_CHECK_NOTNULL(ws->CreateTensor( - output_str, context->device()->allocator(), output_type))); - } - if (i < operator_def_->output_shape_size()) { - std::vector - shape_configured(operator_def_->output_shape(i).dims_size()); - for (size_t dim = 0; dim < shape_configured.size(); ++dim) { - shape_configured[dim] = operator_def_->output_shape(i).dims(dim); - } - ws->GetTensor(output_str)->SetShapeConfigured(shape_configured); - } - } - return MaceStatus::MACE_SUCCESS; -} - -// op registry -namespace { -class OpKeyBuilder { - public: - explicit OpKeyBuilder(const std::string &op_name); - - OpKeyBuilder &Device(DeviceType device); - - OpKeyBuilder &TypeConstraint(const char *attr_name, - DataType allowed); - - const std::string Build(); - - private: - std::string op_name_; - DeviceType device_type_; - std::map type_constraint_; -}; - -OpKeyBuilder::OpKeyBuilder(const std::string &op_name) : op_name_(op_name) {} - -OpKeyBuilder &OpKeyBuilder::Device(DeviceType device) { - device_type_ = device; - return *this; -} - -OpKeyBuilder &OpKeyBuilder::TypeConstraint(const char *attr_name, - DataType allowed) { - type_constraint_[attr_name] = allowed; - return *this; -} - -const std::string OpKeyBuilder::Build() { - static const std::vector type_order = {"T"}; - std::stringstream ss; - ss << op_name_; - ss << device_type_; - for (auto type : type_order) { - ss << type << "_" << DataTypeToString(type_constraint_[type]); - } - - return ss.str(); -} -} // namespace - -OpRegistrationInfo::OpRegistrationInfo() { - // default device type placer - device_placer = [this](OpConditionContext *context) -> std::set { - MACE_UNUSED(context); - return this->devices; - }; - - // default input and output memory type setter - memory_type_setter = [](OpConditionContext *context) -> void { - if (context->device()->device_type() == DeviceType::GPU) { -#ifdef MACE_ENABLE_OPENCL - if (context->device()->gpu_runtime()->UseImageMemory()) { - context->set_output_mem_type(MemoryType::GPU_IMAGE); - } else { - context->set_output_mem_type(MemoryType::GPU_BUFFER); - } -#endif // MACE_ENABLE_OPENCL - } else { - context->set_output_mem_type(MemoryType::CPU_BUFFER); - } - }; - - data_format_selector = [](OpConditionContext *context) - -> std::vector { - DataFormat op_data_format = - static_cast( - ProtoArgHelper::GetOptionalArg( - *context->operator_def(), "data_format", - static_cast(DataFormat::NONE))); - return std::vector(context->operator_def()->input_size(), - op_data_format); - }; -} - -void OpRegistrationInfo::AddDevice(DeviceType device) { - devices.insert(device); -} - -void OpRegistrationInfo::Register(const std::string &key, OpCreator creator) { - VLOG(3) << "Registering: " << key; - MACE_CHECK(creators.count(key) == 0, "Key already registered: ", key); - creators[key] = creator; -} - -MaceStatus OpRegistryBase::Register( - const std::string &op_type, - const DeviceType device_type, - const DataType dt, - OpRegistrationInfo::OpCreator creator) { - if (registry_.count(op_type) == 0) { - registry_[op_type] = std::unique_ptr( - new OpRegistrationInfo); - } - registry_[op_type]->AddDevice(device_type); - - std::string op_key = OpKeyBuilder(op_type) - .Device(device_type) - .TypeConstraint("T", dt) - .Build(); - registry_.at(op_type)->Register(op_key, creator); - return MaceStatus::MACE_SUCCESS; -} - -MaceStatus OpRegistryBase::Register( - const OpConditionBuilder &builder) { - std::string op_type = builder.type(); - if (registry_.count(op_type) == 0) { - registry_[op_type] = std::unique_ptr( - new OpRegistrationInfo); - } - builder.Finalize(registry_[op_type].get()); - return MaceStatus::MACE_SUCCESS; -} - -const std::set OpRegistryBase::AvailableDevices( - const std::string &op_type, OpConditionContext *context) const { - MACE_CHECK(registry_.count(op_type) != 0, - op_type, " operation is not registered."); - - return registry_.at(op_type)->device_placer(context); -} - -void OpRegistryBase::GetInOutMemoryTypes( - const std::string &op_type, - OpConditionContext *context) const { - MACE_CHECK(registry_.count(op_type) != 0, - op_type, " operation is not registered. op_type=", op_type); - return registry_.at(op_type)->memory_type_setter(context); -} - -const std::vector OpRegistryBase::InputsDataFormat( - const std::string &op_type, - OpConditionContext *context) const { - MACE_CHECK(registry_.count(op_type) != 0, - op_type, " operation is not registered."); - return registry_.at(op_type)->data_format_selector(context); -} - -std::unique_ptr OpRegistryBase::CreateOperation( - OpConstructContext *context, - DeviceType device_type) const { - auto operator_def = context->operator_def(); - DataType dtype = static_cast( - ProtoArgHelper::GetOptionalArg( - *operator_def, "T", static_cast(DT_FLOAT))); - VLOG(1) << "Creating operator " << operator_def->name() << "(" - << operator_def->type() << "<" << dtype << ">" << ") on " - << device_type; - const std::string op_type = context->operator_def()->type(); - MACE_CHECK(registry_.count(op_type) != 0, - op_type, " operation is not registered."); - - auto key_dtype = - (device_type == DeviceType::GPU && dtype == DT_HALF) ? DT_FLOAT : dtype; - std::string key = OpKeyBuilder(op_type) - .Device(device_type) - .TypeConstraint("T", key_dtype) - .Build(); - if (registry_.at(op_type)->creators.count(key) == 0) { - LOG(FATAL) << "Key not registered: " << key - << ", op type is: " << operator_def->type(); - } - return registry_.at(op_type)->creators.at(key)(context); -} - -OpConditionBuilder::OpConditionBuilder(const std::string &type) - : type_(type) {} - -const std::string OpConditionBuilder::type() const { - return type_; -} - -OpConditionBuilder &OpConditionBuilder::SetDevicePlacerFunc( - OpRegistrationInfo::DevicePlacer placer) { - placer_ = placer; - return *this; -} - -OpConditionBuilder &OpConditionBuilder::SetInputMemoryTypeSetter( - OpRegistrationInfo::MemoryTypeSetter setter) { - memory_type_setter_ = setter; - return *this; -} - -OpConditionBuilder &OpConditionBuilder::SetInputsDataFormatSelector( - OpRegistrationInfo::DataFormatSelector selector) { - data_format_selector_ = selector; - return *this; -} - -void OpConditionBuilder::Finalize(OpRegistrationInfo *info) const { - if (info != nullptr) { - if (placer_) { - info->device_placer = placer_; - } - if (memory_type_setter_) { - info->memory_type_setter = memory_type_setter_; - } - - if (data_format_selector_) { - info->data_format_selector = data_format_selector_; - } - } -} - -} // namespace mace diff --git a/mace/core/operator.h b/mace/core/operator.h deleted file mode 100644 index fbcbfd2e..00000000 --- a/mace/core/operator.h +++ /dev/null @@ -1,358 +0,0 @@ -// Copyright 2018 The MACE Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef MACE_CORE_OPERATOR_H_ -#define MACE_CORE_OPERATOR_H_ - -#include -#include -#include -#include -#include - -#include "mace/core/arg_helper.h" -#include "mace/core/op_context.h" -#include "mace/core/tensor.h" -#include "mace/core/workspace.h" -#include "mace/proto/mace.pb.h" -#ifdef MACE_ENABLE_OPENCL -#include "mace/core/runtime/opencl/opencl_util.h" -#endif // MACE_ENABLE_OPENCL - -namespace mace { - -// OpConditionContext has all information used for choosing proper Op -class OpConditionContext { - public: - typedef std::unordered_map> TensorShapeMap; - OpConditionContext(const Workspace *ws, TensorShapeMap *info); - ~OpConditionContext() = default; - - void set_operator_def(const OperatorDef *operator_def); - - inline const OperatorDef *operator_def() const { - return operator_def_; - } - - inline const Workspace *workspace() const { - return ws_; - } - - inline void set_device(Device *device) { - device_ = device; - } - - inline Device *device() const { - return device_; - } - - inline TensorShapeMap *tensor_shape_info() const { - return tensor_shape_info_; - } - - void set_output_mem_type(MemoryType type); - - inline MemoryType output_mem_type() const { - return output_mem_type_; - } - - void SetInputInfo(size_t idx, MemoryType mem_type, DataType dt); - - MemoryType GetInputMemType(size_t idx) const; - - DataType GetInputDataType(size_t idx) const; - -#ifdef MACE_ENABLE_OPENCL - void SetInputOpenCLBufferType(size_t idx, OpenCLBufferType buffer_type); - OpenCLBufferType GetInputOpenCLBufferType(size_t idx) const; -#endif // MACE_ENABLE_OPENCL - - private: - const OperatorDef *operator_def_; - const Workspace *ws_; - Device *device_; - TensorShapeMap *tensor_shape_info_; - // used for memory transform - std::vector input_mem_types_; - std::vector input_data_types_; - MemoryType output_mem_type_; // there is only one output memory type now. -#ifdef MACE_ENABLE_OPENCL - std::vector input_opencl_buffer_types_; -#endif // MACE_ENABLE_OPENCL -}; - -// memory_optimizer, device -class OpConstructContext { - typedef std::unordered_map> TensorShapeMap; - - public: - explicit OpConstructContext(Workspace *ws); - ~OpConstructContext() = default; - - void set_operator_def(std::shared_ptr operator_def); - - inline std::shared_ptr operator_def() const { - return operator_def_; - } - - inline Workspace *workspace() const { - return ws_; - } - - inline void set_device(Device *device) { - device_ = device; - } - - inline Device *device() const { - return device_; - } -#ifdef MACE_ENABLE_OPENCL - inline MemoryType GetOpMemoryType() const { - return static_cast( - ProtoArgHelper::GetOptionalArg( - *operator_def_, OutputMemoryTypeTagName(), - static_cast(MemoryType::CPU_BUFFER))); - } -#endif // MACE_ENABLE_OPENCL - - private: - std::shared_ptr operator_def_; - Workspace *ws_; - Device *device_; -}; - -// memory_optimizer, device -class OpInitContext { - public: - explicit OpInitContext(Workspace *ws, Device *device = nullptr); - ~OpInitContext() = default; - - inline Workspace *workspace() const { - return ws_; - } - - inline void set_device(Device *device) { - device_ = device; - } - - inline Device *device() const { - return device_; - } - - private: - Workspace *ws_; - Device *device_; -}; - -// Conventions -// * If there exist format, NHWC is the default format -// * The input/output format of CPU ops with float data type is NCHW -// * The input/output format of GPU ops and CPU Quantization ops is NHWC -// * Inputs' data type is same as the operation data type by default. -// * The outputs' data type is same as the operation data type by default. -class Operation { - public: - explicit Operation(OpConstructContext *context); - virtual ~Operation() = default; - - template - inline T GetOptionalArg(const std::string &name, - const T &default_value) const { - MACE_CHECK(operator_def_, "operator_def was null!"); - return ProtoArgHelper::GetOptionalArg( - *operator_def_, name, default_value); - } - template - inline std::vector GetRepeatedArgs( - const std::string &name, const std::vector &default_value = {}) const { - MACE_CHECK(operator_def_, "operator_def was null!"); - return ProtoArgHelper::GetRepeatedArgs( - *operator_def_, name, default_value); - } - - inline DeviceType device_type() const { - return static_cast(operator_def_->device_type()); - } - - inline const Tensor *Input(unsigned int idx) { - MACE_CHECK(idx < inputs_.size()); - return inputs_[idx]; - } - - inline Tensor *Output(int idx) { return outputs_[idx]; } - - inline int InputSize() { return inputs_.size(); } - inline int OutputSize() { return outputs_.size(); } - inline const std::vector &Inputs() const { return inputs_; } - inline const std::vector &Outputs() { return outputs_; } - - // Run Op asynchronously (depends on device), return a future if not nullptr. - virtual MaceStatus Init(OpInitContext *); - virtual MaceStatus Run(OpContext *) = 0; - - inline const OperatorDef &debug_def() const { - MACE_CHECK(has_debug_def(), "operator_def was null!"); - return *operator_def_; - } - - inline void set_debug_def( - const std::shared_ptr &operator_def) { - operator_def_ = operator_def; - } - - inline bool has_debug_def() const { return operator_def_ != nullptr; } - - inline std::shared_ptr operator_def() { - return operator_def_; - } - - protected: - std::shared_ptr operator_def_; - std::vector inputs_; - std::vector outputs_; - - MACE_DISABLE_COPY_AND_ASSIGN(Operation); -}; - -// MACE_OP_INPUT_TAGS and MACE_OP_OUTPUT_TAGS are optional features to name the -// indices of the operator's inputs and outputs, in order to avoid confusion. -// For example, for a fully convolution layer that has input, weight and bias, -// you can define its input tags as: -// MACE_OP_INPUT_TAGS(INPUT, WEIGHT, BIAS); -// And in the code, instead of doing -// auto& weight = Input(1); -// you can now do -// auto& weight = Input(WEIGHT); -// to make it more clear. -#define MACE_OP_INPUT_TAGS(first_input, ...) \ - enum _InputTags { first_input = 0, __VA_ARGS__ } -#define MACE_OP_OUTPUT_TAGS(first_input, ...) \ - enum _OutputTags { first_input = 0, __VA_ARGS__ } - -struct OpRegistrationInfo { - public: - typedef std::function(OpConstructContext *)> - OpCreator; - typedef std::function(OpConditionContext *)> - DevicePlacer; - typedef std::function MemoryTypeSetter; - typedef std::function(OpConditionContext *)> - DataFormatSelector; - - OpRegistrationInfo(); - - void AddDevice(DeviceType); - - void Register(const std::string &key, OpCreator creator); - - std::set devices; - std::unordered_map creators; - DevicePlacer device_placer; - MemoryTypeSetter memory_type_setter; - DataFormatSelector data_format_selector; -}; - -class OpConditionBuilder { - public: - explicit OpConditionBuilder(const std::string &type); - - const std::string type() const; - - OpConditionBuilder &SetDevicePlacerFunc( - OpRegistrationInfo::DevicePlacer placer); - - // If you set input memory type for specified Op, - // you must call OpConditionContext::set_output_mem_type - OpConditionBuilder &SetInputMemoryTypeSetter( - OpRegistrationInfo::MemoryTypeSetter setter); - - OpConditionBuilder &SetInputsDataFormatSelector( - OpRegistrationInfo::DataFormatSelector selector); - - void Finalize(OpRegistrationInfo *info) const; - - private: - std::string type_; - OpRegistrationInfo::DevicePlacer placer_; - OpRegistrationInfo::MemoryTypeSetter memory_type_setter_; - OpRegistrationInfo::DataFormatSelector data_format_selector_; -}; - -class OpRegistryBase { - public: - OpRegistryBase() = default; - virtual ~OpRegistryBase() = default; - MaceStatus Register(const std::string &op_type, - const DeviceType device_type, - const DataType dt, - OpRegistrationInfo::OpCreator creator); - - MaceStatus Register(const OpConditionBuilder &builder); - - const std::set AvailableDevices( - const std::string &op_type, OpConditionContext *context) const; - - void GetInOutMemoryTypes( - const std::string &op_type, OpConditionContext *context) const; - - const std::vector InputsDataFormat( - const std::string &op_type, OpConditionContext *context) const; - - std::unique_ptr CreateOperation( - OpConstructContext *context, - DeviceType device_type) const; - - template - static std::unique_ptr DefaultCreator( - OpConstructContext *context) { - return std::unique_ptr(new DerivedType(context)); - } - - private: - std::unordered_map< - std::string, - std::unique_ptr> registry_; - MACE_DISABLE_COPY_AND_ASSIGN(OpRegistryBase); -}; - -#define MACE_REGISTER_OP(op_registry, op_type, class_name, device, dt) \ - op_registry->Register(op_type, \ - device, \ - DataTypeToEnum
::value, \ - OpRegistryBase::DefaultCreator>) - -#define MACE_REGISTER_OP_BY_CLASS( \ - op_registry, op_type, class_name, device, dt) \ - op_registry->Register(op_type, \ - device, \ - DataTypeToEnum
::value, \ - OpRegistryBase::DefaultCreator) - -#ifdef MACE_ENABLE_OPENCL -#define MACE_REGISTER_GPU_OP(op_registry, op_type, class_name) \ - op_registry->Register( \ - op_type, \ - DeviceType::GPU, \ - DT_FLOAT, \ - OpRegistryBase::DefaultCreator>) -#else -#define MACE_REGISTER_GPU_OP(op_registry, op_type, class_name) -#endif - -#define MACE_REGISTER_OP_CONDITION(op_registry, builder) \ - op_registry->Register(builder) - -} // namespace mace - -#endif // MACE_CORE_OPERATOR_H_ diff --git a/mace/core/ops/op_condition_builder.cc b/mace/core/ops/op_condition_builder.cc new file mode 100644 index 00000000..4f226e36 --- /dev/null +++ b/mace/core/ops/op_condition_builder.cc @@ -0,0 +1,59 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mace/core/ops/op_condition_builder.h" + +namespace mace { + +OpConditionBuilder::OpConditionBuilder(const std::string &type) + : type_(type) {} + +const std::string OpConditionBuilder::type() const { + return type_; +} + +OpConditionBuilder &OpConditionBuilder::SetDevicePlacerFunc( + OpRegistrationInfo::DevicePlacer placer) { + placer_ = placer; + return *this; +} + +OpConditionBuilder &OpConditionBuilder::SetInputMemoryTypeSetter( + OpRegistrationInfo::MemoryTypeSetter setter) { + memory_type_setter_ = setter; + return *this; +} + +OpConditionBuilder &OpConditionBuilder::SetInputsDataFormatSelector( + OpRegistrationInfo::DataFormatSelector selector) { + data_format_selector_ = selector; + return *this; +} + +void OpConditionBuilder::Finalize(OpRegistrationInfo *info) const { + if (info != nullptr) { + if (placer_) { + info->device_placer = placer_; + } + if (memory_type_setter_) { + info->memory_type_setter = memory_type_setter_; + } + + if (data_format_selector_) { + info->data_format_selector = data_format_selector_; + } + } +} + +} // namespace mace diff --git a/mace/core/ops/op_condition_builder.h b/mace/core/ops/op_condition_builder.h new file mode 100644 index 00000000..1378e00e --- /dev/null +++ b/mace/core/ops/op_condition_builder.h @@ -0,0 +1,53 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MACE_CORE_OPS_OP_CONDITION_BUILDER_H_ +#define MACE_CORE_OPS_OP_CONDITION_BUILDER_H_ + +#include +#include + +#include "mace/core/registry/op_registration_info.h" +#include "mace/core/types.h" + +namespace mace { +class OpConditionBuilder { + public: + explicit OpConditionBuilder(const std::string &type); + + const std::string type() const; + + OpConditionBuilder &SetDevicePlacerFunc( + OpRegistrationInfo::DevicePlacer placer); + + // If you set input memory type for specified Op, + // you must call OpConditionContext::set_output_mem_type + OpConditionBuilder &SetInputMemoryTypeSetter( + OpRegistrationInfo::MemoryTypeSetter setter); + + OpConditionBuilder &SetInputsDataFormatSelector( + OpRegistrationInfo::DataFormatSelector selector); + + void Finalize(OpRegistrationInfo *info) const; + + private: + std::string type_; + OpRegistrationInfo::DevicePlacer placer_; + OpRegistrationInfo::MemoryTypeSetter memory_type_setter_; + OpRegistrationInfo::DataFormatSelector data_format_selector_; +}; + +} // namespace mace + +#endif // MACE_CORE_OPS_OP_CONDITION_BUILDER_H_ diff --git a/mace/core/ops/op_condition_context.cc b/mace/core/ops/op_condition_context.cc new file mode 100644 index 00000000..eb094a8c --- /dev/null +++ b/mace/core/ops/op_condition_context.cc @@ -0,0 +1,104 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mace/core/ops/op_condition_context.h" + +#include "mace/core/arg_helper.h" +#include "mace/proto/mace.pb.h" +#include "mace/utils/logging.h" + +namespace mace { + +OpConditionContext::OpConditionContext( + const Workspace *ws, + OpConditionContext::TensorShapeMap *info) + : operator_def_(nullptr), + ws_(ws), + device_(nullptr), + tensor_shape_info_(info) {} + +void OpConditionContext::set_operator_def( + const OperatorDef *operator_def) { + operator_def_ = operator_def; + input_data_types_.clear(); +} + +void OpConditionContext::SetInputInfo(size_t idx, + MemoryType mem_type, + DataType dt) { + if (input_mem_types_.empty()) { + // the default inputs' memory types are same as output memory type. + input_mem_types_.resize(operator_def_->input_size(), output_mem_type_); + } + if (input_data_types_.empty()) { + // the default inputs' data types are same as operation's data type. + DataType op_dt = static_cast( + ProtoArgHelper::GetOptionalArg( + *operator_def_, "T", static_cast(DataType::DT_FLOAT))); + input_data_types_.resize(operator_def_->input_size(), op_dt); + } + MACE_CHECK(idx < input_mem_types_.size() && idx < input_data_types_.size()); + input_mem_types_[idx] = mem_type; + input_data_types_[idx] = dt; +} + +void OpConditionContext::set_output_mem_type(MemoryType type) { + MACE_CHECK(operator_def_ != nullptr); + output_mem_type_ = type; + input_mem_types_.clear(); +} + +MemoryType OpConditionContext::GetInputMemType(size_t idx) const { + if (input_mem_types_.empty()) { + return output_mem_type_; + } + MACE_CHECK(idx < input_mem_types_.size(), + idx, " < ", input_mem_types_.size()); + return input_mem_types_[idx]; +} + +DataType OpConditionContext::GetInputDataType(size_t idx) const { + if (input_data_types_.empty()) { + // the default inputs' data types are same as operation's data type. + return static_cast( + ProtoArgHelper::GetOptionalArg( + *operator_def_, "T", static_cast(DataType::DT_FLOAT))); + } + MACE_CHECK(idx < input_data_types_.size()); + return input_data_types_[idx]; +} + +#ifdef MACE_ENABLE_OPENCL +void OpConditionContext::SetInputOpenCLBufferType( + size_t idx, OpenCLBufferType buffer_type) { + if (input_opencl_buffer_types_.empty()) { + // the default inputs' memory types are same as output memory type. + input_opencl_buffer_types_.resize(operator_def_->input_size(), + OpenCLBufferType::IN_OUT_CHANNEL); + } + MACE_CHECK(idx < input_opencl_buffer_types_.size()); + input_opencl_buffer_types_[idx] = buffer_type; +} + +OpenCLBufferType OpConditionContext::GetInputOpenCLBufferType( + size_t idx) const { + if (input_opencl_buffer_types_.empty()) { + return OpenCLBufferType::IN_OUT_CHANNEL; + } + MACE_CHECK(idx < input_opencl_buffer_types_.size()); + return input_opencl_buffer_types_[idx]; +} +#endif // MACE_ENABLE_OPENCL + +} // namespace mace diff --git a/mace/core/ops/op_condition_context.h b/mace/core/ops/op_condition_context.h new file mode 100644 index 00000000..8e1c882e --- /dev/null +++ b/mace/core/ops/op_condition_context.h @@ -0,0 +1,94 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MACE_CORE_OPS_OP_CONDITION_CONTEXT_H_ +#define MACE_CORE_OPS_OP_CONDITION_CONTEXT_H_ + +#include +#include +#include +#include + +#include "mace/core/types.h" + +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/opencl_util.h" +#endif // MACE_ENABLE_OPENCL + +namespace mace { +class Workspace; +class Device; + +// OpConditionContext has all information used for choosing proper Op +class OpConditionContext { + public: + typedef std::unordered_map> TensorShapeMap; + OpConditionContext(const Workspace *ws, TensorShapeMap *info); + ~OpConditionContext() = default; + + void set_operator_def(const OperatorDef *operator_def); + + const OperatorDef *operator_def() const { + return operator_def_; + } + + const Workspace *workspace() const { + return ws_; + } + + void set_device(Device *device) { + device_ = device; + } + + Device *device() const { + return device_; + } + + TensorShapeMap *tensor_shape_info() const { + return tensor_shape_info_; + } + + void set_output_mem_type(MemoryType type); + + MemoryType output_mem_type() const { + return output_mem_type_; + } + + void SetInputInfo(size_t idx, MemoryType mem_type, DataType dt); + + MemoryType GetInputMemType(size_t idx) const; + + DataType GetInputDataType(size_t idx) const; + +#ifdef MACE_ENABLE_OPENCL + void SetInputOpenCLBufferType(size_t idx, OpenCLBufferType buffer_type); + OpenCLBufferType GetInputOpenCLBufferType(size_t idx) const; +#endif // MACE_ENABLE_OPENCL + + private: + const OperatorDef *operator_def_; + const Workspace *ws_; + Device *device_; + TensorShapeMap *tensor_shape_info_; + // used for memory transform + std::vector input_mem_types_; + std::vector input_data_types_; + MemoryType output_mem_type_; // there is only one output memory type now. +#ifdef MACE_ENABLE_OPENCL + std::vector input_opencl_buffer_types_; +#endif // MACE_ENABLE_OPENCL +}; +} // namespace mace + +#endif // MACE_CORE_OPS_OP_CONDITION_CONTEXT_H_ diff --git a/mace/core/ops/op_construct_context.cc b/mace/core/ops/op_construct_context.cc new file mode 100644 index 00000000..fc701259 --- /dev/null +++ b/mace/core/ops/op_construct_context.cc @@ -0,0 +1,29 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mace/core/ops/op_construct_context.h" + +namespace mace { + +OpConstructContext::OpConstructContext(Workspace *ws) + : operator_def_(nullptr), + ws_(ws), + device_(nullptr) {} + +void OpConstructContext::set_operator_def( + std::shared_ptr operator_def) { + operator_def_ = operator_def; +} + +} // namespace mace diff --git a/mace/core/ops/op_construct_context.h b/mace/core/ops/op_construct_context.h new file mode 100644 index 00000000..9bd4709d --- /dev/null +++ b/mace/core/ops/op_construct_context.h @@ -0,0 +1,73 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MACE_CORE_OPS_OP_CONSTRUCT_CONTEXT_H_ +#define MACE_CORE_OPS_OP_CONSTRUCT_CONTEXT_H_ + +#include +#include +#include +#include + +#include "mace/core/arg_helper.h" +#include "mace/core/types.h" +#include "mace/proto/mace.pb.h" + +namespace mace { +class Device; +class Workspace; + +// memory_optimizer, device +class OpConstructContext { + typedef std::unordered_map> TensorShapeMap; + + public: + explicit OpConstructContext(Workspace *ws); + ~OpConstructContext() = default; + + void set_operator_def(std::shared_ptr operator_def); + + std::shared_ptr operator_def() const { + return operator_def_; + } + + Workspace *workspace() const { + return ws_; + } + + void set_device(Device *device) { + device_ = device; + } + + Device *device() const { + return device_; + } +#ifdef MACE_ENABLE_OPENCL + inline MemoryType GetOpMemoryType() const { + return static_cast( + ProtoArgHelper::GetOptionalArg( + *operator_def_, OutputMemoryTypeTagName(), + static_cast(MemoryType::CPU_BUFFER))); + } +#endif // MACE_ENABLE_OPENCL + + private: + std::shared_ptr operator_def_; + Workspace *ws_; + Device *device_; +}; + +} // namespace mace + +#endif // MACE_CORE_OPS_OP_CONSTRUCT_CONTEXT_H_ diff --git a/mace/core/op_context.cc b/mace/core/ops/op_context.cc similarity index 96% rename from mace/core/op_context.cc rename to mace/core/ops/op_context.cc index d0ebeff7..64160995 100644 --- a/mace/core/op_context.cc +++ b/mace/core/ops/op_context.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" namespace mace { diff --git a/mace/core/op_context.h b/mace/core/ops/op_context.h similarity index 90% rename from mace/core/op_context.h rename to mace/core/ops/op_context.h index 26a31dc3..06225479 100644 --- a/mace/core/op_context.h +++ b/mace/core/ops/op_context.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_CORE_OP_CONTEXT_H_ -#define MACE_CORE_OP_CONTEXT_H_ +#ifndef MACE_CORE_OPS_OP_CONTEXT_H_ +#define MACE_CORE_OPS_OP_CONTEXT_H_ #include "mace/core/device.h" #include "mace/core/workspace.h" @@ -35,8 +35,7 @@ class OpContext { Device *device_; Workspace *ws_; StatsFuture *future_; - // metadata }; } // namespace mace -#endif // MACE_CORE_OP_CONTEXT_H_ +#endif // MACE_CORE_OPS_OP_CONTEXT_H_ diff --git a/mace/core/ops/op_delegator.h b/mace/core/ops/op_delegator.h new file mode 100644 index 00000000..029bd39f --- /dev/null +++ b/mace/core/ops/op_delegator.h @@ -0,0 +1,58 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MACE_CORE_OPS_OP_DELEGATOR_H_ +#define MACE_CORE_OPS_OP_DELEGATOR_H_ + +#include + +#include "mace/utils/macros.h" +#include "mace/utils/memory.h" + +namespace mace { + +enum ImplType { + REF = 0, + NEON, +}; + +#ifdef MACE_ENABLE_NEON +#define MACE_CPU_IMPL_TYPE NEON +#else +#define MACE_CPU_IMPL_TYPE REF +#endif + +struct DelegatorParam { + public: + DelegatorParam() = default; + virtual ~DelegatorParam() = default; +}; + +class OpDelegator { + public: + explicit OpDelegator(const DelegatorParam ¶m) { + MACE_UNUSED(param); + } + virtual ~OpDelegator() = default; + + template + static std::unique_ptr DefaultCreator( + const DelegatorParam ¶m) { + return make_unique(static_cast(param)); + } +}; + +} // namespace mace + +#endif // MACE_CORE_OPS_OP_DELEGATOR_H_ diff --git a/mace/core/ops/op_init_context.cc b/mace/core/ops/op_init_context.cc new file mode 100644 index 00000000..2b3cee25 --- /dev/null +++ b/mace/core/ops/op_init_context.cc @@ -0,0 +1,22 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mace/core/ops/op_init_context.h" + +namespace mace { + +OpInitContext::OpInitContext(Workspace *ws, Device *device) + : ws_(ws), device_(device) {} + +} // namespace mace diff --git a/mace/core/ops/op_init_context.h b/mace/core/ops/op_init_context.h new file mode 100644 index 00000000..da51cc23 --- /dev/null +++ b/mace/core/ops/op_init_context.h @@ -0,0 +1,47 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MACE_CORE_OPS_OP_INIT_CONTEXT_H_ +#define MACE_CORE_OPS_OP_INIT_CONTEXT_H_ + +namespace mace { +class Workspace; +class Device; + +// memory_optimizer, device +class OpInitContext { + public: + explicit OpInitContext(Workspace *ws, Device *device = nullptr); + ~OpInitContext() = default; + + Workspace *workspace() const { + return ws_; + } + + void set_device(Device *device) { + device_ = device; + } + + Device *device() const { + return device_; + } + + private: + Workspace *ws_; + Device *device_; +}; + +} // namespace mace + +#endif // MACE_CORE_OPS_OP_INIT_CONTEXT_H_ diff --git a/mace/core/ops/operator.cc b/mace/core/ops/operator.cc new file mode 100644 index 00000000..5aa102d9 --- /dev/null +++ b/mace/core/ops/operator.cc @@ -0,0 +1,68 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mace/core/ops/operator.h" + +#include + +#include "mace/core/ops/op_construct_context.h" +#include "mace/core/ops/op_init_context.h" + +namespace mace { +Operation::Operation(OpConstructContext *context) + : operator_def_(context->operator_def()) {} + +MaceStatus Operation::Init(OpInitContext *context) { + Workspace *ws = context->workspace(); + for (const std::string &input_str : operator_def_->input()) { + const Tensor *tensor = ws->GetTensor(input_str); + MACE_CHECK(tensor != nullptr, "op ", operator_def_->type(), + ": Encountered a non-existing input tensor: ", input_str); + inputs_.push_back(tensor); + } + for (int i = 0; i < operator_def_->output_size(); ++i) { + const std::string output_str = operator_def_->output(i); + if (ws->HasTensor(output_str)) { + outputs_.push_back(ws->GetTensor(output_str)); + } else { + MACE_CHECK( + operator_def_->output_type_size() == 0 || + operator_def_->output_size() == operator_def_->output_type_size(), + "operator output size != operator output type size", + operator_def_->output_size(), + operator_def_->output_type_size()); + DataType output_type; + if (i < operator_def_->output_type_size()) { + output_type = operator_def_->output_type(i); + } else { + output_type = static_cast( + ProtoArgHelper::GetOptionalArg( + *operator_def_, "T", static_cast(DT_FLOAT))); + } + outputs_.push_back(MACE_CHECK_NOTNULL(ws->CreateTensor( + output_str, context->device()->allocator(), output_type))); + } + if (i < operator_def_->output_shape_size()) { + std::vector + shape_configured(operator_def_->output_shape(i).dims_size()); + for (size_t dim = 0; dim < shape_configured.size(); ++dim) { + shape_configured[dim] = operator_def_->output_shape(i).dims(dim); + } + ws->GetTensor(output_str)->SetShapeConfigured(shape_configured); + } + } + return MaceStatus::MACE_SUCCESS; +} + +} // namespace mace diff --git a/mace/core/ops/operator.h b/mace/core/ops/operator.h new file mode 100644 index 00000000..bb4a20d5 --- /dev/null +++ b/mace/core/ops/operator.h @@ -0,0 +1,120 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MACE_CORE_OPS_OPERATOR_H_ +#define MACE_CORE_OPS_OPERATOR_H_ + +#include +#include +#include + +#include "mace/core/arg_helper.h" +#include "mace/core/ops/op_construct_context.h" +#include "mace/core/ops/op_context.h" +#include "mace/core/tensor.h" +#include "mace/proto/mace.pb.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/opencl_util.h" +#endif // MACE_ENABLE_OPENCL + +namespace mace { +class OpInitContext; +// Conventions +// * If there exist format, NHWC is the default format +// * The input/output format of CPU ops with float data type is NCHW +// * The input/output format of GPU ops and CPU Quantization ops is NHWC +// * Inputs' data type is same as the operation data type by default. +// * The outputs' data type is same as the operation data type by default. +class Operation { + public: + explicit Operation(OpConstructContext *context); + virtual ~Operation() = default; + + template + T GetOptionalArg(const std::string &name, + const T &default_value) const { + MACE_CHECK(operator_def_, "operator_def was null!"); + return ProtoArgHelper::GetOptionalArg( + *operator_def_, name, default_value); + } + template + std::vector GetRepeatedArgs( + const std::string &name, const std::vector &default_value = {}) const { + MACE_CHECK(operator_def_, "operator_def was null!"); + return ProtoArgHelper::GetRepeatedArgs( + *operator_def_, name, default_value); + } + + DeviceType device_type() const { + return static_cast(operator_def_->device_type()); + } + + const Tensor *Input(unsigned int idx) { + MACE_CHECK(idx < inputs_.size()); + return inputs_[idx]; + } + + Tensor *Output(int idx) { return outputs_[idx]; } + + int InputSize() { return inputs_.size(); } + int OutputSize() { return outputs_.size(); } + const std::vector &Inputs() const { return inputs_; } + const std::vector &Outputs() { return outputs_; } + + // Run Op asynchronously (depends on device), return a future if not nullptr. + virtual MaceStatus Init(OpInitContext *); + virtual MaceStatus Run(OpContext *) = 0; + + const OperatorDef &debug_def() const { + MACE_CHECK(has_debug_def(), "operator_def was null!"); + return *operator_def_; + } + + void set_debug_def( + const std::shared_ptr &operator_def) { + operator_def_ = operator_def; + } + + bool has_debug_def() const { return operator_def_ != nullptr; } + + inline std::shared_ptr operator_def() { + return operator_def_; + } + + protected: + std::shared_ptr operator_def_; + std::vector inputs_; + std::vector outputs_; + + MACE_DISABLE_COPY_AND_ASSIGN(Operation); +}; + +// MACE_OP_INPUT_TAGS and MACE_OP_OUTPUT_TAGS are optional features to name the +// indices of the operator's inputs and outputs, in order to avoid confusion. +// For example, for a fully convolution layer that has input, weight and bias, +// you can define its input tags as: +// MACE_OP_INPUT_TAGS(INPUT, WEIGHT, BIAS); +// And in the code, instead of doing +// auto& weight = Input(1); +// you can now do +// auto& weight = Input(WEIGHT); +// to make it more clear. +#define MACE_OP_INPUT_TAGS(first_input, ...) \ + enum _InputTags { first_input = 0, __VA_ARGS__ } +#define MACE_OP_OUTPUT_TAGS(first_input, ...) \ + enum _OutputTags { first_input = 0, __VA_ARGS__ } + +} // namespace mace + +#endif // MACE_CORE_OPS_OPERATOR_H_ diff --git a/mace/core/registry/op_delegator_registry.cc b/mace/core/registry/op_delegator_registry.cc new file mode 100644 index 00000000..006f5555 --- /dev/null +++ b/mace/core/registry/op_delegator_registry.cc @@ -0,0 +1,39 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mace/core/registry/op_delegator_registry.h" + +#include + +#include "mace/utils/logging.h" + +namespace mace { + +MaceStatus OpDelegatorRegistry::Register(const std::string &key, + DelegatorCreator creator) { + MACE_CHECK(registry_.count(key) == 0, "Register an exist key."); + registry_[key] = std::move(creator); + return MaceStatus::MACE_SUCCESS; +} + +DelegatorCreator OpDelegatorRegistry::GetCreator(const std::string &key) const { + MACE_CHECK(registry_.count(key) > 0, key, " not exist."); + return registry_.at(key); +} + +template<> const char *DType::name_ = "float"; +template<> const char *DType::name_ = "int"; +template<> const char *DType::name_ = "uint8_t"; + +} // namespace mace diff --git a/mace/core/registry/op_delegator_registry.h b/mace/core/registry/op_delegator_registry.h new file mode 100644 index 00000000..f70d5555 --- /dev/null +++ b/mace/core/registry/op_delegator_registry.h @@ -0,0 +1,94 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MACE_CORE_REGISTRY_OP_DELEGATOR_REGISTRY_H_ +#define MACE_CORE_REGISTRY_OP_DELEGATOR_REGISTRY_H_ + +#include +#include +#include +#include +#include + +#include "mace/core/ops/op_delegator.h" +#include "mace/proto/mace.pb.h" +#include "mace/public/mace.h" + +namespace mace { +typedef std::function(const DelegatorParam &)> + DelegatorCreator; + +class OpDelegatorRegistry { + public: + OpDelegatorRegistry() = default; + ~OpDelegatorRegistry() = default; + + MaceStatus Register(const std::string &key, DelegatorCreator creator); + DelegatorCreator GetCreator(const std::string &key) const; + + private: + std::unordered_map registry_; +}; + +template +struct DType { static const char *name_; }; +template<> const char *DType::name_; +template<> const char *DType::name_; +template<> const char *DType::name_; + + +} // namespace mace + +#ifndef MACE_DELEGATOR_KEY_TMP +#define MACE_DELEGATOR_KEY_TMP(delegator_name, device, DT, impl) \ + (std::string(#delegator_name"_"#device"_"#impl"_") + DType
::name_) +#endif // MACE_DELEGATOR_KEY_TMP + +#ifndef MACE_DELEGATOR_KEY +#define MACE_DELEGATOR_KEY(delegator_name, device, DT, impl) \ + MACE_DELEGATOR_KEY_TMP(delegator_name, device, DT, impl) +#endif // MACE_DELEGATOR_KEY + +#ifndef MACE_DELEGATOR_KEY_EX_TMP +#define MACE_DELEGATOR_KEY_EX_TMP(delegator_name, device, DT, impl, tag) \ + (std::string(#delegator_name"_"#device"_"#impl"_"#tag"_") + DType
::name_) +#endif // MACE_DELEGATOR_KEY_EX_TMP + +#ifndef MACE_DELEGATOR_KEY_EX +#define MACE_DELEGATOR_KEY_EX(delegator_name, device, DT, impl, tag) \ + MACE_DELEGATOR_KEY_EX_TMP(delegator_name, device, DT, impl, tag) +#endif // MACE_DELEGATOR_KEY_EX + +#ifndef MACE_REGISTER_DELEGATOR +#define MACE_REGISTER_DELEGATOR(registry, class_name, param_name, key) \ + void Register##class_name##Delegator(OpDelegatorRegistry *registry) { \ + registry->Register( \ + key, OpDelegator::DefaultCreator); \ + } +#endif // MACE_REGISTER_DELEGATOR + +#ifndef MACE_DEFINE_DELEGATOR_CREATOR +#define MACE_DEFINE_DELEGATOR_CREATOR(class_name) \ + static std::unique_ptr Create( \ + Workspace *workspace, const std::string &tag, \ + const DelegatorParam ¶m) { \ + DelegatorCreator creator = \ + workspace->GetDelegatorRegistry()->GetCreator(tag); \ + std::unique_ptr delegator = creator(param); \ + return std::unique_ptr( \ + static_cast(delegator.release())); \ + } +#endif // MACE_DEFINE_DELEGATOR_CREATOR + +#endif // MACE_CORE_REGISTRY_OP_DELEGATOR_REGISTRY_H_ diff --git a/mace/core/registry/op_registration_info.cc b/mace/core/registry/op_registration_info.cc new file mode 100644 index 00000000..e0438976 --- /dev/null +++ b/mace/core/registry/op_registration_info.cc @@ -0,0 +1,69 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mace/core/registry/op_registration_info.h" + +#include +#include +#include +#include + +#include "mace/core/ops/op_condition_context.h" + +namespace mace { +OpRegistrationInfo::OpRegistrationInfo() { + // default device type placer + device_placer = [this](OpConditionContext *context) -> std::set { + MACE_UNUSED(context); + return this->devices; + }; + + // default input and output memory type setter + memory_type_setter = [](OpConditionContext *context) -> void { + if (context->device()->device_type() == DeviceType::GPU) { +#ifdef MACE_ENABLE_OPENCL + if (context->device()->gpu_runtime()->UseImageMemory()) { + context->set_output_mem_type(MemoryType::GPU_IMAGE); + } else { + context->set_output_mem_type(MemoryType::GPU_BUFFER); + } +#endif // MACE_ENABLE_OPENCL + } else { + context->set_output_mem_type(MemoryType::CPU_BUFFER); + } + }; + + data_format_selector = [](OpConditionContext *context) + -> std::vector { + DataFormat op_data_format = + static_cast( + ProtoArgHelper::GetOptionalArg( + *context->operator_def(), "data_format", + static_cast(DataFormat::NONE))); + return std::vector(context->operator_def()->input_size(), + op_data_format); + }; +} + +void OpRegistrationInfo::AddDevice(DeviceType device) { + devices.insert(device); +} + +void OpRegistrationInfo::Register(const std::string &key, OpCreator creator) { + VLOG(3) << "Registering: " << key; + MACE_CHECK(creators.count(key) == 0, "Key already registered: ", key); + creators[key] = std::move(creator); +} + +} // namespace mace diff --git a/mace/core/registry/op_registration_info.h b/mace/core/registry/op_registration_info.h new file mode 100644 index 00000000..ed110a3c --- /dev/null +++ b/mace/core/registry/op_registration_info.h @@ -0,0 +1,56 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#ifndef MACE_CORE_REGISTRY_OP_REGISTRATION_INFO_H_ +#define MACE_CORE_REGISTRY_OP_REGISTRATION_INFO_H_ + +#include +#include +#include +#include +#include + +#include "mace/core/ops/operator.h" +#include "mace/proto/mace.pb.h" + +namespace mace { +class OpConstructContext; +class OpConditionContext; + +class OpRegistrationInfo { + public: + typedef std::function(OpConstructContext *)> + OpCreator; + typedef std::function(OpConditionContext *)> + DevicePlacer; + typedef std::function MemoryTypeSetter; + typedef std::function(OpConditionContext *)> + DataFormatSelector; + + OpRegistrationInfo(); + + void AddDevice(DeviceType); + + void Register(const std::string &key, OpCreator creator); + + std::set devices; + std::unordered_map creators; + DevicePlacer device_placer; + MemoryTypeSetter memory_type_setter; + DataFormatSelector data_format_selector; +}; +} // namespace mace + +#endif // MACE_CORE_REGISTRY_OP_REGISTRATION_INFO_H_ diff --git a/mace/core/registry/ops_registry.cc b/mace/core/registry/ops_registry.cc new file mode 100644 index 00000000..8a99c9e1 --- /dev/null +++ b/mace/core/registry/ops_registry.cc @@ -0,0 +1,149 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mace/core/registry/ops_registry.h" + +#include +#include +#include +#include +#include + +namespace mace { +namespace { +class OpKeyBuilder { + public: + explicit OpKeyBuilder(const std::string &op_name); + + OpKeyBuilder &Device(DeviceType device); + + OpKeyBuilder &TypeConstraint(const char *attr_name, + DataType allowed); + + const std::string Build(); + + private: + std::string op_name_; + DeviceType device_type_; + std::map type_constraint_; +}; + +OpKeyBuilder::OpKeyBuilder(const std::string &op_name) : op_name_(op_name) {} + +OpKeyBuilder &OpKeyBuilder::Device(DeviceType device) { + device_type_ = device; + return *this; +} + +OpKeyBuilder &OpKeyBuilder::TypeConstraint(const char *attr_name, + DataType allowed) { + type_constraint_[attr_name] = allowed; + return *this; +} + +const std::string OpKeyBuilder::Build() { + static const std::vector type_order = {"T"}; + std::stringstream ss; + ss << op_name_; + ss << device_type_; + for (auto type : type_order) { + ss << type << "_" << DataTypeToString(type_constraint_[type]); + } + + return ss.str(); +} +} // namespace + +MaceStatus OpRegistry::Register( + const std::string &op_type, + const DeviceType device_type, + const DataType dt, + OpRegistrationInfo::OpCreator creator) { + if (registry_.count(op_type) == 0) { + registry_[op_type] = std::unique_ptr( + new OpRegistrationInfo); + } + registry_[op_type]->AddDevice(device_type); + + std::string op_key = OpKeyBuilder(op_type) + .Device(device_type) + .TypeConstraint("T", dt) + .Build(); + registry_.at(op_type)->Register(op_key, creator); + return MaceStatus::MACE_SUCCESS; +} + +MaceStatus OpRegistry::Register( + const OpConditionBuilder &builder) { + std::string op_type = builder.type(); + if (registry_.count(op_type) == 0) { + registry_[op_type] = std::unique_ptr( + new OpRegistrationInfo); + } + builder.Finalize(registry_[op_type].get()); + return MaceStatus::MACE_SUCCESS; +} + +const std::set OpRegistry::AvailableDevices( + const std::string &op_type, OpConditionContext *context) const { + MACE_CHECK(registry_.count(op_type) != 0, + op_type, " operation is not registered."); + + return registry_.at(op_type)->device_placer(context); +} + +void OpRegistry::GetInOutMemoryTypes( + const std::string &op_type, + OpConditionContext *context) const { + MACE_CHECK(registry_.count(op_type) != 0, + op_type, " operation is not registered. op_type=", op_type); + return registry_.at(op_type)->memory_type_setter(context); +} + +const std::vector OpRegistry::InputsDataFormat( + const std::string &op_type, + OpConditionContext *context) const { + MACE_CHECK(registry_.count(op_type) != 0, + op_type, " operation is not registered."); + return registry_.at(op_type)->data_format_selector(context); +} + +std::unique_ptr OpRegistry::CreateOperation( + OpConstructContext *context, + DeviceType device_type) const { + auto operator_def = context->operator_def(); + DataType dtype = static_cast( + ProtoArgHelper::GetOptionalArg( + *operator_def, "T", static_cast(DT_FLOAT))); + VLOG(1) << "Creating operator " << operator_def->name() << "(" + << operator_def->type() << "<" << dtype << ">" << ") on " + << device_type; + const std::string op_type = context->operator_def()->type(); + MACE_CHECK(registry_.count(op_type) != 0, + op_type, " operation is not registered."); + + auto key_dtype = + (device_type == DeviceType::GPU && dtype == DT_HALF) ? DT_FLOAT : dtype; + std::string key = OpKeyBuilder(op_type) + .Device(device_type) + .TypeConstraint("T", key_dtype) + .Build(); + if (registry_.at(op_type)->creators.count(key) == 0) { + LOG(FATAL) << "Key not registered: " << key + << ", op type is: " << operator_def->type(); + } + return registry_.at(op_type)->creators.at(key)(context); +} + +} // namespace mace diff --git a/mace/core/registry/ops_registry.h b/mace/core/registry/ops_registry.h new file mode 100644 index 00000000..46476a64 --- /dev/null +++ b/mace/core/registry/ops_registry.h @@ -0,0 +1,99 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#ifndef MACE_CORE_REGISTRY_OPS_REGISTRY_H_ +#define MACE_CORE_REGISTRY_OPS_REGISTRY_H_ + +#include +#include +#include +#include +#include + +#include "mace/core/ops/operator.h" +#include "mace/core/ops/op_condition_builder.h" +#include "mace/core/ops/op_condition_context.h" +#include "mace/public/mace.h" +#include "mace/proto/mace.pb.h" +#include "mace/utils/memory.h" + +namespace mace { + +class OpRegistry { + public: + OpRegistry() = default; + virtual ~OpRegistry() = default; + MaceStatus Register(const std::string &op_type, + const DeviceType device_type, + const DataType dt, + OpRegistrationInfo::OpCreator creator); + + MaceStatus Register(const OpConditionBuilder &builder); + + const std::set AvailableDevices( + const std::string &op_type, OpConditionContext *context) const; + + void GetInOutMemoryTypes( + const std::string &op_type, OpConditionContext *context) const; + + const std::vector InputsDataFormat( + const std::string &op_type, OpConditionContext *context) const; + + std::unique_ptr CreateOperation( + OpConstructContext *context, + DeviceType device_type) const; + + template + static std::unique_ptr DefaultCreator( + OpConstructContext *context) { + return make_unique(context); + } + + private: + std::unordered_map> + registry_; + MACE_DISABLE_COPY_AND_ASSIGN(OpRegistry); +}; + +#define MACE_REGISTER_OP(op_registry, op_type, class_name, device, dt) \ + op_registry->Register(op_type, \ + device, \ + DataTypeToEnum
::value, \ + OpRegistry::DefaultCreator>) + +#define MACE_REGISTER_OP_BY_CLASS(\ + op_registry, op_type, class_name, device, dt) \ + op_registry->Register(op_type, \ + device, \ + DataTypeToEnum
::value, \ + OpRegistry::DefaultCreator) + +#ifdef MACE_ENABLE_OPENCL +#define MACE_REGISTER_GPU_OP(op_registry, op_type, class_name) \ + op_registry->Register( \ + op_type, \ + DeviceType::GPU, \ + DT_FLOAT, \ + OpRegistry::DefaultCreator>) +#else +#define MACE_REGISTER_GPU_OP(op_registry, op_type, class_name) +#endif + +#define MACE_REGISTER_OP_CONDITION(op_registry, builder) \ + op_registry->Register(builder) + +} // namespace mace + +#endif // MACE_CORE_REGISTRY_OPS_REGISTRY_H_ diff --git a/mace/core/workspace.cc b/mace/core/workspace.cc index fa9a5891..08bf59b0 100644 --- a/mace/core/workspace.cc +++ b/mace/core/workspace.cc @@ -46,7 +46,7 @@ bool HasHalfTensor(const NetDef &net_def) { return false; } -template +template void DequantizeTensor(Device *device, const unsigned char *model_data, const ConstTensor &const_tensor, @@ -66,7 +66,8 @@ void DequantizeTensor(Device *device, } // namespace -Workspace::Workspace() = default; +Workspace::Workspace(const OpDelegatorRegistry *registry) : + op_delegator_registry_(registry) {} Tensor *Workspace::CreateTensor(const std::string &name, Allocator *alloc, @@ -144,7 +145,7 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def, DataType dst_data_type = const_tensor.data_type(); if (device_type == DeviceType::CPU && - const_tensor.data_type() == DataType::DT_HALF) { + const_tensor.data_type() == DataType::DT_HALF) { dst_data_type = DataType::DT_FLOAT; } else if (!is_quantize_model && const_tensor.quantized()) { if (device_type == GPU && net_def.data_type() != DataType::DT_FLOAT) { @@ -173,13 +174,13 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def, if (device_type == DeviceType::CPU && const_tensor.data_type() == DataType::DT_HALF) { - // uncompress the weights of fp16 - auto org_data = reinterpret_cast( - model_data + const_tensor.offset()); - float *dst_data = tensor->mutable_data(); - for (int i = 0; i < const_tensor.data_size(); ++i) { - dst_data[i] = half_float::half_cast(org_data[i]); - } + // uncompress the weights of fp16 + auto org_data = reinterpret_cast( + model_data + const_tensor.offset()); + float *dst_data = tensor->mutable_data(); + for (int i = 0; i < const_tensor.data_size(); ++i) { + dst_data[i] = half_float::half_cast(org_data[i]); + } } else if (!is_quantize_model && const_tensor.quantized()) { // uncompress the weights of uint8 if (dst_data_type != DT_FLOAT) { @@ -401,4 +402,8 @@ void Workspace::RemoveTensor(const std::string &name) { } } +const OpDelegatorRegistry *Workspace::GetDelegatorRegistry() const { + return op_delegator_registry_; +} + } // namespace mace diff --git a/mace/core/workspace.h b/mace/core/workspace.h index 4308f924..eae7ebd5 100644 --- a/mace/core/workspace.h +++ b/mace/core/workspace.h @@ -27,13 +27,14 @@ namespace mace { +class OpDelegatorRegistry; class MemoryOptimizer; class Workspace { public: typedef std::map> TensorMap; - Workspace(); + explicit Workspace(const OpDelegatorRegistry *registry); ~Workspace() {} Tensor *CreateTensor(const std::string &name, @@ -71,15 +72,16 @@ class Workspace { void RemoveTensor(const std::string &name); + const OpDelegatorRegistry *GetDelegatorRegistry() const; + private: TensorMap tensor_map_; - std::unique_ptr tensor_buffer_; - PreallocatedPooledAllocator preallocated_allocator_; - bool diffused_buffer_; + const OpDelegatorRegistry *op_delegator_registry_; + MACE_DISABLE_COPY_AND_ASSIGN(Workspace); }; diff --git a/mace/libmace/mace.cc b/mace/libmace/mace.cc index b9d3b13c..6ab855f4 100644 --- a/mace/libmace/mace.cc +++ b/mace/libmace/mace.cc @@ -19,8 +19,10 @@ #include "mace/core/device_context.h" #include "mace/core/memory_optimizer.h" #include "mace/core/net.h" -#include "mace/ops/registry/ops_registry.h" +#include "mace/core/registry/ops_registry.h" +#include "mace/core/registry/op_delegator_registry.h" #include "mace/ops/common/transpose.h" +#include "mace/ops/registry/registry.h" #include "mace/utils/math.h" #include "mace/utils/memory.h" #include "mace/utils/stl_util.h" @@ -451,7 +453,8 @@ class MaceEngine::Impl { private: std::unique_ptr model_data_; - std::unique_ptr op_registry_; + std::unique_ptr op_registry_; + std::unique_ptr op_delegator_registry_; DeviceType device_type_; std::unique_ptr device_; std::unique_ptr ws_; @@ -478,9 +481,10 @@ class MaceEngine::Impl { MaceEngine::Impl::Impl(const MaceEngineConfig &config) : model_data_(nullptr), op_registry_(new OpRegistry), + op_delegator_registry_(new OpDelegatorRegistry), device_type_(config.impl_->device_type()), device_(nullptr), - ws_(new Workspace()), + ws_(new Workspace(op_delegator_registry_.get())), net_(nullptr), is_quantized_model_(false), thread_pool_(new utils::ThreadPool(config.impl_->num_threads(), @@ -498,6 +502,8 @@ MaceEngine::Impl::Impl(const MaceEngineConfig &config) #endif { LOG(INFO) << "Creating MaceEngine, MACE version: " << MaceVersion(); + ops::RegisterAllOps(op_registry_.get()); + ops::RegisterAllOpDelegators(op_delegator_registry_.get()); thread_pool_->Init(); if (device_type_ == DeviceType::CPU) { device_.reset(new CPUDevice(config.impl_->num_threads(), diff --git a/mace/ops/BUILD.bazel b/mace/ops/BUILD.bazel index 9861198a..52ad46ed 100644 --- a/mace/ops/BUILD.bazel +++ b/mace/ops/BUILD.bazel @@ -22,11 +22,13 @@ cc_library( srcs = glob( [ "common/*.cc", + "delegator/*.cc", ], ), hdrs = glob( [ "common/*.h", + "delegator/*.h", ], ), copts = [ @@ -58,12 +60,16 @@ cc_library( [ "ref/*.cc", ], - ), + ) + if_quantize_enabled(glob([ + "ref/q8/*.cc", + ])), hdrs = glob( [ "ref/*.h", ], - ), + ) + if_quantize_enabled(glob([ + "ref/q8/*.h", + ])), copts = [ "-Werror", "-Wextra", @@ -236,12 +242,12 @@ cc_library( cc_library( name = "ops", - srcs = [ - "registry/ops_registry.cc", - ], - hdrs = [ - "registry/ops_registry.h", - ], + srcs = glob([ + "registry/*.cc", + ]), + hdrs = glob([ + "registry/*.h", + ]), copts = [ "-Werror", "-Wextra", diff --git a/mace/ops/CMakeLists.txt b/mace/ops/CMakeLists.txt index 7994b445..7de9661d 100644 --- a/mace/ops/CMakeLists.txt +++ b/mace/ops/CMakeLists.txt @@ -1,6 +1,10 @@ file(GLOB OPS_COMMON_SRCS common/*.cc) file(GLOB OPS_REF_KERNELS_SRCS ref/*.cc) +file(GLOB OPS_REF_Q8_KERNELS_SRCS + ref/q8/*.cc +) + file(GLOB OPS_ARM_NEON_FP32_KERNELS_SRCS arm/fp32/*.cc ) @@ -17,20 +21,23 @@ file(GLOB OPS_OPENCL_KERNELS_SRCS file(GLOB OPS_INTERNAL_OPS_SRCS *.cc) -set(OPS_SRCS registry/ops_registry.cc) +set(OPS_SRCS registry/ops_registry.cc registry/op_delegators_registry.cc) set(OPS_SRCS ${OPS_SRCS} ${OPS_COMMON_SRCS}) set(OPS_SRCS ${OPS_SRCS} ${OPS_INTERNAL_OPS_SRCS}) # TODO we need to remove this in production build set(OPS_SRCS ${OPS_SRCS} ${OPS_REF_KERNELS_SRCS}) +if(MACE_ENABLE_QUANTIZE) + set(OPS_SRCS ${OPS_SRCS} ${OPS_REF_Q8_KERNELS_SRCS}) +endif(MACE_ENABLE_QUANTIZE) + if(MACE_ENABLE_NEON) set(OPS_SRCS ${OPS_SRCS} ${OPS_ARM_NEON_FP32_KERNELS_SRCS}) + if(MACE_ENABLE_QUANTIZE) + set(OPS_SRCS ${OPS_SRCS} ${OPS_ARM_NEON_Q8_KERNELS_SRCS}) + endif(MACE_ENABLE_QUANTIZE) endif(MACE_ENABLE_NEON) -if(MACE_ENABLE_QUANTIZE) - set(OPS_SRCS ${OPS_SRCS} ${OPS_ARM_NEON_Q8_KERNELS_SRCS}) -endif(MACE_ENABLE_QUANTIZE) - if(MACE_ENABLE_OPENCL) set(OPS_SRCS ${OPS_SRCS} ${OPS_OPENCL_KERNELS_SRCS}) endif(MACE_ENABLE_OPENCL) diff --git a/mace/ops/activation.cc b/mace/ops/activation.cc index 25537056..338de7ea 100644 --- a/mace/ops/activation.cc +++ b/mace/ops/activation.cc @@ -17,13 +17,10 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" -#if defined(MACE_ENABLE_NEON) -#include "mace/ops/arm/fp32/activation.h" -#else -#include "mace/ops/ref/activation.h" -#endif +#include "mace/ops/delegator/activation.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/buffer_transformer.h" @@ -37,19 +34,20 @@ namespace ops { template class ActivationOp; -template<> -class ActivationOp : public Operation { +template +class ActivationOp : public Operation { public: explicit ActivationOp(OpConstructContext *context) : Operation(context), activation_type_(ops::StringToActivationType( - Operation::GetOptionalArg("activation", - "NOOP"))), - activation_delegator_(activation_type_, - Operation::GetOptionalArg("max_limit", - 0.0f), - Operation::GetOptionalArg( - "leakyrelu_coefficient", 0.0f)) {} + Operation::GetOptionalArg("activation", "NOOP"))), + activation_delegator_(delegator::Activation::Create( + context->workspace(), + MACE_DELEGATOR_KEY(Activation, CPU, T, MACE_CPU_IMPL_TYPE), + delegator::ActivationParam( + activation_type_, + Operation::GetOptionalArg("max_limit", 0), + Operation::GetOptionalArg("leakyrelu_coefficient", 0)))) {} MaceStatus Run(OpContext *context) override { MACE_UNUSED(context); @@ -58,28 +56,24 @@ class ActivationOp : public Operation { if (activation_type_ == PRELU) { MACE_RETURN_IF_ERROR(output->ResizeLike(input)); - const float *input_ptr = input->data(); - float *output_ptr = output->mutable_data(); + const T *input_ptr = input->data(); + T *output_ptr = output->mutable_data(); MACE_CHECK(this->InputSize() > 1); const Tensor *alpha = this->Input(1); - const float *alpha_ptr = alpha->data(); + const T *alpha_ptr = alpha->data(); const index_t outer_size = output->dim(0); const index_t inner_size = output->dim(2) * output->dim(3); PReLUActivation(context, input_ptr, outer_size, input->dim(1), inner_size, alpha_ptr, output_ptr); } else { - activation_delegator_.Compute(context, input, output); + activation_delegator_->Compute(context, input, output); } return MaceStatus::MACE_SUCCESS; } private: ActivationType activation_type_; -#if defined(MACE_ENABLE_NEON) - arm::fp32::Activation activation_delegator_; -#else - ref::Activation activation_delegator_; -#endif // MACE_ENABLE_NEON + std::unique_ptr activation_delegator_; }; #ifdef MACE_ENABLE_OPENCL @@ -122,7 +116,7 @@ class ActivationOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterActivation(OpRegistryBase *op_registry) { +void RegisterActivation(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Activation", ActivationOp, DeviceType::CPU, float); MACE_REGISTER_GPU_OP(op_registry, "Activation", ActivationOp); diff --git a/mace/ops/activation.h b/mace/ops/activation.h index 9ceae6e0..4003dd30 100644 --- a/mace/ops/activation.h +++ b/mace/ops/activation.h @@ -20,7 +20,7 @@ #include #include "mace/core/types.h" -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/ops/common/activation_type.h" #include "mace/utils/logging.h" diff --git a/mace/ops/addn.cc b/mace/ops/addn.cc index 5b98ba85..8e5ce2e1 100644 --- a/mace/ops/addn.cc +++ b/mace/ops/addn.cc @@ -19,7 +19,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/image/addn.h" @@ -92,7 +93,7 @@ class AddNOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterAddN(OpRegistryBase *op_registry) { +void RegisterAddN(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "AddN", AddNOp, DeviceType::CPU, float); MACE_REGISTER_GPU_OP(op_registry, "AddN", AddNOp); MACE_REGISTER_OP_CONDITION( diff --git a/mace/ops/argmax.cc b/mace/ops/argmax.cc index 32007d6c..5ec9dc92 100644 --- a/mace/ops/argmax.cc +++ b/mace/ops/argmax.cc @@ -18,7 +18,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -109,7 +110,7 @@ class ArgMaxOp : public Operation { -void RegisterArgMax(OpRegistryBase *op_registry) { +void RegisterArgMax(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "ArgMax", ArgMaxOp, DeviceType::CPU, float); } diff --git a/mace/ops/arm/fp32/activation.cc b/mace/ops/arm/fp32/activation.cc index cac3badb..8c66bd56 100644 --- a/mace/ops/arm/fp32/activation.cc +++ b/mace/ops/arm/fp32/activation.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/ops/arm/fp32/activation.h" +#include "mace/ops/delegator/activation.h" #include #include @@ -22,16 +22,22 @@ namespace ops { namespace arm { namespace fp32 { -Activation::Activation(ActivationType type, - const float limit, - const float leakyrelu_coefficient) - : type_(type), - limit_(limit), - leakyrelu_coefficient_(leakyrelu_coefficient) {} +class Activation : public delegator::Activation { + public: + explicit Activation(const delegator::ActivationParam ¶m) + : delegator::Activation(param) {} + ~Activation() = default; + + MaceStatus Compute(const OpContext *context, + const Tensor *input, Tensor *output) override; + + private: + void DoActivation(const OpContext *context, + const Tensor *input, Tensor *output); +}; MaceStatus Activation::Compute(const OpContext *context, - const Tensor *input, - Tensor *output) { + const Tensor *input, Tensor *output) { Tensor::MappingGuard input_guard(input); if (input != output) { MACE_RETURN_IF_ERROR(output->ResizeLike(input)); @@ -139,7 +145,7 @@ void Activation::DoActivation(const OpContext *context, // remain for (index_t i = block_count * 4; i < size; ++i) { output_data[i] = std::max(input_data[i], 0.f) + - std::min(input_data[i], 0.f) * leakyrelu_coefficient_; + std::min(input_data[i], 0.f) * leakyrelu_coefficient_; } break; @@ -169,14 +175,19 @@ void Activation::DoActivation(const OpContext *context, break; } - case NOOP: + case NOOP: { break; + } - default: + default: { MACE_NOT_IMPLEMENTED; + } } } +MACE_REGISTER_DELEGATOR(registry, Activation, delegator::ActivationParam, + MACE_DELEGATOR_KEY(Activation, CPU, float, NEON)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/activation.h b/mace/ops/arm/fp32/activation.h deleted file mode 100644 index 265915d0..00000000 --- a/mace/ops/arm/fp32/activation.h +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2019 The MACE Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef MACE_OPS_ARM_FP32_ACTIVATION_H_ -#define MACE_OPS_ARM_FP32_ACTIVATION_H_ - -#include "mace/core/op_context.h" -#include "mace/ops/common/activation_type.h" - -namespace mace { -namespace ops { -namespace arm { -namespace fp32 { - -class Activation { - public: - explicit Activation(ActivationType type, - const float limit, - const float leakyrelu_coefficient); - ~Activation() = default; - - MaceStatus Compute( - const OpContext *context, - const Tensor *input, - Tensor *output); - - private: - void DoActivation(const OpContext *context, - const Tensor *input, - Tensor *output); - - ActivationType type_; - const float limit_; - const float leakyrelu_coefficient_; -}; - -} // namespace fp32 -} // namespace arm -} // namespace ops -} // namespace mace - -#endif // MACE_OPS_ARM_FP32_ACTIVATION_H_ diff --git a/mace/ops/arm/fp32/bias_add.cc b/mace/ops/arm/fp32/bias_add.cc index f572b22b..fc5a55b3 100644 --- a/mace/ops/arm/fp32/bias_add.cc +++ b/mace/ops/arm/fp32/bias_add.cc @@ -12,15 +12,27 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/ops/arm/fp32/bias_add.h" - #include +#include "mace/ops/delegator/bias_add.h" namespace mace { namespace ops { namespace arm { namespace fp32 { +class BiasAdd : public delegator::BiasAdd { + public: + explicit BiasAdd(const DelegatorParam ¶m) : delegator::BiasAdd(param) {} + ~BiasAdd() = default; + + MaceStatus Compute(const OpContext *context, const Tensor *input, + const Tensor *bias, Tensor *output) override; + + private: + void AddBias(const OpContext *context, const Tensor *input, + const Tensor *bias, Tensor *output); +}; + MaceStatus BiasAdd::Compute(const OpContext *context, const Tensor *input, const Tensor *bias, @@ -117,6 +129,9 @@ void BiasAdd::AddBias(const OpContext *context, } } +MACE_REGISTER_DELEGATOR(registry, BiasAdd, DelegatorParam, + MACE_DELEGATOR_KEY(BiasAdd, CPU, float, NEON)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/bias_add.h b/mace/ops/arm/fp32/bias_add.h deleted file mode 100644 index a3e68491..00000000 --- a/mace/ops/arm/fp32/bias_add.h +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2019 The MACE Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef MACE_OPS_ARM_FP32_BIAS_ADD_H_ -#define MACE_OPS_ARM_FP32_BIAS_ADD_H_ - -#include "mace/core/op_context.h" - -namespace mace { -namespace ops { -namespace arm { -namespace fp32 { - -class BiasAdd { - public: - BiasAdd() = default; - ~BiasAdd() = default; - - MaceStatus Compute( - const OpContext *context, - const Tensor *input, - const Tensor *bias, - Tensor *output); - - private: - void AddBias(const OpContext *context, - const Tensor *input, - const Tensor *bias, - Tensor *output); -}; - -} // namespace fp32 -} // namespace arm -} // namespace ops -} // namespace mace - -#endif // MACE_OPS_ARM_FP32_BIAS_ADD_H_ diff --git a/mace/ops/arm/fp32/conv_2d.h b/mace/ops/arm/fp32/conv_2d.h index dc8d0eff..a143f5f8 100644 --- a/mace/ops/arm/fp32/conv_2d.h +++ b/mace/ops/arm/fp32/conv_2d.h @@ -18,36 +18,25 @@ #include #include -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" +#include "mace/ops/delegator/conv_2d.h" #include "mace/ops/arm/fp32/gemm.h" #include "mace/ops/common/conv_pool_2d_util.h" +#include "mace/public/mace.h" namespace mace { namespace ops { namespace arm { namespace fp32 { -class Conv2dBase { +class Conv2dBase : public delegator::Conv2d { public: - Conv2dBase(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type) - : strides_(strides), - dilations_(dilations), - paddings_(paddings), - padding_type_(padding_type) {} + explicit Conv2dBase(const delegator::Conv2dParam ¶m) + : delegator::Conv2d(param) {} virtual ~Conv2dBase() = default; - virtual MaceStatus Compute( - const OpContext *context, - const Tensor *input, - const Tensor *filter, - Tensor *output) = 0; - protected: void CalOutputShapeAndInputPadSize(const std::vector &input_shape, const std::vector &filter_shape, @@ -83,11 +72,6 @@ class Conv2dBase { const int pad_left, Tensor *dst); void UnPadOutput(const Tensor &src, Tensor *dst); - - const std::vector strides_; - const std::vector dilations_; - const std::vector paddings_; - const Padding padding_type_; }; } // namespace fp32 diff --git a/mace/ops/arm/fp32/conv_2d_1x1.cc b/mace/ops/arm/fp32/conv_2d_1x1.cc index d5e03652..0aad6be9 100644 --- a/mace/ops/arm/fp32/conv_2d_1x1.cc +++ b/mace/ops/arm/fp32/conv_2d_1x1.cc @@ -12,13 +12,32 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/ops/arm/fp32/conv_2d_1x1.h" +#include "mace/ops/arm/fp32/conv_2d.h" +#include "mace/ops/arm/fp32/gemm.h" +#include "mace/ops/delegator/conv_2d.h" namespace mace { namespace ops { namespace arm { namespace fp32 { +class Conv2dK1x1 : public Conv2dBase { + public: + explicit Conv2dK1x1(const delegator::Conv2dParam ¶m) + : Conv2dBase(param), + gemm_(delegator::GemmParam()) {} + virtual ~Conv2dK1x1() {} + + MaceStatus Compute( + const OpContext *context, + const Tensor *input, + const Tensor *filter, + Tensor *output) override; + + private: + Gemm gemm_; +}; + MaceStatus Conv2dK1x1::Compute(const OpContext *context, const Tensor *input, const Tensor *filter, @@ -94,6 +113,9 @@ MaceStatus Conv2dK1x1::Compute(const OpContext *context, output); } +MACE_REGISTER_DELEGATOR(registry, Conv2dK1x1, delegator::Conv2dParam, + MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, NEON, K1x1)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/conv_2d_1x1.h b/mace/ops/arm/fp32/conv_2d_1x1.h deleted file mode 100644 index cde94ea0..00000000 --- a/mace/ops/arm/fp32/conv_2d_1x1.h +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright 2019 The MACE Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef MACE_OPS_ARM_FP32_CONV_2D_1X1_H_ -#define MACE_OPS_ARM_FP32_CONV_2D_1X1_H_ - -#include -#include "mace/public/mace.h" -#include "mace/core/tensor.h" -#include "mace/core/op_context.h" -#include "mace/ops/arm/fp32/gemm.h" -#include "mace/ops/arm/fp32/conv_2d.h" - -namespace mace { -namespace ops { -namespace arm { -namespace fp32 { - -class Conv2dK1x1 : public Conv2dBase { - public: - Conv2dK1x1(const std::vector &paddings, const Padding padding_type) - : Conv2dBase({1, 1}, {1, 1}, paddings, padding_type) {} - virtual ~Conv2dK1x1() {} - - MaceStatus Compute( - const OpContext *context, - const Tensor *input, - const Tensor *filter, - Tensor *output) override; - - private: - Gemm gemm_; -}; - -} // namespace fp32 -} // namespace arm -} // namespace ops -} // namespace mace - -#endif // MACE_OPS_ARM_FP32_CONV_2D_1X1_H_ diff --git a/mace/ops/arm/fp32/conv_2d_1xn.cc b/mace/ops/arm/fp32/conv_2d_1xn.cc index 3be9e3eb..fc92091f 100644 --- a/mace/ops/arm/fp32/conv_2d_1xn.cc +++ b/mace/ops/arm/fp32/conv_2d_1xn.cc @@ -17,6 +17,8 @@ #include #include +#include "mace/ops/delegator/conv_2d.h" + namespace mace { namespace ops { namespace arm { @@ -859,6 +861,19 @@ MaceStatus Conv2dK15x1S1::Compute(const OpContext *context, return MaceStatus::MACE_SUCCESS; } +MACE_REGISTER_DELEGATOR(registry, Conv2dK1x7S1, delegator::Conv2dParam, + MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, NEON, K1x7S1)) + +MACE_REGISTER_DELEGATOR(registry, Conv2dK7x1S1, delegator::Conv2dParam, + MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, NEON, K7x1S1)) + +MACE_REGISTER_DELEGATOR(registry, Conv2dK1x15S1, delegator::Conv2dParam, + MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, + NEON, K1x15S1)) +MACE_REGISTER_DELEGATOR(registry, Conv2dK15x1S1, delegator::Conv2dParam, + MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, + NEON, K15x1S1)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/conv_2d_1xn.h b/mace/ops/arm/fp32/conv_2d_1xn.h index 0bdd6673..c0a6da63 100644 --- a/mace/ops/arm/fp32/conv_2d_1xn.h +++ b/mace/ops/arm/fp32/conv_2d_1xn.h @@ -16,10 +16,11 @@ #define MACE_OPS_ARM_FP32_CONV_2D_1XN_H_ #include -#include "mace/public/mace.h" + +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" #include "mace/ops/arm/fp32/conv_2d.h" +#include "mace/public/mace.h" namespace mace { namespace ops { @@ -28,8 +29,8 @@ namespace fp32 { class Conv2dK1x7S1 : public Conv2dBase { public: - Conv2dK1x7S1(const std::vector &paddings, const Padding padding_type) - : Conv2dBase({1, 1}, {1, 1}, paddings, padding_type) {} + explicit Conv2dK1x7S1(const delegator::Conv2dParam ¶m) + : Conv2dBase(param) {} virtual ~Conv2dK1x7S1() {} MaceStatus Compute( @@ -41,8 +42,8 @@ class Conv2dK1x7S1 : public Conv2dBase { class Conv2dK7x1S1 : public Conv2dBase { public: - Conv2dK7x1S1(const std::vector &paddings, const Padding padding_type) - : Conv2dBase({1, 1}, {1, 1}, paddings, padding_type) {} + explicit Conv2dK7x1S1(const delegator::Conv2dParam ¶m) + : Conv2dBase(param) {} virtual ~Conv2dK7x1S1() {} MaceStatus Compute( @@ -54,8 +55,8 @@ class Conv2dK7x1S1 : public Conv2dBase { class Conv2dK1x15S1 : public Conv2dBase { public: - Conv2dK1x15S1(const std::vector &paddings, const Padding padding_type) - : Conv2dBase({1, 1}, {1, 1}, paddings, padding_type) {} + explicit Conv2dK1x15S1(const delegator::Conv2dParam ¶m) + : Conv2dBase(param) {} virtual ~Conv2dK1x15S1() {} MaceStatus Compute( @@ -67,8 +68,8 @@ class Conv2dK1x15S1 : public Conv2dBase { class Conv2dK15x1S1 : public Conv2dBase { public: - Conv2dK15x1S1(const std::vector &paddings, const Padding padding_type) - : Conv2dBase({1, 1}, {1, 1}, paddings, padding_type) {} + explicit Conv2dK15x1S1(const delegator::Conv2dParam ¶m) + : Conv2dBase(param) {} virtual ~Conv2dK15x1S1() {} MaceStatus Compute( diff --git a/mace/ops/arm/fp32/conv_2d_3x3.cc b/mace/ops/arm/fp32/conv_2d_3x3.cc index 95c30341..37d8ef84 100644 --- a/mace/ops/arm/fp32/conv_2d_3x3.cc +++ b/mace/ops/arm/fp32/conv_2d_3x3.cc @@ -17,6 +17,8 @@ #include #include +#include "mace/ops/delegator/conv_2d.h" + namespace mace { namespace ops { namespace arm { @@ -735,6 +737,11 @@ MaceStatus Conv2dK3x3S2::Compute(const OpContext *context, return MaceStatus::MACE_SUCCESS; } +MACE_REGISTER_DELEGATOR(registry, Conv2dK3x3S1, delegator::Conv2dParam, + MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, NEON, K3x3S1)) +MACE_REGISTER_DELEGATOR(registry, Conv2dK3x3S2, delegator::Conv2dParam, + MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, NEON, K3x3S2)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/conv_2d_3x3.h b/mace/ops/arm/fp32/conv_2d_3x3.h index bd96501d..e64d061e 100644 --- a/mace/ops/arm/fp32/conv_2d_3x3.h +++ b/mace/ops/arm/fp32/conv_2d_3x3.h @@ -16,10 +16,11 @@ #define MACE_OPS_ARM_FP32_CONV_2D_3X3_H_ #include -#include "mace/public/mace.h" + +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" #include "mace/ops/arm/fp32/conv_2d.h" +#include "mace/public/mace.h" namespace mace { namespace ops { @@ -28,8 +29,8 @@ namespace fp32 { class Conv2dK3x3S1 : public Conv2dBase { public: - Conv2dK3x3S1(const std::vector &paddings, const Padding padding_type) - : Conv2dBase({1, 1}, {1, 1}, paddings, padding_type) {} + explicit Conv2dK3x3S1(const delegator::Conv2dParam ¶m) + : Conv2dBase(param) {} virtual ~Conv2dK3x3S1() {} MaceStatus Compute( @@ -41,8 +42,8 @@ class Conv2dK3x3S1 : public Conv2dBase { class Conv2dK3x3S2 : public Conv2dBase { public: - Conv2dK3x3S2(const std::vector &paddings, const Padding padding_type) - : Conv2dBase({2, 2}, {1, 1}, paddings, padding_type) {} + explicit Conv2dK3x3S2(const delegator::Conv2dParam ¶m) + : Conv2dBase(param) {} virtual ~Conv2dK3x3S2() {} MaceStatus Compute( diff --git a/mace/ops/arm/fp32/conv_2d_3x3_winograd.cc b/mace/ops/arm/fp32/conv_2d_3x3_winograd.cc index ab2517bf..cbdb7d66 100644 --- a/mace/ops/arm/fp32/conv_2d_3x3_winograd.cc +++ b/mace/ops/arm/fp32/conv_2d_3x3_winograd.cc @@ -17,6 +17,7 @@ #include #include "mace/ops/common/conv_pool_2d_util.h" +#include "mace/ops/delegator/conv_2d.h" #include "mace/utils/memory.h" #include "mace/utils/math.h" @@ -800,6 +801,10 @@ void Conv2dK3x3Winograd::TransformOutput8x8(const OpContext *context, }, 0, batch, 1, 0, out_channels, 1); } +MACE_REGISTER_DELEGATOR(registry, Conv2dK3x3Winograd, delegator::Conv2dParam, + MACE_DELEGATOR_KEY_EX( + Conv2d, CPU, float, NEON, K3x3Winograd)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/conv_2d_3x3_winograd.h b/mace/ops/arm/fp32/conv_2d_3x3_winograd.h index 53118a6a..ec4db81b 100644 --- a/mace/ops/arm/fp32/conv_2d_3x3_winograd.h +++ b/mace/ops/arm/fp32/conv_2d_3x3_winograd.h @@ -18,11 +18,11 @@ #include #include -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" -#include "mace/ops/arm/fp32/gemm.h" #include "mace/ops/arm/fp32/conv_2d.h" +#include "mace/ops/arm/fp32/gemm.h" +#include "mace/public/mace.h" namespace mace { namespace ops { @@ -31,10 +31,9 @@ namespace fp32 { class Conv2dK3x3Winograd : public Conv2dBase { public: - Conv2dK3x3Winograd(const std::vector &paddings, - const Padding padding_type) - : Conv2dBase({1, 1}, {1, 1}, paddings, padding_type), - gemm_(), + explicit Conv2dK3x3Winograd(const delegator::Conv2dParam ¶m) + : Conv2dBase(param), + gemm_(delegator::GemmParam()), transformed_filter_(nullptr), out_tile_size_(0) {} diff --git a/mace/ops/arm/fp32/conv_2d_5x5.cc b/mace/ops/arm/fp32/conv_2d_5x5.cc index 1b41ec7c..cc117cf9 100644 --- a/mace/ops/arm/fp32/conv_2d_5x5.cc +++ b/mace/ops/arm/fp32/conv_2d_5x5.cc @@ -12,16 +12,30 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/ops/arm/fp32/conv_2d_5x5.h" - #include #include +#include "mace/ops/arm/fp32/conv_2d.h" +#include "mace/ops/delegator/conv_2d.h" + namespace mace { namespace ops { namespace arm { namespace fp32 { +class Conv2dK5x5S1 : public Conv2dBase { + public: + explicit Conv2dK5x5S1(const delegator::Conv2dParam ¶m) + : Conv2dBase(param) {} + virtual ~Conv2dK5x5S1() {} + + MaceStatus Compute( + const OpContext *context, + const Tensor *input, + const Tensor *filter, + Tensor *output) override; +}; + #define MACE_Conv2dNeonK5x5SnLoadCalc4 \ /* load filter (4 outch x 1 height x 4 width) */ \ float32x4_t vf00, vf10, vf20, vf30; \ @@ -244,6 +258,9 @@ MaceStatus Conv2dK5x5S1::Compute(const OpContext *context, return MaceStatus::MACE_SUCCESS; } +MACE_REGISTER_DELEGATOR(registry, Conv2dK5x5S1, delegator::Conv2dParam, + MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, NEON, K5x5S1)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/conv_2d_5x5.h b/mace/ops/arm/fp32/conv_2d_5x5.h deleted file mode 100644 index b6fdf9bb..00000000 --- a/mace/ops/arm/fp32/conv_2d_5x5.h +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2019 The MACE Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef MACE_OPS_ARM_FP32_CONV_2D_5X5_H_ -#define MACE_OPS_ARM_FP32_CONV_2D_5X5_H_ - -#include -#include "mace/public/mace.h" -#include "mace/core/tensor.h" -#include "mace/core/op_context.h" -#include "mace/ops/arm/fp32/conv_2d.h" - -namespace mace { -namespace ops { -namespace arm { -namespace fp32 { - -class Conv2dK5x5S1 : public Conv2dBase { - public: - Conv2dK5x5S1(const std::vector &paddings, const Padding padding_type) - : Conv2dBase({1, 1}, {1, 1}, paddings, padding_type) {} - virtual ~Conv2dK5x5S1() {} - - MaceStatus Compute( - const OpContext *context, - const Tensor *input, - const Tensor *filter, - Tensor *output) override; -}; - - -} // namespace fp32 -} // namespace arm -} // namespace ops -} // namespace mace - -#endif // MACE_OPS_ARM_FP32_CONV_2D_5X5_H_ diff --git a/mace/ops/arm/fp32/conv_2d_7x7.cc b/mace/ops/arm/fp32/conv_2d_7x7.cc index 4ee8a045..cc6963e7 100644 --- a/mace/ops/arm/fp32/conv_2d_7x7.cc +++ b/mace/ops/arm/fp32/conv_2d_7x7.cc @@ -17,6 +17,8 @@ #include #include +#include "mace/ops/delegator/conv_2d.h" + namespace mace { namespace ops { namespace arm { @@ -720,6 +722,13 @@ MaceStatus Conv2dK7x7S3::Compute(const OpContext *context, return MaceStatus::MACE_SUCCESS; } +MACE_REGISTER_DELEGATOR(registry, Conv2dK7x7S1, delegator::Conv2dParam, + MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, NEON, K7x7S1)) +MACE_REGISTER_DELEGATOR(registry, Conv2dK7x7S2, delegator::Conv2dParam, + MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, NEON, K7x7S2)) +MACE_REGISTER_DELEGATOR(registry, Conv2dK7x7S3, delegator::Conv2dParam, + MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, NEON, K7x7S3)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/conv_2d_7x7.h b/mace/ops/arm/fp32/conv_2d_7x7.h index 9324f4da..0d0467fc 100644 --- a/mace/ops/arm/fp32/conv_2d_7x7.h +++ b/mace/ops/arm/fp32/conv_2d_7x7.h @@ -16,10 +16,11 @@ #define MACE_OPS_ARM_FP32_CONV_2D_7X7_H_ #include -#include "mace/public/mace.h" + +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" #include "mace/ops/arm/fp32/conv_2d.h" +#include "mace/public/mace.h" namespace mace { namespace ops { @@ -28,8 +29,8 @@ namespace fp32 { class Conv2dK7x7S1 : public Conv2dBase { public: - Conv2dK7x7S1(const std::vector &paddings, const Padding padding_type) - : Conv2dBase({1, 1}, {1, 1}, paddings, padding_type) {} + explicit Conv2dK7x7S1(const delegator::Conv2dParam ¶m) + : Conv2dBase(param) {} virtual ~Conv2dK7x7S1() {} MaceStatus Compute( @@ -41,8 +42,8 @@ class Conv2dK7x7S1 : public Conv2dBase { class Conv2dK7x7S2 : public Conv2dBase { public: - Conv2dK7x7S2(const std::vector &paddings, const Padding padding_type) - : Conv2dBase({2, 2}, {1, 1}, paddings, padding_type) {} + explicit Conv2dK7x7S2(const delegator::Conv2dParam ¶m) + : Conv2dBase(param) {} virtual ~Conv2dK7x7S2() {} MaceStatus Compute( @@ -54,8 +55,8 @@ class Conv2dK7x7S2 : public Conv2dBase { class Conv2dK7x7S3 : public Conv2dBase { public: - Conv2dK7x7S3(const std::vector &paddings, const Padding padding_type) - : Conv2dBase({3, 3}, {1, 1}, paddings, padding_type) {} + explicit Conv2dK7x7S3(const delegator::Conv2dParam ¶m) + : Conv2dBase(param) {} virtual ~Conv2dK7x7S3() {} MaceStatus Compute( diff --git a/mace/ops/arm/fp32/conv_general.cc b/mace/ops/arm/fp32/conv_general.cc index 25fb2441..2fdc57e2 100644 --- a/mace/ops/arm/fp32/conv_general.cc +++ b/mace/ops/arm/fp32/conv_general.cc @@ -12,15 +12,30 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/ops/arm/fp32/conv_general.h" +#include "mace/ops/arm/fp32/conv_2d.h" #include +#include "mace/ops/delegator/conv_2d.h" + namespace mace { namespace ops { namespace arm { namespace fp32 { +class Conv2dGeneral : public Conv2dBase { + public: + explicit Conv2dGeneral(const delegator::Conv2dParam ¶m) + : Conv2dBase(param) {} + virtual ~Conv2dGeneral() {} + + MaceStatus Compute( + const OpContext *context, + const Tensor *input, + const Tensor *filter, + Tensor *output) override; +}; + MaceStatus Conv2dGeneral::Compute(const OpContext *context, const Tensor *input, const Tensor *filter, @@ -237,6 +252,10 @@ MaceStatus Conv2dGeneral::Compute(const OpContext *context, return MaceStatus::MACE_SUCCESS; } +MACE_REGISTER_DELEGATOR( + registry, Conv2dGeneral, delegator::Conv2dParam, + MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, NEON, General)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/conv_general.h b/mace/ops/arm/fp32/conv_general.h deleted file mode 100644 index 115acdb3..00000000 --- a/mace/ops/arm/fp32/conv_general.h +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright 2019 The MACE Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef MACE_OPS_ARM_FP32_CONV_GENERAL_H_ -#define MACE_OPS_ARM_FP32_CONV_GENERAL_H_ - -#include -#include "mace/public/mace.h" -#include "mace/core/tensor.h" -#include "mace/core/op_context.h" -#include "mace/ops/arm/fp32/conv_2d.h" - -namespace mace { -namespace ops { -namespace arm { -namespace fp32 { - -class Conv2dGeneral : public Conv2dBase { - public: - Conv2dGeneral(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type) - : Conv2dBase(strides, dilations, paddings, padding_type) {} - virtual ~Conv2dGeneral() {} - - MaceStatus Compute( - const OpContext *context, - const Tensor *input, - const Tensor *filter, - Tensor *output) override; -}; - -} // namespace fp32 -} // namespace arm -} // namespace ops -} // namespace mace - -#endif // MACE_OPS_ARM_FP32_CONV_GENERAL_H_ diff --git a/mace/ops/arm/fp32/deconv_2d.h b/mace/ops/arm/fp32/deconv_2d.h index 554f2935..128d5858 100644 --- a/mace/ops/arm/fp32/deconv_2d.h +++ b/mace/ops/arm/fp32/deconv_2d.h @@ -18,54 +18,27 @@ #include #include -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/types.h" -#include "mace/core/op_context.h" #include "mace/ops/arm/fp32/gemm.h" #include "mace/ops/common/conv_pool_2d_util.h" +#include "mace/ops/delegator/deconv_2d.h" +#include "mace/public/mace.h" namespace mace { namespace ops { namespace arm { namespace fp32 { -class Deconv2dBase { +class Deconv2dBase : public delegator::Deconv2d { public: - Deconv2dBase(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type, - const index_t group, - const FrameworkType framework_type) - : strides_(strides), - dilations_(dilations), - paddings_(paddings), - padding_type_(padding_type), - group_(group), - framework_type_(framework_type) {} - - Deconv2dBase(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : Deconv2dBase(strides, - dilations, - paddings, - padding_type, - 1, - framework_type) {} + explicit Deconv2dBase(const delegator::Deconv2dParam ¶m) + : delegator::Deconv2d(param), + group_(param.group_) {} virtual ~Deconv2dBase() = default; - virtual MaceStatus Compute( - const OpContext *context, - const Tensor *input, - const Tensor *filter, - const Tensor *output_shape, - Tensor *output) = 0; - protected: MaceStatus ResizeOutAndPadOut(const OpContext *context, const Tensor *input, @@ -78,13 +51,7 @@ class Deconv2dBase { void UnPadOutput(const Tensor &src, const std::vector &out_pad_size, Tensor *dst); - - const std::vector strides_; - const std::vector dilations_; - const std::vector paddings_; - const Padding padding_type_; index_t group_; - const FrameworkType framework_type_; }; } // namespace fp32 diff --git a/mace/ops/arm/fp32/deconv_2d_2x2.cc b/mace/ops/arm/fp32/deconv_2d_2x2.cc index c9d630bb..65cfc6e8 100644 --- a/mace/ops/arm/fp32/deconv_2d_2x2.cc +++ b/mace/ops/arm/fp32/deconv_2d_2x2.cc @@ -330,12 +330,18 @@ MaceStatus Deconv2dK2x2S2::Compute(const OpContext *context, } }, 0, batch, 1, 0, outch, 1); - UnPadOutput(*out_tensor, out_pad_size, output); return MaceStatus::MACE_SUCCESS; } +MACE_REGISTER_DELEGATOR(registry, Deconv2dK2x2S1, delegator::Deconv2dParam, + MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float, + NEON, K2x2S1)) +MACE_REGISTER_DELEGATOR(registry, Deconv2dK2x2S2, delegator::Deconv2dParam, + MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float, + NEON, K2x2S2)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/deconv_2d_2x2.h b/mace/ops/arm/fp32/deconv_2d_2x2.h index 05f80dec..6fd53344 100644 --- a/mace/ops/arm/fp32/deconv_2d_2x2.h +++ b/mace/ops/arm/fp32/deconv_2d_2x2.h @@ -18,12 +18,12 @@ #include #include -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/types.h" -#include "mace/core/op_context.h" #include "mace/ops/arm/fp32/deconv_2d.h" #include "mace/ops/common/conv_pool_2d_util.h" +#include "mace/public/mace.h" namespace mace { namespace ops { @@ -32,10 +32,8 @@ namespace fp32 { class Deconv2dK2x2S1 : public Deconv2dBase { public: - Deconv2dK2x2S1(const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : Deconv2dBase({1, 1}, {1, 1}, paddings, padding_type, framework_type) {} + explicit Deconv2dK2x2S1(const delegator::Deconv2dParam ¶m) + : Deconv2dBase(param) {} virtual ~Deconv2dK2x2S1() {} MaceStatus Compute( @@ -48,10 +46,8 @@ class Deconv2dK2x2S1 : public Deconv2dBase { class Deconv2dK2x2S2 : public Deconv2dBase { public: - Deconv2dK2x2S2(const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : Deconv2dBase({2, 2}, {1, 1}, paddings, padding_type, framework_type) {} + explicit Deconv2dK2x2S2(const delegator::Deconv2dParam ¶m) + : Deconv2dBase(param) {} virtual ~Deconv2dK2x2S2() {} MaceStatus Compute( diff --git a/mace/ops/arm/fp32/deconv_2d_3x3.cc b/mace/ops/arm/fp32/deconv_2d_3x3.cc index b2ef6eae..55911e25 100644 --- a/mace/ops/arm/fp32/deconv_2d_3x3.cc +++ b/mace/ops/arm/fp32/deconv_2d_3x3.cc @@ -464,6 +464,13 @@ MaceStatus Deconv2dK3x3S2::Compute(const OpContext *context, return MaceStatus::MACE_SUCCESS; } +MACE_REGISTER_DELEGATOR(registry, Deconv2dK3x3S1, delegator::Deconv2dParam, + MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float, + NEON, K3x3S1)) +MACE_REGISTER_DELEGATOR(registry, Deconv2dK3x3S2, delegator::Deconv2dParam, + MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float, + NEON, K3x3S2)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/deconv_2d_3x3.h b/mace/ops/arm/fp32/deconv_2d_3x3.h index 4495cbe8..65cc23e6 100644 --- a/mace/ops/arm/fp32/deconv_2d_3x3.h +++ b/mace/ops/arm/fp32/deconv_2d_3x3.h @@ -18,12 +18,12 @@ #include #include -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/types.h" -#include "mace/core/op_context.h" #include "mace/ops/arm/fp32/deconv_2d.h" #include "mace/ops/common/conv_pool_2d_util.h" +#include "mace/public/mace.h" namespace mace { namespace ops { @@ -32,10 +32,8 @@ namespace fp32 { class Deconv2dK3x3S1 : public Deconv2dBase { public: - Deconv2dK3x3S1(const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : Deconv2dBase({1, 1}, {1, 1}, paddings, padding_type, framework_type) {} + explicit Deconv2dK3x3S1(const delegator::Deconv2dParam ¶m) + : Deconv2dBase(param) {} virtual ~Deconv2dK3x3S1() {} MaceStatus Compute( @@ -48,10 +46,8 @@ class Deconv2dK3x3S1 : public Deconv2dBase { class Deconv2dK3x3S2 : public Deconv2dBase { public: - Deconv2dK3x3S2(const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : Deconv2dBase({2, 2}, {1, 1}, paddings, padding_type, framework_type) {} + explicit Deconv2dK3x3S2(const delegator::Deconv2dParam ¶m) + : Deconv2dBase(param) {} virtual ~Deconv2dK3x3S2() {} MaceStatus Compute( diff --git a/mace/ops/arm/fp32/deconv_2d_4x4.cc b/mace/ops/arm/fp32/deconv_2d_4x4.cc index 3c47ecff..b2e17afa 100644 --- a/mace/ops/arm/fp32/deconv_2d_4x4.cc +++ b/mace/ops/arm/fp32/deconv_2d_4x4.cc @@ -449,7 +449,6 @@ MaceStatus Deconv2dK4x4S2::Compute(const OpContext *context, const index_t outw = out_shape[3]; const index_t out_img_size = outh * outw; - utils::ThreadPool &thread_pool = context->device()->cpu_runtime()->thread_pool(); @@ -575,6 +574,13 @@ MaceStatus Deconv2dK4x4S2::Compute(const OpContext *context, return MaceStatus::MACE_SUCCESS; } +MACE_REGISTER_DELEGATOR(registry, Deconv2dK4x4S1, delegator::Deconv2dParam, + MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float, + NEON, K4x4S1)) +MACE_REGISTER_DELEGATOR(registry, Deconv2dK4x4S2, delegator::Deconv2dParam, + MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float, + NEON, K4x4S2)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/deconv_2d_4x4.h b/mace/ops/arm/fp32/deconv_2d_4x4.h index 9f09056a..bf86a62a 100644 --- a/mace/ops/arm/fp32/deconv_2d_4x4.h +++ b/mace/ops/arm/fp32/deconv_2d_4x4.h @@ -18,12 +18,12 @@ #include #include -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/types.h" -#include "mace/core/op_context.h" #include "mace/ops/arm/fp32/deconv_2d.h" #include "mace/ops/common/conv_pool_2d_util.h" +#include "mace/public/mace.h" namespace mace { namespace ops { @@ -32,10 +32,8 @@ namespace fp32 { class Deconv2dK4x4S1 : public Deconv2dBase { public: - Deconv2dK4x4S1(const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : Deconv2dBase({1, 1}, {1, 1}, paddings, padding_type, framework_type) {} + explicit Deconv2dK4x4S1(const delegator::Deconv2dParam ¶m) + : Deconv2dBase(param) {} virtual ~Deconv2dK4x4S1() {} MaceStatus Compute( @@ -48,10 +46,8 @@ class Deconv2dK4x4S1 : public Deconv2dBase { class Deconv2dK4x4S2 : public Deconv2dBase { public: - Deconv2dK4x4S2(const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : Deconv2dBase({2, 2}, {1, 1}, paddings, padding_type, framework_type) {} + explicit Deconv2dK4x4S2(const delegator::Deconv2dParam ¶m) + : Deconv2dBase(param) {} virtual ~Deconv2dK4x4S2() {} MaceStatus Compute( diff --git a/mace/ops/arm/fp32/deconv_2d_general.cc b/mace/ops/arm/fp32/deconv_2d_general.cc index 47bfe39c..5ffe7b0d 100644 --- a/mace/ops/arm/fp32/deconv_2d_general.cc +++ b/mace/ops/arm/fp32/deconv_2d_general.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/ops/arm/fp32/deconv_2d_general.h" +#include "mace/ops/arm/fp32/deconv_2d.h" // TODO(liutuo): optimize it @@ -21,6 +21,20 @@ namespace ops { namespace arm { namespace fp32 { +class Deconv2dGeneral : public Deconv2dBase { + public: + explicit Deconv2dGeneral(const delegator::Deconv2dParam ¶m) + : Deconv2dBase(param) {} + virtual ~Deconv2dGeneral() {} + + MaceStatus Compute( + const OpContext *context, + const Tensor *input, + const Tensor *filter, + const Tensor *output_shape, + Tensor *output) override; +}; + MaceStatus Deconv2dGeneral::Compute(const OpContext *context, const Tensor *input, const Tensor *filter, @@ -110,6 +124,10 @@ MaceStatus Deconv2dGeneral::Compute(const OpContext *context, return MaceStatus::MACE_SUCCESS; } +MACE_REGISTER_DELEGATOR(registry, Deconv2dGeneral, delegator::Deconv2dParam, + MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float, + NEON, General)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/deconv_2d_general.h b/mace/ops/arm/fp32/deconv_2d_general.h deleted file mode 100644 index d11ada03..00000000 --- a/mace/ops/arm/fp32/deconv_2d_general.h +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright 2019 The MACE Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef MACE_OPS_ARM_FP32_DECONV_2D_GENERAL_H_ -#define MACE_OPS_ARM_FP32_DECONV_2D_GENERAL_H_ - -#include -#include - -#include "mace/public/mace.h" -#include "mace/core/tensor.h" -#include "mace/core/types.h" -#include "mace/core/op_context.h" -#include "mace/ops/arm/fp32/deconv_2d.h" -#include "mace/ops/common/conv_pool_2d_util.h" - -namespace mace { -namespace ops { -namespace arm { -namespace fp32 { - -class Deconv2dGeneral : public Deconv2dBase { - public: - Deconv2dGeneral(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : Deconv2dBase(strides, - dilations, - paddings, - padding_type, - framework_type) {} - virtual ~Deconv2dGeneral() {} - - MaceStatus Compute( - const OpContext *context, - const Tensor *input, - const Tensor *filter, - const Tensor *output_shape, - Tensor *output) override; -}; - -} // namespace fp32 -} // namespace arm -} // namespace ops -} // namespace mace - -#endif // MACE_OPS_ARM_FP32_DECONV_2D_GENERAL_H_ diff --git a/mace/ops/arm/fp32/depthwise_conv_2d_3x3.cc b/mace/ops/arm/fp32/depthwise_conv_2d_3x3.cc index a27827b4..8d77672b 100644 --- a/mace/ops/arm/fp32/depthwise_conv_2d_3x3.cc +++ b/mace/ops/arm/fp32/depthwise_conv_2d_3x3.cc @@ -512,6 +512,13 @@ MaceStatus DepthwiseConv2dK3x3S2::Compute(const mace::OpContext *context, return MaceStatus::MACE_SUCCESS; } +MACE_REGISTER_DELEGATOR( + registry, DepthwiseConv2dK3x3S1, delegator::DepthwiseConv2dParam, + MACE_DELEGATOR_KEY_EX(DepthwiseConv2d, CPU, float, NEON, K3x3S1)) +MACE_REGISTER_DELEGATOR( + registry, DepthwiseConv2dK3x3S2, delegator::DepthwiseConv2dParam, + MACE_DELEGATOR_KEY_EX(DepthwiseConv2d, CPU, float, NEON, K3x3S2)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/depthwise_conv_2d_3x3.h b/mace/ops/arm/fp32/depthwise_conv_2d_3x3.h index c130fbff..49412b80 100644 --- a/mace/ops/arm/fp32/depthwise_conv_2d_3x3.h +++ b/mace/ops/arm/fp32/depthwise_conv_2d_3x3.h @@ -16,10 +16,12 @@ #define MACE_OPS_ARM_FP32_DEPTHWISE_CONV_2D_3X3_H_ #include -#include "mace/public/mace.h" + +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" #include "mace/ops/arm/fp32/conv_2d.h" +#include "mace/ops/delegator/depthwise_conv_2d.h" +#include "mace/public/mace.h" namespace mace { namespace ops { @@ -28,9 +30,8 @@ namespace fp32 { class DepthwiseConv2dK3x3S1 : public Conv2dBase { public: - DepthwiseConv2dK3x3S1(const std::vector &paddings, - const Padding padding_type) - : Conv2dBase({1, 1}, {1, 1}, paddings, padding_type) {} + explicit DepthwiseConv2dK3x3S1(const delegator::DepthwiseConv2dParam ¶m) + : Conv2dBase(param) {} virtual ~DepthwiseConv2dK3x3S1() {} MaceStatus Compute( @@ -42,9 +43,8 @@ class DepthwiseConv2dK3x3S1 : public Conv2dBase { class DepthwiseConv2dK3x3S2 : public Conv2dBase { public: - DepthwiseConv2dK3x3S2(const std::vector &paddings, - const Padding padding_type) - : Conv2dBase({2, 2}, {1, 1}, paddings, padding_type) {} + explicit DepthwiseConv2dK3x3S2(const delegator::DepthwiseConv2dParam ¶m) + : Conv2dBase(param) {} virtual ~DepthwiseConv2dK3x3S2() {} MaceStatus Compute( diff --git a/mace/ops/arm/fp32/depthwise_deconv_2d_3x3.cc b/mace/ops/arm/fp32/depthwise_deconv_2d_3x3.cc index 3cd6d527..291075ae 100644 --- a/mace/ops/arm/fp32/depthwise_deconv_2d_3x3.cc +++ b/mace/ops/arm/fp32/depthwise_deconv_2d_3x3.cc @@ -776,6 +776,20 @@ MaceStatus GroupDeconv2dK3x3S2::Compute(const OpContext *context, return MaceStatus::MACE_SUCCESS; } +MACE_REGISTER_DELEGATOR( + registry, DepthwiseDeconv2dK3x3S1, delegator::DepthwiseDeconv2dParam, + MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, CPU, float, NEON, K3x3S1)) +MACE_REGISTER_DELEGATOR( + registry, DepthwiseDeconv2dK3x3S2, delegator::DepthwiseDeconv2dParam, + MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, CPU, float, NEON, K3x3S2)) + +MACE_REGISTER_DELEGATOR( + registry, GroupDeconv2dK3x3S1, delegator::GroupDeconv2dParam, + MACE_DELEGATOR_KEY_EX(GroupDeconv2d, CPU, float, NEON, K3x3S1)) +MACE_REGISTER_DELEGATOR( + registry, GroupDeconv2dK3x3S2, delegator::GroupDeconv2dParam, + MACE_DELEGATOR_KEY_EX(GroupDeconv2d, CPU, float, NEON, K3x3S2)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/depthwise_deconv_2d_3x3.h b/mace/ops/arm/fp32/depthwise_deconv_2d_3x3.h index 5dd315a4..eeb21d6c 100644 --- a/mace/ops/arm/fp32/depthwise_deconv_2d_3x3.h +++ b/mace/ops/arm/fp32/depthwise_deconv_2d_3x3.h @@ -18,12 +18,13 @@ #include #include -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/types.h" -#include "mace/core/op_context.h" #include "mace/ops/arm/fp32/deconv_2d.h" #include "mace/ops/common/conv_pool_2d_util.h" +#include "mace/ops/delegator/depthwise_deconv_2d.h" +#include "mace/public/mace.h" namespace mace { namespace ops { @@ -32,14 +33,9 @@ namespace fp32 { class DepthwiseDeconv2dK3x3S1 : public Deconv2dBase { public: - DepthwiseDeconv2dK3x3S1(const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : Deconv2dBase({1, 1}, - {1, 1}, - paddings, - padding_type, - framework_type) {} + explicit DepthwiseDeconv2dK3x3S1( + const delegator::DepthwiseDeconv2dParam ¶m) + : Deconv2dBase(param) {} virtual ~DepthwiseDeconv2dK3x3S1() {} MaceStatus Compute( @@ -52,14 +48,9 @@ class DepthwiseDeconv2dK3x3S1 : public Deconv2dBase { class DepthwiseDeconv2dK3x3S2 : public Deconv2dBase { public: - DepthwiseDeconv2dK3x3S2(const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : Deconv2dBase({2, 2}, - {1, 1}, - paddings, - padding_type, - framework_type) {} + explicit DepthwiseDeconv2dK3x3S2( + const delegator::DepthwiseDeconv2dParam ¶m) + : Deconv2dBase(param) {} virtual ~DepthwiseDeconv2dK3x3S2() {} MaceStatus Compute( @@ -72,16 +63,9 @@ class DepthwiseDeconv2dK3x3S2 : public Deconv2dBase { class GroupDeconv2dK3x3S1 : public Deconv2dBase { public: - GroupDeconv2dK3x3S1(const std::vector &paddings, - const Padding padding_type, - const int group, - const FrameworkType framework_type) - : Deconv2dBase({1, 1}, - {1, 1}, - paddings, - padding_type, - group, - framework_type) {} + explicit GroupDeconv2dK3x3S1( + const delegator::GroupDeconv2dParam ¶m) + : Deconv2dBase(param) {} virtual ~GroupDeconv2dK3x3S1() {} MaceStatus Compute( @@ -94,16 +78,8 @@ class GroupDeconv2dK3x3S1 : public Deconv2dBase { class GroupDeconv2dK3x3S2 : public Deconv2dBase { public: - GroupDeconv2dK3x3S2(const std::vector &paddings, - const Padding padding_type, - const int group, - const FrameworkType framework_type) - : Deconv2dBase({2, 2}, - {1, 1}, - paddings, - padding_type, - group, - framework_type) {} + explicit GroupDeconv2dK3x3S2(const delegator::GroupDeconv2dParam ¶m) + : Deconv2dBase(param) {} virtual ~GroupDeconv2dK3x3S2() {} MaceStatus Compute( diff --git a/mace/ops/arm/fp32/depthwise_deconv_2d_4x4.cc b/mace/ops/arm/fp32/depthwise_deconv_2d_4x4.cc index 85c93b0c..f9de2de3 100644 --- a/mace/ops/arm/fp32/depthwise_deconv_2d_4x4.cc +++ b/mace/ops/arm/fp32/depthwise_deconv_2d_4x4.cc @@ -959,6 +959,20 @@ MaceStatus GroupDeconv2dK4x4S2::Compute(const OpContext *context, return MaceStatus::MACE_SUCCESS; } +MACE_REGISTER_DELEGATOR( + registry, DepthwiseDeconv2dK4x4S1, delegator::DepthwiseDeconv2dParam, + MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, CPU, float, NEON, K4x4S1)) +MACE_REGISTER_DELEGATOR( + registry, DepthwiseDeconv2dK4x4S2, delegator::DepthwiseDeconv2dParam, + MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, CPU, float, NEON, K4x4S2)) + +MACE_REGISTER_DELEGATOR( + registry, GroupDeconv2dK4x4S1, delegator::GroupDeconv2dParam, + MACE_DELEGATOR_KEY_EX(GroupDeconv2d, CPU, float, NEON, K4x4S1)) +MACE_REGISTER_DELEGATOR( + registry, GroupDeconv2dK4x4S2, delegator::GroupDeconv2dParam, + MACE_DELEGATOR_KEY_EX(GroupDeconv2d, CPU, float, NEON, K4x4S2)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/depthwise_deconv_2d_4x4.h b/mace/ops/arm/fp32/depthwise_deconv_2d_4x4.h index 4b73ed01..31d5bd99 100644 --- a/mace/ops/arm/fp32/depthwise_deconv_2d_4x4.h +++ b/mace/ops/arm/fp32/depthwise_deconv_2d_4x4.h @@ -18,12 +18,13 @@ #include #include -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/types.h" -#include "mace/core/op_context.h" #include "mace/ops/arm/fp32/deconv_2d.h" #include "mace/ops/common/conv_pool_2d_util.h" +#include "mace/ops/delegator/depthwise_deconv_2d.h" +#include "mace/public/mace.h" namespace mace { namespace ops { @@ -32,14 +33,9 @@ namespace fp32 { class DepthwiseDeconv2dK4x4S1 : public Deconv2dBase { public: - DepthwiseDeconv2dK4x4S1(const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : Deconv2dBase({1, 1}, - {1, 1}, - paddings, - padding_type, - framework_type) {} + explicit DepthwiseDeconv2dK4x4S1( + const delegator::DepthwiseDeconv2dParam ¶m) + : Deconv2dBase(param) {} virtual ~DepthwiseDeconv2dK4x4S1() {} MaceStatus Compute( @@ -52,14 +48,9 @@ class DepthwiseDeconv2dK4x4S1 : public Deconv2dBase { class DepthwiseDeconv2dK4x4S2 : public Deconv2dBase { public: - DepthwiseDeconv2dK4x4S2(const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : Deconv2dBase({2, 2}, - {1, 1}, - paddings, - padding_type, - framework_type) {} + explicit DepthwiseDeconv2dK4x4S2( + const delegator::DepthwiseDeconv2dParam ¶m) + : Deconv2dBase(param) {} virtual ~DepthwiseDeconv2dK4x4S2() {} MaceStatus Compute( @@ -72,16 +63,8 @@ class DepthwiseDeconv2dK4x4S2 : public Deconv2dBase { class GroupDeconv2dK4x4S1 : public Deconv2dBase { public: - GroupDeconv2dK4x4S1(const std::vector &paddings, - const Padding padding_type, - const int group, - const FrameworkType framework_type) - : Deconv2dBase({1, 1}, - {1, 1}, - paddings, - padding_type, - group, - framework_type) {} + explicit GroupDeconv2dK4x4S1(const delegator::GroupDeconv2dParam ¶m) + : Deconv2dBase(param) {} virtual ~GroupDeconv2dK4x4S1() {} MaceStatus Compute( @@ -94,16 +77,8 @@ class GroupDeconv2dK4x4S1 : public Deconv2dBase { class GroupDeconv2dK4x4S2 : public Deconv2dBase { public: - GroupDeconv2dK4x4S2(const std::vector &paddings, - const Padding padding_type, - const int group, - const FrameworkType framework_type) - : Deconv2dBase({2, 2}, - {1, 1}, - paddings, - padding_type, - group, - framework_type) {} + explicit GroupDeconv2dK4x4S2(const delegator::GroupDeconv2dParam ¶m) + : Deconv2dBase(param) {} virtual ~GroupDeconv2dK4x4S2() {} MaceStatus Compute( diff --git a/mace/ops/arm/fp32/depthwise_deconv_2d_general.cc b/mace/ops/arm/fp32/depthwise_deconv_2d_general.cc index a45d5acc..81d715e2 100644 --- a/mace/ops/arm/fp32/depthwise_deconv_2d_general.cc +++ b/mace/ops/arm/fp32/depthwise_deconv_2d_general.cc @@ -207,6 +207,14 @@ MaceStatus GroupDeconv2dGeneral::Compute(const OpContext *context, return MaceStatus::MACE_SUCCESS; } +MACE_REGISTER_DELEGATOR( + registry, DepthwiseDeconv2dGeneral, delegator::DepthwiseDeconv2dParam, + MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, CPU, float, NEON, General)) + +MACE_REGISTER_DELEGATOR( + registry, GroupDeconv2dGeneral, delegator::GroupDeconv2dParam, + MACE_DELEGATOR_KEY_EX(GroupDeconv2d, CPU, float, NEON, General)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/depthwise_deconv_2d_general.h b/mace/ops/arm/fp32/depthwise_deconv_2d_general.h index d73480c5..92492449 100644 --- a/mace/ops/arm/fp32/depthwise_deconv_2d_general.h +++ b/mace/ops/arm/fp32/depthwise_deconv_2d_general.h @@ -18,12 +18,13 @@ #include #include -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/types.h" -#include "mace/core/op_context.h" #include "mace/ops/arm/fp32/deconv_2d.h" #include "mace/ops/common/conv_pool_2d_util.h" +#include "mace/ops/delegator/depthwise_deconv_2d.h" +#include "mace/public/mace.h" namespace mace { namespace ops { @@ -32,16 +33,9 @@ namespace fp32 { class DepthwiseDeconv2dGeneral : public Deconv2dBase { public: - DepthwiseDeconv2dGeneral(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : Deconv2dBase(strides, - dilations, - paddings, - padding_type, - framework_type) {} + explicit DepthwiseDeconv2dGeneral( + const delegator::DepthwiseDeconv2dParam ¶m) + : Deconv2dBase(param) {} virtual ~DepthwiseDeconv2dGeneral() {} MaceStatus Compute( @@ -54,18 +48,8 @@ class DepthwiseDeconv2dGeneral : public Deconv2dBase { class GroupDeconv2dGeneral : public Deconv2dBase { public: - GroupDeconv2dGeneral(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type, - const int group, - const FrameworkType framework_type) - : Deconv2dBase(strides, - dilations, - paddings, - padding_type, - group, - framework_type) {} + explicit GroupDeconv2dGeneral(const delegator::GroupDeconv2dParam ¶m) + : Deconv2dBase(param) {} virtual ~GroupDeconv2dGeneral() {} MaceStatus Compute( diff --git a/mace/ops/arm/fp32/gemm.cc b/mace/ops/arm/fp32/gemm.cc index ff26052f..ca429e63 100644 --- a/mace/ops/arm/fp32/gemm.cc +++ b/mace/ops/arm/fp32/gemm.cc @@ -1224,6 +1224,9 @@ MaceStatus Gemm::Compute(const OpContext *context, output); } +MACE_REGISTER_DELEGATOR(registry, Gemm, delegator::GemmParam, + MACE_DELEGATOR_KEY(Gemm, CPU, float, NEON)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/gemm.h b/mace/ops/arm/fp32/gemm.h index 00b4d80e..4910ae35 100644 --- a/mace/ops/arm/fp32/gemm.h +++ b/mace/ops/arm/fp32/gemm.h @@ -15,10 +15,11 @@ #ifndef MACE_OPS_ARM_FP32_GEMM_H_ #define MACE_OPS_ARM_FP32_GEMM_H_ -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" #include "mace/ops/common/matrix.h" +#include "mace/ops/delegator/gemm.h" +#include "mace/public/mace.h" #include "mace/utils/math.h" // This implements matrix-matrix multiplication. @@ -29,13 +30,12 @@ namespace ops { namespace arm { namespace fp32 { -class Gemm { +class Gemm : public delegator::Gemm { public: - explicit Gemm(const bool should_cache_pack) - : pack_cache_(GetCPUAllocator()), - should_cache_pack_(should_cache_pack), + explicit Gemm(const delegator::GemmParam ¶m) + : delegator::Gemm(param), pack_cache_(GetCPUAllocator()), + should_cache_pack_(param.should_cache_pack_), cached_(0) {} - Gemm() : Gemm(false) {} ~Gemm() {} MaceStatus Compute( @@ -51,7 +51,7 @@ class Gemm { const MatrixMajor output_major, const bool lhs_batched, const bool rhs_batched, - Tensor *output); + Tensor *output) override; // Original matrix before transpose has row-major MaceStatus Compute( @@ -68,7 +68,7 @@ class Gemm { const bool transpose_out, const bool lhs_batched, const bool rhs_batched, - Tensor *output); + Tensor *output) override; private: void ComputeBlock(const float *packed_lhs_data, diff --git a/mace/ops/arm/fp32/gemv.cc b/mace/ops/arm/fp32/gemv.cc index 2f2866cf..317e4224 100644 --- a/mace/ops/arm/fp32/gemv.cc +++ b/mace/ops/arm/fp32/gemv.cc @@ -378,6 +378,10 @@ MaceStatus Gemv::Compute(const OpContext *context, #undef vaddvq_f32 #endif + +MACE_REGISTER_DELEGATOR(registry, Gemv, DelegatorParam, + MACE_DELEGATOR_KEY(Gemv, CPU, float, NEON)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/gemv.h b/mace/ops/arm/fp32/gemv.h index 1f406426..9933cf42 100644 --- a/mace/ops/arm/fp32/gemv.h +++ b/mace/ops/arm/fp32/gemv.h @@ -15,18 +15,19 @@ #ifndef MACE_OPS_ARM_FP32_GEMV_H_ #define MACE_OPS_ARM_FP32_GEMV_H_ -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" +#include "mace/ops/delegator/gemv.h" +#include "mace/public/mace.h" namespace mace { namespace ops { namespace arm { namespace fp32 { -class Gemv { +class Gemv : public delegator::Gemv { public: - Gemv() {} + explicit Gemv(const DelegatorParam ¶m) : delegator::Gemv(param) {} ~Gemv() {} // Always row-major after transpose MaceStatus Compute( @@ -39,7 +40,7 @@ class Gemv { const index_t lhs_width, const bool lhs_batched, const bool rhs_batched, - Tensor *output); + Tensor *output) override; }; } // namespace fp32 diff --git a/mace/ops/arm/q8/eltwise.cc b/mace/ops/arm/q8/eltwise.cc index bdaa57a6..74d44104 100644 --- a/mace/ops/arm/q8/eltwise.cc +++ b/mace/ops/arm/q8/eltwise.cc @@ -12,12 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/ops/arm/q8/eltwise.h" - #include #include #include "mace/ops/common/gemmlowp_util.h" +#include "mace/ops/delegator/eltwise.h" #include "mace/utils/logging.h" namespace mace { @@ -25,6 +24,16 @@ namespace ops { namespace arm { namespace q8 { +class Eltwise : public delegator::Eltwise { + public: + explicit Eltwise(const delegator::EltwiseParam ¶m) + : delegator::Eltwise(param) {} + ~Eltwise() = default; + + MaceStatus Compute(const OpContext *context, const Tensor *input0, + const Tensor *input1, Tensor *output) override; +}; + MaceStatus Eltwise::Compute(const OpContext *context, const Tensor *input0, const Tensor *input1, @@ -144,7 +153,7 @@ MaceStatus Eltwise::Compute(const OpContext *context, gemmlowp::SaturatingRoundingDoublingHighMul( res, output_multiplier), -output_shift) + - output->zero_point(); + output->zero_point(); output_ptr[i] = Saturate(output_val); } }, @@ -153,6 +162,9 @@ MaceStatus Eltwise::Compute(const OpContext *context, return MaceStatus::MACE_SUCCESS; } +MACE_REGISTER_DELEGATOR(registry, Eltwise, delegator::EltwiseParam, + MACE_DELEGATOR_KEY(Eltwise, CPU, uint8_t, NEON)) + } // namespace q8 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/q8/eltwise.h b/mace/ops/arm/q8/eltwise.h deleted file mode 100644 index 200b13cb..00000000 --- a/mace/ops/arm/q8/eltwise.h +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2019 The MACE Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// This implements matrix-vector multiplication described as -// https://github.com/google/gemmlowp/blob/master/todo/fast-gemv.txt - -#ifndef MACE_OPS_ARM_Q8_ELTWISE_H_ -#define MACE_OPS_ARM_Q8_ELTWISE_H_ - -#include "mace/core/op_context.h" -#include "mace/core/types.h" -#include "mace/ops/common/eltwise_type.h" - -namespace mace { -namespace ops { -namespace arm { -namespace q8 { - -class Eltwise { - public: - explicit Eltwise(const EltwiseType type) : type_(type) {} - - MaceStatus Compute(const OpContext *context, - const Tensor *input0, - const Tensor *input1, - Tensor *output); - - private: - EltwiseType type_; -}; - -} // namespace q8 -} // namespace arm -} // namespace ops -} // namespace mace - -#endif // MACE_OPS_ARM_Q8_ELTWISE_H_ diff --git a/mace/ops/arm/q8/gemv.cc b/mace/ops/arm/q8/gemv.cc index 388c6814..11290d35 100644 --- a/mace/ops/arm/q8/gemv.cc +++ b/mace/ops/arm/q8/gemv.cc @@ -181,6 +181,14 @@ class Gemv; template class Gemv; +typedef Gemv GemvUint8; +MACE_REGISTER_DELEGATOR(registry, GemvUint8, DelegatorParam, + MACE_DELEGATOR_KEY(Gemv, CPU, uint8_t, NEON)) + +typedef Gemv GemvInt32; +MACE_REGISTER_DELEGATOR(registry, GemvInt32, DelegatorParam, + MACE_DELEGATOR_KEY(Gemv, CPU, int32_t, NEON)) + } // namespace q8 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/q8/gemv.h b/mace/ops/arm/q8/gemv.h index 21a27579..c9b98a07 100644 --- a/mace/ops/arm/q8/gemv.h +++ b/mace/ops/arm/q8/gemv.h @@ -1,4 +1,4 @@ -// Copyright 2019 The MACE Authors. All Rights Reserved. +// Copyright 2020 The MACE Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,15 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -// This implements matrix-vector multiplication described as -// https://github.com/google/gemmlowp/blob/master/todo/fast-gemv.txt - #ifndef MACE_OPS_ARM_Q8_GEMV_H_ #define MACE_OPS_ARM_Q8_GEMV_H_ -#include "mace/public/mace.h" -#include "mace/core/tensor.h" -#include "mace/core/op_context.h" +#include "mace/ops/delegator/gemv.h" namespace mace { namespace ops { @@ -28,11 +23,11 @@ namespace arm { namespace q8 { template -class Gemv { +class Gemv : public delegator::Gemv { public: - Gemv() : is_output_type_uint8_( - DataTypeToEnum::value == DataType::DT_UINT8) { - } + explicit Gemv(const DelegatorParam ¶m) + : delegator::Gemv(param), is_output_type_uint8_( + DataTypeToEnum::value == DataType::DT_UINT8) {} ~Gemv() {} // Always row-major after transpose MaceStatus Compute( @@ -45,7 +40,7 @@ class Gemv { const index_t lhs_width, const bool lhs_batched, const bool rhs_batched, - Tensor *output); + Tensor *output) override; private: bool is_output_type_uint8_; diff --git a/mace/ops/arm/q8/quantize.cc b/mace/ops/arm/q8/quantize.cc index 9c80dcbc..4a8d402b 100644 --- a/mace/ops/arm/q8/quantize.cc +++ b/mace/ops/arm/q8/quantize.cc @@ -17,7 +17,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/core/tensor.h" #include "mace/core/quantize.h" @@ -106,12 +107,12 @@ class DequantizeOp : public Operation { QuantizeUtil quantize_util_; }; -void RegisterQuantize(OpRegistryBase *op_registry) { +void RegisterQuantize(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Quantize", QuantizeOp, DeviceType::CPU, uint8_t); } -void RegisterDequantize(OpRegistryBase *op_registry) { +void RegisterDequantize(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Dequantize", DequantizeOp, DeviceType::CPU, uint8_t); MACE_REGISTER_OP(op_registry, "Dequantize", DequantizeOp, diff --git a/mace/ops/batch_norm.cc b/mace/ops/batch_norm.cc index a27e46c5..88c9a179 100644 --- a/mace/ops/batch_norm.cc +++ b/mace/ops/batch_norm.cc @@ -16,14 +16,10 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/ops/activation.h" - -#if defined(MACE_ENABLE_NEON) -#include "mace/ops/arm/fp32/activation.h" -#else -#include "mace/ops/ref/activation.h" -#endif +#include "mace/ops/delegator/activation.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/buffer_transformer.h" @@ -45,11 +41,16 @@ class BatchNormOp : public Operation { epsilon_(Operation::GetOptionalArg("epsilon", static_cast(1e-4))), activation_delegator_( - ops::StringToActivationType( - Operation::GetOptionalArg("activation", "NOOP")), - Operation::GetOptionalArg("max_limit", 0.0f), - Operation::GetOptionalArg( - "leakyrelu_coefficient", 0.0f)) {} + delegator::Activation::Create( + context->workspace(), + MACE_DELEGATOR_KEY(Activation, CPU, float, MACE_CPU_IMPL_TYPE), + delegator::ActivationParam( + ops::StringToActivationType( + Operation::GetOptionalArg("activation", + "NOOP")), + Operation::GetOptionalArg("max_limit", 0.0f), + Operation::GetOptionalArg("leakyrelu_coefficient", + 0.0f)))) {} MaceStatus Run(OpContext *context) override { MACE_UNUSED(context); @@ -142,18 +143,14 @@ class BatchNormOp : public Operation { }, 0, batch, 1, 0, channels, 1); } - activation_delegator_.Compute(context, output, output); + activation_delegator_->Compute(context, output, output); return MaceStatus::MACE_SUCCESS; } private: float epsilon_; -#ifdef MACE_ENABLE_NEON - arm::fp32::Activation activation_delegator_; -#else - ref::Activation activation_delegator_; -#endif // MACE_ENABLE_NEON + std::unique_ptr activation_delegator_; protected: MACE_OP_INPUT_TAGS(INPUT, SCALE, OFFSET, MEAN, VAR); @@ -232,7 +229,7 @@ class BatchNormOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterBatchNorm(OpRegistryBase *op_registry) { +void RegisterBatchNorm(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "BatchNorm", BatchNormOp, DeviceType::CPU, float); MACE_REGISTER_GPU_OP(op_registry, "BatchNorm", BatchNormOp); diff --git a/mace/ops/batch_to_space.cc b/mace/ops/batch_to_space.cc index 937387fc..90324cd7 100644 --- a/mace/ops/batch_to_space.cc +++ b/mace/ops/batch_to_space.cc @@ -15,7 +15,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/image/batch_to_space.h" #endif // MACE_ENABLE_OPENCL @@ -285,7 +286,7 @@ class BatchToSpaceNDOp : public BatchToSpaceOpBase { }; #endif // MACE_ENABLE_OPENCL -void RegisterBatchToSpaceND(OpRegistryBase *op_registry) { +void RegisterBatchToSpaceND(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "BatchToSpaceND", BatchToSpaceNDOp, DeviceType::CPU, float); diff --git a/mace/ops/bias_add.cc b/mace/ops/bias_add.cc index c17a6e49..54a0f271 100644 --- a/mace/ops/bias_add.cc +++ b/mace/ops/bias_add.cc @@ -16,14 +16,10 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/ops/activation.h" - -#ifdef MACE_ENABLE_NEON -#include "mace/ops/arm/fp32/bias_add.h" -#else -#include "mace/ops/ref/bias_add.h" -#endif // MACE_ENABLE_NEON +#include "mace/ops/delegator/bias_add.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/buffer_transformer.h" @@ -42,8 +38,11 @@ class BiasAddOp : public Operation { public: explicit BiasAddOp(OpConstructContext *context) : Operation(context), - has_data_format_(Operation::GetOptionalArg("has_data_format", - 0)) {} + has_data_format_(Operation::GetOptionalArg("has_data_format", 0)), + bias_add_delegator_(delegator::BiasAdd::Create( + context->workspace(), + MACE_DELEGATOR_KEY(BiasAdd, CPU, float, MACE_CPU_IMPL_TYPE), + DelegatorParam())) {} MaceStatus Run(OpContext *context) override { MACE_UNUSED(context); @@ -56,7 +55,7 @@ class BiasAddOp : public Operation { MACE_CHECK(bias->dim_size() == 1 || bias->dim_size() == 2, "bias must be 1-dimensional or n*c for caffee.", MakeString(bias->shape())); - bias_add_delegator_.Compute(context, input, bias, output); + bias_add_delegator_->Compute(context, input, bias, output); } else { // NHWC MACE_CHECK(bias->dim_size() == 1 || bias->dim_size() == 2, "bias must be 1 or 2 dimensionals for caffee.", @@ -115,11 +114,7 @@ class BiasAddOp : public Operation { private: int has_data_format_; -#ifdef MACE_ENABLE_NEON - arm::fp32::BiasAdd bias_add_delegator_; -#else - ref::BiasAdd bias_add_delegator_; -#endif // MACE_ENABLE_NEON + std::unique_ptr bias_add_delegator_; }; #ifdef MACE_ENABLE_OPENCL @@ -164,7 +159,7 @@ class BiasAddOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterBiasAdd(OpRegistryBase *op_registry) { +void RegisterBiasAdd(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "BiasAdd", BiasAddOp, DeviceType::CPU, float); MACE_REGISTER_GPU_OP(op_registry, "BiasAdd", BiasAddOp); diff --git a/mace/ops/cast.cc b/mace/ops/cast.cc index 940959a9..dfa42a76 100644 --- a/mace/ops/cast.cc +++ b/mace/ops/cast.cc @@ -12,7 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) #include @@ -54,7 +55,7 @@ class CastOp : public Operation { MACE_OP_OUTPUT_TAGS(OUTPUT); }; -void RegisterCast(OpRegistryBase *op_registry) { +void RegisterCast(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Cast", CastOp, DeviceType::CPU, float); MACE_REGISTER_OP(op_registry, "Cast", CastOp, diff --git a/mace/ops/channel_shuffle.cc b/mace/ops/channel_shuffle.cc index a7fababb..cddda38d 100644 --- a/mace/ops/channel_shuffle.cc +++ b/mace/ops/channel_shuffle.cc @@ -14,7 +14,8 @@ #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/image/channel_shuffle.h" #endif // MACE_ENABLE_OPENCL @@ -98,7 +99,7 @@ class ChannelShuffleOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterChannelShuffle(OpRegistryBase *op_registry) { +void RegisterChannelShuffle(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "ChannelShuffle", ChannelShuffleOp, DeviceType::CPU, float); diff --git a/mace/ops/common/lstm.h b/mace/ops/common/lstm.h index d9e40248..a22094e5 100644 --- a/mace/ops/common/lstm.h +++ b/mace/ops/common/lstm.h @@ -15,8 +15,8 @@ #ifndef MACE_OPS_COMMON_LSTM_H_ #define MACE_OPS_COMMON_LSTM_H_ +#include "mace/core/ops/op_context.h" #include "mace/core/types.h" -#include "mace/core/op_context.h" namespace mace { namespace ops { diff --git a/mace/ops/common/transpose.h b/mace/ops/common/transpose.h index 6a70133c..8ff72df6 100644 --- a/mace/ops/common/transpose.h +++ b/mace/ops/common/transpose.h @@ -20,7 +20,7 @@ #endif // MACE_ENABLE_NEON #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/public/mace.h" namespace mace { diff --git a/mace/ops/concat.cc b/mace/ops/concat.cc index aff95a2e..65f05fdc 100644 --- a/mace/ops/concat.cc +++ b/mace/ops/concat.cc @@ -14,7 +14,8 @@ #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/core/quantize.h" #include "mace/utils/memory.h" @@ -221,7 +222,7 @@ class ConcatOp : public ConcatOpBase { }; #endif // MACE_ENABLE_OPENCL -void RegisterConcat(OpRegistryBase *op_registry) { +void RegisterConcat(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Concat", ConcatOp, DeviceType::CPU, float); diff --git a/mace/ops/conv_2d.cc b/mace/ops/conv_2d.cc index c2666d07..83da3f85 100644 --- a/mace/ops/conv_2d.cc +++ b/mace/ops/conv_2d.cc @@ -24,32 +24,18 @@ #include #include "mace/core/future.h" -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/core/tensor.h" #include "mace/ops/activation.h" #include "mace/ops/conv_pool_2d_base.h" #include "mace/ops/common/conv_pool_2d_util.h" +#include "mace/ops/delegator/activation.h" +#include "mace/ops/delegator/bias_add.h" +#include "mace/ops/delegator/conv_2d.h" #include "mace/utils/memory.h" #include "mace/utils/math.h" -#ifdef MACE_ENABLE_NEON -#include "mace/ops/arm/fp32/conv_2d.h" -#include "mace/ops/arm/fp32/conv_2d_1x1.h" -#include "mace/ops/arm/fp32/conv_2d_3x3.h" -#include "mace/ops/arm/fp32/conv_2d_3x3_winograd.h" -#include "mace/ops/arm/fp32/conv_2d_5x5.h" -#include "mace/ops/arm/fp32/conv_2d_7x7.h" -#include "mace/ops/arm/fp32/conv_2d_1xn.h" -#include "mace/ops/arm/fp32/conv_general.h" -#include "mace/ops/arm/fp32/bias_add.h" -#include "mace/ops/arm/fp32/activation.h" -#else -#include "mace/ops/ref/activation.h" -#include "mace/ops/ref/bias_add.h" -#endif // MACE_ENABLE_NEON - -#include "mace/ops/ref/conv_2d.h" - #ifdef MACE_ENABLE_QUANTIZE #include "mace/ops/common/gemmlowp_util.h" #include "mace/ops/arm/q8/quantization_util.h" @@ -72,13 +58,21 @@ class Conv2dOp : public ConvPool2dOpBase { public: explicit Conv2dOp(OpConstructContext *context) : ConvPool2dOpBase(context), - activation_delegator_(ops::StringToActivationType( - Operation::GetOptionalArg("activation", - "NOOP")), - Operation::GetOptionalArg("max_limit", - 0.0f), - Operation::GetOptionalArg( - "leakyrelu_coefficient", 0.0f)) {} + activation_delegator_( + delegator::Activation::Create( + context->workspace(), + MACE_DELEGATOR_KEY(Activation, CPU, float, MACE_CPU_IMPL_TYPE), + delegator::ActivationParam( + ops::StringToActivationType( + Operation::GetOptionalArg("activation", + "NOOP")), + Operation::GetOptionalArg("max_limit", 0.0f), + Operation::GetOptionalArg("leakyrelu_coefficient", + 0.0f)))), + bias_add_delegator_(delegator::BiasAdd::Create( + context->workspace(), + MACE_DELEGATOR_KEY(BiasAdd, CPU, float, MACE_CPU_IMPL_TYPE), + DelegatorParam())) {} MaceStatus Run(OpContext *context) override { const Tensor *input = this->Input(INPUT); @@ -86,116 +80,100 @@ class Conv2dOp : public ConvPool2dOpBase { const Tensor *bias = this->InputSize() >= 3 ? this->Input(BIAS) : nullptr; Tensor *output = this->Output(OUTPUT); -#ifdef MACE_ENABLE_NEON - // the following params are used to decide which conv delegator to use - const index_t stride_h = strides_[0]; - const index_t stride_w = strides_[1]; - const index_t dilation_h = dilations_[0]; - const index_t dilation_w = dilations_[1]; - const index_t filter_h = filter->dim(2); - const index_t filter_w = filter->dim(3); - const index_t input_channels = input->dim(1); - const index_t channels = filter->dim(0); - - // NOTE: delegator is fixed after first round of running, - // although winograd depends on input params. - // We do not support changeable filter for now. if (conv2d_delegator_ == nullptr) { - if (filter_h == 1 && filter_w == 1 && stride_h == 1 && stride_w == 1 - && dilation_h == 1 && dilation_w == 1) { - conv2d_delegator_ = make_unique( - paddings_, padding_type_); - } else if (filter_h == 3 && filter_w == 3 - && stride_h == 1 && stride_w == 1 && dilation_h == 1 - && dilation_w == 1) { - if (input_channels >= 8 && channels >= 8) { - conv2d_delegator_ = make_unique( - paddings_, padding_type_); - } else { - conv2d_delegator_ = make_unique( - paddings_, padding_type_); + std::string tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, + MACE_CPU_IMPL_TYPE, General); + if (MACE_CPU_IMPL_TYPE == NEON) { + // the following params are used to decide which conv delegator to use + const index_t stride_h = strides_[0]; + const index_t stride_w = strides_[1]; + const index_t dilation_h = dilations_[0]; + const index_t dilation_w = dilations_[1]; + const index_t filter_h = filter->dim(2); + const index_t filter_w = filter->dim(3); + const index_t input_channels = input->dim(1); + const index_t channels = filter->dim(0); + // NOTE: delegator is fixed after first round of running, + // although winograd depends on input params. + // We do not support changeable filter for now. + if (filter_h == 1 && filter_w == 1 && stride_h == 1 && stride_w == 1 + && dilation_h == 1 && dilation_w == 1) { + tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K1x1); + } else if (filter_h == 3 && filter_w == 3 + && stride_h == 1 && stride_w == 1 && dilation_h == 1 + && dilation_w == 1) { + if (input_channels >= 8 && channels >= 8) { + tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K3x3Winograd); + } else { + tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K3x3S1); + } + } else if (filter_h == 3 && filter_w == 3 + && stride_h == 2 && stride_w == 2 && dilation_h == 1 + && dilation_w == 1) { + tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K3x3S2); + } else if (filter_h == 5 && filter_w == 5 + && stride_h == 1 && stride_w == 1 && dilation_h == 1 + && dilation_w == 1) { + tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K5x5S1); + } else if (filter_h == 7 && filter_w == 7 + && stride_h == 1 && stride_w == 1 && dilation_h == 1 + && dilation_w == 1) { + tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K7x7S1); + } else if (filter_h == 7 && filter_w == 7 + && stride_h == 2 && stride_w == 2 && dilation_h == 1 + && dilation_w == 1) { + tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K7x7S2); + } else if (filter_h == 7 && filter_w == 7 + && stride_h == 3 && stride_w == 3 && dilation_h == 1 + && dilation_w == 1) { + tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K7x7S3); + } else if (filter_h == 1 && filter_w == 7 + && stride_h == 1 && stride_w == 1 && dilation_h == 1 + && dilation_w == 1) { + tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K1x7S1); + } else if (filter_h == 7 && filter_w == 1 + && stride_h == 1 && stride_w == 1 && dilation_h == 1 + && dilation_w == 1) { + tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K7x1S1); + } else if (filter_h == 1 && filter_w == 15 + && stride_h == 1 && stride_w == 1 && dilation_h == 1 + && dilation_w == 1) { + tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K1x15S1); + } else if (filter_h == 15 && filter_w == 1 + && stride_h == 1 && stride_w == 1 && dilation_h == 1 + && dilation_w == 1) { + tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K15x1S1); } - } else if (filter_h == 3 && filter_w == 3 - && stride_h == 2 && stride_w == 2 && dilation_h == 1 - && dilation_w == 1) { - conv2d_delegator_ = make_unique( - paddings_, padding_type_); - } else if (filter_h == 5 && filter_w == 5 - && stride_h == 1 && stride_w == 1 && dilation_h == 1 - && dilation_w == 1) { - conv2d_delegator_ = make_unique( - paddings_, padding_type_); - } else if (filter_h == 7 && filter_w == 7 - && stride_h == 1 && stride_w == 1 && dilation_h == 1 - && dilation_w == 1) { - conv2d_delegator_ = make_unique( - paddings_, padding_type_); - } else if (filter_h == 7 && filter_w == 7 - && stride_h == 2 && stride_w == 2 && dilation_h == 1 - && dilation_w == 1) { - conv2d_delegator_ = make_unique( - paddings_, padding_type_); - } else if (filter_h == 7 && filter_w == 7 - && stride_h == 3 && stride_w == 3 && dilation_h == 1 - && dilation_w == 1) { - conv2d_delegator_ = make_unique( - paddings_, padding_type_); - } else if (filter_h == 1 && filter_w == 7 - && stride_h == 1 && stride_w == 1 && dilation_h == 1 - && dilation_w == 1) { - conv2d_delegator_ = make_unique( - paddings_, padding_type_); - } else if (filter_h == 7 && filter_w == 1 - && stride_h == 1 && stride_w == 1 && dilation_h == 1 - && dilation_w == 1) { - conv2d_delegator_ = make_unique( - paddings_, padding_type_); - } else if (filter_h == 1 && filter_w == 15 - && stride_h == 1 && stride_w == 1 && dilation_h == 1 - && dilation_w == 1) { - conv2d_delegator_ = make_unique( - paddings_, padding_type_); - } else if (filter_h == 15 && filter_w == 1 - && stride_h == 1 && stride_w == 1 && dilation_h == 1 - && dilation_w == 1) { - conv2d_delegator_ = make_unique( - paddings_, padding_type_); - } else { - conv2d_delegator_ = make_unique( - strides_, - dilations_, - paddings_, - padding_type_); } + delegator::Conv2dParam param(strides_, dilations_, + paddings_, padding_type_); + conv2d_delegator_ = delegator::Conv2d::Create(context->workspace(), + tag, param); } conv2d_delegator_->Compute(context, input, filter, output); -#else - if (ref_conv2d_delegator_ == nullptr) { - ref_conv2d_delegator_ = make_unique>(strides_, - dilations_, - paddings_, - padding_type_); - } - ref_conv2d_delegator_->Compute(context, input, filter, output); -#endif - - bias_add_delegator_.Compute(context, output, bias, output); - activation_delegator_.Compute(context, output, output); + bias_add_delegator_->Compute(context, output, bias, output); + activation_delegator_->Compute(context, output, output); return MaceStatus::MACE_SUCCESS; } private: -#ifdef MACE_ENABLE_NEON - std::unique_ptr conv2d_delegator_; - arm::fp32::BiasAdd bias_add_delegator_; - arm::fp32::Activation activation_delegator_; -#else - std::unique_ptr> ref_conv2d_delegator_; - ref::BiasAdd bias_add_delegator_; - ref::Activation activation_delegator_; -#endif // MACE_ENABLE_NEON + std::unique_ptr activation_delegator_; + std::unique_ptr bias_add_delegator_; + std::unique_ptr conv2d_delegator_; private: MACE_OP_INPUT_TAGS(INPUT, FILTER, BIAS); @@ -518,7 +496,7 @@ class Conv2dOp : public ConvPool2dOpBase { }; #endif // MACE_ENABLE_OPENCL -void RegisterConv2D(OpRegistryBase *op_registry) { +void RegisterConv2D(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Conv2D", Conv2dOp, DeviceType::CPU, float); diff --git a/mace/ops/conv_pool_2d_base.h b/mace/ops/conv_pool_2d_base.h index b5ad48ae..8d96532d 100644 --- a/mace/ops/conv_pool_2d_base.h +++ b/mace/ops/conv_pool_2d_base.h @@ -17,7 +17,7 @@ #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" #include "mace/ops/common/conv_pool_2d_util.h" namespace mace { diff --git a/mace/ops/crop.cc b/mace/ops/crop.cc index acaa73f1..5be82345 100644 --- a/mace/ops/crop.cc +++ b/mace/ops/crop.cc @@ -14,7 +14,8 @@ #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/utils/math.h" #include "mace/utils/memory.h" #ifdef MACE_ENABLE_OPENCL @@ -132,7 +133,7 @@ class CropOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterCrop(OpRegistryBase *op_registry) { +void RegisterCrop(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Crop", CropOp, DeviceType::CPU, float); MACE_REGISTER_GPU_OP(op_registry, "Crop", CropOp); diff --git a/mace/ops/cumsum.cc b/mace/ops/cumsum.cc index 302fdfd5..b1cb58f0 100644 --- a/mace/ops/cumsum.cc +++ b/mace/ops/cumsum.cc @@ -14,7 +14,8 @@ #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -141,7 +142,7 @@ class CumsumOp : public Operation { bool checked_; }; -void RegisterCumsum(OpRegistryBase *op_registry) { +void RegisterCumsum(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Cumsum", CumsumOp, DeviceType::CPU, float); } diff --git a/mace/ops/deconv_2d.cc b/mace/ops/deconv_2d.cc index 98298e0c..1e68449b 100644 --- a/mace/ops/deconv_2d.cc +++ b/mace/ops/deconv_2d.cc @@ -14,20 +14,6 @@ #include "mace/ops/deconv_2d.h" -#if defined(MACE_ENABLE_NEON) -#include -#include "mace/ops/arm/fp32/deconv_2d_2x2.h" -#include "mace/ops/arm/fp32/deconv_2d_3x3.h" -#include "mace/ops/arm/fp32/deconv_2d_4x4.h" -#include "mace/ops/arm/fp32/deconv_2d_general.h" -#include "mace/ops/arm/fp32/bias_add.h" -#include "mace/ops/arm/fp32/activation.h" -#else -#include "mace/ops/ref/bias_add.h" -#include "mace/ops/ref/activation.h" -#include "mace/ops/ref/deconv_2d.h" -#endif - #include #include #include @@ -35,9 +21,13 @@ #include #include "mace/core/future.h" +#include "mace/core/registry/ops_registry.h" #include "mace/core/tensor.h" #include "mace/ops/activation.h" #include "mace/ops/common/conv_pool_2d_util.h" +#include "mace/ops/delegator/activation.h" +#include "mace/ops/delegator/bias_add.h" +#include "mace/ops/delegator/deconv_2d.h" #include "mace/utils/memory.h" #include "mace/utils/math.h" @@ -49,6 +39,10 @@ namespace mace { namespace ops { +namespace { +const std::vector kDeconv2dStrides = {1, 1}; +} + template class Deconv2dOp; @@ -57,9 +51,16 @@ class Deconv2dOp : public Deconv2dOpBase { public: explicit Deconv2dOp(OpConstructContext *context) : Deconv2dOpBase(context), - activation_delegator_(activation_, - relux_max_limit_, - leakyrelu_coefficient_) {} + activation_delegator_( + delegator::Activation::Create( + context->workspace(), + MACE_DELEGATOR_KEY(Activation, CPU, float, MACE_CPU_IMPL_TYPE), + delegator::ActivationParam(activation_, relux_max_limit_, + leakyrelu_coefficient_))), + bias_add_delegator_(delegator::BiasAdd::Create( + context->workspace(), + MACE_DELEGATOR_KEY(BiasAdd, CPU, float, MACE_CPU_IMPL_TYPE), + DelegatorParam())) {} MaceStatus Run(OpContext *context) override { const Tensor *input = this->Input(0); @@ -79,91 +80,67 @@ class Deconv2dOp : public Deconv2dOpBase { MACE_CHECK_NOTNULL(filter); MACE_CHECK_NOTNULL(output); -#ifdef MACE_ENABLE_NEON - const index_t kernel_h = filter->dim(2); - const index_t kernel_w = filter->dim(3); - bool use_neon_2x2_s1 = kernel_h == kernel_w && kernel_h == 2 && - strides_[0] == strides_[1] && strides_[0] == 1; - bool use_neon_2x2_s2 = kernel_h == kernel_w && kernel_h == 2 && - strides_[0] == strides_[1] && strides_[0] == 2; + if (deconv2d_delegator_ == nullptr) { + std::string tag = MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, General); + if (MACE_CPU_IMPL_TYPE == NEON) { + const index_t kernel_h = filter->dim(2); + const index_t kernel_w = filter->dim(3); - bool use_neon_3x3_s1 = kernel_h == kernel_w && kernel_h == 3 && - strides_[0] == strides_[1] && strides_[0] == 1; - bool use_neon_3x3_s2 = kernel_h == kernel_w && kernel_h == 3 && - strides_[0] == strides_[1] && strides_[0] == 2; + bool use_neon_2x2_s1 = kernel_h == kernel_w && kernel_h == 2 && + strides_[0] == strides_[1] && strides_[0] == 1; + bool use_neon_2x2_s2 = kernel_h == kernel_w && kernel_h == 2 && + strides_[0] == strides_[1] && strides_[0] == 2; - bool use_neon_4x4_s1 = kernel_h == kernel_w && kernel_h == 4 && - strides_[0] == strides_[1] && strides_[0] == 1; - bool use_neon_4x4_s2 = kernel_h == kernel_w && kernel_h == 4 && - strides_[0] == strides_[1] && strides_[0] == 2; + bool use_neon_3x3_s1 = kernel_h == kernel_w && kernel_h == 3 && + strides_[0] == strides_[1] && strides_[0] == 1; + bool use_neon_3x3_s2 = kernel_h == kernel_w && kernel_h == 3 && + strides_[0] == strides_[1] && strides_[0] == 2; - if (deconv2d_delegator_ == nullptr) { - if (use_neon_2x2_s1) { - deconv2d_delegator_ = make_unique( - paddings_, padding_type_, model_type_); - } else if (use_neon_2x2_s2) { - deconv2d_delegator_ = make_unique( - paddings_, padding_type_, model_type_); - } else if (use_neon_3x3_s1) { - deconv2d_delegator_ = make_unique( - paddings_, padding_type_, model_type_); - } else if (use_neon_3x3_s2) { - deconv2d_delegator_ = make_unique( - paddings_, padding_type_, model_type_); - } else if (use_neon_4x4_s1) { - deconv2d_delegator_ = make_unique( - paddings_, padding_type_, model_type_); - } else if (use_neon_4x4_s2) { - deconv2d_delegator_ = make_unique( - paddings_, padding_type_, model_type_); - } else { - deconv2d_delegator_ = - make_unique(strides_, - std::vector{1, 1}, - paddings_, - padding_type_, - model_type_); + bool use_neon_4x4_s1 = kernel_h == kernel_w && kernel_h == 4 && + strides_[0] == strides_[1] && strides_[0] == 1; + bool use_neon_4x4_s2 = kernel_h == kernel_w && kernel_h == 4 && + strides_[0] == strides_[1] && strides_[0] == 2; + + if (use_neon_2x2_s1) { + tag = MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K2x2S1); + } else if (use_neon_2x2_s2) { + tag = MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K2x2S2); + } else if (use_neon_3x3_s1) { + tag = MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K3x3S1); + } else if (use_neon_3x3_s2) { + tag = MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K3x3S2); + } else if (use_neon_4x4_s1) { + tag = MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K4x4S1); + } else if (use_neon_4x4_s2) { + tag = MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K4x4S2); + } } + delegator::Deconv2dParam param(strides_, kDeconv2dStrides, paddings_, + padding_type_, model_type_); + deconv2d_delegator_ = delegator::Deconv2d::Create(context->workspace(), + tag, param); } - deconv2d_delegator_->Compute(context, - input, - filter, - output_shape_tensor, - output); -#else - if (deconv2d_delegator_ == nullptr) { - deconv2d_delegator_ = make_unique>(strides_, - std::vector{ - 1, 1}, - paddings_, - padding_type_, - model_type_); - } - deconv2d_delegator_->Compute(context, - input, - filter, - output_shape_tensor, - output); - -#endif // MACE_ENABLE_NEON - bias_add_delegator_.Compute(context, output, bias, output); - activation_delegator_.Compute(context, output, output); + deconv2d_delegator_->Compute(context, input, filter, + output_shape_tensor, output); + bias_add_delegator_->Compute(context, output, bias, output); + activation_delegator_->Compute(context, output, output); return MaceStatus::MACE_SUCCESS; } private: -#ifdef MACE_ENABLE_NEON - std::unique_ptr deconv2d_delegator_; - arm::fp32::BiasAdd bias_add_delegator_; - arm::fp32::Activation activation_delegator_; -#else - ref::BiasAdd bias_add_delegator_; - ref::Activation activation_delegator_; - std::unique_ptr> deconv2d_delegator_; -#endif // MACE_ENABLE_NEON + std::unique_ptr activation_delegator_; + std::unique_ptr bias_add_delegator_; + std::unique_ptr deconv2d_delegator_; }; #ifdef MACE_ENABLE_OPENCL @@ -258,7 +235,7 @@ class Deconv2dOp : public Deconv2dOpBase { }; #endif // MACE_ENABLE_OPENCL -void RegisterDeconv2D(OpRegistryBase *op_registry) { +void RegisterDeconv2D(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Deconv2D", Deconv2dOp, DeviceType::CPU, float); MACE_REGISTER_GPU_OP(op_registry, "Deconv2D", Deconv2dOp); diff --git a/mace/ops/deconv_2d.h b/mace/ops/deconv_2d.h index 50a2ecee..a11d5f8a 100644 --- a/mace/ops/deconv_2d.h +++ b/mace/ops/deconv_2d.h @@ -19,7 +19,7 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" #include "mace/core/types.h" #include "mace/ops/activation.h" #include "mace/ops/common/conv_pool_2d_util.h" diff --git a/mace/ops/delegator/activation.h b/mace/ops/delegator/activation.h new file mode 100644 index 00000000..80a9c6b3 --- /dev/null +++ b/mace/ops/delegator/activation.h @@ -0,0 +1,61 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MACE_OPS_DELEGATOR_ACTIVATION_H_ +#define MACE_OPS_DELEGATOR_ACTIVATION_H_ + +#include "mace/core/ops/op_context.h" +#include "mace/core/ops/op_delegator.h" +#include "mace/core/registry/op_delegator_registry.h" +#include "mace/ops/common/activation_type.h" + +namespace mace { +namespace ops { +namespace delegator { + +struct ActivationParam : public DelegatorParam { + explicit ActivationParam(ActivationType type, const float limit, + const float leakyrelu_coefficient) + : type_(type), limit_(limit), + leakyrelu_coefficient_(leakyrelu_coefficient) {} + + ActivationType type_; + const float limit_; + const float leakyrelu_coefficient_; +}; + +class Activation : public OpDelegator { + public: + explicit Activation(const ActivationParam ¶m) + : OpDelegator(param), type_(param.type_), limit_(param.limit_), + leakyrelu_coefficient_(param.leakyrelu_coefficient_) {} + virtual ~Activation() = default; + + MACE_DEFINE_DELEGATOR_CREATOR(Activation) + + virtual MaceStatus Compute(const OpContext *context, + const Tensor *input, + Tensor *output) = 0; + + protected: + ActivationType type_; + const float limit_; + const float leakyrelu_coefficient_; +}; + +} // namespace delegator +} // namespace ops +} // namespace mace + +#endif // MACE_OPS_DELEGATOR_ACTIVATION_H_ diff --git a/mace/ops/delegator/bias_add.h b/mace/ops/delegator/bias_add.h new file mode 100644 index 00000000..f5fdea0d --- /dev/null +++ b/mace/ops/delegator/bias_add.h @@ -0,0 +1,43 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MACE_OPS_DELEGATOR_BIAS_ADD_H_ +#define MACE_OPS_DELEGATOR_BIAS_ADD_H_ + +#include "mace/core/ops/op_context.h" +#include "mace/core/ops/op_delegator.h" +#include "mace/core/registry/op_delegator_registry.h" + +namespace mace { +namespace ops { +namespace delegator { + +class BiasAdd : public OpDelegator { + public: + explicit BiasAdd(const DelegatorParam ¶m) : OpDelegator(param) {} + virtual ~BiasAdd() = default; + + MACE_DEFINE_DELEGATOR_CREATOR(BiasAdd) + + virtual MaceStatus Compute(const OpContext *context, + const Tensor *input, + const Tensor *bias, + Tensor *output) = 0; +}; + +} // namespace delegator +} // namespace ops +} // namespace mace + +#endif // MACE_OPS_DELEGATOR_BIAS_ADD_H_ diff --git a/mace/ops/delegator/conv_2d.h b/mace/ops/delegator/conv_2d.h new file mode 100644 index 00000000..9ff85f6d --- /dev/null +++ b/mace/ops/delegator/conv_2d.h @@ -0,0 +1,90 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#ifndef MACE_OPS_DELEGATOR_CONV_2D_H_ +#define MACE_OPS_DELEGATOR_CONV_2D_H_ + +#include + +#include "mace/core/ops/op_context.h" +#include "mace/core/ops/op_delegator.h" +#include "mace/core/registry/op_delegator_registry.h" +#include "mace/ops/common/conv_pool_2d_util.h" + +namespace mace { +namespace ops { + +enum ConvType { + General, + K1x1, + K1x7S1, + K7x1S1, + K1x15S1, + K15x1S1, + K3x3S1, + K3x3S2, + K3x3Winograd, + K5x5S1, + K7x7S1, + K7x7S2, + K7x7S3, +}; + +namespace delegator { + +struct Conv2dParam : public DelegatorParam { + explicit Conv2dParam(const std::vector &strides, + const std::vector &dilations, + const std::vector &paddings, + const Padding padding_type) + : strides_(strides), dilations_(dilations), + paddings_(paddings), padding_type_(padding_type) {} + + const std::vector &strides_; + const std::vector &dilations_; + const std::vector &paddings_; + const Padding padding_type_; +}; + +class Conv2d : public OpDelegator { + public: + explicit Conv2d(const delegator::Conv2dParam ¶m) + : OpDelegator(param), + strides_(param.strides_), + dilations_(param.dilations_), + paddings_(param.paddings_), + padding_type_(param.padding_type_) {} + virtual ~Conv2d() = default; + + MACE_DEFINE_DELEGATOR_CREATOR(Conv2d) + + virtual MaceStatus Compute(const OpContext *context, + const Tensor *input, + const Tensor *filter, + Tensor *output) = 0; + + protected: + const std::vector strides_; + const std::vector dilations_; + const std::vector paddings_; + const Padding padding_type_; +}; + +} // namespace delegator +} // namespace ops +} // namespace mace + +#endif // MACE_OPS_DELEGATOR_CONV_2D_H_ + diff --git a/mace/ops/delegator/deconv_2d.h b/mace/ops/delegator/deconv_2d.h new file mode 100644 index 00000000..856f3595 --- /dev/null +++ b/mace/ops/delegator/deconv_2d.h @@ -0,0 +1,95 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#ifndef MACE_OPS_DELEGATOR_DECONV_2D_H_ +#define MACE_OPS_DELEGATOR_DECONV_2D_H_ + +#include + +#include "mace/core/ops/op_context.h" +#include "mace/core/ops/op_delegator.h" +#include "mace/core/registry/op_delegator_registry.h" + +namespace mace { +namespace ops { + +enum DeconvType { + General, + K2x2S1, + K2x2S2, + K3x3S1, + K3x3S2, + K4x4S1, + K4x4S2, +}; + +namespace delegator { + +struct Deconv2dParam : public DelegatorParam { + explicit Deconv2dParam(const std::vector &strides, + const std::vector &dilations, + const std::vector &paddings, + const Padding padding_type, + const FrameworkType framework_type, + const int group = 1) + : strides_(strides), dilations_(dilations), + paddings_(paddings), padding_type_(padding_type), + framework_type_(framework_type), + group_(group) {} + + const std::vector &strides_; + const std::vector &dilations_; + const std::vector &paddings_; + const Padding padding_type_; + const FrameworkType framework_type_; + const int group_; +}; + +class Deconv2d : public OpDelegator { + public: + explicit Deconv2d(const Deconv2dParam ¶m) + : OpDelegator(param), + strides_(param.strides_), + dilations_(param.dilations_), + paddings_(param.paddings_), + padding_type_(param.padding_type_), + framework_type_(param.framework_type_), + group_(param.group_) {} + + virtual ~Deconv2d() = default; + + MACE_DEFINE_DELEGATOR_CREATOR(Deconv2d) + + virtual MaceStatus Compute(const OpContext *context, + const Tensor *input, + const Tensor *filter, + const Tensor *output_shape, + Tensor *output) = 0; + + protected: + const std::vector strides_; + const std::vector dilations_; + const std::vector paddings_; + const Padding padding_type_; + const FrameworkType framework_type_; + const int group_; +}; + +} // namespace delegator +} // namespace ops +} // namespace mace + +#endif // MACE_OPS_DELEGATOR_DECONV_2D_H_ + diff --git a/mace/ops/ref/bias_add.h b/mace/ops/delegator/depthwise_conv_2d.h similarity index 52% rename from mace/ops/ref/bias_add.h rename to mace/ops/delegator/depthwise_conv_2d.h index f3dc6096..c586839b 100644 --- a/mace/ops/ref/bias_add.h +++ b/mace/ops/delegator/depthwise_conv_2d.h @@ -1,4 +1,4 @@ -// Copyright 2019 The MACE Authors. All Rights Reserved. +// Copyright 2020 The MACE Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,35 +12,22 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_OPS_REF_BIAS_ADD_H_ -#define MACE_OPS_REF_BIAS_ADD_H_ -#include "mace/core/op_context.h" +#ifndef MACE_OPS_DELEGATOR_DEPTHWISE_CONV_2D_H_ +#define MACE_OPS_DELEGATOR_DEPTHWISE_CONV_2D_H_ + +#include "mace/ops/delegator/conv_2d.h" namespace mace { namespace ops { -namespace ref { - -class BiasAdd { - public: - BiasAdd() = default; - ~BiasAdd() = default; - - MaceStatus Compute( - const OpContext *context, - const Tensor *input, - const Tensor *bias, - Tensor *output); - - private: - void AddBias(const OpContext *context, - const Tensor *input, - const Tensor *bias, - Tensor *output); -}; - -} // namespace ref +namespace delegator { + +typedef Conv2dParam DepthwiseConv2dParam; +typedef Conv2d DepthwiseConv2d; + +} // namespace delegator } // namespace ops } // namespace mace -#endif // MACE_OPS_REF_BIAS_ADD_H_ +#endif // MACE_OPS_DELEGATOR_DEPTHWISE_CONV_2D_H_ + diff --git a/mace/ops/delegator/depthwise_deconv_2d.h b/mace/ops/delegator/depthwise_deconv_2d.h new file mode 100644 index 00000000..10616f1b --- /dev/null +++ b/mace/ops/delegator/depthwise_deconv_2d.h @@ -0,0 +1,35 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#ifndef MACE_OPS_DELEGATOR_DEPTHWISE_DECONV_2D_H_ +#define MACE_OPS_DELEGATOR_DEPTHWISE_DECONV_2D_H_ + +#include "mace/ops/delegator/deconv_2d.h" +namespace mace { +namespace ops { +namespace delegator { + +typedef Deconv2dParam DepthwiseDeconv2dParam; +typedef Deconv2dParam GroupDeconv2dParam; + +typedef Deconv2d DepthwiseDeconv2d; +typedef Deconv2d GroupDeconv2d; + +} // namespace delegator +} // namespace ops +} // namespace mace + +#endif // MACE_OPS_DELEGATOR_DEPTHWISE_DECONV_2D_H_ + diff --git a/mace/ops/delegator/eltwise.h b/mace/ops/delegator/eltwise.h new file mode 100644 index 00000000..fe66f354 --- /dev/null +++ b/mace/ops/delegator/eltwise.h @@ -0,0 +1,57 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This implements matrix-vector multiplication described as +// https://github.com/google/gemmlowp/blob/master/todo/fast-gemv.txt + +#ifndef MACE_OPS_DELEGATOR_ELTWISE_H_ +#define MACE_OPS_DELEGATOR_ELTWISE_H_ + +#include "mace/core/ops/op_context.h" +#include "mace/core/ops/op_delegator.h" +#include "mace/core/registry/op_delegator_registry.h" +#include "mace/core/types.h" +#include "mace/ops/common/eltwise_type.h" + +namespace mace { +namespace ops { +namespace delegator { + +struct EltwiseParam : public DelegatorParam { + explicit EltwiseParam(EltwiseType type) + : type_(type) {} + + EltwiseType type_; +}; + +class Eltwise : public OpDelegator { + public: + explicit Eltwise(const EltwiseParam ¶m) : OpDelegator(param), + type_(param.type_) {} + virtual ~Eltwise() = default; + + MACE_DEFINE_DELEGATOR_CREATOR(Eltwise) + + virtual MaceStatus Compute(const OpContext *context, const Tensor *input0, + const Tensor *input1, Tensor *output) = 0; + + protected: + EltwiseType type_; +}; + +} // namespace delegator +} // namespace ops +} // namespace mace + +#endif // MACE_OPS_DELEGATOR_ELTWISE_H_ diff --git a/mace/ops/delegator/gemm.h b/mace/ops/delegator/gemm.h new file mode 100644 index 00000000..29043c3b --- /dev/null +++ b/mace/ops/delegator/gemm.h @@ -0,0 +1,77 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#ifndef MACE_OPS_DELEGATOR_GEMM_H_ +#define MACE_OPS_DELEGATOR_GEMM_H_ + +#include "mace/core/ops/op_context.h" +#include "mace/core/ops/op_delegator.h" +#include "mace/core/registry/op_delegator_registry.h" +#include "mace/ops/common/matrix.h" + +namespace mace { +namespace ops { +namespace delegator { + +struct GemmParam : public DelegatorParam { + explicit GemmParam(const bool should_cache_pack = false) + : should_cache_pack_(should_cache_pack) {} + + const bool should_cache_pack_; +}; + +class Gemm : public OpDelegator { + public: + explicit Gemm(const GemmParam ¶m) : OpDelegator(param) {} + virtual ~Gemm() = default; + + MACE_DEFINE_DELEGATOR_CREATOR(Gemm) + + virtual MaceStatus Compute(const OpContext *context, + const Tensor *lhs, + const Tensor *rhs, + const index_t batch, + const index_t rows, + const index_t cols, + const index_t depth, + const MatrixMajor lhs_major, + const MatrixMajor rhs_major, + const MatrixMajor output_major, + const bool lhs_batched, + const bool rhs_batched, + Tensor *output) = 0; + // Original matrix before transpose has row-major + virtual MaceStatus Compute(const OpContext *context, + const Tensor *lhs, + const Tensor *rhs, + const index_t batch, + const index_t lhs_rows, + const index_t lhs_cols, + const index_t rhs_rows, + const index_t rhs_cols, + const bool transpose_lhs, + const bool transpose_rhs, + const bool transpose_out, + const bool lhs_batched, + const bool rhs_batched, + Tensor *output) = 0; +}; + +} // namespace delegator +} // namespace ops +} // namespace mace + +#endif // MACE_OPS_DELEGATOR_GEMM_H_ + diff --git a/mace/ops/delegator/gemv.h b/mace/ops/delegator/gemv.h new file mode 100644 index 00000000..4bdde182 --- /dev/null +++ b/mace/ops/delegator/gemv.h @@ -0,0 +1,52 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#ifndef MACE_OPS_DELEGATOR_GEMV_H_ +#define MACE_OPS_DELEGATOR_GEMV_H_ + +#include "mace/core/ops/op_context.h" +#include "mace/core/ops/op_delegator.h" +#include "mace/core/registry/op_delegator_registry.h" + +namespace mace { +namespace ops { +namespace delegator { + +class Gemv : public OpDelegator { + public: + explicit Gemv(const DelegatorParam ¶m) : OpDelegator(param) {} + virtual ~Gemv() = default; + + MACE_DEFINE_DELEGATOR_CREATOR(Gemv) + + // Always row-major after transpose + virtual MaceStatus Compute(const OpContext *context, + const Tensor *lhs, + const Tensor *rhs, + const Tensor *bias, + const index_t batch, + const index_t lhs_height, + const index_t lhs_width, + const bool lhs_batched, + const bool rhs_batched, + Tensor *output) = 0; +}; + +} // namespace delegator +} // namespace ops +} // namespace mace + +#endif // MACE_OPS_DELEGATOR_GEMV_H_ + diff --git a/mace/ops/depth_to_space.cc b/mace/ops/depth_to_space.cc index 6efa4d24..9484fdde 100644 --- a/mace/ops/depth_to_space.cc +++ b/mace/ops/depth_to_space.cc @@ -15,7 +15,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/image/depth_to_space.h" #endif // MACE_ENABLE_OPENCL @@ -184,7 +185,7 @@ class DepthToSpaceOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterDepthToSpace(OpRegistryBase *op_registry) { +void RegisterDepthToSpace(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "DepthToSpace", DepthToSpaceOp, DeviceType::CPU, float); diff --git a/mace/ops/depthwise_conv2d.cc b/mace/ops/depthwise_conv2d.cc index 06964ee0..23cf8e04 100644 --- a/mace/ops/depthwise_conv2d.cc +++ b/mace/ops/depthwise_conv2d.cc @@ -17,17 +17,6 @@ #include #include -#include "mace/ops/ref/depthwise_conv_2d.h" - -#if defined(MACE_ENABLE_NEON) -#include "mace/ops/arm/fp32/depthwise_conv_2d_3x3.h" -#include "mace/ops/arm/fp32/bias_add.h" -#include "mace/ops/arm/fp32/activation.h" -#else -#include "mace/ops/ref/activation.h" -#include "mace/ops/ref/bias_add.h" -#endif // MACE_ENABLE_NEON - #ifdef MACE_ENABLE_QUANTIZE #include "mace/ops/arm/q8/quantization_util.h" // We reuse TensorFlow Lite's optimized depthwiseconv_uint8 and parallelized it @@ -36,9 +25,13 @@ #endif // MACE_ENABLE_QUANTIZE #include "mace/core/future.h" -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/ops/activation.h" #include "mace/ops/conv_pool_2d_base.h" +#include "mace/ops/delegator/activation.h" +#include "mace/ops/delegator/bias_add.h" +#include "mace/ops/delegator/depthwise_conv_2d.h" #include "mace/public/mace.h" #include "mace/utils/memory.h" #include "mace/core/quantize.h" @@ -75,9 +68,16 @@ class DepthwiseConv2dOp : public DepthwiseConv2dOpBase { public: explicit DepthwiseConv2dOp(OpConstructContext *context) : DepthwiseConv2dOpBase(context), - activation_delegator_(activation_, - relux_max_limit_, - leakyrelu_coefficient_) {} + activation_delegator_( + delegator::Activation::Create( + context->workspace(), + MACE_DELEGATOR_KEY(Activation, CPU, float, MACE_CPU_IMPL_TYPE), + delegator::ActivationParam(activation_, relux_max_limit_, + leakyrelu_coefficient_))), + bias_add_delegator_(delegator::BiasAdd::Create( + context->workspace(), + MACE_DELEGATOR_KEY(BiasAdd, CPU, float, MACE_CPU_IMPL_TYPE), + DelegatorParam())) {} MaceStatus Run(OpContext *context) override { MACE_UNUSED(context); @@ -92,67 +92,44 @@ class DepthwiseConv2dOp : public DepthwiseConv2dOpBase { MACE_CHECK_NOTNULL(filter); MACE_CHECK_NOTNULL(output); -#ifdef MACE_ENABLE_NEON - const index_t filter_h = filter->dim(2); - const index_t filter_w = filter->dim(3); - const index_t stride_h = strides_[0]; - const index_t stride_w = strides_[1]; - const index_t dilation_h = dilations_[0]; - const index_t dilation_w = dilations_[1]; - - if (filter_h == 3 && filter_w == 3 && stride_h == 1 && stride_w == 1 - && dilation_h == 1 && dilation_w == 1) { - if (conv2d_delegator_.get() == nullptr) { - conv2d_delegator_ = - make_unique(paddings_, - padding_type_); - } - conv2d_delegator_->Compute(context, input, filter, output); - } else if (filter_h == 3 && filter_w == 3 && stride_h == 2 && stride_w == 2 - && dilation_h == 1 && dilation_w == 1) { - if (conv2d_delegator_.get() == nullptr) { - conv2d_delegator_ = - make_unique(paddings_, - padding_type_); - } - conv2d_delegator_->Compute(context, input, filter, output); - } else { - if (ref_conv2d_delegator_.get() == nullptr) { - ref_conv2d_delegator_ = - make_unique>(strides_, - dilations_, - paddings_, - padding_type_); + if (depthwise_conv2d_delegator_ == nullptr) { + std::string tag = MACE_DELEGATOR_KEY_EX(DepthwiseConv2d, CPU, float, + REF, General); + if (MACE_CPU_IMPL_TYPE == NEON) { + const index_t filter_h = filter->dim(2); + const index_t filter_w = filter->dim(3); + const index_t stride_h = strides_[0]; + const index_t stride_w = strides_[1]; + const index_t dilation_h = dilations_[0]; + const index_t dilation_w = dilations_[1]; + if (filter_h == 3 && filter_w == 3 && stride_h == 1 && stride_w == 1 + && dilation_h == 1 && dilation_w == 1) { + tag = MACE_DELEGATOR_KEY_EX(DepthwiseConv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K3x3S1); + } else if (filter_h == 3 && filter_w == 3 && stride_h == 2 + && stride_w == 2 + && dilation_h == 1 && dilation_w == 1) { + tag = MACE_DELEGATOR_KEY_EX(DepthwiseConv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K3x3S2); + } } - ref_conv2d_delegator_->Compute(context, input, filter, output); - } -#else - if (ref_conv2d_delegator_.get() == nullptr) { - ref_conv2d_delegator_ = - make_unique>(strides_, - dilations_, - paddings_, - padding_type_); + delegator::Conv2dParam param(strides_, dilations_, + paddings_, padding_type_); + depthwise_conv2d_delegator_ = delegator::DepthwiseConv2d::Create( + context->workspace(), tag, param); } - ref_conv2d_delegator_->Compute(context, input, filter, output); -#endif // MACE_ENABLE_NEON - bias_add_delegator_.Compute(context, output, bias, output); - activation_delegator_.Compute(context, output, output); + depthwise_conv2d_delegator_->Compute(context, input, filter, output); + bias_add_delegator_->Compute(context, output, bias, output); + activation_delegator_->Compute(context, output, output); return MaceStatus::MACE_SUCCESS; } private: -#ifdef MACE_ENABLE_NEON - std::unique_ptr conv2d_delegator_; - arm::fp32::BiasAdd bias_add_delegator_; - arm::fp32::Activation activation_delegator_; -#else - ref::BiasAdd bias_add_delegator_; - ref::Activation activation_delegator_; -#endif // MACE_ENABLE_NEON - std::unique_ptr> ref_conv2d_delegator_; + std::unique_ptr activation_delegator_; + std::unique_ptr bias_add_delegator_; + std::unique_ptr depthwise_conv2d_delegator_; protected: MACE_OP_INPUT_TAGS(INPUT, FILTER, BIAS); @@ -422,7 +399,7 @@ class DepthwiseConv2dOp : public DepthwiseConv2dOpBase { }; #endif // MACE_ENABLE_OPENCL -void RegisterDepthwiseConv2d(OpRegistryBase *op_registry) { +void RegisterDepthwiseConv2d(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "DepthwiseConv2d", DepthwiseConv2dOp, DeviceType::CPU, float); diff --git a/mace/ops/depthwise_deconv2d.cc b/mace/ops/depthwise_deconv2d.cc index 96f6d575..f09261d6 100644 --- a/mace/ops/depthwise_deconv2d.cc +++ b/mace/ops/depthwise_deconv2d.cc @@ -12,33 +12,22 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/ops/deconv_2d.h" - -#if defined(MACE_ENABLE_NEON) -#include -#include "mace/ops/arm/fp32/depthwise_deconv_2d_general.h" -#include "mace/ops/arm/fp32/depthwise_deconv_2d_3x3.h" -#include "mace/ops/arm/fp32/depthwise_deconv_2d_4x4.h" -#include "mace/ops/arm/fp32/bias_add.h" -#include "mace/ops/arm/fp32/activation.h" - -#else -#include "mace/ops/ref/depthwise_deconv_2d.h" -#include "mace/ops/ref/bias_add.h" -#include "mace/ops/ref/activation.h" -#endif - #include #include #include #include #include "mace/core/future.h" +#include "mace/core/registry/ops_registry.h" #include "mace/core/tensor.h" -#include "mace/utils/math.h" +#include "mace/ops/common/conv_pool_2d_util.h" +#include "mace/ops/deconv_2d.h" +#include "mace/ops/delegator/activation.h" +#include "mace/ops/delegator/bias_add.h" +#include "mace/ops/delegator/depthwise_deconv_2d.h" #include "mace/public/mace.h" +#include "mace/utils/math.h" #include "mace/utils/memory.h" -#include "mace/ops/common/conv_pool_2d_util.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/buffer_transformer.h" @@ -48,6 +37,10 @@ namespace mace { namespace ops { +namespace { +const std::vector kDepthwiseStrides = {1, 1}; +} + template class DepthwiseDeconv2dOp; @@ -57,9 +50,16 @@ class DepthwiseDeconv2dOp public: explicit DepthwiseDeconv2dOp(OpConstructContext *context) : Deconv2dOpBase(context), - activation_delegator_(activation_, - relux_max_limit_, - leakyrelu_coefficient_) {} + activation_delegator_( + delegator::Activation::Create( + context->workspace(), + MACE_DELEGATOR_KEY(Activation, CPU, float, MACE_CPU_IMPL_TYPE), + delegator::ActivationParam(activation_, relux_max_limit_, + leakyrelu_coefficient_))), + bias_add_delegator_(delegator::BiasAdd::Create( + context->workspace(), + MACE_DELEGATOR_KEY(BiasAdd, CPU, float, MACE_CPU_IMPL_TYPE), + DelegatorParam())) {} MaceStatus Run(OpContext *context) override { const Tensor *input = this->Input(0); @@ -74,113 +74,77 @@ class DepthwiseDeconv2dOp const index_t in_channels = input->dim(1); bool is_depthwise = group_ == in_channels; -#ifdef MACE_ENABLE_NEON - const index_t kernel_h = filter->dim(2); - const index_t kernel_w = filter->dim(3); - bool use_neon_3x3_s1 = kernel_h == kernel_w && kernel_h == 3 && - strides_[0] == strides_[1] && strides_[0] == 1; - bool use_neon_3x3_s2 = kernel_h == kernel_w && kernel_h == 3 && - strides_[0] == strides_[1] && strides_[0] == 2; - bool use_neon_4x4_s1 = kernel_h == kernel_w && kernel_h == 4 && - strides_[0] == strides_[1] && strides_[0] == 1; - bool use_neon_4x4_s2 = kernel_h == kernel_w && kernel_h == 4 && - strides_[0] == strides_[1] && strides_[0] == 2; - - if (deconv2d_delegator_ == nullptr) { - if (is_depthwise) { - if (use_neon_3x3_s1) { - deconv2d_delegator_ = make_unique( - paddings_, padding_type_, CAFFE); - } else if (use_neon_3x3_s2) { - deconv2d_delegator_ = make_unique( - paddings_, padding_type_, CAFFE); - } else if (use_neon_4x4_s1) { - deconv2d_delegator_ = make_unique( - paddings_, padding_type_, CAFFE); - } else if (use_neon_4x4_s2) { - deconv2d_delegator_ = make_unique( - paddings_, padding_type_, CAFFE); - } else { - deconv2d_delegator_ = - make_unique( - strides_, - std::vector{1, 1}, - paddings_, - padding_type_, - CAFFE); - } - } else { - if (use_neon_3x3_s1) { - deconv2d_delegator_ = make_unique( - paddings_, padding_type_, group_, CAFFE); - } else if (use_neon_3x3_s2) { - deconv2d_delegator_ = make_unique( - paddings_, padding_type_, group_, CAFFE); - } else if (use_neon_4x4_s1) { - deconv2d_delegator_ = make_unique( - paddings_, padding_type_, group_, CAFFE); - } else if (use_neon_4x4_s2) { - deconv2d_delegator_ = make_unique( - paddings_, padding_type_, group_, CAFFE); + if (depthwise_deconv2d_delegator_ == nullptr) { + if (MACE_CPU_IMPL_TYPE == NEON) { + const index_t kernel_h = filter->dim(2); + const index_t kernel_w = filter->dim(3); + bool use_neon_3x3_s1 = kernel_h == kernel_w && kernel_h == 3 && + strides_[0] == strides_[1] && strides_[0] == 1; + bool use_neon_3x3_s2 = kernel_h == kernel_w && kernel_h == 3 && + strides_[0] == strides_[1] && strides_[0] == 2; + bool use_neon_4x4_s1 = kernel_h == kernel_w && kernel_h == 4 && + strides_[0] == strides_[1] && strides_[0] == 1; + bool use_neon_4x4_s2 = kernel_h == kernel_w && kernel_h == 4 && + strides_[0] == strides_[1] && strides_[0] == 2; + + if (is_depthwise) { + std::string tag = MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, General); + if (use_neon_3x3_s1) { + tag = MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K3x3S1); + } else if (use_neon_3x3_s2) { + tag = MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K3x3S2); + } else if (use_neon_4x4_s1) { + tag = MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K4x4S1); + } else if (use_neon_4x4_s2) { + tag = MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K4x4S2); + } + delegator::DepthwiseDeconv2dParam param(strides_, kDepthwiseStrides, + paddings_, padding_type_, + CAFFE, group_); + depthwise_deconv2d_delegator_ = delegator::DepthwiseDeconv2d::Create( + context->workspace(), tag, param); } else { - deconv2d_delegator_ = make_unique( - strides_, - std::vector{1, 1}, - paddings_, - padding_type_, - group_, - CAFFE); + std::string tag = MACE_DELEGATOR_KEY_EX(GroupDeconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, General); + if (use_neon_3x3_s1) { + tag = MACE_DELEGATOR_KEY_EX(GroupDeconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K3x3S1); + } else if (use_neon_3x3_s2) { + tag = MACE_DELEGATOR_KEY_EX(GroupDeconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K3x3S2); + } else if (use_neon_4x4_s1) { + tag = MACE_DELEGATOR_KEY_EX(GroupDeconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K4x4S1); + } else if (use_neon_4x4_s2) { + tag = MACE_DELEGATOR_KEY_EX(GroupDeconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K4x4S2); + } + delegator::GroupDeconv2dParam param(strides_, kDepthwiseStrides, + paddings_, padding_type_, + CAFFE, group_); + depthwise_deconv2d_delegator_ = delegator::GroupDeconv2d::Create( + context->workspace(), tag, param); } } } - deconv2d_delegator_->Compute(context, - input, - filter, - nullptr, - output); -#else - if (deconv2d_delegator_ == nullptr) { - if (is_depthwise) { - deconv2d_delegator_ = make_unique>( - strides_, - std::vector{1, 1}, - paddings_, - padding_type_, - CAFFE); - } else { - deconv2d_delegator_ = make_unique>( - strides_, - std::vector{1, 1}, - paddings_, - padding_type_, - group_, - CAFFE); - } - } - deconv2d_delegator_->Compute(context, - input, - filter, - nullptr, - output); -#endif - - bias_add_delegator_.Compute(context, output, bias, output); - activation_delegator_.Compute(context, output, output); + depthwise_deconv2d_delegator_->Compute(context, input, filter, + nullptr, output); + bias_add_delegator_->Compute(context, output, bias, output); + activation_delegator_->Compute(context, output, output); return MaceStatus::MACE_SUCCESS; } private: -#ifdef MACE_ENABLE_NEON - std::unique_ptr deconv2d_delegator_; - arm::fp32::BiasAdd bias_add_delegator_; - arm::fp32::Activation activation_delegator_; -#else - std::unique_ptr> deconv2d_delegator_; - ref::BiasAdd bias_add_delegator_; - ref::Activation activation_delegator_; -#endif // MACE_ENABLE_NEON + std::unique_ptr activation_delegator_; + std::unique_ptr bias_add_delegator_; + std::unique_ptr depthwise_deconv2d_delegator_; }; #ifdef MACE_ENABLE_OPENCL @@ -251,7 +215,7 @@ class DepthwiseDeconv2dOp : public Deconv2dOpBase { }; #endif // MACE_ENABLE_OPENCL -void RegisterDepthwiseDeconv2d(OpRegistryBase *op_registry) { +void RegisterDepthwiseDeconv2d(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "DepthwiseDeconv2d", DepthwiseDeconv2dOp, DeviceType::CPU, float); diff --git a/mace/ops/dynamic_lstm.cc b/mace/ops/dynamic_lstm.cc index fc226c08..014f23c0 100644 --- a/mace/ops/dynamic_lstm.cc +++ b/mace/ops/dynamic_lstm.cc @@ -35,14 +35,13 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/ops/common/lstm.h" +#include "mace/ops/delegator/gemv.h" #ifdef MACE_ENABLE_NEON #include -#include "mace/ops/arm/fp32/gemv.h" -#else -#include "mace/ops/ref/gemv.h" #endif // MACE_ENABLE_NEON namespace mace { @@ -73,7 +72,11 @@ class DynamicLSTMOp : public Operation { cell_cache_indexes_( Operation::GetRepeatedArgs("cell_cache_indexes")), out_cache_indexes_( - Operation::GetRepeatedArgs("out_cache_indexes")) {} + Operation::GetRepeatedArgs("out_cache_indexes")), + gemv_(delegator::Gemv::Create( + context->workspace(), + MACE_DELEGATOR_KEY(Gemv, CPU, T, MACE_CPU_IMPL_TYPE), + DelegatorParam())) {} inline void Validate() { const Tensor *input = this->Input(0); @@ -93,7 +96,7 @@ class DynamicLSTMOp : public Operation { ") and prev_out_delay(", prev_out_delay_, ") should be less than zero."); MACE_CHECK(prev_cell_delay_ % subsample_factor_ == 0 && - prev_out_delay_ % subsample_factor_ == 0, + prev_out_delay_ % subsample_factor_ == 0, "prev_cell_delay(", prev_cell_delay_, ") and prev_out_delay(", prev_out_delay_, ") should be multiples of subsample_factor(", @@ -190,8 +193,8 @@ class DynamicLSTMOp : public Operation { const index_t affine_a_out_dim = weights_a->dim(0); const index_t affine_a_depth = weights_a->dim(1); MACE_CHECK(affine_a_in_dim == affine_a_depth) - << "affine_a's input_dim:" << affine_a_in_dim - << "!=" << "affine_a's weights' depth:" << affine_a_depth << std::endl; + << "affine_a's input_dim:" << affine_a_in_dim + << "!=" << "affine_a's weights' depth:" << affine_a_depth << std::endl; const index_t lstm_input_dim = affine_a_out_dim + prev_cell_dim_; const index_t lstm_cell_dim = lstm_input_dim / 5; @@ -202,15 +205,15 @@ class DynamicLSTMOp : public Operation { lstm_cell_dim, ")."); MACE_CHECK(lstm_params->dim(0) == 3 && params_stride == lstm_cell_dim && lstm_cell_dim == prev_cell_dim_) - << " lstm params rows: " << lstm_params->dim(0) - << " params_stride: " << params_stride - << " != " << " cell_dim: " << lstm_cell_dim << std::endl; + << " lstm params rows: " << lstm_params->dim(0) + << " params_stride: " << params_stride + << " != " << " cell_dim: " << lstm_cell_dim << std::endl; const index_t affine_b_out_dim = weights_b->dim(0); const index_t affine_b_depth = weights_b->dim(1); const index_t affine_b_in_dim = lstm_cell_dim; MACE_CHECK(affine_b_in_dim == affine_b_depth) - << "affine_b's input_dim:" << affine_b_in_dim - << "!=" << "affine_b's weights' depth:" << affine_b_depth << std::endl; + << "affine_b's input_dim:" << affine_b_in_dim + << "!=" << "affine_b's weights' depth:" << affine_b_depth << std::endl; const index_t output_dim = affine_b_out_dim; MACE_CHECK(prev_out_offset_ + prev_out_dim_ <= output_dim) @@ -316,16 +319,16 @@ class DynamicLSTMOp : public Operation { prev_out_buf_data + i % out_buf_chunk * prev_out_dim_, prev_out_dim_ * sizeof(float)); // Affine - gemv_.Compute(context, - weights_a, - &affine_a_in, - bias_a, - 1, - affine_a_out_dim, - affine_a_depth, - false, - false, - &affine_a_out); + gemv_->Compute(context, + weights_a, + &affine_a_in, + bias_a, + 1, + affine_a_out_dim, + affine_a_depth, + false, + false, + &affine_a_out); // Prepare LSTMNonlinear input and output pointer float *lstm_cell_ptr = prev_cell_buf_data + i % cell_buf_chunk * prev_cell_dim_; @@ -343,16 +346,16 @@ class DynamicLSTMOp : public Operation { affine_b_in_data); UpdateCell(curr_cell_ptr, prev_cell_dim_, scale_); // Affine - gemv_.Compute(context, - weights_b, - &affine_b_in, - bias_b, - 1, - affine_b_out_dim, - affine_b_depth, - false, - false, - &affine_b_out); + gemv_->Compute(context, + weights_b, + &affine_b_in, + bias_b, + 1, + affine_b_out_dim, + affine_b_depth, + false, + false, + &affine_b_out); // Output memcpy(output_ptr, affine_b_out_data, @@ -404,18 +407,13 @@ class DynamicLSTMOp : public Operation { std::vector forward_indexes_; std::vector cell_cache_indexes_; std::vector out_cache_indexes_; - -#ifdef MACE_ENABLE_NEON - arm::fp32::Gemv gemv_; -#else - ref::Gemv gemv_; -#endif // MACE_ENABLE_NEON + std::unique_ptr gemv_; MACE_OP_INPUT_TAGS(INPUT, PREV_OUT, PREV_CELL, WEIGHTS_A, PARAMS, WEIGHTS_B); MACE_OP_OUTPUT_TAGS(OUTPUT, OUT_CACHE, CELL_CACHE); }; -void RegisterDynamicLSTM(OpRegistryBase *op_registry) { +void RegisterDynamicLSTM(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "DynamicLSTM", DynamicLSTMOp, DeviceType::CPU, float); } diff --git a/mace/ops/eltwise.cc b/mace/ops/eltwise.cc index 7db7b6c0..e4d5a74b 100644 --- a/mace/ops/eltwise.cc +++ b/mace/ops/eltwise.cc @@ -12,11 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifdef MACE_ENABLE_NEON #ifdef MACE_ENABLE_QUANTIZE -#include "mace/ops/arm/q8/eltwise.h" +#include "mace/ops/delegator/eltwise.h" #endif // MACE_ENABLE_QUANTIZE -#endif // MACE_ENABLE_NEON #include "mace/ops/eltwise.h" @@ -28,7 +26,8 @@ #include #include "mace/core/future.h" -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/core/tensor.h" #include "mace/utils/memory.h" #include "mace/core/quantize.h" @@ -1061,7 +1060,7 @@ class EltwiseOp : public Operation { }; #ifdef MACE_ENABLE_QUANTIZE -template <> +template<> class EltwiseOp : public Operation { public: explicit EltwiseOp(OpConstructContext *context) @@ -1071,12 +1070,15 @@ class EltwiseOp : public Operation { coeff_(Operation::GetRepeatedArgs("coeff")), scalar_input_(Operation::GetOptionalArg("scalar_input", 1.0)), scalar_input_index_(Operation::GetOptionalArg( - "scalar_input_index", 1)) -#ifdef MACE_ENABLE_NEON - , eltwise_(static_cast(Operation::GetOptionalArg( - "type", static_cast(ops::EltwiseType::NONE)))) -#endif - {} + "scalar_input_index", 1)), + eltwise_delegator_(delegator::Eltwise::Create( + context->workspace(), + MACE_DELEGATOR_KEY(Eltwise, CPU, uint8_t, MACE_CPU_IMPL_TYPE), + delegator::EltwiseParam( + static_cast( + Operation::GetOptionalArg( + "type", + static_cast(ops::EltwiseType::NONE)))))) {} MaceStatus Run(OpContext *context) override { MACE_UNUSED(context); @@ -1092,77 +1094,7 @@ class EltwiseOp : public Operation { MACE_CHECK(output->scale() != 0); MACE_RETURN_IF_ERROR(output->Resize(input0->shape())); -#ifdef MACE_ENABLE_NEON - eltwise_.Compute(context, input0, input1, output); -#else - constexpr int left_shift = 20; - const double doubled_scale = 2 * std::max(input0->scale(), input1->scale()); - const double adjusted_input0_scale = input0->scale() / doubled_scale; - const double adjusted_input1_scale = input1->scale() / doubled_scale; - const double adjusted_output_scale = - doubled_scale / ((1 << left_shift) * output->scale()); - - int32_t input0_multiplier; - int32_t input1_multiplier; - int32_t output_multiplier; - int32_t input0_shift; - int32_t input1_shift; - int32_t output_shift; - QuantizeMultiplier(adjusted_input0_scale, - &input0_multiplier, - &input0_shift); - QuantizeMultiplier(adjusted_input1_scale, - &input1_multiplier, - &input1_shift); - QuantizeMultiplier(adjusted_output_scale, - &output_multiplier, - &output_shift); - - Tensor::MappingGuard input0_guard(input0); - Tensor::MappingGuard input1_guard(input1); - Tensor::MappingGuard output_guard(output); - - auto input0_ptr = input0->data(); - auto input1_ptr = input1->data(); - auto output_ptr = output->mutable_data(); - - utils::ThreadPool - &thread_pool = context->device()->cpu_runtime()->thread_pool(); - thread_pool.Compute1D([=](index_t start, index_t end, index_t step) { - for (index_t i = start; i < end; i += step) { - const int32_t offset_input0 = input0_ptr[i] - input0->zero_point(); - const int32_t offset_input1 = input1_ptr[i] - input1->zero_point(); - const int32_t shifted_input0 = offset_input0 * (1 << left_shift); - const int32_t shifted_input1 = offset_input1 * (1 << left_shift); - const int32_t multiplied_input0 = - gemmlowp::RoundingDivideByPOT( - gemmlowp::SaturatingRoundingDoublingHighMul(shifted_input0, - input0_multiplier), - -input0_shift); - const int32_t multiplied_input1 = - gemmlowp::RoundingDivideByPOT( - gemmlowp::SaturatingRoundingDoublingHighMul(shifted_input1, - input1_multiplier), - -input1_shift); - - int32_t res; - if (type_ == SUM) { - res = multiplied_input0 + multiplied_input1; - } else { - res = multiplied_input0 - multiplied_input1; - } - - const int32_t output_val = - gemmlowp::RoundingDivideByPOT( - gemmlowp::SaturatingRoundingDoublingHighMul(res, - output_multiplier), - -output_shift) + output->zero_point(); - output_ptr[i] = Saturate(output_val); - } - }, 0, output->size(), 1); -#endif // NEON - - return MaceStatus::MACE_SUCCESS; + return eltwise_delegator_->Compute(context, input0, input1, output); } private: @@ -1171,9 +1103,7 @@ class EltwiseOp : public Operation { float scalar_input_; int32_t scalar_input_index_; Tensor scalar_tensor_; -#ifdef MACE_ENABLE_NEON - arm::q8::Eltwise eltwise_; -#endif + std::unique_ptr eltwise_delegator_; }; #endif // MACE_ENABLE_QUANTIZE @@ -1244,7 +1174,7 @@ class EltwiseOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterEltwise(OpRegistryBase *op_registry) { +void RegisterEltwise(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Eltwise", EltwiseOp, DeviceType::CPU, float); diff --git a/mace/ops/expand_dims.cc b/mace/ops/expand_dims.cc index 5474dd4b..cc3426c3 100644 --- a/mace/ops/expand_dims.cc +++ b/mace/ops/expand_dims.cc @@ -13,7 +13,8 @@ // limitations under the License. -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/utils/math.h" namespace mace { @@ -53,7 +54,7 @@ class ExpandDimsOp : public Operation { int axis_; }; -void RegisterExpandDims(OpRegistryBase *op_registry) { +void RegisterExpandDims(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "ExpandDims", ExpandDimsOp, DeviceType::CPU, float); diff --git a/mace/ops/extract_pooling.cc b/mace/ops/extract_pooling.cc index 87264f4f..765fc58e 100644 --- a/mace/ops/extract_pooling.cc +++ b/mace/ops/extract_pooling.cc @@ -26,7 +26,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { @@ -176,7 +177,7 @@ class ExtractPoolingOp : public Operation { std::vector counts_; }; -void RegisterExtractPooling(OpRegistryBase *op_registry) { +void RegisterExtractPooling(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "ExtractPooling", ExtractPoolingOp, DeviceType::CPU, float); } diff --git a/mace/ops/fill.cc b/mace/ops/fill.cc index 32a8595d..0917674b 100644 --- a/mace/ops/fill.cc +++ b/mace/ops/fill.cc @@ -13,7 +13,8 @@ // limitations under the License. -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -61,7 +62,7 @@ class FillOp : public Operation { MACE_OP_OUTPUT_TAGS(OUTPUT); }; -void RegisterFill(OpRegistryBase *op_registry) { +void RegisterFill(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Fill", FillOp, DeviceType::CPU, float); } diff --git a/mace/ops/fully_connected.cc b/mace/ops/fully_connected.cc index d863a284..b0374888 100644 --- a/mace/ops/fully_connected.cc +++ b/mace/ops/fully_connected.cc @@ -17,22 +17,12 @@ #include #include "mace/core/future.h" -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/core/tensor.h" #include "mace/ops/activation.h" - -#ifdef MACE_ENABLE_NEON -#include "mace/ops/arm/fp32/gemv.h" -#include "mace/ops/arm/fp32/activation.h" - -#ifdef MACE_ENABLE_QUANTIZE -#include "mace/ops/arm/q8/gemv.h" -#endif // MACE_ENABLE_QUANTIZE - -#else -#include "mace/ops/ref/gemv.h" -#include "mace/ops/ref/activation.h" -#endif // MACE_ENABLE_NEON +#include "mace/ops/delegator/activation.h" +#include "mace/ops/delegator/gemv.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/buffer_transformer.h" @@ -71,9 +61,16 @@ class FullyConnectedOp : public FullyConnectedOpBase { public: explicit FullyConnectedOp(OpConstructContext *context) : FullyConnectedOpBase(context), - activation_delegator_(activation_, - relux_max_limit_, - leakyrelu_coefficient_) {} + activation_delegator_(delegator::Activation::Create( + context->workspace(), + MACE_DELEGATOR_KEY(Activation, CPU, float, MACE_CPU_IMPL_TYPE), + delegator::ActivationParam(activation_, + relux_max_limit_, + leakyrelu_coefficient_))), + gemv_(delegator::Gemv::Create( + context->workspace(), + MACE_DELEGATOR_KEY(Gemv, CPU, float, MACE_CPU_IMPL_TYPE), + DelegatorParam())) {} MaceStatus Run(OpContext *context) override { MACE_UNUSED(context); @@ -100,30 +97,25 @@ class FullyConnectedOp : public FullyConnectedOpBase { const index_t input_size = weight->dim(1) * weight->dim(2) * weight->dim(3); const index_t output_size = weight->dim(0); - gemv_.Compute(context, - weight, - input, - bias, - batch, - output_size, - input_size, - false, - true, - output); + gemv_->Compute(context, + weight, + input, + bias, + batch, + output_size, + input_size, + false, + true, + output); - activation_delegator_.Compute(context, output, output); + activation_delegator_->Compute(context, output, output); return MaceStatus::MACE_SUCCESS; } private: -#ifdef MACE_ENABLE_NEON - arm::fp32::Gemv gemv_; - arm::fp32::Activation activation_delegator_; -#else - ref::Gemv gemv_; - ref::Activation activation_delegator_; -#endif // MACE_ENABLE_NEON + std::unique_ptr activation_delegator_; + std::unique_ptr gemv_; }; #ifdef MACE_ENABLE_QUANTIZE @@ -132,7 +124,11 @@ class FullyConnectedOp : public FullyConnectedOpBase { public: explicit FullyConnectedOp(OpConstructContext *context) - : FullyConnectedOpBase(context) {} + : FullyConnectedOpBase(context), + gemv_(delegator::Gemv::Create( + context->workspace(), + MACE_DELEGATOR_KEY(Gemv, CPU, uint8_t, MACE_CPU_IMPL_TYPE), + DelegatorParam())) {} MaceStatus Run(OpContext *context) override { const Tensor *input = this->Input(INPUT); @@ -161,7 +157,7 @@ class FullyConnectedOp const int input_size = static_cast(weight->dim(1) * weight->dim(2) * weight->dim(3)); const int output_size = static_cast(weight->dim(0)); - gemv_.Compute(context, + gemv_->Compute(context, weight, input, bias, @@ -175,11 +171,7 @@ class FullyConnectedOp } private: -#ifdef MACE_ENABLE_NEON - ::mace::ops::arm::q8::Gemv gemv_; -#else - ref::Gemv gemv_; -#endif // MACE_ENABLE_NEON + std::unique_ptr gemv_; }; #endif // MACE_ENABLE_QUANTIZE @@ -231,7 +223,7 @@ class FullyConnectedOp : public FullyConnectedOpBase { }; #endif // MACE_ENABLE_OPENCL -void RegisterFullyConnected(OpRegistryBase *op_registry) { +void RegisterFullyConnected(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "FullyConnected", FullyConnectedOp, DeviceType::CPU, float); diff --git a/mace/ops/gather.cc b/mace/ops/gather.cc index 2114290b..a112d91f 100644 --- a/mace/ops/gather.cc +++ b/mace/ops/gather.cc @@ -14,7 +14,8 @@ #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -85,7 +86,7 @@ class GatherOp : public Operation { MACE_OP_OUTPUT_TAGS(OUTPUT); }; -void RegisterGather(OpRegistryBase *op_registry) { +void RegisterGather(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Gather", GatherOp, DeviceType::CPU, float); diff --git a/mace/ops/identity.cc b/mace/ops/identity.cc index 1c7a037e..ac915cd8 100644 --- a/mace/ops/identity.cc +++ b/mace/ops/identity.cc @@ -13,7 +13,8 @@ // limitations under the License. -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -32,7 +33,7 @@ class IdentityOp : public Operation { } }; -void RegisterIdentity(OpRegistryBase *op_registry) { +void RegisterIdentity(OpRegistry *op_registry) { MACE_REGISTER_OP_BY_CLASS(op_registry, "Identity", IdentityOp, DeviceType::CPU, float); MACE_REGISTER_OP_BY_CLASS(op_registry, "Identity", IdentityOp, diff --git a/mace/ops/ifdefined.cc b/mace/ops/ifdefined.cc index f0367d20..84a28316 100644 --- a/mace/ops/ifdefined.cc +++ b/mace/ops/ifdefined.cc @@ -25,7 +25,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -162,7 +163,7 @@ class IfDefinedOp : public Operation { MACE_OP_OUTPUT_TAGS(OUTPUT); }; -void RegisterIfDefined(OpRegistryBase *op_registry) { +void RegisterIfDefined(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "IfDefined", IfDefinedOp, DeviceType::CPU, float); } diff --git a/mace/ops/infer_conv2d_shape.cc b/mace/ops/infer_conv2d_shape.cc index fb7bfecc..f29056fe 100644 --- a/mace/ops/infer_conv2d_shape.cc +++ b/mace/ops/infer_conv2d_shape.cc @@ -13,7 +13,8 @@ // limitations under the License. -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/ops/common/conv_pool_2d_util.h" namespace mace { @@ -101,7 +102,7 @@ class InferConv2dShapeOp : public Operation { } }; -void RegisterInferConv2dShape(OpRegistryBase *op_registry) { +void RegisterInferConv2dShape(OpRegistry *op_registry) { MACE_REGISTER_OP_BY_CLASS(op_registry, "InferConv2dShape", InferConv2dShapeOp, DeviceType::CPU, float); MACE_REGISTER_OP_BY_CLASS(op_registry, "InferConv2dShape", diff --git a/mace/ops/kaldi_batch_norm.cc b/mace/ops/kaldi_batch_norm.cc index 61c0340c..ed05064f 100644 --- a/mace/ops/kaldi_batch_norm.cc +++ b/mace/ops/kaldi_batch_norm.cc @@ -19,7 +19,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -167,7 +168,7 @@ class KaldiBatchNormOp : public Operation { MACE_OP_OUTPUT_TAGS(OUTPUT); }; -void RegisterKaldiBatchNorm(OpRegistryBase *op_registry) { +void RegisterKaldiBatchNorm(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "KaldiBatchNorm", KaldiBatchNormOp, DeviceType::CPU, float); } diff --git a/mace/ops/local_response_norm.cc b/mace/ops/local_response_norm.cc index 022ee3e7..2ade126c 100644 --- a/mace/ops/local_response_norm.cc +++ b/mace/ops/local_response_norm.cc @@ -15,7 +15,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -91,7 +92,7 @@ class LocalResponseNormOp : public Operation { float beta_; }; -void RegisterLocalResponseNorm(OpRegistryBase *op_registry) { +void RegisterLocalResponseNorm(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "LocalResponseNorm", LocalResponseNormOp, DeviceType::CPU, float); } diff --git a/mace/ops/lpnorm.cc b/mace/ops/lpnorm.cc index 2c62ac19..a5c68a35 100644 --- a/mace/ops/lpnorm.cc +++ b/mace/ops/lpnorm.cc @@ -16,7 +16,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/image/lpnorm.h" @@ -147,7 +148,7 @@ class LpNormOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterLpNorm(OpRegistryBase *op_registry) { +void RegisterLpNorm(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "LpNorm", LpNormOp, DeviceType::CPU, float); MACE_REGISTER_GPU_OP(op_registry, "LpNorm", LpNormOp); diff --git a/mace/ops/lstm_nonlinear.cc b/mace/ops/lstm_nonlinear.cc index fbf92c16..c975ae62 100644 --- a/mace/ops/lstm_nonlinear.cc +++ b/mace/ops/lstm_nonlinear.cc @@ -18,7 +18,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/ops/common/lstm.h" namespace mace { @@ -100,7 +101,7 @@ class LSTMNonlinearOp : public Operation { MACE_OP_OUTPUT_TAGS(OUTPUT); }; -void RegisterLSTMNonlinear(OpRegistryBase *op_registry) { +void RegisterLSTMNonlinear(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "LSTMNonlinear", LSTMNonlinearOp, DeviceType::CPU, float); } diff --git a/mace/ops/matmul.cc b/mace/ops/matmul.cc index 1c97279e..75e27870 100644 --- a/mace/ops/matmul.cc +++ b/mace/ops/matmul.cc @@ -19,25 +19,18 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/core/tensor.h" +#include "mace/ops/delegator/gemm.h" +#include "mace/ops/delegator/gemv.h" #include "mace/utils/math.h" -#ifdef MACE_ENABLE_NEON -#include "mace/ops/arm/fp32/gemm.h" -#include "mace/ops/arm/fp32/gemv.h" - #ifdef MACE_ENABLE_QUANTIZE +#include "mace/ops/common/gemmlowp_util.h" +#ifdef MACE_ENABLE_NEON #include "mace/ops/arm/q8/gemv.h" -#endif // MACE_ENABLE_QUANTIZE - -#else -#include "mace/ops/ref/gemm.h" -#include "mace/ops/ref/gemv.h" #endif // MACE_ENABLE_NEON - -#ifdef MACE_ENABLE_QUANTIZE -#include "mace/ops/common/gemmlowp_util.h" #endif // MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_OPENCL @@ -103,7 +96,15 @@ template<> class MatMulOp : public MatMulOpBase { public: explicit MatMulOp(OpConstructContext *context) - : MatMulOpBase(context) {} + : MatMulOpBase(context), + gemm_(delegator::Gemm::Create( + context->workspace(), + MACE_DELEGATOR_KEY(Gemm, CPU, float, MACE_CPU_IMPL_TYPE), + delegator::GemmParam())), + gemv_(delegator::Gemv::Create( + context->workspace(), + MACE_DELEGATOR_KEY(Gemv, CPU, float, MACE_CPU_IMPL_TYPE), + DelegatorParam())) {} MaceStatus Run(OpContext *context) override { Validate(); @@ -154,43 +155,43 @@ class MatMulOp : public MatMulOpBase { MACE_RETURN_IF_ERROR(C->Resize(output_shape)); if (rows == 1 && transpose_b_) { - return gemv_.Compute(context, - rhs, - lhs, - bias, - batch, - cols, - depth, - rhs_batched, - lhs_batched, - C); + return gemv_->Compute(context, + rhs, + lhs, + bias, + batch, + cols, + depth, + rhs_batched, + lhs_batched, + C); } else if (cols == 1 && !transpose_a_) { - return gemv_.Compute(context, - lhs, - rhs, - bias, - batch, - rows, - depth, - lhs_batched, - rhs_batched, - C); + return gemv_->Compute(context, + lhs, + rhs, + bias, + batch, + rows, + depth, + lhs_batched, + rhs_batched, + C); } else { context->device()->scratch_buffer()->Rewind(); - MaceStatus ret = gemm_.Compute(context, - lhs, - rhs, - batch, - lhs_rows, - lhs_cols, - rhs_rows, - rhs_cols, - transpose_a_, - transpose_b_, - false, - lhs_batched, - rhs_batched, - C); + MaceStatus ret = gemm_->Compute(context, + lhs, + rhs, + batch, + lhs_rows, + lhs_cols, + rhs_rows, + rhs_cols, + transpose_a_, + transpose_b_, + false, + lhs_batched, + rhs_batched, + C); if (bias != nullptr) { MACE_CHECK(bias->dim_size() == 1 && bias->dim(0) == cols, "bias' dim should be <= 2."); @@ -217,13 +218,8 @@ class MatMulOp : public MatMulOpBase { } private: -#ifdef MACE_ENABLE_NEON - arm::fp32::Gemm gemm_; - arm::fp32::Gemv gemv_; -#else - ref::Gemv gemv_; - ref::Gemm gemm_; -#endif // MACE_ENABLE_NEON + std::unique_ptr gemm_; + std::unique_ptr gemv_; }; #ifdef MACE_ENABLE_QUANTIZE @@ -234,6 +230,10 @@ class MatMulFixpointImpl; template class MatMulFixpointImpl { public: +#ifdef MACE_ENABLE_NEON + MatMulFixpointImpl() + : gemv_kernel_(DelegatorParam()) {} +#endif // MACE_ENABLE_NEON void operator()(OpContext *context, const Tensor *A, const Tensor *B, @@ -318,6 +318,10 @@ class MatMulFixpointImpl { template class MatMulFixpointImpl { public: +#ifdef MACE_ENABLE_NEON + MatMulFixpointImpl() + : gemv_kernel_(DelegatorParam()) {} +#endif // MACE_ENABLE_NEON void operator()(OpContext *context, const Tensor *A, const Tensor *B, @@ -592,7 +596,7 @@ class MatMulOp : public MatMulOpBase { }; #endif // MACE_ENABLE_FP16_NEON -void RegisterMatMul(OpRegistryBase *op_registry) { +void RegisterMatMul(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp, DeviceType::CPU, float); diff --git a/mace/ops/mvnorm.cc b/mace/ops/mvnorm.cc index ccb0018a..09d3bb9a 100644 --- a/mace/ops/mvnorm.cc +++ b/mace/ops/mvnorm.cc @@ -16,7 +16,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/image/mvnorm.h" @@ -165,7 +166,7 @@ class MVNormOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterMVNorm(OpRegistryBase *op_registry) { +void RegisterMVNorm(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "MVNorm", MVNormOp, DeviceType::CPU, float); MACE_REGISTER_GPU_OP(op_registry, "MVNorm", MVNormOp); diff --git a/mace/ops/one_hot.cc b/mace/ops/one_hot.cc index 1596286a..77d18bca 100644 --- a/mace/ops/one_hot.cc +++ b/mace/ops/one_hot.cc @@ -15,7 +15,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -144,7 +145,7 @@ class OneHotOp : public OneHotOpBase { }; -void RegisterOneHot(OpRegistryBase *op_registry) { +void RegisterOneHot(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "OneHot", OneHotOp, DeviceType::CPU, float); } diff --git a/mace/ops/opencl/buffer/buffer_transform.h b/mace/ops/opencl/buffer/buffer_transform.h index 25415877..5b47bdc7 100644 --- a/mace/ops/opencl/buffer/buffer_transform.h +++ b/mace/ops/opencl/buffer/buffer_transform.h @@ -19,7 +19,7 @@ #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/buffer/buffer_type_transform.cc b/mace/ops/opencl/buffer/buffer_type_transform.cc index 688ded66..e86c4608 100644 --- a/mace/ops/opencl/buffer/buffer_type_transform.cc +++ b/mace/ops/opencl/buffer/buffer_type_transform.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/buffer/conv_2d_1x1.cc b/mace/ops/opencl/buffer/conv_2d_1x1.cc index 95c85b17..001c201d 100644 --- a/mace/ops/opencl/buffer/conv_2d_1x1.cc +++ b/mace/ops/opencl/buffer/conv_2d_1x1.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/ops/common/activation_type.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/buffer/conv_2d_general.cc b/mace/ops/opencl/buffer/conv_2d_general.cc index 4c03ee2a..9e7d7508 100644 --- a/mace/ops/opencl/buffer/conv_2d_general.cc +++ b/mace/ops/opencl/buffer/conv_2d_general.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/ops/common/activation_type.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/buffer/reshape.cc b/mace/ops/opencl/buffer/reshape.cc index ae3c119c..73f78777 100644 --- a/mace/ops/opencl/buffer/reshape.cc +++ b/mace/ops/opencl/buffer/reshape.cc @@ -16,7 +16,7 @@ #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" namespace mace { namespace ops { diff --git a/mace/ops/opencl/buffer/softmax.h b/mace/ops/opencl/buffer/softmax.h index 0acae465..5555ad61 100644 --- a/mace/ops/opencl/buffer/softmax.h +++ b/mace/ops/opencl/buffer/softmax.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/buffer/utils.h b/mace/ops/opencl/buffer/utils.h index e68fcb4a..10d0dea1 100644 --- a/mace/ops/opencl/buffer/utils.h +++ b/mace/ops/opencl/buffer/utils.h @@ -16,7 +16,7 @@ #define MACE_OPS_OPENCL_BUFFER_UTILS_H_ #include "mace/core/future.h" -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/public/mace.h" diff --git a/mace/ops/opencl/buffer_transform.cc b/mace/ops/opencl/buffer_transform.cc index fc1d9dcc..1cacaccb 100644 --- a/mace/ops/opencl/buffer_transform.cc +++ b/mace/ops/opencl/buffer_transform.cc @@ -14,7 +14,8 @@ #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/ops/opencl/buffer_transformer.h" namespace mace { @@ -51,7 +52,7 @@ class BufferTransformOp : public Operation { MemoryType out_mem_type_; }; -void RegisterBufferTransform(OpRegistryBase *op_registry) { +void RegisterBufferTransform(OpRegistry *op_registry) { MACE_REGISTER_GPU_OP(op_registry, "BufferTransform", BufferTransformOp); } diff --git a/mace/ops/opencl/buffer_transformer.h b/mace/ops/opencl/buffer_transformer.h index 0dcec529..987507de 100644 --- a/mace/ops/opencl/buffer_transformer.h +++ b/mace/ops/opencl/buffer_transformer.h @@ -19,7 +19,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/ops/opencl/image/buffer_to_image.h" #include "mace/ops/opencl/image/image_to_buffer.h" #include "mace/ops/opencl/buffer/buffer_transform.h" diff --git a/mace/ops/opencl/image/activation.h b/mace/ops/opencl/image/activation.h index 929d267d..bfbdc47c 100644 --- a/mace/ops/opencl/image/activation.h +++ b/mace/ops/opencl/image/activation.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/ops/common/activation_type.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/addn.h b/mace/ops/opencl/image/addn.h index 575dee22..ee2c526b 100644 --- a/mace/ops/opencl/image/addn.h +++ b/mace/ops/opencl/image/addn.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/batch_norm.h b/mace/ops/opencl/image/batch_norm.h index 6b777368..9a93b534 100644 --- a/mace/ops/opencl/image/batch_norm.h +++ b/mace/ops/opencl/image/batch_norm.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/ops/common/activation_type.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/batch_to_space.h b/mace/ops/opencl/image/batch_to_space.h index a9d047aa..ade029b6 100644 --- a/mace/ops/opencl/image/batch_to_space.h +++ b/mace/ops/opencl/image/batch_to_space.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/bias_add.h b/mace/ops/opencl/image/bias_add.h index 67644d6a..3430c81f 100644 --- a/mace/ops/opencl/image/bias_add.h +++ b/mace/ops/opencl/image/bias_add.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/buffer_to_image.h b/mace/ops/opencl/image/buffer_to_image.h index 33891182..5a332f6a 100644 --- a/mace/ops/opencl/image/buffer_to_image.h +++ b/mace/ops/opencl/image/buffer_to_image.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/channel_shuffle.h b/mace/ops/opencl/image/channel_shuffle.h index 94448d80..016b60e0 100644 --- a/mace/ops/opencl/image/channel_shuffle.h +++ b/mace/ops/opencl/image/channel_shuffle.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/concat.h b/mace/ops/opencl/image/concat.h index e5cd2977..de9ee72f 100644 --- a/mace/ops/opencl/image/concat.h +++ b/mace/ops/opencl/image/concat.h @@ -19,7 +19,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/conv_2d.h b/mace/ops/opencl/image/conv_2d.h index 6044c1a7..1ecd9131 100644 --- a/mace/ops/opencl/image/conv_2d.h +++ b/mace/ops/opencl/image/conv_2d.h @@ -19,7 +19,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/conv_2d_1x1.cc b/mace/ops/opencl/image/conv_2d_1x1.cc index 494672a4..2d4baa5b 100644 --- a/mace/ops/opencl/image/conv_2d_1x1.cc +++ b/mace/ops/opencl/image/conv_2d_1x1.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/ops/common/activation_type.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/conv_2d_3x3.cc b/mace/ops/opencl/image/conv_2d_3x3.cc index 8bfc988c..c5ea2890 100644 --- a/mace/ops/opencl/image/conv_2d_3x3.cc +++ b/mace/ops/opencl/image/conv_2d_3x3.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/ops/common/activation_type.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/conv_2d_general.cc b/mace/ops/opencl/image/conv_2d_general.cc index 9964c5f2..b84d8394 100644 --- a/mace/ops/opencl/image/conv_2d_general.cc +++ b/mace/ops/opencl/image/conv_2d_general.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/runtime/opencl/opencl_helper.h" #include "mace/ops/common/activation_type.h" diff --git a/mace/ops/opencl/image/crop.h b/mace/ops/opencl/image/crop.h index 33a5d260..c6b9ca8a 100644 --- a/mace/ops/opencl/image/crop.h +++ b/mace/ops/opencl/image/crop.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/deconv_2d.h b/mace/ops/opencl/image/deconv_2d.h index 4f1db7e6..2ab38504 100644 --- a/mace/ops/opencl/image/deconv_2d.h +++ b/mace/ops/opencl/image/deconv_2d.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/depth_to_space.h b/mace/ops/opencl/image/depth_to_space.h index 383a4c6f..ee56b6ea 100644 --- a/mace/ops/opencl/image/depth_to_space.h +++ b/mace/ops/opencl/image/depth_to_space.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/depthwise_conv2d.h b/mace/ops/opencl/image/depthwise_conv2d.h index c72170ac..fc8833dd 100644 --- a/mace/ops/opencl/image/depthwise_conv2d.h +++ b/mace/ops/opencl/image/depthwise_conv2d.h @@ -19,7 +19,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/depthwise_deconv2d.h b/mace/ops/opencl/image/depthwise_deconv2d.h index fe039cb6..4643a9c1 100644 --- a/mace/ops/opencl/image/depthwise_deconv2d.h +++ b/mace/ops/opencl/image/depthwise_deconv2d.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/eltwise.h b/mace/ops/opencl/image/eltwise.h index a9298cc6..38c3dfe6 100644 --- a/mace/ops/opencl/image/eltwise.h +++ b/mace/ops/opencl/image/eltwise.h @@ -22,7 +22,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/ops/common/eltwise_type.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/fully_connected.h b/mace/ops/opencl/image/fully_connected.h index 010edcac..46a93a61 100644 --- a/mace/ops/opencl/image/fully_connected.h +++ b/mace/ops/opencl/image/fully_connected.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/ops/common/activation_type.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/image_to_buffer.h b/mace/ops/opencl/image/image_to_buffer.h index 5d5c5248..f6484e2f 100644 --- a/mace/ops/opencl/image/image_to_buffer.h +++ b/mace/ops/opencl/image/image_to_buffer.h @@ -19,7 +19,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/ops/opencl/buffer_transform_kernel.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/lpnorm.h b/mace/ops/opencl/image/lpnorm.h index cac64112..d500c66d 100644 --- a/mace/ops/opencl/image/lpnorm.h +++ b/mace/ops/opencl/image/lpnorm.h @@ -14,7 +14,7 @@ #ifndef MACE_OPS_OPENCL_IMAGE_LPNORM_H_ #define MACE_OPS_OPENCL_IMAGE_LPNORM_H_ -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/lstm_cell.h b/mace/ops/opencl/image/lstm_cell.h index 998d8147..f224ba07 100644 --- a/mace/ops/opencl/image/lstm_cell.h +++ b/mace/ops/opencl/image/lstm_cell.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/matmul.h b/mace/ops/opencl/image/matmul.h index 8ee05239..f9e3125d 100644 --- a/mace/ops/opencl/image/matmul.h +++ b/mace/ops/opencl/image/matmul.h @@ -22,7 +22,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/mvnorm.h b/mace/ops/opencl/image/mvnorm.h index f6e609d2..5752167e 100644 --- a/mace/ops/opencl/image/mvnorm.h +++ b/mace/ops/opencl/image/mvnorm.h @@ -17,7 +17,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/pad.h b/mace/ops/opencl/image/pad.h index 3df88f34..6c04c7c8 100644 --- a/mace/ops/opencl/image/pad.h +++ b/mace/ops/opencl/image/pad.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/ops/common/pad_type.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/pooling.h b/mace/ops/opencl/image/pooling.h index 5c0e14a5..8f0e0c06 100644 --- a/mace/ops/opencl/image/pooling.h +++ b/mace/ops/opencl/image/pooling.h @@ -22,7 +22,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/reduce.h b/mace/ops/opencl/image/reduce.h index 0dfb48b4..24e889d7 100644 --- a/mace/ops/opencl/image/reduce.h +++ b/mace/ops/opencl/image/reduce.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" #include "mace/ops/common/reduce_type.h" diff --git a/mace/ops/opencl/image/reshape.h b/mace/ops/opencl/image/reshape.h index 60be5fe0..3ee6bf29 100644 --- a/mace/ops/opencl/image/reshape.h +++ b/mace/ops/opencl/image/reshape.h @@ -20,7 +20,7 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" #include "mace/core/runtime/opencl/opencl_helper.h" #include "mace/ops/opencl/buffer_transform_kernel.h" diff --git a/mace/ops/opencl/image/resize_bicubic.h b/mace/ops/opencl/image/resize_bicubic.h index 5abc5539..aab81369 100644 --- a/mace/ops/opencl/image/resize_bicubic.h +++ b/mace/ops/opencl/image/resize_bicubic.h @@ -22,7 +22,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/resize_bilinear.h b/mace/ops/opencl/image/resize_bilinear.h index ca3602d3..a428a813 100644 --- a/mace/ops/opencl/image/resize_bilinear.h +++ b/mace/ops/opencl/image/resize_bilinear.h @@ -22,7 +22,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/resize_nearest_neighbor.h b/mace/ops/opencl/image/resize_nearest_neighbor.h index 8bb10d4b..1092665e 100644 --- a/mace/ops/opencl/image/resize_nearest_neighbor.h +++ b/mace/ops/opencl/image/resize_nearest_neighbor.h @@ -22,7 +22,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/softmax.h b/mace/ops/opencl/image/softmax.h index 525f1edc..1873cd16 100644 --- a/mace/ops/opencl/image/softmax.h +++ b/mace/ops/opencl/image/softmax.h @@ -22,7 +22,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/space_to_batch.h b/mace/ops/opencl/image/space_to_batch.h index 20777dc8..f1001b2f 100644 --- a/mace/ops/opencl/image/space_to_batch.h +++ b/mace/ops/opencl/image/space_to_batch.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/space_to_depth.h b/mace/ops/opencl/image/space_to_depth.h index 661e09af..6abb330f 100644 --- a/mace/ops/opencl/image/space_to_depth.h +++ b/mace/ops/opencl/image/space_to_depth.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/split.h b/mace/ops/opencl/image/split.h index 20e19362..0d2eaff2 100644 --- a/mace/ops/opencl/image/split.h +++ b/mace/ops/opencl/image/split.h @@ -22,7 +22,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/sqrdiff_mean.h b/mace/ops/opencl/image/sqrdiff_mean.h index 5acddb25..740fc036 100644 --- a/mace/ops/opencl/image/sqrdiff_mean.h +++ b/mace/ops/opencl/image/sqrdiff_mean.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/winograd_conv2d.cc b/mace/ops/opencl/image/winograd_conv2d.cc index fd7cdfe6..539b4cf4 100644 --- a/mace/ops/opencl/image/winograd_conv2d.cc +++ b/mace/ops/opencl/image/winograd_conv2d.cc @@ -13,7 +13,7 @@ // limitations under the License. #include "mace/core/runtime/opencl/opencl_runtime.h" -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/ops/common/activation_type.h" #include "mace/ops/common/conv_pool_2d_util.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/lstm_cell.cc b/mace/ops/opencl/lstm_cell.cc index ce45c844..dbdc2650 100644 --- a/mace/ops/opencl/lstm_cell.cc +++ b/mace/ops/opencl/lstm_cell.cc @@ -17,7 +17,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/ops/opencl/buffer_transformer.h" #include "mace/ops/opencl/image/lstm_cell.h" #include "mace/utils/memory.h" @@ -89,7 +90,7 @@ class LSTMCellOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterLSTMCell(OpRegistryBase *op_registry) { +void RegisterLSTMCell(OpRegistry *op_registry) { MACE_REGISTER_GPU_OP(op_registry, "LSTMCell", LSTMCellOp); } diff --git a/mace/ops/pad.cc b/mace/ops/pad.cc index 49784c10..b210f40e 100644 --- a/mace/ops/pad.cc +++ b/mace/ops/pad.cc @@ -15,7 +15,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/ops/common/pad_type.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/image/pad.h" @@ -198,7 +199,7 @@ class PadOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterPad(OpRegistryBase *op_registry) { +void RegisterPad(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Pad", PadOp, DeviceType::CPU, float); diff --git a/mace/ops/pad_context.cc b/mace/ops/pad_context.cc index 25117df2..02a8c425 100644 --- a/mace/ops/pad_context.cc +++ b/mace/ops/pad_context.cc @@ -18,7 +18,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/utils/math.h" namespace mace { @@ -83,7 +84,7 @@ class PadContextOp : public Operation { int right_context_; }; -void RegisterPadContext(OpRegistryBase *op_registry) { +void RegisterPadContext(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "PadContext", PadContextOp, DeviceType::CPU, float); } diff --git a/mace/ops/pnorm.cc b/mace/ops/pnorm.cc index 1d0d6698..588e5974 100644 --- a/mace/ops/pnorm.cc +++ b/mace/ops/pnorm.cc @@ -26,7 +26,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -132,7 +133,7 @@ class PNormOp : public Operation { int output_dim_; }; -void RegisterPNorm(OpRegistryBase *op_registry) { +void RegisterPNorm(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "PNorm", PNormOp, DeviceType::CPU, float); } diff --git a/mace/ops/pooling.cc b/mace/ops/pooling.cc index 4d4247f2..2d51c1c4 100644 --- a/mace/ops/pooling.cc +++ b/mace/ops/pooling.cc @@ -22,7 +22,8 @@ #include #include "mace/core/future.h" -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/core/tensor.h" #include "mace/ops/conv_pool_2d_base.h" #include "mace/ops/common/conv_pool_2d_util.h" @@ -510,7 +511,7 @@ class PoolingOp : public PoolingOpBase { }; #endif // MACE_ENABLE_OPENCL -void RegisterPooling(OpRegistryBase *op_registry) { +void RegisterPooling(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Pooling", PoolingOp, DeviceType::CPU, float); diff --git a/mace/ops/prior_box.cc b/mace/ops/prior_box.cc index 62040d27..3598c98a 100644 --- a/mace/ops/prior_box.cc +++ b/mace/ops/prior_box.cc @@ -18,7 +18,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -144,7 +145,7 @@ class PriorBoxOp : public Operation { MACE_OP_OUTPUT_TAGS(OUTPUT); }; -void RegisterPriorBox(OpRegistryBase *op_registry) { +void RegisterPriorBox(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "PriorBox", PriorBoxOp, DeviceType::CPU, float); } diff --git a/mace/ops/reduce.cc b/mace/ops/reduce.cc index 7c34db3e..a81a602d 100644 --- a/mace/ops/reduce.cc +++ b/mace/ops/reduce.cc @@ -19,7 +19,8 @@ #include "mace/ops/common/reduce_type.h" #include "mace/core/future.h" -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/core/runtime/cpu/cpu_runtime.h" #include "mace/core/tensor.h" #ifdef MACE_ENABLE_OPENCL @@ -1032,7 +1033,7 @@ class ReduceOp : public ReduceOpBase { }; #endif // MACE_ENABLE_OPENCL -void RegisterReduce(OpRegistryBase *op_registry) { +void RegisterReduce(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Reduce", ReduceOp, DeviceType::CPU, float); MACE_REGISTER_OP(op_registry, "Reduce", ReduceOp, diff --git a/mace/ops/ref/activation.cc b/mace/ops/ref/activation.cc index 4e2e65db..da2ff26f 100644 --- a/mace/ops/ref/activation.cc +++ b/mace/ops/ref/activation.cc @@ -13,18 +13,26 @@ // limitations under the License. #include -#include "mace/ops/ref/activation.h" + +#include "mace/ops/delegator/activation.h" namespace mace { namespace ops { namespace ref { -Activation::Activation(ActivationType type, - const float limit, - const float leakyrelu_coefficient) - : type_(type), - limit_(limit), - leakyrelu_coefficient_(leakyrelu_coefficient) {} +class Activation : public delegator::Activation { + public: + explicit Activation(const delegator::ActivationParam ¶m) + : delegator::Activation(param) {} + ~Activation() = default; + + MaceStatus Compute(const OpContext *context, const Tensor *input, + Tensor *output) override; + + private: + void DoActivation(const OpContext *context, const Tensor *input, + Tensor *output); +}; MaceStatus Activation::Compute(const OpContext *context, const Tensor *input, @@ -99,6 +107,9 @@ void Activation::DoActivation(const OpContext *context, } } +MACE_REGISTER_DELEGATOR(registry, Activation, delegator::ActivationParam, + MACE_DELEGATOR_KEY(Activation, CPU, float, REF)) + } // namespace ref } // namespace ops } // namespace mace diff --git a/mace/ops/ref/activation.h b/mace/ops/ref/activation.h deleted file mode 100644 index 7ad986a5..00000000 --- a/mace/ops/ref/activation.h +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright 2019 The MACE Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef MACE_OPS_REF_ACTIVATION_H_ -#define MACE_OPS_REF_ACTIVATION_H_ - -#include "mace/core/op_context.h" -#include "mace/ops/common/activation_type.h" - -namespace mace { -namespace ops { -namespace ref { - -class Activation { - public: - explicit Activation(ActivationType type, - const float limit, - const float leakyrelu_coefficient); - ~Activation() = default; - - MaceStatus Compute( - const OpContext *context, - const Tensor *input, - Tensor *output); - - private: - void DoActivation(const OpContext *context, - const Tensor *input, - Tensor *output); - - ActivationType type_; - const float limit_; - const float leakyrelu_coefficient_; -}; - -} // namespace ref -} // namespace ops -} // namespace mace - -#endif // MACE_OPS_REF_ACTIVATION_H_ diff --git a/mace/ops/ref/bias_add.cc b/mace/ops/ref/bias_add.cc index efc56f74..221c2d2e 100644 --- a/mace/ops/ref/bias_add.cc +++ b/mace/ops/ref/bias_add.cc @@ -12,12 +12,25 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/ops/ref/bias_add.h" +#include "mace/ops/delegator/bias_add.h" namespace mace { namespace ops { namespace ref { +class BiasAdd : public delegator::BiasAdd { + public: + explicit BiasAdd(const DelegatorParam ¶m) : delegator::BiasAdd(param) {} + ~BiasAdd() = default; + + MaceStatus Compute(const OpContext *context, const Tensor *input, + const Tensor *bias, Tensor *output) override; + + private: + void AddBias(const OpContext *context, const Tensor *input, + const Tensor *bias, Tensor *output); +}; + MaceStatus BiasAdd::Compute(const OpContext *context, const Tensor *input, const Tensor *bias, @@ -71,6 +84,9 @@ void BiasAdd::AddBias(const OpContext *context, } } +MACE_REGISTER_DELEGATOR(registry, BiasAdd, DelegatorParam, + MACE_DELEGATOR_KEY(BiasAdd, CPU, float, REF)) + } // namespace ref } // namespace ops } // namespace mace diff --git a/mace/ops/ref/conv_2d.cc b/mace/ops/ref/conv_2d.cc index 1c69ee9d..d90b7e2b 100644 --- a/mace/ops/ref/conv_2d.cc +++ b/mace/ops/ref/conv_2d.cc @@ -109,6 +109,10 @@ MaceStatus Conv2d::Compute(const OpContext *context, return MaceStatus::MACE_SUCCESS; } +typedef Conv2d Conv2dRef; +MACE_REGISTER_DELEGATOR(registry, Conv2dRef, delegator::Conv2dParam, + MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, REF, General)) + } // namespace ref } // namespace ops } // namespace mace diff --git a/mace/ops/ref/conv_2d.h b/mace/ops/ref/conv_2d.h index 9a9fbb8f..b241a58a 100644 --- a/mace/ops/ref/conv_2d.h +++ b/mace/ops/ref/conv_2d.h @@ -18,64 +18,41 @@ #include -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" #include "mace/ops/common/conv_pool_2d_util.h" +#include "mace/ops/delegator/conv_2d.h" +#include "mace/public/mace.h" namespace mace { namespace ops { namespace ref { template -class Conv2d { +class Conv2d : public delegator::Conv2d { public: - Conv2d(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type) - : strides_(strides), - dilations_(dilations), - paddings_(paddings), - padding_type_(padding_type) {} + explicit Conv2d(const delegator::Conv2dParam ¶m) + : delegator::Conv2d(param) {} ~Conv2d() {} MaceStatus Compute( const OpContext *context, const Tensor *input, const Tensor *filter, - Tensor *output); - - private: - const std::vector strides_; - const std::vector dilations_; - const std::vector paddings_; - const Padding padding_type_; + Tensor *output) override; }; template<> -class Conv2d { +class Conv2d : public delegator::Conv2d { public: - Conv2d(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type) - : strides_(strides), - dilations_(dilations), - paddings_(paddings), - padding_type_(padding_type) {} + explicit Conv2d(const delegator::Conv2dParam ¶m) + : delegator::Conv2d(param) {} ~Conv2d() {} MaceStatus Compute( const OpContext *context, const Tensor *input, const Tensor *filter, - Tensor *output); - - private: - const std::vector strides_; - const std::vector dilations_; - const std::vector paddings_; - const Padding padding_type_; + Tensor *output) override; }; } // namespace ref diff --git a/mace/ops/ref/deconv_2d.cc b/mace/ops/ref/deconv_2d.cc index d06c6634..d19a96d2 100644 --- a/mace/ops/ref/deconv_2d.cc +++ b/mace/ops/ref/deconv_2d.cc @@ -162,6 +162,11 @@ MaceStatus Deconv2d::Compute(const OpContext *context, return MaceStatus::MACE_SUCCESS; } +typedef Deconv2d Deconv2dRef; +MACE_REGISTER_DELEGATOR( + registry, Deconv2dRef, delegator::Deconv2dParam, + MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float, REF, General)) + } // namespace ref } // namespace ops } // namespace mace diff --git a/mace/ops/ref/deconv_2d.h b/mace/ops/ref/deconv_2d.h index a8ab6722..564ce7e7 100644 --- a/mace/ops/ref/deconv_2d.h +++ b/mace/ops/ref/deconv_2d.h @@ -18,28 +18,21 @@ #include -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" #include "mace/ops/common/conv_pool_2d_util.h" +#include "mace/ops/delegator/deconv_2d.h" +#include "mace/public/mace.h" namespace mace { namespace ops { namespace ref { template -class Deconv2d { +class Deconv2d : public delegator::Deconv2d { public: - Deconv2d(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : strides_(strides), - dilations_(dilations), - paddings_(paddings), - padding_type_(padding_type), - framework_type_(framework_type) {} + explicit Deconv2d(const delegator::Deconv2dParam ¶m) + : delegator::Deconv2d(param) {} ~Deconv2d() = default; @@ -48,29 +41,14 @@ class Deconv2d { const Tensor *input, const Tensor *filter, const Tensor *output_shape, - Tensor *output); - - private: - const std::vector strides_; - const std::vector dilations_; - const std::vector paddings_; - const Padding padding_type_; - const FrameworkType framework_type_; + Tensor *output) override; }; template<> -class Deconv2d { +class Deconv2d : public delegator::Deconv2d { public: - Deconv2d(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : strides_(strides), - dilations_(dilations), - paddings_(paddings), - padding_type_(padding_type), - framework_type_(framework_type) {} + explicit Deconv2d(const delegator::Deconv2dParam ¶m) + : delegator::Deconv2d(param) {} ~Deconv2d() = default; @@ -79,14 +57,7 @@ class Deconv2d { const Tensor *input, const Tensor *filter, const Tensor *output_shape, - Tensor *output); - - private: - const std::vector strides_; - const std::vector dilations_; - const std::vector paddings_; - const Padding padding_type_; - const FrameworkType framework_type_; + Tensor *output) override; }; } // namespace ref diff --git a/mace/ops/ref/depthwise_conv_2d.cc b/mace/ops/ref/depthwise_conv_2d.cc index bff95069..03be506c 100644 --- a/mace/ops/ref/depthwise_conv_2d.cc +++ b/mace/ops/ref/depthwise_conv_2d.cc @@ -115,6 +115,11 @@ MaceStatus DepthwiseConv2d::Compute(const OpContext *context, return MaceStatus::MACE_SUCCESS; } +typedef DepthwiseConv2d DepthwiseConv2dRef; +MACE_REGISTER_DELEGATOR( + registry, DepthwiseConv2dRef, delegator::DepthwiseConv2dParam, + MACE_DELEGATOR_KEY_EX(DepthwiseConv2d, CPU, float, REF, General)) + } // namespace ref } // namespace ops } // namespace mace diff --git a/mace/ops/ref/depthwise_conv_2d.h b/mace/ops/ref/depthwise_conv_2d.h index 91a95192..cc5a14ca 100644 --- a/mace/ops/ref/depthwise_conv_2d.h +++ b/mace/ops/ref/depthwise_conv_2d.h @@ -18,64 +18,41 @@ #include -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" #include "mace/ops/common/conv_pool_2d_util.h" +#include "mace/ops/delegator/depthwise_conv_2d.h" +#include "mace/public/mace.h" namespace mace { namespace ops { namespace ref { template -class DepthwiseConv2d { +class DepthwiseConv2d : public delegator::DepthwiseConv2d { public: - DepthwiseConv2d(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type) - : strides_(strides), - dilations_(dilations), - paddings_(paddings), - padding_type_(padding_type) {} + explicit DepthwiseConv2d(const delegator::DepthwiseConv2dParam ¶m) + : delegator::DepthwiseConv2d(param) {} ~DepthwiseConv2d() {} MaceStatus Compute( const OpContext *context, const Tensor *input, const Tensor *filter, - Tensor *output); - - private: - const std::vector strides_; - const std::vector dilations_; - const std::vector paddings_; - const Padding padding_type_; + Tensor *output) override; }; template<> -class DepthwiseConv2d { +class DepthwiseConv2d : public delegator::DepthwiseConv2d { public: - DepthwiseConv2d(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type) - : strides_(strides), - dilations_(dilations), - paddings_(paddings), - padding_type_(padding_type) {} + explicit DepthwiseConv2d(const delegator::DepthwiseConv2dParam ¶m) + : delegator::DepthwiseConv2d(param) {} ~DepthwiseConv2d() {} MaceStatus Compute( const OpContext *context, const Tensor *input, const Tensor *filter, - Tensor *output); - - private: - const std::vector strides_; - const std::vector dilations_; - const std::vector paddings_; - const Padding padding_type_; + Tensor *output) override; }; } // namespace ref diff --git a/mace/ops/ref/depthwise_deconv_2d.cc b/mace/ops/ref/depthwise_deconv_2d.cc index 63b3aa69..badded16 100644 --- a/mace/ops/ref/depthwise_deconv_2d.cc +++ b/mace/ops/ref/depthwise_deconv_2d.cc @@ -302,6 +302,11 @@ MaceStatus GroupDeconv2d::Compute(const OpContext *context, return MaceStatus::MACE_SUCCESS; } +typedef DepthwiseDeconv2d DepthwiseDeconv2dRef; +MACE_REGISTER_DELEGATOR( + registry, DepthwiseDeconv2dRef, delegator::DepthwiseDeconv2dParam, + MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, CPU, float, REF, General)) + } // namespace ref } // namespace ops } // namespace mace diff --git a/mace/ops/ref/depthwise_deconv_2d.h b/mace/ops/ref/depthwise_deconv_2d.h index 5da74871..586f2627 100644 --- a/mace/ops/ref/depthwise_deconv_2d.h +++ b/mace/ops/ref/depthwise_deconv_2d.h @@ -18,63 +18,37 @@ #include -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" #include "mace/ops/common/conv_pool_2d_util.h" +#include "mace/ops/delegator/depthwise_deconv_2d.h" +#include "mace/public/mace.h" namespace mace { namespace ops { namespace ref { template -class GroupDeconv2d { +class GroupDeconv2d : public delegator::GroupDeconv2d { public: - GroupDeconv2d(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type, - const index_t group, - const FrameworkType framework_type) - : strides_(strides), - dilations_(dilations), - paddings_(paddings), - padding_type_(padding_type), - group_(group), - framework_type_(framework_type) {} + explicit GroupDeconv2d(const delegator::GroupDeconv2dParam ¶m) + : delegator::GroupDeconv2d(param) {} virtual ~GroupDeconv2d() = default; - virtual MaceStatus Compute( + MaceStatus Compute( const OpContext *context, const Tensor *input, const Tensor *filter, const Tensor *output_shape, - Tensor *output); - - private: - const std::vector strides_; - const std::vector dilations_; - const std::vector paddings_; - const Padding padding_type_; - const index_t group_; - const FrameworkType framework_type_; + Tensor *output) override; }; template class DepthwiseDeconv2d : public GroupDeconv2d { public: - DepthwiseDeconv2d(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : GroupDeconv2d(strides, - dilations, - paddings, - padding_type, - 0, - framework_type) {} + explicit DepthwiseDeconv2d(const delegator::DepthwiseDeconv2d ¶m) + : GroupDeconv2d(param) {} ~DepthwiseDeconv2d() = default; @@ -83,57 +57,30 @@ class DepthwiseDeconv2d : public GroupDeconv2d { const Tensor *input, const Tensor *filter, const Tensor *output_shape, - Tensor *output); + Tensor *output) override; }; template<> -class GroupDeconv2d { +class GroupDeconv2d : public delegator::GroupDeconv2d { public: - GroupDeconv2d(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type, - const index_t group, - const FrameworkType framework_type) - : strides_(strides), - dilations_(dilations), - paddings_(paddings), - padding_type_(padding_type), - group_(group), - framework_type_(framework_type) {} + explicit GroupDeconv2d(const delegator::GroupDeconv2dParam ¶m) + : delegator::GroupDeconv2d(param) {} virtual ~GroupDeconv2d() = default; - virtual MaceStatus Compute( + MaceStatus Compute( const OpContext *context, const Tensor *input, const Tensor *filter, const Tensor *output_shape, - Tensor *output); - - protected: - const std::vector strides_; - const std::vector dilations_; - const std::vector paddings_; - const Padding padding_type_; - const index_t group_; - const FrameworkType framework_type_; + Tensor *output) override; }; template<> class DepthwiseDeconv2d : public GroupDeconv2d { public: - DepthwiseDeconv2d(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : GroupDeconv2d(strides, - dilations, - paddings, - padding_type, - 0, - framework_type) {} + explicit DepthwiseDeconv2d(const delegator::DepthwiseDeconv2dParam ¶m) + : GroupDeconv2d(param) {} ~DepthwiseDeconv2d() = default; @@ -142,7 +89,7 @@ class DepthwiseDeconv2d : public GroupDeconv2d { const Tensor *input, const Tensor *filter, const Tensor *output_shape, - Tensor *output); + Tensor *output) override; }; } // namespace ref diff --git a/mace/ops/ref/gemm.cc b/mace/ops/ref/gemm.cc index e9d13c91..956a7aff 100644 --- a/mace/ops/ref/gemm.cc +++ b/mace/ops/ref/gemm.cc @@ -111,6 +111,10 @@ MaceStatus Gemm::Compute(const OpContext *context, output); } +typedef Gemm GemmRef; +MACE_REGISTER_DELEGATOR(registry, GemmRef, delegator::GemmParam, + MACE_DELEGATOR_KEY(Gemm, CPU, float, REF)) + } // namespace ref } // namespace ops } // namespace mace diff --git a/mace/ops/ref/gemm.h b/mace/ops/ref/gemm.h index bf1826ad..b7b63fba 100644 --- a/mace/ops/ref/gemm.h +++ b/mace/ops/ref/gemm.h @@ -16,19 +16,20 @@ #ifndef MACE_OPS_REF_GEMM_H_ #define MACE_OPS_REF_GEMM_H_ -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" #include "mace/ops/common/matrix.h" +#include "mace/ops/delegator/gemm.h" +#include "mace/public/mace.h" namespace mace { namespace ops { namespace ref { template -class Gemm { +class Gemm : public delegator::Gemm { public: - Gemm() {} + explicit Gemm(const delegator::GemmParam ¶m) : delegator::Gemm(param) {} ~Gemm() {} MaceStatus Compute(const OpContext *context, const Tensor *lhs, @@ -42,13 +43,13 @@ class Gemm { const MatrixMajor output_major, const bool lhs_batched, const bool rhs_batched, - Tensor *output); + Tensor *output) override; }; template<> -class Gemm { +class Gemm : public delegator::Gemm { public: - Gemm() {} + explicit Gemm(const delegator::GemmParam ¶m) : delegator::Gemm(param) {} ~Gemm() {} MaceStatus Compute(const OpContext *context, const Tensor *lhs, @@ -62,7 +63,7 @@ class Gemm { const MatrixMajor output_major, const bool lhs_batched, const bool rhs_batched, - Tensor *output); + Tensor *output) override; // Original matrix before transpose has row-major MaceStatus Compute( const OpContext *context, @@ -78,7 +79,7 @@ class Gemm { const bool transpose_out, const bool lhs_batched, const bool rhs_batched, - Tensor *output); + Tensor *output) override; }; } // namespace ref diff --git a/mace/ops/ref/gemv.cc b/mace/ops/ref/gemv.cc index bf0366f3..350412c2 100644 --- a/mace/ops/ref/gemv.cc +++ b/mace/ops/ref/gemv.cc @@ -159,8 +159,16 @@ MaceStatus Gemv::Compute(const OpContext *context, } // b return MaceStatus::MACE_SUCCESS; } + +typedef Gemv GemvUint8Ref; +MACE_REGISTER_DELEGATOR(registry, GemvUint8Ref, DelegatorParam, + MACE_DELEGATOR_KEY(Gemv, CPU, uint8_t, Ref)) #endif // MACE_ENABLE_QUANTIZE +typedef Gemv GemvRef; +MACE_REGISTER_DELEGATOR(registry, GemvRef, DelegatorParam, + MACE_DELEGATOR_KEY(Gemv, CPU, float, REF)) + } // namespace ref } // namespace ops } // namespace mace diff --git a/mace/ops/ref/gemv.h b/mace/ops/ref/gemv.h index 7116b8fa..e14730bb 100644 --- a/mace/ops/ref/gemv.h +++ b/mace/ops/ref/gemv.h @@ -16,18 +16,19 @@ #ifndef MACE_OPS_REF_GEMV_H_ #define MACE_OPS_REF_GEMV_H_ -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" +#include "mace/ops/delegator/gemv.h" +#include "mace/public/mace.h" namespace mace { namespace ops { namespace ref { template -class Gemv { +class Gemv : public delegator::Gemv { public: - Gemv() {} + explicit Gemv(const DelegatorParam ¶m) : delegator::Gemv(param) {} ~Gemv() {} // Always row-major after transpose MaceStatus Compute( @@ -40,13 +41,13 @@ class Gemv { const index_t lhs_width, const bool lhs_batched, const bool rhs_batched, - Tensor *output); + Tensor *output) override; }; template<> -class Gemv { +class Gemv : public delegator::Gemv { public: - Gemv() {} + explicit Gemv(const DelegatorParam ¶m) : delegator::Gemv(param) {} ~Gemv() {} // Always row-major after transpose MaceStatus Compute( @@ -59,14 +60,14 @@ class Gemv { const index_t lhs_width, const bool lhs_batched, const bool rhs_batched, - Tensor *output); + Tensor *output) override; }; #if defined(MACE_ENABLE_QUANTIZE) template<> -class Gemv { +class Gemv : public delegator::Gemv { public: - Gemv() {} + explicit Gemv(const DelegatorParam ¶m) : delegator::Gemv(param) {} ~Gemv() {} // Always row-major after transpose MaceStatus Compute( @@ -79,13 +80,13 @@ class Gemv { const index_t lhs_width, const bool lhs_batched, const bool rhs_batched, - Tensor *output); + Tensor *output) override; }; template<> -class Gemv { +class Gemv : public delegator::Gemv { public: - Gemv() {} + explicit Gemv(const DelegatorParam ¶m) : delegator::Gemv(param) {} ~Gemv() {} // Always row-major after transpose MaceStatus Compute( @@ -98,7 +99,7 @@ class Gemv { const index_t lhs_width, const bool lhs_batched, const bool rhs_batched, - Tensor *output); + Tensor *output) override; }; #endif // MACE_ENABLE_QUANTIZE diff --git a/mace/ops/ref/q8/eltwise.cc b/mace/ops/ref/q8/eltwise.cc new file mode 100644 index 00000000..220378e4 --- /dev/null +++ b/mace/ops/ref/q8/eltwise.cc @@ -0,0 +1,116 @@ +// Copyright 2019 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "mace/ops/common/gemmlowp_util.h" +#include "mace/ops/delegator/eltwise.h" +#include "mace/utils/logging.h" + +namespace mace { +namespace ops { +namespace ref { +namespace q8 { + +class Eltwise : public delegator::Eltwise { + public: + explicit Eltwise(const delegator::EltwiseParam ¶m) + : delegator::Eltwise(param) {} + ~Eltwise() = default; + + MaceStatus Compute(const OpContext *context, const Tensor *input0, + const Tensor *input1, Tensor *output) override; +}; + +MaceStatus Eltwise::Compute(const OpContext *context, + const Tensor *input0, + const Tensor *input1, + Tensor *output) { + constexpr int left_shift = 20; + const double doubled_scale = 2 * std::max(input0->scale(), input1->scale()); + const double adjusted_input0_scale = input0->scale() / doubled_scale; + const double adjusted_input1_scale = input1->scale() / doubled_scale; + const double adjusted_output_scale = + doubled_scale / ((1 << left_shift) * output->scale()); + + int32_t input0_multiplier; + int32_t input1_multiplier; + int32_t output_multiplier; + int32_t input0_shift; + int32_t input1_shift; + int32_t output_shift; + QuantizeMultiplier(adjusted_input0_scale, + &input0_multiplier, + &input0_shift); + QuantizeMultiplier(adjusted_input1_scale, + &input1_multiplier, + &input1_shift); + QuantizeMultiplier(adjusted_output_scale, + &output_multiplier, + &output_shift); + + Tensor::MappingGuard input0_guard(input0); + Tensor::MappingGuard input1_guard(input1); + Tensor::MappingGuard output_guard(output); + + auto input0_ptr = input0->data(); + auto input1_ptr = input1->data(); + auto output_ptr = output->mutable_data(); + + utils::ThreadPool + &thread_pool = context->device()->cpu_runtime()->thread_pool(); + thread_pool.Compute1D([=](index_t start, index_t end, index_t step) { + for (index_t i = start; i < end; i += step) { + const int32_t offset_input0 = input0_ptr[i] - input0->zero_point(); + const int32_t offset_input1 = input1_ptr[i] - input1->zero_point(); + const int32_t shifted_input0 = offset_input0 * (1 << left_shift); + const int32_t shifted_input1 = offset_input1 * (1 << left_shift); + const int32_t multiplied_input0 = + gemmlowp::RoundingDivideByPOT( + gemmlowp::SaturatingRoundingDoublingHighMul(shifted_input0, + input0_multiplier), + -input0_shift); + const int32_t multiplied_input1 = + gemmlowp::RoundingDivideByPOT( + gemmlowp::SaturatingRoundingDoublingHighMul(shifted_input1, + input1_multiplier), + -input1_shift); + + int32_t res; + if (type_ == SUM) { + res = multiplied_input0 + multiplied_input1; + } else { + res = multiplied_input0 - multiplied_input1; + } + + const int32_t output_val = + gemmlowp::RoundingDivideByPOT( + gemmlowp::SaturatingRoundingDoublingHighMul(res, + output_multiplier), + -output_shift) + output->zero_point(); + output_ptr[i] = Saturate(output_val); + } + }, 0, output->size(), 1); + + return MaceStatus::MACE_SUCCESS; +} + +MACE_REGISTER_DELEGATOR(registry, Eltwise, delegator::EltwiseParam, + MACE_DELEGATOR_KEY(Eltwise, CPU, uint8_t, REF)) + +} // namespace q8 +} // namespace ref +} // namespace ops +} // namespace mace diff --git a/mace/ops/registry/op_delegators_registry.cc b/mace/ops/registry/op_delegators_registry.cc new file mode 100644 index 00000000..a5968780 --- /dev/null +++ b/mace/ops/registry/op_delegators_registry.cc @@ -0,0 +1,170 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mace/ops/registry/registry.h" + +namespace mace { +namespace ops { + +namespace ref { +extern void RegisterActivationDelegator(OpDelegatorRegistry *registry); +extern void RegisterBiasAddDelegator(OpDelegatorRegistry *registry); +extern void RegisterConv2dRefDelegator(OpDelegatorRegistry *registry); +extern void RegisterDeconv2dRefDelegator(OpDelegatorRegistry *registry); +extern void RegisterDepthwiseConv2dRefDelegator(OpDelegatorRegistry *registry); +extern void RegisterDepthwiseDeconv2dRefDelegator( + OpDelegatorRegistry *registry); +extern void RegisterGemmRefDelegator(OpDelegatorRegistry *registry); +extern void RegisterGemvRefDelegator(OpDelegatorRegistry *registry); + +#ifdef MACE_ENABLE_QUANTIZE +namespace q8 { +extern void RegisterEltwiseDelegator(OpDelegatorRegistry *registry); +} // namespace q8 +extern void RegisterGemvUint8RefDelegator(OpDelegatorRegistry *registry); +#endif // MACE_ENABLE_QUANTIZE +} // namespace ref + +#ifdef MACE_ENABLE_NEON +namespace arm { +namespace fp32 { +extern void RegisterActivationDelegator(OpDelegatorRegistry *registry); +extern void RegisterBiasAddDelegator(OpDelegatorRegistry *registry); + +extern void RegisterConv2dK1x1Delegator(OpDelegatorRegistry *registry); +extern void RegisterConv2dK1x7S1Delegator(OpDelegatorRegistry *registry); +extern void RegisterConv2dK7x1S1Delegator(OpDelegatorRegistry *registry); +extern void RegisterConv2dK1x15S1Delegator(OpDelegatorRegistry *registry); +extern void RegisterConv2dK15x1S1Delegator(OpDelegatorRegistry *registry); +extern void RegisterConv2dK3x3S1Delegator(OpDelegatorRegistry *registry); +extern void RegisterConv2dK3x3S2Delegator(OpDelegatorRegistry *registry); +extern void RegisterConv2dK3x3WinogradDelegator(OpDelegatorRegistry *registry); +extern void RegisterConv2dK5x5S1Delegator(OpDelegatorRegistry *registry); +extern void RegisterConv2dK7x7S1Delegator(OpDelegatorRegistry *registry); +extern void RegisterConv2dK7x7S2Delegator(OpDelegatorRegistry *registry); +extern void RegisterConv2dK7x7S3Delegator(OpDelegatorRegistry *registry); +extern void RegisterConv2dGeneralDelegator(OpDelegatorRegistry *registry); + +extern void RegisterDeconv2dK2x2S1Delegator(OpDelegatorRegistry *registry); +extern void RegisterDeconv2dK2x2S2Delegator(OpDelegatorRegistry *registry); +extern void RegisterDeconv2dK3x3S1Delegator(OpDelegatorRegistry *registry); +extern void RegisterDeconv2dK3x3S2Delegator(OpDelegatorRegistry *registry); +extern void RegisterDeconv2dK4x4S1Delegator(OpDelegatorRegistry *registry); +extern void RegisterDeconv2dK4x4S2Delegator(OpDelegatorRegistry *registry); +extern void RegisterDeconv2dGeneralDelegator(OpDelegatorRegistry *registry); + +extern void RegisterDepthwiseConv2dK3x3S1Delegator( + OpDelegatorRegistry *registry); +extern void RegisterDepthwiseConv2dK3x3S2Delegator( + OpDelegatorRegistry *registry); +extern void RegisterDepthwiseDeconv2dK3x3S1Delegator( + OpDelegatorRegistry *registry); +extern void RegisterDepthwiseDeconv2dK3x3S2Delegator( + OpDelegatorRegistry *registry); +extern void RegisterGroupDeconv2dK3x3S1Delegator(OpDelegatorRegistry *registry); +extern void RegisterGroupDeconv2dK3x3S2Delegator(OpDelegatorRegistry *registry); +extern void RegisterDepthwiseDeconv2dK4x4S1Delegator( + OpDelegatorRegistry *registry); +extern void RegisterDepthwiseDeconv2dK4x4S2Delegator( + OpDelegatorRegistry *registry); +extern void RegisterGroupDeconv2dK4x4S1Delegator(OpDelegatorRegistry *registry); +extern void RegisterGroupDeconv2dK4x4S2Delegator(OpDelegatorRegistry *registry); +extern void RegisterDepthwiseDeconv2dGeneralDelegator( + OpDelegatorRegistry *registry); +extern void RegisterGroupDeconv2dGeneralDelegator( + OpDelegatorRegistry *registry); + +extern void RegisterGemmDelegator(OpDelegatorRegistry *registry); +extern void RegisterGemvDelegator(OpDelegatorRegistry *registry); +} // namespace fp32 + +#ifdef MACE_ENABLE_QUANTIZE +namespace q8 { +extern void RegisterEltwiseDelegator(OpDelegatorRegistry *registry); +extern void RegisterGemvUint8Delegator(OpDelegatorRegistry *registry); +extern void RegisterGemvInt32Delegator(OpDelegatorRegistry *registry); +} // namespace q8 +#endif // MACE_ENABLE_QUANTIZE + +} // namespace arm +#endif // MACE_ENABLE_NEON + +void RegisterAllOpDelegators(OpDelegatorRegistry *registry) { + ref::RegisterActivationDelegator(registry); + ref::RegisterBiasAddDelegator(registry); + ref::RegisterConv2dRefDelegator(registry); + ref::RegisterDeconv2dRefDelegator(registry); + ref::RegisterDepthwiseConv2dRefDelegator(registry); + ref::RegisterDepthwiseDeconv2dRefDelegator(registry); + ref::RegisterGemmRefDelegator(registry); + ref::RegisterGemvRefDelegator(registry); + +#ifdef MACE_ENABLE_QUANTIZE + ref::q8::RegisterEltwiseDelegator(registry); + ref::RegisterGemvUint8RefDelegator(registry); +#endif // MACE_ENABLE_QUANTIZE + +#ifdef MACE_ENABLE_NEON + arm::fp32::RegisterActivationDelegator(registry); + arm::fp32::RegisterBiasAddDelegator(registry); + + arm::fp32::RegisterConv2dK1x1Delegator(registry); + arm::fp32::RegisterConv2dK1x7S1Delegator(registry); + arm::fp32::RegisterConv2dK7x1S1Delegator(registry); + arm::fp32::RegisterConv2dK1x15S1Delegator(registry); + arm::fp32::RegisterConv2dK15x1S1Delegator(registry); + arm::fp32::RegisterConv2dK3x3S1Delegator(registry); + arm::fp32::RegisterConv2dK3x3S2Delegator(registry); + arm::fp32::RegisterConv2dK3x3WinogradDelegator(registry); + arm::fp32::RegisterConv2dK5x5S1Delegator(registry); + arm::fp32::RegisterConv2dK7x7S1Delegator(registry); + arm::fp32::RegisterConv2dK7x7S2Delegator(registry); + arm::fp32::RegisterConv2dK7x7S3Delegator(registry); + arm::fp32::RegisterConv2dGeneralDelegator(registry); + + arm::fp32::RegisterDeconv2dK2x2S1Delegator(registry); + arm::fp32::RegisterDeconv2dK2x2S2Delegator(registry); + arm::fp32::RegisterDeconv2dK3x3S1Delegator(registry); + arm::fp32::RegisterDeconv2dK3x3S2Delegator(registry); + arm::fp32::RegisterDeconv2dK4x4S1Delegator(registry); + arm::fp32::RegisterDeconv2dK4x4S2Delegator(registry); + arm::fp32::RegisterDeconv2dGeneralDelegator(registry); + + arm::fp32::RegisterDepthwiseConv2dK3x3S1Delegator(registry); + arm::fp32::RegisterDepthwiseConv2dK3x3S2Delegator(registry); + arm::fp32::RegisterDepthwiseDeconv2dK3x3S1Delegator(registry); + arm::fp32::RegisterDepthwiseDeconv2dK3x3S2Delegator(registry); + arm::fp32::RegisterGroupDeconv2dK3x3S1Delegator(registry); + arm::fp32::RegisterGroupDeconv2dK3x3S2Delegator(registry); + arm::fp32::RegisterDepthwiseDeconv2dK4x4S1Delegator(registry); + arm::fp32::RegisterDepthwiseDeconv2dK4x4S2Delegator(registry); + arm::fp32::RegisterGroupDeconv2dK4x4S1Delegator(registry); + arm::fp32::RegisterGroupDeconv2dK4x4S2Delegator(registry); + arm::fp32::RegisterDepthwiseDeconv2dGeneralDelegator(registry); + arm::fp32::RegisterGroupDeconv2dGeneralDelegator(registry); + + arm::fp32::RegisterGemmDelegator(registry); + arm::fp32::RegisterGemvDelegator(registry); + +#ifdef MACE_ENABLE_QUANTIZE + arm::q8::RegisterEltwiseDelegator(registry); + arm::q8::RegisterGemvUint8Delegator(registry); + arm::q8::RegisterGemvInt32Delegator(registry); +#endif // MACE_ENABLE_QUANTIZE + +#endif // MACE_ENABLE_NEON +} + +} // namespace ops +} // namespace mace diff --git a/mace/ops/registry/ops_registry.cc b/mace/ops/registry/ops_registry.cc index eafa78ce..2f6e8c73 100644 --- a/mace/ops/registry/ops_registry.cc +++ b/mace/ops/registry/ops_registry.cc @@ -1,4 +1,4 @@ -// Copyright 2018 The MACE Authors. All Rights Reserved. +// Copyright 2020 The MACE Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,167 +12,167 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/ops/registry/ops_registry.h" +#include "mace/ops/registry/registry.h" namespace mace { namespace ops { // Keep in lexicographical order -extern void RegisterActivation(OpRegistryBase *op_registry); -extern void RegisterAddN(OpRegistryBase *op_registry); -extern void RegisterArgMax(OpRegistryBase *op_registry); -extern void RegisterBatchNorm(OpRegistryBase *op_registry); -extern void RegisterBatchToSpaceND(OpRegistryBase *op_registry); -extern void RegisterBiasAdd(OpRegistryBase *op_registry); -extern void RegisterCast(OpRegistryBase *op_registry); -extern void RegisterChannelShuffle(OpRegistryBase *op_registry); -extern void RegisterConcat(OpRegistryBase *op_registry); -extern void RegisterConv2D(OpRegistryBase *op_registry); -extern void RegisterCrop(OpRegistryBase *op_registry); -extern void RegisterCumsum(OpRegistryBase *op_registry); -extern void RegisterDeconv2D(OpRegistryBase *op_registry); -extern void RegisterDepthToSpace(OpRegistryBase *op_registry); -extern void RegisterDepthwiseConv2d(OpRegistryBase *op_registry); -extern void RegisterDepthwiseDeconv2d(OpRegistryBase *op_registry); -extern void RegisterDynamicLSTM(OpRegistryBase *op_registry); -extern void RegisterEltwise(OpRegistryBase *op_registry); -extern void RegisterExpandDims(OpRegistryBase *op_registry); -extern void RegisterExtractPooling(OpRegistryBase *op_registry); -extern void RegisterFill(OpRegistryBase *op_registry); -extern void RegisterFullyConnected(OpRegistryBase *op_registry); -extern void RegisterGather(OpRegistryBase *op_registry); -extern void RegisterIdentity(OpRegistryBase *op_registry); -extern void RegisterIfDefined(OpRegistryBase *op_registry); -extern void RegisterInferConv2dShape(OpRegistryBase *op_registry); -extern void RegisterKaldiBatchNorm(OpRegistryBase *op_registry); -extern void RegisterLocalResponseNorm(OpRegistryBase *op_registry); -extern void RegisterLpNorm(OpRegistryBase *op_registry); -extern void RegisterLSTMNonlinear(OpRegistryBase *op_registry); -extern void RegisterMatMul(OpRegistryBase *op_registry); -extern void RegisterMVNorm(OpRegistryBase *op_registry); -extern void RegisterOneHot(OpRegistryBase *op_registry); -extern void RegisterPad(OpRegistryBase *op_registry); -extern void RegisterPadContext(OpRegistryBase *op_registry); -extern void RegisterPNorm(OpRegistryBase *op_registry); -extern void RegisterPooling(OpRegistryBase *op_registry); -extern void RegisterReduce(OpRegistryBase *op_registry); -extern void RegisterReplaceIndex(OpRegistryBase *op_registry); -extern void RegisterPriorBox(OpRegistryBase *op_registry); -extern void RegisterReshape(OpRegistryBase *op_registry); -extern void RegisterResizeBicubic(OpRegistryBase *op_registry); -extern void RegisterResizeBilinear(OpRegistryBase *op_registry); -extern void RegisterResizeNearestNeighbor(OpRegistryBase *op_registry); -extern void RegisterReverse(OpRegistryBase *op_registry); -extern void RegisterScalarMath(OpRegistryBase *op_registry); -extern void RegisterSelect(OpRegistryBase *op_registry); -extern void RegisterShape(OpRegistryBase *op_registry); -extern void RegisterSlice(OpRegistryBase *op_registry); -extern void RegisterSoftmax(OpRegistryBase *op_registry); -extern void RegisterSpaceToBatchND(OpRegistryBase *op_registry); -extern void RegisterSpaceToDepth(OpRegistryBase *op_registry); -extern void RegisterSplice(OpRegistryBase *op_registry); -extern void RegisterSplit(OpRegistryBase *op_registry); -extern void RegisterSqrDiffMean(OpRegistryBase *op_registry); -extern void RegisterSqueeze(OpRegistryBase *op_registry); -extern void RegisterStack(OpRegistryBase *op_registry); -extern void RegisterStridedSlice(OpRegistryBase *op_registry); -extern void RegisterSubsample(OpRegistryBase *op_registry); -extern void RegisterSumGroup(OpRegistryBase *op_registry); -extern void RegisterTargetRMSNorm(OpRegistryBase *op_registry); -extern void RegisterTile(OpRegistryBase *op_registry); -extern void RegisterTranspose(OpRegistryBase *op_registry); -extern void RegisterUnstack(OpRegistryBase *op_registry); -extern void RegisterUnsqueeze(OpRegistryBase *op_registry); +extern void RegisterActivation(OpRegistry *op_registry); +extern void RegisterAddN(OpRegistry *op_registry); +extern void RegisterArgMax(OpRegistry *op_registry); +extern void RegisterBatchNorm(OpRegistry *op_registry); +extern void RegisterBatchToSpaceND(OpRegistry *op_registry); +extern void RegisterBiasAdd(OpRegistry *op_registry); +extern void RegisterCast(OpRegistry *op_registry); +extern void RegisterChannelShuffle(OpRegistry *op_registry); +extern void RegisterConcat(OpRegistry *op_registry); +extern void RegisterConv2D(OpRegistry *op_registry); +extern void RegisterCrop(OpRegistry *op_registry); +extern void RegisterCumsum(OpRegistry *op_registry); +extern void RegisterDeconv2D(OpRegistry *op_registry); +extern void RegisterDepthToSpace(OpRegistry *op_registry); +extern void RegisterDepthwiseConv2d(OpRegistry *op_registry); +extern void RegisterDepthwiseDeconv2d(OpRegistry *op_registry); +extern void RegisterDynamicLSTM(OpRegistry *op_registry); +extern void RegisterEltwise(OpRegistry *op_registry); +extern void RegisterExpandDims(OpRegistry *op_registry); +extern void RegisterExtractPooling(OpRegistry *op_registry); +extern void RegisterFill(OpRegistry *op_registry); +extern void RegisterFullyConnected(OpRegistry *op_registry); +extern void RegisterGather(OpRegistry *op_registry); +extern void RegisterIdentity(OpRegistry *op_registry); +extern void RegisterIfDefined(OpRegistry *op_registry); +extern void RegisterInferConv2dShape(OpRegistry *op_registry); +extern void RegisterKaldiBatchNorm(OpRegistry *op_registry); +extern void RegisterLocalResponseNorm(OpRegistry *op_registry); +extern void RegisterLpNorm(OpRegistry *op_registry); +extern void RegisterLSTMNonlinear(OpRegistry *op_registry); +extern void RegisterMatMul(OpRegistry *op_registry); +extern void RegisterMVNorm(OpRegistry *op_registry); +extern void RegisterOneHot(OpRegistry *op_registry); +extern void RegisterPad(OpRegistry *op_registry); +extern void RegisterPadContext(OpRegistry *op_registry); +extern void RegisterPNorm(OpRegistry *op_registry); +extern void RegisterPooling(OpRegistry *op_registry); +extern void RegisterReduce(OpRegistry *op_registry); +extern void RegisterReplaceIndex(OpRegistry *op_registry); +extern void RegisterPriorBox(OpRegistry *op_registry); +extern void RegisterReshape(OpRegistry *op_registry); +extern void RegisterResizeBicubic(OpRegistry *op_registry); +extern void RegisterResizeBilinear(OpRegistry *op_registry); +extern void RegisterResizeNearestNeighbor(OpRegistry *op_registry); +extern void RegisterReverse(OpRegistry *op_registry); +extern void RegisterScalarMath(OpRegistry *op_registry); +extern void RegisterSelect(OpRegistry *op_registry); +extern void RegisterShape(OpRegistry *op_registry); +extern void RegisterSlice(OpRegistry *op_registry); +extern void RegisterSoftmax(OpRegistry *op_registry); +extern void RegisterSpaceToBatchND(OpRegistry *op_registry); +extern void RegisterSpaceToDepth(OpRegistry *op_registry); +extern void RegisterSplice(OpRegistry *op_registry); +extern void RegisterSplit(OpRegistry *op_registry); +extern void RegisterSqrDiffMean(OpRegistry *op_registry); +extern void RegisterSqueeze(OpRegistry *op_registry); +extern void RegisterStack(OpRegistry *op_registry); +extern void RegisterStridedSlice(OpRegistry *op_registry); +extern void RegisterSubsample(OpRegistry *op_registry); +extern void RegisterSumGroup(OpRegistry *op_registry); +extern void RegisterTargetRMSNorm(OpRegistry *op_registry); +extern void RegisterTile(OpRegistry *op_registry); +extern void RegisterTranspose(OpRegistry *op_registry); +extern void RegisterUnstack(OpRegistry *op_registry); +extern void RegisterUnsqueeze(OpRegistry *op_registry); #ifdef MACE_ENABLE_QUANTIZE -extern void RegisterDequantize(OpRegistryBase *op_registry); -extern void RegisterQuantize(OpRegistryBase *op_registry); +extern void RegisterDequantize(OpRegistry *op_registry); +extern void RegisterQuantize(OpRegistry *op_registry); #endif // MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_OPENCL -extern void RegisterBufferTransform(OpRegistryBase *op_registry); -extern void RegisterLSTMCell(OpRegistryBase *op_registry); +extern void RegisterBufferTransform(OpRegistry *op_registry); +extern void RegisterLSTMCell(OpRegistry *op_registry); #endif // MACE_ENABLE_OPENCL -} // namespace ops -OpRegistry::OpRegistry() : OpRegistryBase() { +void RegisterAllOps(OpRegistry *registry) { // Keep in lexicographical order - ops::RegisterActivation(this); - ops::RegisterAddN(this); - ops::RegisterArgMax(this); - ops::RegisterBatchNorm(this); - ops::RegisterBatchToSpaceND(this); - ops::RegisterBiasAdd(this); - ops::RegisterCast(this); - ops::RegisterChannelShuffle(this); - ops::RegisterConcat(this); - ops::RegisterConv2D(this); - ops::RegisterCrop(this); - ops::RegisterCumsum(this); - ops::RegisterDeconv2D(this); - ops::RegisterDepthToSpace(this); - ops::RegisterDepthwiseConv2d(this); - ops::RegisterDepthwiseDeconv2d(this); - ops::RegisterDynamicLSTM(this); - ops::RegisterEltwise(this); - ops::RegisterExpandDims(this); - ops::RegisterExtractPooling(this); - ops::RegisterFill(this); - ops::RegisterFullyConnected(this); - ops::RegisterGather(this); - ops::RegisterIdentity(this); - ops::RegisterIfDefined(this); - ops::RegisterInferConv2dShape(this); - ops::RegisterKaldiBatchNorm(this); - ops::RegisterLocalResponseNorm(this); - ops::RegisterLpNorm(this); - ops::RegisterLSTMNonlinear(this); - ops::RegisterMatMul(this); - ops::RegisterMVNorm(this); - ops::RegisterOneHot(this); - ops::RegisterPad(this); - ops::RegisterPadContext(this); - ops::RegisterPNorm(this); - ops::RegisterPooling(this); - ops::RegisterReduce(this); - ops::RegisterReplaceIndex(this); - ops::RegisterPriorBox(this); - ops::RegisterReshape(this); - ops::RegisterResizeBicubic(this); - ops::RegisterResizeBilinear(this); - ops::RegisterResizeNearestNeighbor(this); - ops::RegisterReverse(this); - ops::RegisterScalarMath(this); - ops::RegisterSelect(this); - ops::RegisterShape(this); - ops::RegisterSlice(this); - ops::RegisterSoftmax(this); - ops::RegisterSpaceToBatchND(this); - ops::RegisterSpaceToDepth(this); - ops::RegisterSplice(this); - ops::RegisterSplit(this); - ops::RegisterStack(this); - ops::RegisterStridedSlice(this); - ops::RegisterSqrDiffMean(this); - ops::RegisterSqueeze(this); - ops::RegisterSubsample(this); - ops::RegisterSumGroup(this); - ops::RegisterTargetRMSNorm(this); - ops::RegisterTile(this); - ops::RegisterTranspose(this); - ops::RegisterUnstack(this); - ops::RegisterUnsqueeze(this); + ops::RegisterActivation(registry); + ops::RegisterAddN(registry); + ops::RegisterArgMax(registry); + ops::RegisterBatchNorm(registry); + ops::RegisterBatchToSpaceND(registry); + ops::RegisterBiasAdd(registry); + ops::RegisterCast(registry); + ops::RegisterChannelShuffle(registry); + ops::RegisterConcat(registry); + ops::RegisterConv2D(registry); + ops::RegisterCrop(registry); + ops::RegisterCumsum(registry); + ops::RegisterDeconv2D(registry); + ops::RegisterDepthToSpace(registry); + ops::RegisterDepthwiseConv2d(registry); + ops::RegisterDepthwiseDeconv2d(registry); + ops::RegisterDynamicLSTM(registry); + ops::RegisterEltwise(registry); + ops::RegisterExpandDims(registry); + ops::RegisterExtractPooling(registry); + ops::RegisterFill(registry); + ops::RegisterFullyConnected(registry); + ops::RegisterGather(registry); + ops::RegisterIdentity(registry); + ops::RegisterIfDefined(registry); + ops::RegisterInferConv2dShape(registry); + ops::RegisterKaldiBatchNorm(registry); + ops::RegisterLocalResponseNorm(registry); + ops::RegisterLpNorm(registry); + ops::RegisterLSTMNonlinear(registry); + ops::RegisterMatMul(registry); + ops::RegisterMVNorm(registry); + ops::RegisterOneHot(registry); + ops::RegisterPad(registry); + ops::RegisterPadContext(registry); + ops::RegisterPNorm(registry); + ops::RegisterPooling(registry); + ops::RegisterReduce(registry); + ops::RegisterReplaceIndex(registry); + ops::RegisterPriorBox(registry); + ops::RegisterReshape(registry); + ops::RegisterResizeBicubic(registry); + ops::RegisterResizeBilinear(registry); + ops::RegisterResizeNearestNeighbor(registry); + ops::RegisterReverse(registry); + ops::RegisterScalarMath(registry); + ops::RegisterSelect(registry); + ops::RegisterShape(registry); + ops::RegisterSlice(registry); + ops::RegisterSoftmax(registry); + ops::RegisterSpaceToBatchND(registry); + ops::RegisterSpaceToDepth(registry); + ops::RegisterSplice(registry); + ops::RegisterSplit(registry); + ops::RegisterStack(registry); + ops::RegisterStridedSlice(registry); + ops::RegisterSqrDiffMean(registry); + ops::RegisterSqueeze(registry); + ops::RegisterSubsample(registry); + ops::RegisterSumGroup(registry); + ops::RegisterTargetRMSNorm(registry); + ops::RegisterTile(registry); + ops::RegisterTranspose(registry); + ops::RegisterUnstack(registry); + ops::RegisterUnsqueeze(registry); #ifdef MACE_ENABLE_QUANTIZE - ops::RegisterDequantize(this); - ops::RegisterQuantize(this); + ops::RegisterDequantize(registry); + ops::RegisterQuantize(registry); #endif // MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_OPENCL - ops::RegisterBufferTransform(this); - ops::RegisterLSTMCell(this); + ops::RegisterBufferTransform(registry); + ops::RegisterLSTMCell(registry); #endif // MACE_ENABLE_OPENCL } +} // namespace ops } // namespace mace diff --git a/mace/ops/registry/ops_registry.h b/mace/ops/registry/registry.h similarity index 68% rename from mace/ops/registry/ops_registry.h rename to mace/ops/registry/registry.h index 01f013dc..ed8d55f4 100644 --- a/mace/ops/registry/ops_registry.h +++ b/mace/ops/registry/registry.h @@ -12,19 +12,19 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_OPS_REGISTRY_OPS_REGISTRY_H_ -#define MACE_OPS_REGISTRY_OPS_REGISTRY_H_ - -#include "mace/core/operator.h" +#ifndef MACE_OPS_REGISTRY_REGISTRY_H_ +#define MACE_OPS_REGISTRY_REGISTRY_H_ namespace mace { +class OpRegistry; +class OpDelegatorRegistry; + +namespace ops { -class OpRegistry : public OpRegistryBase { - public: - OpRegistry(); - ~OpRegistry() = default; -}; +void RegisterAllOps(OpRegistry *registry); +void RegisterAllOpDelegators(OpDelegatorRegistry *registry); +} // namespace ops } // namespace mace -#endif // MACE_OPS_REGISTRY_OPS_REGISTRY_H_ +#endif // MACE_OPS_REGISTRY_REGISTRY_H_ diff --git a/mace/ops/replace_index.cc b/mace/ops/replace_index.cc index d4f95323..8b2f76db 100644 --- a/mace/ops/replace_index.cc +++ b/mace/ops/replace_index.cc @@ -20,7 +20,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -94,7 +95,7 @@ class ReplaceIndexOp : public Operation { std::vector forward_indexes_; }; -void RegisterReplaceIndex(OpRegistryBase *op_registry) { +void RegisterReplaceIndex(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "ReplaceIndex", ReplaceIndexOp, DeviceType::CPU, float); } diff --git a/mace/ops/reshape.cc b/mace/ops/reshape.cc index b5daa430..63c91c2e 100644 --- a/mace/ops/reshape.cc +++ b/mace/ops/reshape.cc @@ -14,7 +14,8 @@ #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/utils/math.h" #ifdef MACE_ENABLE_OPENCL @@ -149,7 +150,7 @@ class ReshapeOp : public Operation { }; #endif -void RegisterReshape(OpRegistryBase *op_registry) { +void RegisterReshape(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Reshape", ReshapeOp, DeviceType::CPU, float); MACE_REGISTER_OP(op_registry, "Reshape", ReshapeOp, DeviceType::CPU, int32_t); MACE_REGISTER_GPU_OP(op_registry, "Reshape", ReshapeOp); diff --git a/mace/ops/resize_bicubic.cc b/mace/ops/resize_bicubic.cc index 5e48ad39..d5d25eda 100644 --- a/mace/ops/resize_bicubic.cc +++ b/mace/ops/resize_bicubic.cc @@ -17,7 +17,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/ops/common/utils.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/image/resize_bicubic.h" @@ -232,7 +233,7 @@ class ResizeBicubicOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterResizeBicubic(OpRegistryBase *op_registry) { +void RegisterResizeBicubic(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "ResizeBicubic", ResizeBicubicOp, DeviceType::CPU, float); diff --git a/mace/ops/resize_bilinear.cc b/mace/ops/resize_bilinear.cc index e209864f..2fa891d1 100644 --- a/mace/ops/resize_bilinear.cc +++ b/mace/ops/resize_bilinear.cc @@ -16,7 +16,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/utils/memory.h" #include "mace/core/quantize.h" #include "mace/ops/common/utils.h" @@ -366,7 +367,7 @@ class ResizeBilinearOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterResizeBilinear(OpRegistryBase *op_registry) { +void RegisterResizeBilinear(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "ResizeBilinear", ResizeBilinearOp, DeviceType::CPU, float); diff --git a/mace/ops/resize_nearest_neighbor.cc b/mace/ops/resize_nearest_neighbor.cc index 6ac6b9e7..201c4515 100644 --- a/mace/ops/resize_nearest_neighbor.cc +++ b/mace/ops/resize_nearest_neighbor.cc @@ -16,7 +16,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/ops/common/utils.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/image/resize_nearest_neighbor.h" @@ -172,7 +173,7 @@ class ResizeNearestNeighborOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterResizeNearestNeighbor(OpRegistryBase *op_registry) { +void RegisterResizeNearestNeighbor(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "ResizeNearestNeighbor", ResizeNearestNeighborOp, DeviceType::CPU, float); diff --git a/mace/ops/reverse.cc b/mace/ops/reverse.cc index df3fe6f0..af9670e3 100644 --- a/mace/ops/reverse.cc +++ b/mace/ops/reverse.cc @@ -12,7 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -72,7 +73,7 @@ class ReverseOp : public Operation { MACE_OP_OUTPUT_TAGS(OUTPUT); }; -void RegisterReverse(OpRegistryBase *op_registry) { +void RegisterReverse(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Reverse", ReverseOp, DeviceType::CPU, float); } diff --git a/mace/ops/scalar_math.cc b/mace/ops/scalar_math.cc index 07794065..1c273420 100644 --- a/mace/ops/scalar_math.cc +++ b/mace/ops/scalar_math.cc @@ -16,7 +16,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/ops/eltwise.h" namespace mace { @@ -154,7 +155,7 @@ class ScalarMathOp : public Operation { int32_t scalar_input_index_; }; -void RegisterScalarMath(OpRegistryBase *op_registry) { +void RegisterScalarMath(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "ScalarMath", ScalarMathOp, DeviceType::CPU, float); MACE_REGISTER_OP(op_registry, "ScalarMath", ScalarMathOp, diff --git a/mace/ops/select.cc b/mace/ops/select.cc index 4d094e65..5001ba20 100644 --- a/mace/ops/select.cc +++ b/mace/ops/select.cc @@ -12,7 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/core/tensor.h" namespace mace { @@ -204,7 +205,7 @@ class SelectOp : public Operation { MACE_OP_OUTPUT_TAGS(OUTPUT); }; -void RegisterSelect(OpRegistryBase *op_registry) { +void RegisterSelect(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Select", SelectOp, DeviceType::CPU, float); } diff --git a/mace/ops/shape.cc b/mace/ops/shape.cc index dcca202f..0071ec25 100644 --- a/mace/ops/shape.cc +++ b/mace/ops/shape.cc @@ -12,7 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -58,7 +59,7 @@ class ShapeOp : public Operation { MACE_OP_OUTPUT_TAGS(OUTPUT); }; -void RegisterShape(OpRegistryBase *op_registry) { +void RegisterShape(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Shape", ShapeOp, DeviceType::CPU, float); } diff --git a/mace/ops/slice.cc b/mace/ops/slice.cc index ac7ca64a..14e71cad 100644 --- a/mace/ops/slice.cc +++ b/mace/ops/slice.cc @@ -15,7 +15,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -83,7 +84,7 @@ class SliceOp : public Operation { std::vector ends_; }; -void RegisterSlice(OpRegistryBase *op_registry) { +void RegisterSlice(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Slice", SliceOp, DeviceType::CPU, float); } diff --git a/mace/ops/softmax.cc b/mace/ops/softmax.cc index cfbde681..bf7cf202 100644 --- a/mace/ops/softmax.cc +++ b/mace/ops/softmax.cc @@ -18,7 +18,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #ifdef MACE_ENABLE_QUANTIZE #include "mace/ops/fixpoint.h" @@ -520,7 +521,7 @@ class SoftmaxOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterSoftmax(OpRegistryBase *op_registry) { +void RegisterSoftmax(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Softmax", SoftmaxOp, DeviceType::CPU, float); diff --git a/mace/ops/space_to_batch.cc b/mace/ops/space_to_batch.cc index 156c2132..e5d7ec5c 100644 --- a/mace/ops/space_to_batch.cc +++ b/mace/ops/space_to_batch.cc @@ -15,7 +15,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/image/space_to_batch.h" #endif // MACE_ENABLE_OPENCL @@ -328,7 +329,7 @@ class SpaceToBatchNDOp : public SpaceToBatchOpBase { }; #endif // MACE_ENABLE_OPENCL -void RegisterSpaceToBatchND(OpRegistryBase *op_registry) { +void RegisterSpaceToBatchND(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "SpaceToBatchND", SpaceToBatchNDOp, DeviceType::CPU, float); diff --git a/mace/ops/space_to_depth.cc b/mace/ops/space_to_depth.cc index 59c1a342..3653d09a 100644 --- a/mace/ops/space_to_depth.cc +++ b/mace/ops/space_to_depth.cc @@ -15,7 +15,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/image/space_to_depth.h" #endif // MACE_ENABLE_OPENCL @@ -180,7 +181,7 @@ class SpaceToDepthOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterSpaceToDepth(OpRegistryBase *op_registry) { +void RegisterSpaceToDepth(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "SpaceToDepth", SpaceToDepthOp, DeviceType::CPU, float); diff --git a/mace/ops/splice.cc b/mace/ops/splice.cc index 8f9198c0..af153671 100644 --- a/mace/ops/splice.cc +++ b/mace/ops/splice.cc @@ -29,7 +29,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/utils/math.h" namespace mace { @@ -153,7 +154,7 @@ class SpliceOp : public Operation { std::vector forward_const_indexes_; }; -void RegisterSplice(OpRegistryBase *op_registry) { +void RegisterSplice(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Splice", SpliceOp, DeviceType::CPU, float); } diff --git a/mace/ops/split.cc b/mace/ops/split.cc index ffe7172f..bb86aecb 100644 --- a/mace/ops/split.cc +++ b/mace/ops/split.cc @@ -15,7 +15,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/image/split.h" #endif // MACE_ENABLE_OPENCL @@ -128,7 +129,7 @@ class SplitOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterSplit(OpRegistryBase *op_registry) { +void RegisterSplit(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Split", SplitOp, DeviceType::CPU, float); diff --git a/mace/ops/sqrdiff_mean.cc b/mace/ops/sqrdiff_mean.cc index 2d85ed98..0e2b8d2b 100644 --- a/mace/ops/sqrdiff_mean.cc +++ b/mace/ops/sqrdiff_mean.cc @@ -15,7 +15,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/image/sqrdiff_mean.h" #endif // MACE_ENABLE_OPENCL @@ -100,7 +101,7 @@ class SqrDiffMeanOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterSqrDiffMean(OpRegistryBase *op_registry) { +void RegisterSqrDiffMean(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "SqrDiffMean", SqrDiffMeanOp, DeviceType::CPU, float); diff --git a/mace/ops/squeeze.cc b/mace/ops/squeeze.cc index 0c08cfd5..590479dd 100644 --- a/mace/ops/squeeze.cc +++ b/mace/ops/squeeze.cc @@ -15,7 +15,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -77,7 +78,7 @@ class SqueezeOp : public SqueezeOpRaw { } }; -void RegisterSqueeze(OpRegistryBase *op_registry) { +void RegisterSqueeze(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Squeeze", SqueezeOp, DeviceType::CPU, float); #ifdef MACE_ENABLE_QUANTIZE MACE_REGISTER_OP(op_registry, "Squeeze", SqueezeOp, DeviceType::CPU, uint8_t); diff --git a/mace/ops/stack.cc b/mace/ops/stack.cc index f49c401a..87cc51a0 100644 --- a/mace/ops/stack.cc +++ b/mace/ops/stack.cc @@ -15,7 +15,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -77,7 +78,7 @@ class StackOp : public Operation { int axis_; }; -void RegisterStack(OpRegistryBase *op_registry) { +void RegisterStack(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Stack", StackOp, DeviceType::CPU, float); MACE_REGISTER_OP(op_registry, "Stack", StackOp, DeviceType::CPU, int32_t); } diff --git a/mace/ops/strided_slice.cc b/mace/ops/strided_slice.cc index 4218d1f7..bf44d5a1 100644 --- a/mace/ops/strided_slice.cc +++ b/mace/ops/strided_slice.cc @@ -16,7 +16,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/utils/math.h" namespace mace { @@ -350,7 +351,7 @@ class StridedSliceOp : public Operation { MACE_OP_OUTPUT_TAGS(OUTPUT); }; -void RegisterStridedSlice(OpRegistryBase *op_registry) { +void RegisterStridedSlice(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "StridedSlice", StridedSliceOp, DeviceType::CPU, float); MACE_REGISTER_OP(op_registry, "StridedSlice", StridedSliceOp, diff --git a/mace/ops/subsample.cc b/mace/ops/subsample.cc index 11835ac9..e3c2977e 100644 --- a/mace/ops/subsample.cc +++ b/mace/ops/subsample.cc @@ -18,7 +18,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/utils/math.h" namespace mace { @@ -100,7 +101,7 @@ class SubsampleOp : public Operation { std::vector forward_indexes_; }; -void RegisterSubsample(OpRegistryBase *op_registry) { +void RegisterSubsample(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Subsample", SubsampleOp, DeviceType::CPU, float); } diff --git a/mace/ops/sum_group.cc b/mace/ops/sum_group.cc index 1b62af7e..b8524a74 100644 --- a/mace/ops/sum_group.cc +++ b/mace/ops/sum_group.cc @@ -20,7 +20,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -101,7 +102,7 @@ class SumGroupOp : public Operation { } }; -void RegisterSumGroup(OpRegistryBase *op_registry) { +void RegisterSumGroup(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "SumGroup", SumGroupOp, DeviceType::CPU, float); } diff --git a/mace/ops/target_rms_norm.cc b/mace/ops/target_rms_norm.cc index 23535e15..e2b2fa2e 100644 --- a/mace/ops/target_rms_norm.cc +++ b/mace/ops/target_rms_norm.cc @@ -22,7 +22,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -148,7 +149,7 @@ class TargetRMSNormOp : public Operation { int block_dim_; }; -void RegisterTargetRMSNorm(OpRegistryBase *op_registry) { +void RegisterTargetRMSNorm(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "TargetRMSNorm", TargetRMSNormOp, DeviceType::CPU, float); } diff --git a/mace/ops/tile.cc b/mace/ops/tile.cc index 36d0bfe9..c09ca921 100644 --- a/mace/ops/tile.cc +++ b/mace/ops/tile.cc @@ -16,7 +16,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/utils/memory.h" namespace mace { @@ -110,7 +111,7 @@ class TileOp : public Operation { int has_data_format_; }; -void RegisterTile(OpRegistryBase *op_registry) { +void RegisterTile(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Tile", TileOp, DeviceType::CPU, float); MACE_REGISTER_OP_CONDITION( op_registry, OpConditionBuilder("Tile").SetDevicePlacerFunc( diff --git a/mace/ops/transpose.cc b/mace/ops/transpose.cc index 4eb41e5b..a366f3d4 100644 --- a/mace/ops/transpose.cc +++ b/mace/ops/transpose.cc @@ -20,7 +20,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/ops/common/transpose.h" namespace mace { @@ -64,7 +65,7 @@ class TransposeOp : public Operation { std::vector dims_; }; -void RegisterTranspose(OpRegistryBase *op_registry) { +void RegisterTranspose(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Transpose", TransposeOp, DeviceType::CPU, float); } diff --git a/mace/ops/unsqueeze.cc b/mace/ops/unsqueeze.cc index 9fde2a91..cc28c14d 100644 --- a/mace/ops/unsqueeze.cc +++ b/mace/ops/unsqueeze.cc @@ -15,7 +15,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -59,7 +60,7 @@ class UnsqueezeOp : public Operation { MACE_OP_OUTPUT_TAGS(OUTPUT); }; -void RegisterUnsqueeze(OpRegistryBase *op_registry) { +void RegisterUnsqueeze(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Unsqueeze", UnsqueezeOp, DeviceType::CPU, float); MACE_REGISTER_OP(op_registry, "Unsqueeze", UnsqueezeOp, diff --git a/mace/ops/unstack.cc b/mace/ops/unstack.cc index b2a6eb6c..d0928614 100644 --- a/mace/ops/unstack.cc +++ b/mace/ops/unstack.cc @@ -15,7 +15,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -73,7 +74,7 @@ class UnstackOp : public Operation { int axis_; }; -void RegisterUnstack(OpRegistryBase *op_registry) { +void RegisterUnstack(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Unstack", UnstackOp, DeviceType::CPU, float); MACE_REGISTER_OP(op_registry, "Unstack", UnstackOp, diff --git a/test/ccbenchmark/mace/ops/depthwise_deconv2d_benchmark.cc b/test/ccbenchmark/mace/ops/depthwise_deconv2d_benchmark.cc index 2ac04e0c..fc0e7ed7 100644 --- a/test/ccbenchmark/mace/ops/depthwise_deconv2d_benchmark.cc +++ b/test/ccbenchmark/mace/ops/depthwise_deconv2d_benchmark.cc @@ -15,7 +15,7 @@ #include #include "mace/utils/statistics.h" -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" #include "mace/benchmark_utils/test_benchmark.h" #include "mace/ops/ops_test_util.h" diff --git a/test/ccbenchmark/mace/ops/quantize_benchmark.cc b/test/ccbenchmark/mace/ops/quantize_benchmark.cc index 0923a293..c43bcacb 100644 --- a/test/ccbenchmark/mace/ops/quantize_benchmark.cc +++ b/test/ccbenchmark/mace/ops/quantize_benchmark.cc @@ -14,7 +14,7 @@ #ifdef MACE_ENABLE_QUANTIZE -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" #include "mace/benchmark_utils/test_benchmark.h" #include "mace/ops/ops_test_util.h" diff --git a/test/ccbenchmark/mace/ops/sqrdiff_mean_benchmark.cc b/test/ccbenchmark/mace/ops/sqrdiff_mean_benchmark.cc index 05eaf21d..791182e8 100644 --- a/test/ccbenchmark/mace/ops/sqrdiff_mean_benchmark.cc +++ b/test/ccbenchmark/mace/ops/sqrdiff_mean_benchmark.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" #include "mace/benchmark_utils/test_benchmark.h" #include "mace/ops/ops_test_util.h" diff --git a/test/ccunit/mace/ops/arm/fp32/gemm_test.cc b/test/ccunit/mace/ops/arm/fp32/gemm_test.cc index 80572033..65a516f9 100644 --- a/test/ccunit/mace/ops/arm/fp32/gemm_test.cc +++ b/test/ccunit/mace/ops/arm/fp32/gemm_test.cc @@ -15,8 +15,8 @@ #include +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" #include "mace/ops/arm/fp32/gemm.h" #include "mace/ops/ref/gemm.h" #include "mace/ops/testing/test_utils.h" @@ -50,7 +50,7 @@ void TestGemmFloat32(const index_t batch, GenerateRandomRealTypeData(rhs.shape(), rhs_data); GenerateRandomRealTypeData(output.shape(), output_data); } - ::mace::ops::arm::fp32::Gemm gemm; + ::mace::ops::arm::fp32::Gemm gemm((delegator::GemmParam())); utils::ThreadPool thread_pool(1, AFFINITY_NONE); thread_pool.Init(); CPUDevice cpu_device(1, AFFINITY_NONE, &thread_pool); @@ -71,7 +71,7 @@ void TestGemmFloat32(const index_t batch, Tensor expected_output(GetCPUAllocator(), DataType::DT_FLOAT); expected_output.Resize({batch, rows, cols}); - ::mace::ops::ref::Gemm gemm_ref; + ::mace::ops::ref::Gemm gemm_ref((delegator::GemmParam())); gemm_ref.Compute(nullptr, &lhs, &rhs, diff --git a/test/ccunit/mace/ops/arm/fp32/gemv_test.cc b/test/ccunit/mace/ops/arm/fp32/gemv_test.cc index bc97bc3e..3a224ea2 100644 --- a/test/ccunit/mace/ops/arm/fp32/gemv_test.cc +++ b/test/ccunit/mace/ops/arm/fp32/gemv_test.cc @@ -15,8 +15,8 @@ #include +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" #include "mace/ops/arm/fp32/gemv.h" #include "mace/ops/ref/gemv.h" #include "mace/ops/testing/test_utils.h" @@ -53,7 +53,8 @@ void TestGemvFloat32(const index_t batch, thread_pool.Init(); CPUDevice cpu_device(1, AFFINITY_NONE, &thread_pool); OpContext context(nullptr, &cpu_device); - ::mace::ops::arm::fp32::Gemv gemv; + ::mace::ops::arm::fp32::Gemv gemv = + ::mace::ops::arm::fp32::Gemv(DelegatorParam()); gemv.Compute(&context, &lhs, &rhs, @@ -67,7 +68,8 @@ void TestGemvFloat32(const index_t batch, Tensor expected_output(GetCPUAllocator(), DataType::DT_FLOAT); expected_output.Resize({batch, height}); - ::mace::ops::ref::Gemv gemv_ref; + ::mace::ops::ref::Gemv gemv_ref = + ::mace::ops::ref::Gemv(DelegatorParam()); gemv_ref.Compute(nullptr, &lhs, &rhs, diff --git a/test/ccunit/mace/ops/arm/q8/gemv_test.cc b/test/ccunit/mace/ops/arm/q8/gemv_test.cc index 6216caba..619d343f 100644 --- a/test/ccunit/mace/ops/arm/q8/gemv_test.cc +++ b/test/ccunit/mace/ops/arm/q8/gemv_test.cc @@ -15,8 +15,8 @@ #include +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" #include "mace/ops/arm/q8/gemv.h" #include "mace/ops/ref/gemv.h" #include "mace/ops/testing/test_utils.h" @@ -58,7 +58,8 @@ void TestGemvInt32(const index_t batch, thread_pool.Init(); CPUDevice cpu_device(1, AFFINITY_NONE, &thread_pool); OpContext context(nullptr, &cpu_device); - mace::ops::arm::q8::Gemv gemv; + mace::ops::arm::q8::Gemv gemv = + mace::ops::arm::q8::Gemv(DelegatorParam()); gemv.Compute(&context, &lhs, &rhs, @@ -72,7 +73,8 @@ void TestGemvInt32(const index_t batch, Tensor expected_output(GetCPUAllocator(), DataType::DT_INT32); expected_output.Resize({batch, height}); - mace::ops::ref::Gemv gemv_ref; + mace::ops::ref::Gemv gemv_ref = + mace::ops::ref::Gemv(DelegatorParam()); gemv_ref.Compute(nullptr, &lhs, &rhs, @@ -130,7 +132,8 @@ void TestGemvUint8(const index_t batch, thread_pool.Init(); CPUDevice cpu_device(1, AFFINITY_NONE, &thread_pool); OpContext context(nullptr, &cpu_device); - mace::ops::arm::q8::Gemv gemv; + mace::ops::arm::q8::Gemv gemv = + mace::ops::arm::q8::Gemv(DelegatorParam()); gemv.Compute(&context, &lhs, &rhs, @@ -146,7 +149,8 @@ void TestGemvUint8(const index_t batch, expected_output.SetScale(0.6); expected_output.SetZeroPoint(57); expected_output.Resize({batch, height}); - mace::ops::ref::Gemv gemv_ref; + mace::ops::ref::Gemv gemv_ref = + mace::ops::ref::Gemv(DelegatorParam()); gemv_ref.Compute(nullptr, &lhs, &rhs, diff --git a/test/ccunit/mace/ops/matmul_test.cc b/test/ccunit/mace/ops/matmul_test.cc index d0432bb0..9d46f0e1 100644 --- a/test/ccunit/mace/ops/matmul_test.cc +++ b/test/ccunit/mace/ops/matmul_test.cc @@ -14,6 +14,7 @@ #include +#include "mace/ops/delegator/gemm.h" #include "mace/ops/ops_test_util.h" #include "mace/ops/ref/gemm.h" @@ -111,7 +112,7 @@ void Complex(const std::vector &batch, .Finalize(net.NewOperatorDef()); net.RunOp(CPU); - ref::Gemm gemm; + ref::Gemm gemm = ref::Gemm(delegator::GemmParam()); Tensor expected_output_tensor; std::vector expected_output_shape({rows, cols}); expected_output_shape.insert(expected_output_shape.begin(), diff --git a/test/ccunit/mace/ops/opencl/out_of_range_check_test.cc b/test/ccunit/mace/ops/opencl/out_of_range_check_test.cc index 3dfe468a..808ea9aa 100644 --- a/test/ccunit/mace/ops/opencl/out_of_range_check_test.cc +++ b/test/ccunit/mace/ops/opencl/out_of_range_check_test.cc @@ -16,7 +16,7 @@ #include #include "gtest/gtest.h" -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/runtime/opencl/gpu_device.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/tensor.h" @@ -134,7 +134,7 @@ TEST(OutOfRangeCheckTest, RandomTest) { std::unique_ptr device = make_unique( gpu_context.opencl_tuner()); - Workspace ws; + Workspace ws(nullptr); OpContext context(&ws, device.get()); std::vector buffer_shape = {batch, height, width, channels}; diff --git a/test/ccunit/mace/ops/sqrdiff_mean_test.cc b/test/ccunit/mace/ops/sqrdiff_mean_test.cc index 3257987c..42375b7d 100644 --- a/test/ccunit/mace/ops/sqrdiff_mean_test.cc +++ b/test/ccunit/mace/ops/sqrdiff_mean_test.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" #include "mace/ops/ops_test_util.h" namespace mace { diff --git a/test/ccutils/mace/ops/ops_test_util.h b/test/ccutils/mace/ops/ops_test_util.h index a9f8a984..e1e56342 100644 --- a/test/ccutils/mace/ops/ops_test_util.h +++ b/test/ccutils/mace/ops/ops_test_util.h @@ -31,7 +31,9 @@ #include "mace/core/device_context.h" #include "mace/core/tensor.h" #include "mace/core/workspace.h" -#include "mace/ops/registry/ops_registry.h" +#include "mace/core/registry/ops_registry.h" +#include "mace/core/registry/op_delegator_registry.h" +#include "mace/ops/registry/registry.h" #include "mace/public/mace.h" #include "mace/utils/memory.h" #include "mace/utils/math.h" @@ -109,7 +111,12 @@ class OpTestContext { class OpsTestNet { public: OpsTestNet() : - op_registry_(make_unique()) {} + op_registry_(make_unique()), + op_delegator_registry_(make_unique()), + ws_(op_delegator_registry_.get()) { + ops::RegisterAllOps(op_registry_.get()); + ops::RegisterAllOpDelegators(op_delegator_registry_.get()); + } template void AddInputFromArray(const std::string &name, @@ -426,7 +433,8 @@ class OpsTestNet { void Sync(); public: - std::shared_ptr op_registry_; + std::unique_ptr op_registry_; + std::unique_ptr op_delegator_registry_; Workspace ws_; std::vector op_defs_; std::unique_ptr net_; -- GitLab