From 373f1effd1946147832947b55b366a825ca1c490 Mon Sep 17 00:00:00 2001 From: luxuhui Date: Wed, 13 Nov 2019 20:20:56 +0800 Subject: [PATCH] add mace micro N/A Signed-off-by: Luxuhui --- .gitignore | 3 + .gitlab-ci.yml | 9 + WORKSPACE | 13 + docs/index.rst | 7 + docs/micro-controllers/basic_usage.rst | 127 ++ mace/codegen/tools/gen_version_source.sh | 6 +- mace/core/operator.cc | 3 +- mace/proto/BUILD.bazel | 19 +- mace/proto/mace.proto | 2 + mace/proto/micro_mem.proto | 23 + micro/BUILD.bazel | 7 + micro/base/BUILD.bazel | 39 + micro/base/logger.cc | 149 +++ micro/base/logger.h | 57 + micro/base/logging.h | 84 ++ micro/base/serialize.cc | 54 + micro/base/serialize.h | 64 + micro/base/serialize_type.h | 195 +++ micro/base/types.h | 57 + micro/base/utils.cc | 109 ++ micro/base/utils.h | 90 ++ micro/base/value_to_str.cc | 91 ++ micro/base/value_to_str.h | 61 + micro/codegen/BUILD.bazel | 82 ++ micro/framework/BUILD.bazel | 40 + micro/framework/graph.cc | 108 ++ micro/framework/graph.h | 63 + micro/framework/micro_engine.cc | 81 ++ micro/framework/op_context.cc | 53 + micro/framework/op_context.h | 51 + micro/framework/operator.cc | 242 ++++ micro/framework/operator.h | 111 ++ micro/framework/scratch_buffer.cc | 62 + micro/framework/scratch_buffer.h | 52 + micro/include/BUILD.bazel | 31 + micro/include/port/define.h | 33 + micro/include/public/micro.h | 105 ++ micro/include/utils/bfloat16.h | 57 + micro/include/utils/macros.h | 32 + micro/micro.bzl | 37 + micro/model/BUILD.bazel | 20 + micro/model/argument.cc | 30 + micro/model/argument.h | 48 + micro/model/const_tensor.cc | 41 + micro/model/const_tensor.h | 63 + micro/model/input_output_info.cc | 30 + micro/model/input_output_info.h | 50 + micro/model/net_def.cc | 33 + micro/model/net_def.h | 50 + micro/model/operator_def.cc | 33 + micro/model/operator_def.h | 57 + micro/model/output_shape.cc | 35 + micro/model/output_shape.h | 40 + micro/ops/BUILD.bazel | 36 + micro/ops/activation.cc | 69 ++ micro/ops/activation.h | 44 + micro/ops/argmax.h | 129 ++ micro/ops/bias_add.cc | 51 + micro/ops/bias_add.h | 45 + micro/ops/cast.h | 87 ++ micro/ops/eltwise.cc | 54 + micro/ops/eltwise.h | 1089 +++++++++++++++++ micro/ops/expand_dims.cc | 63 + micro/ops/expand_dims.h | 44 + micro/ops/matmul.cc | 163 +++ micro/ops/matmul.h | 63 + micro/ops/nhwc/base/conv_2d_base.cc | 78 ++ micro/ops/nhwc/base/conv_2d_base.h | 55 + micro/ops/nhwc/base/depthwise_conv_2d_base.cc | 44 + micro/ops/nhwc/base/depthwise_conv_2d_base.h | 32 + micro/ops/nhwc/base/filter_op_base.cc | 156 +++ micro/ops/nhwc/base/filter_op_base.h | 66 + micro/ops/nhwc/base/pooling_base.cc | 95 ++ micro/ops/nhwc/base/pooling_base.h | 63 + micro/ops/nhwc/batch_norm.cc | 92 ++ micro/ops/nhwc/batch_norm.h | 54 + micro/ops/nhwc/conv_2d_c2_s4.cc | 166 +++ micro/ops/nhwc/conv_2d_c2_s4.h | 31 + micro/ops/nhwc/conv_2d_c3_s4.cc | 174 +++ micro/ops/nhwc/conv_2d_c3_s4.h | 32 + micro/ops/nhwc/conv_2d_c4_s4.cc | 185 +++ micro/ops/nhwc/conv_2d_c4_s4.h | 31 + micro/ops/nhwc/conv_2d_ref.cc | 79 ++ micro/ops/nhwc/conv_2d_ref.h | 32 + micro/ops/nhwc/depthwise_conv_2d_kb1_s4.cc | 161 +++ micro/ops/nhwc/depthwise_conv_2d_kb1_s4.h | 31 + micro/ops/nhwc/depthwise_conv_2d_kb2_s4.cc | 181 +++ micro/ops/nhwc/depthwise_conv_2d_kb2_s4.h | 31 + micro/ops/nhwc/depthwise_conv_2d_kb3_s4.cc | 189 +++ micro/ops/nhwc/depthwise_conv_2d_kb3_s4.h | 31 + micro/ops/nhwc/depthwise_conv_2d_kb4_s4.cc | 199 +++ micro/ops/nhwc/depthwise_conv_2d_kb4_s4.h | 31 + micro/ops/nhwc/depthwise_conv_2d_ref.cc | 80 ++ micro/ops/nhwc/depthwise_conv_2d_ref.h | 32 + micro/ops/nhwc/pooling_ref.cc | 130 ++ micro/ops/nhwc/pooling_ref.h | 37 + micro/ops/nhwc/pooling_s4.cc | 225 ++++ micro/ops/nhwc/pooling_s4.h | 37 + micro/ops/reduce.cc | 45 + micro/ops/reduce.h | 575 +++++++++ micro/ops/reshape.cc | 108 ++ micro/ops/reshape.h | 45 + micro/ops/shape.cc | 47 + micro/ops/shape.h | 39 + micro/ops/softmax.cc | 91 ++ micro/ops/softmax.h | 47 + micro/ops/squeeze.cc | 77 ++ micro/ops/squeeze.h | 46 + micro/ops/stack.h | 94 ++ micro/ops/strided_slice.h | 293 +++++ micro/ops/utils/activation.cc | 125 ++ micro/ops/utils/activation.h | 65 + micro/ops/utils/crumb_utils.cc | 43 + micro/ops/utils/crumb_utils.h | 35 + micro/ops/utils/gemm.cc | 199 +++ micro/ops/utils/gemm.h | 82 ++ micro/ops/utils/gemv.cc | 131 ++ micro/ops/utils/gemv.h | 64 + micro/ops/utils/matrix.h | 109 ++ micro/port/BUILD.bazel | 27 + micro/port/api.cc | 59 + micro/port/api.h | 32 + micro/test/ccbaseline/BUILD.bazel | 109 ++ micro/test/ccbaseline/macemc/rpc/macemc.idl | 20 + .../test/ccbaseline/macemc/rpc/skel/macemc.cc | 28 + .../macemc/rpc/skel/micro_tester.cc | 88 ++ .../test/ccbaseline/macemc/rpc/stub/macemc.cc | 40 + .../test/ccbaseline/macemc/rpc/stub/macemc.h | 36 + micro/test/ccbaseline/test_baseline_main.cc | 22 + micro/test/ccbenchmark/BUILD.bazel | 149 +++ .../micro/benchmark_utils/test_benchmark.cc | 190 +++ .../micro/benchmark_utils/test_benchmark.h | 56 + .../micro/ops/activation_benchmark.cc | 103 ++ .../micro/ops/bias_add_benchmark.cc | 79 ++ .../micro/ops/eltwise_benchmark.cc | 83 ++ .../ccbenchmark/micro/ops/matmul_benchmark.cc | 134 ++ .../micro/ops/nhwc/batch_norm_benchmark.cc | 86 ++ .../micro/ops/nhwc/conv_2d_benchmark.cc | 112 ++ .../micro/ops/nhwc/conv_2d_opt_benchmark.cc | 112 ++ .../ops/nhwc/depthwise_conv_2d_benchmark.cc | 112 ++ .../nhwc/depthwise_conv_2d_opt_benchmark.cc | 114 ++ .../micro/ops/nhwc/pooling_benchmark.cc | 88 ++ .../ccbenchmark/micro/ops/reduce_benchmark.cc | 76 ++ .../micro/ops/softmax_benchmark.cc | 73 ++ .../test/ccbenchmark/micro/rpc/benchmark.idl | 20 + .../ccbenchmark/micro/rpc/skel/benchmark.c | 27 + .../ccbenchmark/micro/rpc/stub/benchmark.cc | 40 + .../ccbenchmark/micro/rpc/stub/benchmark.h | 36 + .../ccbenchmark/micro/test_benchmark_main.cc | 27 + micro/test/ccunit/BUILD.bazel | 53 + .../test/ccunit/micro/codegen/engine_test.cc | 56 + .../test/ccunit/micro/framework/graph_test.cc | 113 ++ micro/test/ccunit/micro/model/net_def_test.cc | 167 +++ .../test/ccunit/micro/ops/activation_test.cc | 249 ++++ micro/test/ccunit/micro/ops/argmax_test.cc | 111 ++ micro/test/ccunit/micro/ops/bias_add_test.cc | 62 + micro/test/ccunit/micro/ops/eltwise_test.cc | 499 ++++++++ .../test/ccunit/micro/ops/expand_dims_test.cc | 82 ++ micro/test/ccunit/micro/ops/matmul_test.cc | 99 ++ .../ccunit/micro/ops/nhwc/batch_norm_test.cc | 72 ++ .../ccunit/micro/ops/nhwc/conv_2d_opt_test.cc | 264 ++++ .../ccunit/micro/ops/nhwc/conv_2d_test.cc | 320 +++++ .../ops/nhwc/depthwise_conv_2d_opt_test.cc | 251 ++++ .../micro/ops/nhwc/depthwise_conv_2d_test.cc | 112 ++ .../ccunit/micro/ops/nhwc/pooling_test.cc | 208 ++++ micro/test/ccunit/micro/ops/reduce_test.cc | 269 ++++ micro/test/ccunit/micro/ops/reshape_test.cc | 68 + micro/test/ccunit/micro/ops/shape_test.cc | 62 + micro/test/ccunit/micro/ops/softmax_test.cc | 63 + micro/test/ccunit/micro/ops/squeeze_test.cc | 89 ++ micro/test/ccunit/micro/ops/stack_test.cc | 140 +++ .../ccunit/micro/ops/strided_slice_test.cc | 495 ++++++++ micro/test/ccutils/BUILD.bazel | 101 ++ .../ccutils/micro/common/global_buffer.cc | 60 + .../test/ccutils/micro/common/global_buffer.h | 57 + micro/test/ccutils/micro/ops/gtest_utils.h | 161 +++ micro/test/ccutils/micro/ops/operator.test.cc | 134 ++ micro/test/ccutils/micro/ops/substitute_op.cc | 112 ++ micro/test/ccutils/micro/ops/substitute_op.h | 140 +++ micro/test/ccutils/micro/ops/test_utils.cc | 74 ++ micro/test/ccutils/micro/ops/test_utils.h | 46 + micro/test/ccutils/rpc/qaic.sh | 22 + micro/test/ccutils/rpc/skel/base_func.cc | 40 + micro/test/ccutils/rpc/skel/base_func.h | 62 + micro/test/ccutils/rpc/stub/base_handle.cc | 69 ++ micro/test/ccutils/rpc/stub/base_handle.h | 53 + micro/tools/BUILD.bazel | 20 + micro/tools/micro_run.cc | 351 ++++++ .../mace/benchmark_utils/test_benchmark.h | 2 +- third_party/hexagon/hexagon_sdk.BUILD | 58 + third_party/hexagon/hexagon_tools.BUILD | 88 ++ tools/bazel.rc | 19 + tools/converter.py | 30 +- tools/cpplint.sh | 2 + tools/hexagon_compiler/BUILD.bazel | 53 + tools/hexagon_compiler/CROSSTOOL | 95 ++ .../hexagon_compiler/hexagon_gcc/BUILD.bazel | 84 ++ .../hexagon_gcc/hexagon-qualcomm-ar | 5 + .../hexagon_gcc/hexagon-qualcomm-as | 5 + .../hexagon_gcc/hexagon-qualcomm-gcc | 5 + .../hexagon_gcc/hexagon-qualcomm-gcov | 5 + .../hexagon_gcc/hexagon-qualcomm-ld | 5 + .../hexagon_gcc/hexagon-qualcomm-nm | 5 + .../hexagon_gcc/hexagon-qualcomm-objcopy | 5 + .../hexagon_gcc/hexagon-qualcomm-objdump | 5 + .../hexagon_gcc/hexagon-qualcomm-strip | 5 + tools/python/convert.py | 71 +- tools/python/layers_validate.py | 235 ++++ tools/python/micro/__init__.py | 0 tools/python/micro/graph_builder.py | 98 ++ .../micro_engine_c_interface.cc.jinja2 | 61 + .../micro_engine_c_interface.h.jinja2 | 40 + .../micro_engine_config.cc.jinja2 | 58 + .../micro_engine_factory.cc.jinja2 | 43 + .../micro_engine_factory.h.jinja2 | 27 + .../jinja2_files/micro_graph_data.h.jinja2 | 28 + .../jinja2_files/micro_model_data.h.jinja2 | 28 + .../micro/jinja2_files/micro_net_def.h.jinja2 | 28 + .../jinja2_files/micro_ops_list.h.jinja2 | 38 + tools/python/micro/mem_computer.py | 138 +++ tools/python/micro/micro_codegen.py | 103 ++ tools/python/micro/micro_io_converter.py | 99 ++ tools/python/micro/micro_op_converter.py | 58 + tools/python/micro/micro_support_ops.py | 281 +++++ tools/python/micro/proto_to_bytes.py | 219 ++++ tools/python/micro/scratch_computer.py | 128 ++ tools/python/micro_converter.py | 153 +++ tools/python/py_proto/__init__.py | 4 + tools/python/run_micro.py | 292 +++++ tools/python/template/file_binary.cc.jinja2 | 2 +- tools/python/utils/config_parser.py | 6 +- tools/python/utils/convert_util.py | 105 ++ tools/python/utils/device.py | 10 +- tools/python/validate.py | 16 +- 234 files changed, 20546 insertions(+), 79 deletions(-) create mode 100644 docs/micro-controllers/basic_usage.rst create mode 100644 mace/proto/micro_mem.proto create mode 100644 micro/BUILD.bazel create mode 100644 micro/base/BUILD.bazel create mode 100644 micro/base/logger.cc create mode 100644 micro/base/logger.h create mode 100644 micro/base/logging.h create mode 100644 micro/base/serialize.cc create mode 100644 micro/base/serialize.h create mode 100644 micro/base/serialize_type.h create mode 100644 micro/base/types.h create mode 100644 micro/base/utils.cc create mode 100644 micro/base/utils.h create mode 100644 micro/base/value_to_str.cc create mode 100644 micro/base/value_to_str.h create mode 100644 micro/codegen/BUILD.bazel create mode 100644 micro/framework/BUILD.bazel create mode 100644 micro/framework/graph.cc create mode 100644 micro/framework/graph.h create mode 100644 micro/framework/micro_engine.cc create mode 100644 micro/framework/op_context.cc create mode 100644 micro/framework/op_context.h create mode 100644 micro/framework/operator.cc create mode 100644 micro/framework/operator.h create mode 100644 micro/framework/scratch_buffer.cc create mode 100644 micro/framework/scratch_buffer.h create mode 100644 micro/include/BUILD.bazel create mode 100644 micro/include/port/define.h create mode 100644 micro/include/public/micro.h create mode 100644 micro/include/utils/bfloat16.h create mode 100644 micro/include/utils/macros.h create mode 100644 micro/micro.bzl create mode 100644 micro/model/BUILD.bazel create mode 100644 micro/model/argument.cc create mode 100644 micro/model/argument.h create mode 100644 micro/model/const_tensor.cc create mode 100644 micro/model/const_tensor.h create mode 100644 micro/model/input_output_info.cc create mode 100644 micro/model/input_output_info.h create mode 100644 micro/model/net_def.cc create mode 100644 micro/model/net_def.h create mode 100644 micro/model/operator_def.cc create mode 100644 micro/model/operator_def.h create mode 100644 micro/model/output_shape.cc create mode 100644 micro/model/output_shape.h create mode 100644 micro/ops/BUILD.bazel create mode 100644 micro/ops/activation.cc create mode 100644 micro/ops/activation.h create mode 100644 micro/ops/argmax.h create mode 100644 micro/ops/bias_add.cc create mode 100644 micro/ops/bias_add.h create mode 100644 micro/ops/cast.h create mode 100644 micro/ops/eltwise.cc create mode 100644 micro/ops/eltwise.h create mode 100644 micro/ops/expand_dims.cc create mode 100644 micro/ops/expand_dims.h create mode 100644 micro/ops/matmul.cc create mode 100644 micro/ops/matmul.h create mode 100644 micro/ops/nhwc/base/conv_2d_base.cc create mode 100644 micro/ops/nhwc/base/conv_2d_base.h create mode 100644 micro/ops/nhwc/base/depthwise_conv_2d_base.cc create mode 100644 micro/ops/nhwc/base/depthwise_conv_2d_base.h create mode 100644 micro/ops/nhwc/base/filter_op_base.cc create mode 100644 micro/ops/nhwc/base/filter_op_base.h create mode 100644 micro/ops/nhwc/base/pooling_base.cc create mode 100644 micro/ops/nhwc/base/pooling_base.h create mode 100644 micro/ops/nhwc/batch_norm.cc create mode 100644 micro/ops/nhwc/batch_norm.h create mode 100644 micro/ops/nhwc/conv_2d_c2_s4.cc create mode 100644 micro/ops/nhwc/conv_2d_c2_s4.h create mode 100644 micro/ops/nhwc/conv_2d_c3_s4.cc create mode 100644 micro/ops/nhwc/conv_2d_c3_s4.h create mode 100644 micro/ops/nhwc/conv_2d_c4_s4.cc create mode 100644 micro/ops/nhwc/conv_2d_c4_s4.h create mode 100644 micro/ops/nhwc/conv_2d_ref.cc create mode 100644 micro/ops/nhwc/conv_2d_ref.h create mode 100644 micro/ops/nhwc/depthwise_conv_2d_kb1_s4.cc create mode 100644 micro/ops/nhwc/depthwise_conv_2d_kb1_s4.h create mode 100644 micro/ops/nhwc/depthwise_conv_2d_kb2_s4.cc create mode 100644 micro/ops/nhwc/depthwise_conv_2d_kb2_s4.h create mode 100644 micro/ops/nhwc/depthwise_conv_2d_kb3_s4.cc create mode 100644 micro/ops/nhwc/depthwise_conv_2d_kb3_s4.h create mode 100644 micro/ops/nhwc/depthwise_conv_2d_kb4_s4.cc create mode 100644 micro/ops/nhwc/depthwise_conv_2d_kb4_s4.h create mode 100644 micro/ops/nhwc/depthwise_conv_2d_ref.cc create mode 100644 micro/ops/nhwc/depthwise_conv_2d_ref.h create mode 100644 micro/ops/nhwc/pooling_ref.cc create mode 100644 micro/ops/nhwc/pooling_ref.h create mode 100644 micro/ops/nhwc/pooling_s4.cc create mode 100644 micro/ops/nhwc/pooling_s4.h create mode 100644 micro/ops/reduce.cc create mode 100644 micro/ops/reduce.h create mode 100644 micro/ops/reshape.cc create mode 100644 micro/ops/reshape.h create mode 100644 micro/ops/shape.cc create mode 100644 micro/ops/shape.h create mode 100644 micro/ops/softmax.cc create mode 100644 micro/ops/softmax.h create mode 100644 micro/ops/squeeze.cc create mode 100644 micro/ops/squeeze.h create mode 100644 micro/ops/stack.h create mode 100644 micro/ops/strided_slice.h create mode 100644 micro/ops/utils/activation.cc create mode 100644 micro/ops/utils/activation.h create mode 100644 micro/ops/utils/crumb_utils.cc create mode 100644 micro/ops/utils/crumb_utils.h create mode 100644 micro/ops/utils/gemm.cc create mode 100644 micro/ops/utils/gemm.h create mode 100644 micro/ops/utils/gemv.cc create mode 100644 micro/ops/utils/gemv.h create mode 100644 micro/ops/utils/matrix.h create mode 100644 micro/port/BUILD.bazel create mode 100644 micro/port/api.cc create mode 100644 micro/port/api.h create mode 100644 micro/test/ccbaseline/BUILD.bazel create mode 100755 micro/test/ccbaseline/macemc/rpc/macemc.idl create mode 100644 micro/test/ccbaseline/macemc/rpc/skel/macemc.cc create mode 100644 micro/test/ccbaseline/macemc/rpc/skel/micro_tester.cc create mode 100644 micro/test/ccbaseline/macemc/rpc/stub/macemc.cc create mode 100644 micro/test/ccbaseline/macemc/rpc/stub/macemc.h create mode 100644 micro/test/ccbaseline/test_baseline_main.cc create mode 100644 micro/test/ccbenchmark/BUILD.bazel create mode 100644 micro/test/ccbenchmark/micro/benchmark_utils/test_benchmark.cc create mode 100644 micro/test/ccbenchmark/micro/benchmark_utils/test_benchmark.h create mode 100644 micro/test/ccbenchmark/micro/ops/activation_benchmark.cc create mode 100644 micro/test/ccbenchmark/micro/ops/bias_add_benchmark.cc create mode 100644 micro/test/ccbenchmark/micro/ops/eltwise_benchmark.cc create mode 100644 micro/test/ccbenchmark/micro/ops/matmul_benchmark.cc create mode 100644 micro/test/ccbenchmark/micro/ops/nhwc/batch_norm_benchmark.cc create mode 100644 micro/test/ccbenchmark/micro/ops/nhwc/conv_2d_benchmark.cc create mode 100644 micro/test/ccbenchmark/micro/ops/nhwc/conv_2d_opt_benchmark.cc create mode 100644 micro/test/ccbenchmark/micro/ops/nhwc/depthwise_conv_2d_benchmark.cc create mode 100644 micro/test/ccbenchmark/micro/ops/nhwc/depthwise_conv_2d_opt_benchmark.cc create mode 100644 micro/test/ccbenchmark/micro/ops/nhwc/pooling_benchmark.cc create mode 100644 micro/test/ccbenchmark/micro/ops/reduce_benchmark.cc create mode 100644 micro/test/ccbenchmark/micro/ops/softmax_benchmark.cc create mode 100755 micro/test/ccbenchmark/micro/rpc/benchmark.idl create mode 100644 micro/test/ccbenchmark/micro/rpc/skel/benchmark.c create mode 100644 micro/test/ccbenchmark/micro/rpc/stub/benchmark.cc create mode 100644 micro/test/ccbenchmark/micro/rpc/stub/benchmark.h create mode 100644 micro/test/ccbenchmark/micro/test_benchmark_main.cc create mode 100644 micro/test/ccunit/BUILD.bazel create mode 100644 micro/test/ccunit/micro/codegen/engine_test.cc create mode 100644 micro/test/ccunit/micro/framework/graph_test.cc create mode 100644 micro/test/ccunit/micro/model/net_def_test.cc create mode 100644 micro/test/ccunit/micro/ops/activation_test.cc create mode 100644 micro/test/ccunit/micro/ops/argmax_test.cc create mode 100644 micro/test/ccunit/micro/ops/bias_add_test.cc create mode 100644 micro/test/ccunit/micro/ops/eltwise_test.cc create mode 100644 micro/test/ccunit/micro/ops/expand_dims_test.cc create mode 100644 micro/test/ccunit/micro/ops/matmul_test.cc create mode 100644 micro/test/ccunit/micro/ops/nhwc/batch_norm_test.cc create mode 100644 micro/test/ccunit/micro/ops/nhwc/conv_2d_opt_test.cc create mode 100644 micro/test/ccunit/micro/ops/nhwc/conv_2d_test.cc create mode 100644 micro/test/ccunit/micro/ops/nhwc/depthwise_conv_2d_opt_test.cc create mode 100644 micro/test/ccunit/micro/ops/nhwc/depthwise_conv_2d_test.cc create mode 100644 micro/test/ccunit/micro/ops/nhwc/pooling_test.cc create mode 100644 micro/test/ccunit/micro/ops/reduce_test.cc create mode 100644 micro/test/ccunit/micro/ops/reshape_test.cc create mode 100644 micro/test/ccunit/micro/ops/shape_test.cc create mode 100644 micro/test/ccunit/micro/ops/softmax_test.cc create mode 100644 micro/test/ccunit/micro/ops/squeeze_test.cc create mode 100644 micro/test/ccunit/micro/ops/stack_test.cc create mode 100644 micro/test/ccunit/micro/ops/strided_slice_test.cc create mode 100644 micro/test/ccutils/BUILD.bazel create mode 100644 micro/test/ccutils/micro/common/global_buffer.cc create mode 100644 micro/test/ccutils/micro/common/global_buffer.h create mode 100644 micro/test/ccutils/micro/ops/gtest_utils.h create mode 100644 micro/test/ccutils/micro/ops/operator.test.cc create mode 100644 micro/test/ccutils/micro/ops/substitute_op.cc create mode 100644 micro/test/ccutils/micro/ops/substitute_op.h create mode 100644 micro/test/ccutils/micro/ops/test_utils.cc create mode 100644 micro/test/ccutils/micro/ops/test_utils.h create mode 100644 micro/test/ccutils/rpc/qaic.sh create mode 100644 micro/test/ccutils/rpc/skel/base_func.cc create mode 100644 micro/test/ccutils/rpc/skel/base_func.h create mode 100644 micro/test/ccutils/rpc/stub/base_handle.cc create mode 100644 micro/test/ccutils/rpc/stub/base_handle.h create mode 100644 micro/tools/BUILD.bazel create mode 100644 micro/tools/micro_run.cc create mode 100644 third_party/hexagon/hexagon_sdk.BUILD create mode 100644 third_party/hexagon/hexagon_tools.BUILD create mode 100644 tools/hexagon_compiler/BUILD.bazel create mode 100644 tools/hexagon_compiler/CROSSTOOL create mode 100644 tools/hexagon_compiler/hexagon_gcc/BUILD.bazel create mode 100755 tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-ar create mode 100755 tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-as create mode 100755 tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-gcc create mode 100755 tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-gcov create mode 100755 tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-ld create mode 100755 tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-nm create mode 100755 tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-objcopy create mode 100755 tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-objdump create mode 100755 tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-strip create mode 100644 tools/python/layers_validate.py create mode 100644 tools/python/micro/__init__.py create mode 100644 tools/python/micro/graph_builder.py create mode 100644 tools/python/micro/jinja2_files/micro_engine_c_interface.cc.jinja2 create mode 100644 tools/python/micro/jinja2_files/micro_engine_c_interface.h.jinja2 create mode 100644 tools/python/micro/jinja2_files/micro_engine_config.cc.jinja2 create mode 100644 tools/python/micro/jinja2_files/micro_engine_factory.cc.jinja2 create mode 100644 tools/python/micro/jinja2_files/micro_engine_factory.h.jinja2 create mode 100644 tools/python/micro/jinja2_files/micro_graph_data.h.jinja2 create mode 100644 tools/python/micro/jinja2_files/micro_model_data.h.jinja2 create mode 100644 tools/python/micro/jinja2_files/micro_net_def.h.jinja2 create mode 100644 tools/python/micro/jinja2_files/micro_ops_list.h.jinja2 create mode 100644 tools/python/micro/mem_computer.py create mode 100644 tools/python/micro/micro_codegen.py create mode 100644 tools/python/micro/micro_io_converter.py create mode 100644 tools/python/micro/micro_op_converter.py create mode 100644 tools/python/micro/micro_support_ops.py create mode 100644 tools/python/micro/proto_to_bytes.py create mode 100644 tools/python/micro/scratch_computer.py create mode 100644 tools/python/micro_converter.py create mode 100644 tools/python/run_micro.py create mode 100644 tools/python/utils/convert_util.py diff --git a/.gitignore b/.gitignore index c02dd020..91e5e303 100644 --- a/.gitignore +++ b/.gitignore @@ -22,6 +22,9 @@ mace/codegen/version/ mace/codegen/engine/ mace/codegen/lib/ +micro/codegen/models/ +micro/codegen/engines/ + examples/android/macelibrary/src/main/cpp/mace/ examples/android/macelibrary/src/main/cpp/include/ examples/android/macelibrary/src/main/cpp/lib/arm64-v8a/ diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 52cb2a3f..8a954bd4 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -80,12 +80,14 @@ mace_cc_test: DEVICE_CONF_FILE=generic-mobile-devices/devices.yml fi - python tools/bazel_adb_run.py --target="//test/ccunit:mace_cc_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64 --target_socs=$TARGET_SOCS + - python tools/bazel_adb_run.py --target="//micro/test/ccunit:micro_ops_test" --run_target=True --stdout_processor=ops_benchmark_stdout_processor --target_abis=arm64-v8a mace_cc_benchmark: stage: test script: - if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi - python tools/bazel_adb_run.py --target="//test/ccbenchmark:mace_cc_benchmark" --run_target=True --stdout_processor=ops_benchmark_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS --args="--filter=.*SIGMOID.*" + - python tools/bazel_adb_run.py --target="//micro/test/ccbenchmark:micro_cc_benchmark" --run_target=True --stdout_processor=ops_benchmark_stdout_processor --target_abis=arm64-v8a only: - triggers @@ -112,6 +114,13 @@ model_tests: - python tools/converter.py convert --config=${CONF_FILE} --target_socs=$TARGET_SOCS --model_graph_format=code --model_data_format=file - python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=1 --validate --model_graph_format=code --model_data_format=file - python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=5 --model_graph_format=code --model_data_format=file --benchmark + - CONF_FILE=mace-models/micro-models/har-cnn/har-cnn.yml + - python tools/converter.py convert --config=${CONF_FILE} --enable_micro + - python tools/python/run_micro.py --config $CONF_FILE --build --validate --model_name har_cnn + - python tools/python/run_micro.py --config $CONF_FILE --model_name har_cnn --build --benchmark + - CONF_FILE=mace-models/micro-models/har-cnn/har-cnn-bf16.yml + - python tools/converter.py convert --config=${CONF_FILE} --enable_micro + - python tools/python/run_micro.py --config $CONF_FILE --build --validate --model_name har_cnn - rm -rf mace-models quantization_tests: diff --git a/WORKSPACE b/WORKSPACE index 2bbd89da..9dfe8c89 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -3,6 +3,7 @@ workspace(name = "mace") # generate version and opencl kernel code. load("//repository/git:git_configure.bzl", "git_version_repository") load("//repository/opencl-kernel:opencl_kernel_configure.bzl", "encrypt_opencl_kernel_repository") +load("//micro:micro.bzl", "new_local_repository_env") git_version_repository(name = "local_version_config") @@ -161,3 +162,15 @@ new_http_archive( "https://releases.linaro.org/components/toolchain/binaries/7.3-2018.05/aarch64-linux-gnu/gcc-linaro-7.3.1-2018.05-x86_64_aarch64-linux-gnu.tar.xz", ], ) + +new_local_repository_env( + name = "hexagon_sdk", + build_file = "third_party/hexagon/hexagon_sdk.BUILD", + path = "${HEXAGON_SDK_ROOT}", +) + +new_local_repository_env( + name = "hexagon_tools", + build_file = "third_party/hexagon/hexagon_tools.BUILD", + path = "${HL_HEXAGON_TOOLS}", +) diff --git a/docs/index.rst b/docs/index.rst index 58c906a1..392e1ccc 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -46,6 +46,13 @@ The main documentation is organized into the following sections: development/data_format development/dynamic_lstm +.. toctree:: + :maxdepth: 1 + :caption: Micro Controllers + :name: sec-micro + + micro-controllers/basic_usage.rst + .. toctree:: :maxdepth: 1 :caption: FAQ diff --git a/docs/micro-controllers/basic_usage.rst b/docs/micro-controllers/basic_usage.rst new file mode 100644 index 00000000..f2a510b7 --- /dev/null +++ b/docs/micro-controllers/basic_usage.rst @@ -0,0 +1,127 @@ +Basic usage for Micro Controllers +================================== + + +Build and run an example model +------------------------------- + +At first, make sure the environment has been set up correctly already (refer to :doc:`../installation/env_requirement`). + +The followings are instructions about how to quickly build and run a provided model in +`MACE Model Zoo `__. + +Here we use the har-cnn model as an example. + +**Commands** + + 1. Pull `MACE `__ project. + + .. code-block:: sh + + git clone https://github.com/XiaoMi/mace.git + cd mace/ + git fetch --all --tags --prune + + # Checkout the latest tag (i.e. release version) + tag_name=`git describe --abbrev=0 --tags` + git checkout tags/${tag_name} + + .. note:: + + It's highly recommended to use a release version instead of master branch. + + + 2. Pull `MACE Model Zoo `__ project. + + .. code-block:: sh + + git clone https://github.com/XiaoMi/mace-models.git + + + 3. Convert the pre-trained har-cnn model to c++ code. + + .. code-block:: sh + + cd path/to/mace + # output lib path: build/har-cnn/model/har_cnn_micro.tar.gz + CONF_FILE=/path/to/mace-models/micro-models/har-cnn/har-cnn.yml + python tools/converter.py convert --config=$CONF_FILE --enable_micro + + + 4. Build Micro-Controllers engine and models to library on host. + + .. code-block:: sh + + # copy convert result to micro dir ``path/to/micro`` + cp build/har-cnn/model/har_cnn_micro.tar.gz path/to/micro/ + cd path/to/micro + tar zxvf har_cnn_micro.tar.gz + bazel build //micro/codegen:micro_engine + + .. note:: + + - This step can be skipped if you just want to run a model using ``tools/python/run_micro.py``, such as commands in step 5. + + - The build result ``bazel-bin/micro/codegen/libmicro_engine.so``'s abi is host, if you want to run the model on micro controllers, you should build the code with the target abi. + + 5. Run the model on host. + + .. code-block:: sh + + CONF_FILE=/path/to/mace-models/micro-models/har-cnn/har-cnn.yml + # Run + python tools/python/run_micro.py --config $CONF_FILE --model_name har_cnn --build + + # Test model run time + python tools/python/run_micro.py --config $CONF_FILE --model_name har_cnn --build --round=100 + + # Validate the correctness by comparing the results against the + # original model and framework, measured with cosine distance for similarity. + python tools/python/run_micro.py --config $CONF_FILE --model_name har_cnn --build --validate + # Validate the layers' correctness. + python tools/python/run_micro.py --config $CONF_FILE --model_name har_cnn --build --validate --layers 0:-1 + + + +Deploy your model into applications +------------------------------------ + +Please refer to \ ``/mace/micro/tools/micro_run.cc`` for full usage. The following list the key steps. + +.. code-block:: cpp + + // Include the headers + #include "micro/include/public/micro.h" + + // 1. Create MaceMicroEngine instance + MaceMicroEngine *micro_engine = nullptr; + MaceStatus status = har_cnn::GetMicroEngineSingleton(µ_engine); + + // 1. Create and register Input buffers + std::vector> inputs; + std::vector input_sizes; + for (size_t i = 0; i < input_shapes.size(); ++i) { + input_sizes.push_back(std::accumulate(input_shapes[i].begin(), + input_shapes[i].end(), sizeof(float), + std::multiplies())); + inputs.push_back(std::shared_ptr(new char[input_sizes[i]], + std::default_delete())); + } + // TODO: fill data into input buffers + for (size_t i = 0; i < input_names.size(); ++i) { + micro_engine->RegisterInputData(i, inputs[i].get(), + input_shapes[i].data()); + } + + // 3. Run the model + MaceStatus status = micro_engine->Run(); + + // 4. Get the results + for (size_t i = 0; i < output_names.size(); ++i) { + void *output_buffer = nullptr; + const int32_t *output_dims = nullptr; + uint32_t dim_size = 0; + MaceStatus status = + micro_engine->GetOutputData(i, &output_buffer, &output_dims, &dim_size); + // TODO: the result data is in output_buffer, you can not delete output_buffer. + } diff --git a/mace/codegen/tools/gen_version_source.sh b/mace/codegen/tools/gen_version_source.sh index a8a6af78..9d24e015 100755 --- a/mace/codegen/tools/gen_version_source.sh +++ b/mace/codegen/tools/gen_version_source.sh @@ -53,10 +53,14 @@ cat < ${OUTPUT_FILENAME} // This is a generated file. DO NOT EDIT! namespace mace { +namespace { #ifndef _MSC_VER __attribute__((visibility("default"))) #endif -const char *MaceVersion() { return "MACEVER-${GIT_VERSION}" + 8; } + const char *kMaceVersion = "MACEVER-${GIT_VERSION}"; +} // namespace + +const char *MaceVersion() { return kMaceVersion + 8; } } // namespace mace EOF diff --git a/mace/core/operator.cc b/mace/core/operator.cc index 11ab742e..a266ce2b 100644 --- a/mace/core/operator.cc +++ b/mace/core/operator.cc @@ -322,7 +322,8 @@ std::unique_ptr OpRegistryBase::CreateOperation( .TypeConstraint("T", key_dtype) .Build(); if (registry_.at(op_type)->creators.count(key) == 0) { - LOG(FATAL) << "Key not registered: " << key; + LOG(FATAL) << "Key not registered: " << key + << ", op type is: " << operator_def->type(); } return registry_.at(op_type)->creators.at(key)(context); } diff --git a/mace/proto/BUILD.bazel b/mace/proto/BUILD.bazel index c3004a4f..feb1c145 100644 --- a/mace/proto/BUILD.bazel +++ b/mace/proto/BUILD.bazel @@ -8,9 +8,11 @@ package( licenses(["notice"]) # Apache 2.0 -load("@com_google_protobuf//:protobuf.bzl", - "py_proto_library", - "cc_proto_library") +load( + "@com_google_protobuf//:protobuf.bzl", + "cc_proto_library", + "py_proto_library", +) py_proto_library( name = "mace_py", @@ -27,3 +29,14 @@ cc_proto_library( default_runtime = "@com_google_protobuf//:protobuf_lite", protoc = "@com_google_protobuf//:protoc", ) + +py_proto_library( + name = "micro_mem_py", + srcs = ["micro_mem.proto"], + default_runtime = "@com_google_protobuf//:protobuf_python", + protoc = "@com_google_protobuf//:protoc", + srcs_version = "PY2AND3", + deps = [ + "@com_google_protobuf//:protobuf_python", + ], +) diff --git a/mace/proto/mace.proto b/mace/proto/mace.proto index 0d28a079..d92f9b13 100644 --- a/mace/proto/mace.proto +++ b/mace/proto/mace.proto @@ -14,6 +14,7 @@ enum DataType { DT_HALF = 3; DT_INT32 = 4; DT_FLOAT16 = 5; + DT_BFLOAT16 = 6; } enum MemoryType { @@ -76,6 +77,7 @@ message OperatorDef { repeated DataType output_type = 8; repeated QuantizeActivationInfo quantize_info = 9; + // for mace it is mem_id, for micro, it is mem_offset repeated int32 mem_id = 10; // for hexagon mace-nnlib diff --git a/mace/proto/micro_mem.proto b/mace/proto/micro_mem.proto new file mode 100644 index 00000000..33fe1153 --- /dev/null +++ b/mace/proto/micro_mem.proto @@ -0,0 +1,23 @@ +syntax = "proto2"; + +package micro; + +message OutputShape { + repeated int64 dims = 1; +} + +message OpContext { + optional int32 op_idx = 1; + // The input info of downstream operator is the output info of upstream + // operator, so there is no output info defined here + repeated uint32 input_infos = 2; + repeated OutputShape output_resize_shapes = 3; +} + +message Graph { + repeated OpContext op_contexts = 1; + repeated uint32 input_op_idxs = 2; + // The output info of the last operator, which is not recorded in opcontext, + // is the output of graph + repeated uint32 output_infos = 3; +} diff --git a/micro/BUILD.bazel b/micro/BUILD.bazel new file mode 100644 index 00000000..0af62156 --- /dev/null +++ b/micro/BUILD.bazel @@ -0,0 +1,7 @@ +config_setting( + name = "hexagon_enabled", + define_values = { + "hexagon": "true", + }, + visibility = ["//visibility:public"], +) diff --git a/micro/base/BUILD.bazel b/micro/base/BUILD.bazel new file mode 100644 index 00000000..5f183c66 --- /dev/null +++ b/micro/base/BUILD.bazel @@ -0,0 +1,39 @@ +package( + default_visibility = ["//visibility:public"], +) + +licenses(["notice"]) # Apache 2.0 + +cc_library( + name = "base_hdrs", + hdrs = glob([ + "*.h", + ]), + copts = [ + "-Werror", + "-Wextra", + "-Wno-missing-field-initializers", + ], + deps = [ + "//micro/include", + "//micro/port", + ], +) + +cc_library( + name = "base", + srcs = glob( + [ + "*.cc", + ], + ), + copts = [ + "-Werror", + "-Wextra", + "-Wno-missing-field-initializers", + ], + deps = [ + "base_hdrs", + "//micro/port", + ], +) diff --git a/micro/base/logger.cc b/micro/base/logger.cc new file mode 100644 index 00000000..4663e574 --- /dev/null +++ b/micro/base/logger.cc @@ -0,0 +1,149 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + + +#include "micro/base/logger.h" + +#include "micro/base/value_to_str.h" +#include "micro/port/api.h" + +namespace micro { +namespace base { + +namespace { +const int32_t kInt64ValueBufferLength = 21; +const int32_t kInt32ValueBufferLength = 12; +const int32_t kInt16ValueBufferLength = 6; +const int32_t kInt8ValueBufferLength = 4; +const int32_t kFloatValueBufferLength = 21; + +inline bool IsValidLogLevel(const LogLevel level) { + return level >= CLEAN && level < INVALID_MAX; +} + +char LogLevelToShortStr(LogLevel level) { + if (!IsValidLogLevel(level)) { + level = INFO; + } + + return "CIWEF"[static_cast(level)]; +} + +} // namespace + +Logger::Logger(const char *fname, uint32_t line, + LogLevel severity) : severity_(severity) { + if (severity == CLEAN) { + return; + } + char buffer[15] = {0}; + char *end = buffer + 15; + buffer[0] = LogLevelToShortStr(severity); + buffer[1] = ' '; + micro::port::api::DebugLog(buffer); + + micro::port::api::DebugLog(fname); + + buffer[0] = ':'; + ToString("] ", ToString(line, buffer + 1, end), end); + micro::port::api::DebugLog(buffer); +} + +Logger::~Logger() { + micro::port::api::DebugLog("\n"); + if (severity_ == FATAL) { + micro::port::api::Abort(); + } +} + +const Logger &Logger::operator<<(const char *str) const { + micro::port::api::DebugLog(str); + return *this; +} + +const Logger &Logger::operator<<(const char c) const { + char buffer[2] = {0}; + buffer[0] = c; + micro::port::api::DebugLog(buffer); + return *this; +} + +const Logger &Logger::operator<<(const float value) const { + char buffer[kFloatValueBufferLength] = {0}; + ToString(value, buffer, buffer + kFloatValueBufferLength); + micro::port::api::DebugLog(buffer); + return *this; +} + +const Logger &Logger::operator<<(const int64_t value) const { + char buffer[kInt64ValueBufferLength] = {0}; + ToString(value, buffer, buffer + kInt64ValueBufferLength); + micro::port::api::DebugLog(buffer); + return *this; +} + +const Logger &Logger::operator<<(const int32_t value) const { + char buffer[kInt32ValueBufferLength] = {0}; + ToString(value, buffer, buffer + kInt32ValueBufferLength); + micro::port::api::DebugLog(buffer); + return *this; +} + +const Logger &Logger::operator<<(const uint32_t value) const { + char buffer[kInt32ValueBufferLength] = {0}; + ToString(value, buffer, buffer + kInt32ValueBufferLength); + micro::port::api::DebugLog(buffer); + return *this; +} + +const Logger &Logger::operator<<(const int16_t value) const { + char buffer[kInt16ValueBufferLength] = {0}; + ToString(value, buffer, buffer + kInt16ValueBufferLength); + micro::port::api::DebugLog(buffer); + return *this; +} + +const Logger &Logger::operator<<(const uint16_t value) const { + char buffer[kInt16ValueBufferLength] = {0}; + ToString(value, buffer, buffer + kInt16ValueBufferLength); + micro::port::api::DebugLog(buffer); + return *this; +} + +const Logger &Logger::operator<<(const int8_t value) const { + char buffer[kInt8ValueBufferLength] = {0}; + ToString(value, buffer, buffer + kInt8ValueBufferLength); + micro::port::api::DebugLog(buffer); + return *this; +} + +const Logger &Logger::operator<<(const uint8_t value) const { + char buffer[kInt8ValueBufferLength] = {0}; + ToString(value, buffer, buffer + kInt8ValueBufferLength); + micro::port::api::DebugLog(buffer); + return *this; +} + +const Logger &Logger::operator<<(const bool value) const { + if (value) { + micro::port::api::DebugLog("true"); + } else { + micro::port::api::DebugLog("false"); + } + return *this; +} + +} // namespace base +} // namespace micro diff --git a/micro/base/logger.h b/micro/base/logger.h new file mode 100644 index 00000000..a5b50947 --- /dev/null +++ b/micro/base/logger.h @@ -0,0 +1,57 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_BASE_LOGGER_H_ +#define MICRO_BASE_LOGGER_H_ + +#include + +namespace micro { + +enum LogLevel { + CLEAN = 0, + INFO = 1, + WARNING = 2, + ERROR = 3, + FATAL = 4, + INVALID_MAX, +}; + +namespace base { + +class Logger { + public: + Logger(const char *fname, uint32_t line, LogLevel severity); + ~Logger(); + + const Logger &operator<<(const char *str) const; + const Logger &operator<<(const char c) const; + const Logger &operator<<(const float value) const; + const Logger &operator<<(const int64_t value) const; + const Logger &operator<<(const int32_t value) const; + const Logger &operator<<(const uint32_t value) const; + const Logger &operator<<(const int16_t value) const; + const Logger &operator<<(const uint16_t value) const; + const Logger &operator<<(const int8_t value) const; + const Logger &operator<<(const uint8_t value) const; + const Logger &operator<<(const bool value) const; + + private: + LogLevel severity_; +}; + +} // namespace base +} // namespace micro + +#endif // MICRO_BASE_LOGGER_H_ diff --git a/micro/base/logging.h b/micro/base/logging.h new file mode 100644 index 00000000..a0e806ae --- /dev/null +++ b/micro/base/logging.h @@ -0,0 +1,84 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_BASE_LOGGING_H_ +#define MICRO_BASE_LOGGING_H_ + +#include + +#include "micro/base/logger.h" +#include "micro/include/port/define.h" + +namespace micro { +namespace log { + +#define LOG(severity) \ + micro::base::Logger(__FILE__, __LINE__, micro::severity) + +#ifndef NDEBUG +#define LOG1(severity, value) LOG(severity) << value +#define LOG2(severity, value1, value2) LOG(severity) << value1 << value2 +#define LOG3(severity, value1, value2, value3) \ + LOG(severity) << value1 << value2 << value3 +#define LOG4(severity, value1, value2, value3, value4) \ + LOG(severity) << value1 << value2 << value3 << value4 +#define LOG5(severity, value1, value2, value3, value4, value5) \ + LOG(severity) << value1 << value2 << value3 << value4 << value5 +#else +#define LOG1(severity, value) +#define LOG2(severity, value1, value2) +#define LOG3(severity, value1, value2, value3) +#define LOG4(severity, value1, value2, value3, value4) +#define LOG5(severity, value1, value2, value3, value4, value5) +#endif // NDEBUG + +#ifndef NDEBUG +#define MACE_ASSERT(condition) \ + if (!(condition)) LOG(FATAL) << "Assert failed: "#condition // NOLINT +#define MACE_ASSERT1(condition, str) \ + if (!(condition)) LOG(FATAL) << "Assert failed: "#condition " " << str // NOLINT +#define MACE_ASSERT2(condition, str1, str2) \ + if (!(condition)) LOG(FATAL) << "Assert failed: "#condition " " << str1 << str2 // NOLINT +#else +#define MACE_ASSERT(condition) +#define MACE_ASSERT1(condition, string) +#define MACE_ASSERT2(condition, string1, string2) +#endif // NDEBUG + +#define MACE_NOT_IMPLEMENTED MACE_ASSERT1(false, "not implemented") + +#define MACE_CHECK_SUCCESS(stmt) \ + { \ + MaceStatus status = (stmt); \ + if (status != MACE_SUCCESS) { \ + LOG(FATAL) << #stmt << " failed with error: " \ + << status; \ + } \ + } + +#define MACE_RETURN_IF_ERROR(stmt) \ + { \ + MaceStatus status = (stmt); \ + if (status != MACE_SUCCESS) { \ + LOG(INFO) << static_cast(stmt) \ + << " failed with error: " \ + << static_cast(status); \ + return status; \ + } \ + } + +} // namespace log +} // namespace micro + +#endif // MICRO_BASE_LOGGING_H_ diff --git a/micro/base/serialize.cc b/micro/base/serialize.cc new file mode 100644 index 00000000..039ed3ab --- /dev/null +++ b/micro/base/serialize.cc @@ -0,0 +1,54 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/base/serialize.h" + +#include "micro/base/logging.h" +#include "micro/base/utils.h" + +namespace micro { + +#ifdef MACE_WRITE_MAGIC +SerialUint32 Serialize::GetMagic() const { + return magic_; +} + +SerialUint32 Serialize::Magic(const char *bytes4) const { + MACE_ASSERT1(micro::base::strlen(bytes4) >= 4, "The magic bytes must >= 4."); + SerialUint32 magic = 0; + for (int32_t i = 0; i < 32 && (*bytes4) != '\0'; i += 8, ++bytes4) { + magic += (*bytes4) << i; + } + return magic; +} + +MaceStatus Serialize::MagicToString(SerialUint32 magic, + char (&array)[5]) const { + char *buffer = array; + for (int32_t i = 0; i <32; i += 8, ++buffer) { + *buffer = (magic >> i) & 0x000000ff; + } + *buffer = '\0'; + return MACE_SUCCESS; +} +#endif // MACE_WRITE_MAGIC + +void Serialize::Uint2OpIOInfo(const OpIOInfo *info) const { + OpIOInfo *io_info = const_cast(info); + uint32_t info_data = *(reinterpret_cast(io_info)); + io_info->op_def_idx_ = (info_data & 0xffff0000) >> 16; + io_info->output_idx_ = (info_data & 0x0000ffff); +} + +} // namespace micro diff --git a/micro/base/serialize.h b/micro/base/serialize.h new file mode 100644 index 00000000..932aecd4 --- /dev/null +++ b/micro/base/serialize.h @@ -0,0 +1,64 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_BASE_SERIALIZE_H_ +#define MICRO_BASE_SERIALIZE_H_ + +#include + +#include "micro/base/serialize_type.h" +#include "micro/include/public/micro.h" + +namespace micro { + +#ifdef MACE_WRITE_MAGIC +#ifndef MACE_DEFINE_HARD_CODE_MAGIC +#define MACE_DEFINE_HARD_CODE_MAGIC(CLASS_NAME) \ +SerialUint32 GetHardCodeMagic() const { \ + return Magic(#CLASS_NAME); \ +} +#endif // MACE_DEFINE_HARD_CODE_MAGIC +#else +#ifndef MACE_DEFINE_HARD_CODE_MAGIC +#define MACE_DEFINE_HARD_CODE_MAGIC(CLASS_NAME) +#endif // MACE_DEFINE_HARD_CODE_MAGIC +#endif // MACE_WRITE_MAGIC + +// We describe a tensor as an output tensor, but it can also +// be used to represent an input tensor. +struct OpIOInfo { + uint16_t op_def_idx_; + uint16_t output_idx_; +}; + +class Serialize { +#ifdef MACE_WRITE_MAGIC + public: + SerialUint32 GetMagic() const; + MaceStatus MagicToString(SerialUint32 magic, char (&array)[5]) const; + + protected: + SerialUint32 magic_; + + protected: + SerialUint32 Magic(const char *bytes4) const; +#endif // MACE_WRITE_MAGIC + + public: + void Uint2OpIOInfo(const OpIOInfo *output_info) const; +}; + +} // namespace micro + +#endif // MICRO_BASE_SERIALIZE_H_ diff --git a/micro/base/serialize_type.h b/micro/base/serialize_type.h new file mode 100644 index 00000000..3d267428 --- /dev/null +++ b/micro/base/serialize_type.h @@ -0,0 +1,195 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_BASE_SERIALIZE_TYPE_H_ +#define MICRO_BASE_SERIALIZE_TYPE_H_ + +#include + +#include "micro/include/public/micro.h" + +namespace micro { + +#ifdef MACE_OFFSET_USE_16 +typedef uint16_t offset_size_t; +#else +typedef uint32_t offset_size_t; +#endif // MACE_OFFSET_USE_16 + +template +struct SerialArray { + offset_size_t size_; + offset_size_t offset_; + SerialArray() : size_(0), offset_(0) {} +}; + +struct SerialString { + offset_size_t packed_length_; + offset_size_t offset_; + SerialString() : packed_length_(0), offset_(0) {} +}; + +struct SerialBytes { + offset_size_t packed_length_; + offset_size_t offset_; + SerialBytes() : packed_length_(0), offset_(0) {} +}; + +typedef float SerialFloat; +typedef int32_t SerialInt32; +typedef uint32_t SerialUint32; +typedef uint32_t SerialBool; +typedef int32_t SerialInt16; +typedef uint32_t SerialUint16; +typedef int32_t SerialInt8; +typedef uint32_t SerialUint8; + +#ifndef MACE_DECLARE_OBJECT_FUNC +#define MACE_DECLARE_OBJECT_FUNC(T, OBJECT_NAME) \ + T OBJECT_NAME() const; +#endif // MACE_DECLARE_OBJECT_FUNC + +#ifndef MACE_DEFINE_OBJECT_FUNC +#define MACE_DEFINE_OBJECT_FUNC(CLASS_NAME, T, OBJECT_NAME) \ + T CLASS_NAME::OBJECT_NAME() const { \ + return OBJECT_NAME##_; \ + } +#endif // MACE_DEFINE_OBJECT_FUNC + +#ifndef MACE_MACE_DECLARE_PTR_FUNC +#define MACE_DECLARE_PTR_FUNC(T, OBJECT_NAME) \ + const T *OBJECT_NAME() const; +#endif // MACE_DECLARE_PTR_FUNC + +#ifndef MACE_DEFINE_PTR_FUNC +#define MACE_DEFINE_PTR_FUNC(CLASS_NAME, T, OBJECT_NAME) \ + const T *CLASS_NAME::OBJECT_NAME() const { \ + return &OBJECT_NAME##_; \ + } +#endif // MACE_DEFINE_PTR_FUNC + +#ifndef MACE_DECLARE_ARRAY_FUNC +#define MACE_DECLARE_ARRAY_FUNC(T, OBJECT_NAME) \ + T OBJECT_NAME(uint32_t index) const; \ + uint32_t OBJECT_NAME##_size() const +#endif // MACE_DECLARE_ARRAY_FUNC + +#ifndef MACE_DECLARE_ARRAY_BASE_PTR_FUNC +#define MACE_DECLARE_ARRAY_BASE_PTR_FUNC(T, OBJECT_NAME) \ + const T * OBJECT_NAME() const +#endif // MACE_DECLARE_ARRAY_BASE_PTR_FUNC + +#ifndef MACE_DEFINE_ARRAY_BASE_PTR_FUNC +#define MACE_DEFINE_ARRAY_BASE_PTR_FUNC( \ + CLASS_NAME, T, OBJECT_NAME, ARRAY_NAME) \ + const T *CLASS_NAME::OBJECT_NAME() const { \ + const T *array = reinterpret_cast( \ + reinterpret_cast(this) + ARRAY_NAME.offset_); \ + return array; \ + } +#endif // MACE_DEFINE_ARRAY_BASE_PTR_FUNC + +#ifndef MACE_DEFINE_ARRAY_FUNC +#define MACE_DEFINE_ARRAY_FUNC(CLASS_NAME, T, OBJECT_NAME, ARRAY_NAME) \ + T CLASS_NAME::OBJECT_NAME(uint32_t index) const { \ + const T *array = reinterpret_cast( \ + reinterpret_cast(this) + ARRAY_NAME.offset_); \ + return *(array + index); \ + } \ + uint32_t CLASS_NAME::OBJECT_NAME##_size() const { \ + return ARRAY_NAME.size_; \ + } +#endif // MACE_DEFINE_ARRAY_FUNC + +#ifndef MACE_DECLARE_PTR_ARRAY_FUNC +#define MACE_DECLARE_PTR_ARRAY_FUNC(T, OBJECT_NAME) \ + const T *OBJECT_NAME(uint32_t index) const; \ + uint32_t OBJECT_NAME##_size() const +#endif // MACE_DECLARE_PTR_ARRAY_FUNC + +#ifndef MACE_DEFINE_PTR_ARRAY_FUNC +#define MACE_DEFINE_PTR_ARRAY_FUNC(CLASS_NAME, T, OBJECT_NAME, ARRAY_NAME) \ + const T *CLASS_NAME::OBJECT_NAME(uint32_t index) const { \ + const T *array = reinterpret_cast( \ + reinterpret_cast(this) + ARRAY_NAME.offset_); \ + return (array + index); \ + } \ + \ + uint32_t CLASS_NAME::OBJECT_NAME##_size() const { \ + return ARRAY_NAME.size_; \ + } +#endif // MACE_DEFINE_PTR_ARRAY_FUNC + +#ifndef MACE_DECLARE_STRING_FUNC +#define MACE_DECLARE_STRING_FUNC(OBJECT_NAME) \ + const char *OBJECT_NAME() const; +#endif // MACE_DECLARE_STRING_FUNC + +#ifndef MACE_DEFINE_STRING_FUNC +#define MACE_DEFINE_STRING_FUNC(CLASS_NAME, OBJECT_NAME, STRING_NAME) \ + const char *CLASS_NAME::OBJECT_NAME() const { \ + if (STRING_NAME.packed_length_ == 0) { \ + return NULL; \ + } else { \ + return reinterpret_cast(this) + STRING_NAME.offset_; \ + } \ + } +#endif // MACE_DEFINE_STRING_FUNC + +#ifndef MACE_DECLARE_BYTES_FUNC +#define MACE_DECLARE_BYTES_FUNC(OBJECT_NAME) \ + const uint8_t *OBJECT_NAME() const; \ + uint32_t OBJECT_NAME##_size() const +#endif // MACE_DECLARE_BYTES_FUNC + +#ifndef MACE_DEFINE_BYTES_FUNC +#define MACE_DEFINE_BYTES_FUNC(CLASS_NAME, OBJECT_NAME, BYTES_NAME) \ + const uint8_t *CLASS_NAME::OBJECT_NAME() const { \ + if (BYTES_NAME.packed_length_ == 0) { \ + return NULL; \ + } else { \ + return reinterpret_cast(this) + BYTES_NAME.offset_; \ + } \ + } \ + \ + uint32_t CLASS_NAME::OBJECT_NAME##_size() const { \ + return BYTES_NAME.packed_length_; \ + } +#endif // MACE_DEFINE_BYTES_FUNC + +#ifndef MACE_DECLARE_STRING_ARRAY_FUNC +#define MACE_DECLARE_STRING_ARRAY_FUNC(OBJECT_NAME) \ + const char *OBJECT_NAME(uint32_t index) const; \ + uint32_t OBJECT_NAME##_size() const +#endif + +#ifndef MACE_DEFINE_STRING_ARRAY_FUNC +#define MACE_DEFINE_STRING_ARRAY_FUNC(CLASS_NAME, OBJECT_NAME, ARRAY_NAME) \ + const char *CLASS_NAME::OBJECT_NAME(uint32_t index) const { \ + const SerialString *array = reinterpret_cast( \ + reinterpret_cast(this) + ARRAY_NAME.offset_); \ + const SerialString *serial_str = array + index; \ + const char *str = reinterpret_cast(serial_str) + \ + serial_str->offset_; \ + return str; \ + } \ + \ + uint32_t CLASS_NAME::OBJECT_NAME##_size() const { \ + return ARRAY_NAME.size_; \ + } +#endif // MACE_DEFINE_STRING_ARRAY_FUNC + +} // namespace micro + +#endif // MICRO_BASE_SERIALIZE_TYPE_H_ diff --git a/micro/base/types.h b/micro/base/types.h new file mode 100644 index 00000000..6de264b4 --- /dev/null +++ b/micro/base/types.h @@ -0,0 +1,57 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_BASE_TYPES_H_ +#define MICRO_BASE_TYPES_H_ + +#include "micro/include/public/micro.h" +#include "micro/include/utils/bfloat16.h" + +namespace micro { + +#ifdef MACE_ENABLE_BFLOAT16 +typedef BFloat16 mifloat; +#else +typedef float mifloat; +#endif // MACE_ENABLE_BFLOAT16 + +template +struct DataTypeToEnum; + +template +struct EnumToDataType; + +#ifndef MACE_MAPPING_DATA_TYPE_AND_ENUM +#define MACE_MAPPING_DATA_TYPE_AND_ENUM(DATA_TYPE, ENUM_VALUE) \ + template <> \ + struct DataTypeToEnum { \ + static DataType v() { return ENUM_VALUE; } \ + static const DataType value = ENUM_VALUE; \ + }; \ + template <> \ + struct EnumToDataType { \ + typedef DATA_TYPE Type; \ + }; +#endif // MACE_MAPPING_DATA_TYPE_AND_ENUM + +MACE_MAPPING_DATA_TYPE_AND_ENUM(float, DT_FLOAT); +MACE_MAPPING_DATA_TYPE_AND_ENUM(uint8_t, DT_UINT8); +MACE_MAPPING_DATA_TYPE_AND_ENUM(int32_t, DT_INT32); +#ifdef MACE_ENABLE_BFLOAT16 +MACE_MAPPING_DATA_TYPE_AND_ENUM(BFloat16, DT_BFLOAT16); +#endif + +} // namespace micro + +#endif // MICRO_BASE_TYPES_H_ diff --git a/micro/base/utils.cc b/micro/base/utils.cc new file mode 100644 index 00000000..305e461f --- /dev/null +++ b/micro/base/utils.cc @@ -0,0 +1,109 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#include "micro/base/utils.h" + +#include + +#include "micro/base/logging.h" + +namespace micro { +namespace base { + +uint32_t strlen(const char *str) { + MACE_ASSERT1(str != NULL, "str can not be NULL."); + uint32_t length = 0; + while (*str++ != '\0') { + ++length; + } + return length; +} + +int32_t strcmp(const char *str1, const char *str2) { + MACE_ASSERT1(str1 != NULL && str2 != NULL, + "strcmp str can not be NULL."); + while (*str1 == *str2) { + if (*str1 == '\0') { + return 0; + } + ++str1; + ++str2; + } + return (*str1) - (*str2); +} + +void memcpy(void *dst, const void *src, uint32_t bytes) { + MACE_ASSERT1(dst != NULL && src != NULL && bytes > 0, + "Invalid params."); + uint8_t *dst_mem = static_cast(dst); + const uint8_t *src_mem = static_cast(src); + while (bytes-- > 0) { + *dst_mem++ = *src_mem++; + } +} + +int32_t GetShapeSize(uint32_t dim_size, const int32_t *dims) { + return accumulate_multi(dims, 0, dim_size); +} + +float sqrt(float x) { + return ::sqrt(x); +} + +int32_t ceil(float f) { + int32_t i = (int32_t) f; + return (f == static_cast(i)) ? i : i + 1; +} + +int32_t floor(float f) { + return ::floor(f); +} + +float fabs(float x) { + if (x < 0.0f) { + return -x; + } else if (x > 0.0f) { + return x; + } else { + return 0.0f; + } +} + +float lowest() { + return -3.402823466e+38F; +} + +float highest() { + return 3.402823466e+38F; +} + +float tanh(float x) { + return ::tanh(x); +} + +float exp(float x) { + return ::exp(x); +} + +float pow(float x, float y) { + return ::pow(x, y); +} + +float log(float x) { + return ::log(x); +} + +} // namespace base +} // namespace micro diff --git a/micro/base/utils.h b/micro/base/utils.h new file mode 100644 index 00000000..56eb955e --- /dev/null +++ b/micro/base/utils.h @@ -0,0 +1,90 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_BASE_UTILS_H_ +#define MICRO_BASE_UTILS_H_ + +#include + +#include "micro/base/logging.h" + +namespace micro { +namespace base { + +uint32_t strlen(const char *str); +int32_t strcmp(const char *str1, const char *str2); +void memcpy(void *dst, const void *src, uint32_t bytes); +int32_t GetShapeSize(uint32_t dim_size, const int32_t *dims); +float sqrt(float x); +int32_t ceil(float f); +int32_t floor(float f); +float fabs(float x); +float lowest(); +float highest(); +float tanh(float x); +float exp(float x); +float pow(float x, float y); +float log(float x); + +template +void memset(T *src, T value, uint32_t size) { + for (uint32_t i = 0; i < size; ++i) { + src[i] = value; + } +} + +template +T accumulate_multi(const T *array, uint32_t array_start, uint32_t array_end) { + MACE_ASSERT(array_start >= 0 && array_start <= array_end); + if (array == NULL || array_start == array_end) { + return 1; + } + T total = array[array_start]; + for (uint32_t i = array_start + 1; i < array_end; ++i) { + total *= array[i]; + } + return total; +} + +template +T abs(T x) { + return x > 0 ? x : -x; +} + +template +T max(T a, T b) { + return a > b ? a : b; +} + +template +T min(T a, T b) { + return a < b ? a : b; +} + +template +void swap(T *a, T *b) { // NOLINT + T c = *a; + *a = *b; + *b = c; +} + +template +T clamp(T in, T low, T high) { + return max(low, min(in, high)); // NOLINT +} + +} // namespace base +} // namespace micro + +#endif // MICRO_BASE_UTILS_H_ diff --git a/micro/base/value_to_str.cc b/micro/base/value_to_str.cc new file mode 100644 index 00000000..3312d83d --- /dev/null +++ b/micro/base/value_to_str.cc @@ -0,0 +1,91 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#include "micro/base/value_to_str.h" + +namespace micro { +namespace base { + +#ifndef MACE_SIGNED_TO_STRING +#define MACE_SIGNED_TO_STRING(T, UNSIGNED_T) \ +template<> \ +char *ToString(T value, char *buffer, char *end) { \ + if (value < 0) { \ + value = -value; \ + *buffer++ = '-'; \ + } \ + return ToString(static_cast(value), buffer, end); \ +} +#endif // MACE_SIGNED_TO_STRING + + +void ReverseInplace(char *start, char *end) { + end--; + while (start < end) { + char tmp = *start; + *start++ = *end; + *end-- = tmp; + } +} + +MACE_SIGNED_TO_STRING(int64_t, uint64_t) + +MACE_SIGNED_TO_STRING(int32_t, uint32_t) + +MACE_SIGNED_TO_STRING(int16_t, uint16_t) + +MACE_SIGNED_TO_STRING(int8_t, uint8_t) + +template<> +char *ToString(const char *str, char *buffer, char *end) { + end--; + while (*str != '\0' && buffer < end) { + *buffer++ = *str++; + } + *buffer = '\0'; + return buffer; +} + +template<> +char *ToString(float value, char *buffer, char *end) { + if (value <= -1e-8) { + *buffer++ = '-'; + } + int32_t int_part = (int32_t) value; + buffer = ToString(int_part, buffer, end); + + float deci_part = value - int_part; + if (deci_part < 1e-8 && deci_part > -1e-8) { + return buffer; + } + if (deci_part < 0.0) { + deci_part = -deci_part; + } + + end--; + *buffer++ = '.'; + do { + deci_part *= 10; + int32_t remainder = (int32_t) deci_part; + *buffer++ = '0' + remainder; + deci_part -= remainder; + } while (deci_part > 0 && buffer < end); + + *buffer = '\0'; + return buffer; +} + +} // namespace base +} // namespace micro diff --git a/micro/base/value_to_str.h b/micro/base/value_to_str.h new file mode 100644 index 00000000..91ff6578 --- /dev/null +++ b/micro/base/value_to_str.h @@ -0,0 +1,61 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_BASE_VALUE_TO_STR_H_ +#define MICRO_BASE_VALUE_TO_STR_H_ + +#include + +namespace micro { +namespace base { + +void ReverseInplace(char *start, char *end); + +// for uint64_t/uint32_t/uint16_t/uint8_t +template +char *ToString(T value, char *buffer, char *end) { + char *start = buffer; + end--; + do { + *buffer++ = '0' + (value % 10); + value /= 10; + } while (value > 0 && buffer < end); + ReverseInplace(start, buffer); + *buffer = '\0'; + + return buffer; +} + +template<> +char *ToString(int64_t value, char *buffer, char *end); + +template<> +char *ToString(int32_t value, char *buffer, char *end); + +template<> +char *ToString(int16_t value, char *buffer, char *end); + +template<> +char *ToString(int8_t value, char *buffer, char *end); + +template<> +char *ToString(const char *str, char *buffer, char *end); + +template<> +char *ToString(float value, char *buffer, char *end); + +} // namespace base +} // namespace micro + +#endif // MICRO_BASE_VALUE_TO_STR_H_ diff --git a/micro/codegen/BUILD.bazel b/micro/codegen/BUILD.bazel new file mode 100644 index 00000000..78b41ef2 --- /dev/null +++ b/micro/codegen/BUILD.bazel @@ -0,0 +1,82 @@ +# Description: +# Generated model and runtime code. +# +package( + default_visibility = ["//visibility:public"], +) + +cc_library( + name = "generated_models", + srcs = glob(["models/**/*.cc"]), + hdrs = glob(["models/**/*.h"]), + copts = [ + "-Werror", + "-Wextra", + "-Wno-missing-field-initializers", + ], + deps = [ + "//micro/framework", + "//micro/include", + "//micro/model", + "//micro/ops", + ], +) + +cc_library( + name = "micro_engine_c", + srcs = glob(["micro/codegen/engines/**/micro_engine_c_interface.cc"]), + hdrs = glob(["micro/codegen/engines/**/micro_engine_c_interface.cc"]), + copts = [ + "-Werror", + "-Wextra", + "-Wno-missing-field-initializers", + ], + deps = [ + ":micro_engine", + ], + alwayslink = 1, +) + +cc_library( + name = "micro_engine", + srcs = glob( + ["engines/**/*.cc"], + exclude = ["micro/codegen/engines/**/micro_engine_c_interface.cc"], + ), + hdrs = glob( + [ + "engines/**/*.h", + ], + exclude = ["micro/codegen/engines/**/micro_engine_c_interface.cc"], + ), + copts = [ + "-Werror", + "-Wextra", + "-Wno-missing-field-initializers", + ], + deps = [ + "generated_models", + "//micro/framework", + "//micro/model", + "//micro/ops", + ], + alwayslink = 1, +) + +cc_binary( + name = "libmicro.so", + linkshared = 1, + linkstatic = 1, + deps = [ + ":micro_engine", + ], +) + +cc_binary( + name = "libmicro.lo", + linkshared = False, + linkstatic = True, + deps = [ + ":micro_engine", + ], +) diff --git a/micro/framework/BUILD.bazel b/micro/framework/BUILD.bazel new file mode 100644 index 00000000..c8b3315d --- /dev/null +++ b/micro/framework/BUILD.bazel @@ -0,0 +1,40 @@ +package( + default_visibility = ["//visibility:public"], +) + +licenses(["notice"]) # Apache 2.0 + +cc_library( + name = "framework", + srcs = glob(["*.cc"]), + hdrs = glob(["*.h"]), + copts = [ + "-Werror", + "-Wextra", + "-Wno-missing-field-initializers", + ], + deps = [ + "//micro/base", + "//micro/include", + "//micro/model", + ], +) + +cc_library( + name = "framework_for_optest", + srcs = glob( + ["*.cc"], + exclude = ["operator.cc"], + ), + hdrs = glob(["*.h"]), + copts = [ + "-Werror", + "-Wextra", + "-Wno-missing-field-initializers", + ], + deps = [ + "//micro/base", + "//micro/include", + "//micro/model", + ], +) diff --git a/micro/framework/graph.cc b/micro/framework/graph.cc new file mode 100644 index 00000000..560aaeec --- /dev/null +++ b/micro/framework/graph.cc @@ -0,0 +1,108 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/framework/graph.h" + +#include "micro/base/logging.h" +#include "micro/base/serialize.h" +#include "micro/base/utils.h" +#include "micro/framework/operator.h" +#include "micro/include/public/micro.h" +#include "micro/model/net_def.h" + +namespace micro { +namespace framework { + +MACE_DEFINE_PTR_ARRAY_FUNC(Graph, OpContext, op_context, op_contexts_) +MACE_DEFINE_ARRAY_FUNC(Graph, uint32_t, input_op_idx, input_op_idxs_); +MACE_DEFINE_PTR_ARRAY_FUNC(Graph, OpIOInfo, output_info, output_infos_); + +MaceStatus Graph::Init(MaceMicroEngineConfig *engine_config) { + MACE_ASSERT(engine_config->net_def_->op_size() == op_context_size()); + + uint32_t output_info_size = this->output_info_size(); + for (uint32_t i = 0; i < output_info_size; ++i) { + Uint2OpIOInfo(this->output_info(i)); + } + + uint32_t op_size = engine_config->net_def_->op_size(); + for (uint32_t i = 0; i < op_size; ++i) { + OpContext *op_ctx = const_cast(op_context(i)); + MACE_RETURN_IF_ERROR(op_ctx->Init( + engine_config, engine_config->net_def_->op(i))); + } + + return MACE_SUCCESS; +} + +MaceStatus Graph::RegisterInputData(MaceMicroEngineConfig *engine_config, + uint32_t idx, + const void *input_buffer, + const int32_t *input_dims) { + engine_config->input_buffers_[idx] = input_buffer; + engine_config->input_shapes_[idx] = input_dims; + + // update the op's input buffers + uint32_t op_idx = input_op_idx(idx); + framework::Operator *input_op = engine_config->op_array_[op_idx]; + return input_op->OnInit(); +} + +MaceStatus Graph::Run(MaceMicroEngineConfig *engine_config) { + uint32_t op_size = engine_config->net_def_->op_size(); + for (uint32_t i = 0; i < op_size; ++i) { + OpContext *op_ctx = const_cast(op_context(i)); + MACE_RETURN_IF_ERROR(op_ctx->Run(engine_config)); + } + + return MACE_SUCCESS; +} + +MaceStatus Graph::GetOutputData(MaceMicroEngineConfig *engine_config, + const uint32_t idx, + void **output_data, + const int32_t **output_dims, + uint32_t *output_dim_size) { + MACE_ASSERT(idx < output_info_size()); + + const OpIOInfo *o_info = output_info(idx); + return GetOpOutputData(engine_config, o_info->op_def_idx_, + o_info->output_idx_, output_data, + output_dims, output_dim_size); +} + +MaceStatus Graph::GetOpOutputData(MaceMicroEngineConfig *engine_config, + const uint32_t op_def_idx, + const uint32_t output_idx, + void **output_data, + const int32_t **output_dims, + uint32_t *output_dim_size) { + MACE_ASSERT(engine_config != NULL); + MACE_ASSERT(output_data != NULL); + MACE_ASSERT(output_dims != NULL); + MACE_ASSERT(output_dim_size != NULL); + + const model::OperatorDef *op_def = engine_config->net_def_->op(op_def_idx); + *output_data = engine_config->tensor_mem_ + op_def->mem_offset(output_idx); + + const model::OutputShape *output_shape = + op_context(op_def_idx)->output_resize_shape(output_idx); + *output_dims = output_shape->dim(); + *output_dim_size = output_shape->dim_size(); + + return MACE_SUCCESS; +} + +} // namespace framework +} // namespace micro diff --git a/micro/framework/graph.h b/micro/framework/graph.h new file mode 100644 index 00000000..5153361b --- /dev/null +++ b/micro/framework/graph.h @@ -0,0 +1,63 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#ifndef MICRO_FRAMEWORK_GRAPH_H_ +#define MICRO_FRAMEWORK_GRAPH_H_ + +#include "micro/base/serialize.h" +#include "micro/framework/op_context.h" + +namespace micro { + +struct MaceMicroEngineConfig; + +namespace framework { + +class Graph : public Serialize { + public: + MACE_DEFINE_HARD_CODE_MAGIC(Graph) + + MACE_DECLARE_PTR_ARRAY_FUNC(OpContext, op_context); + MACE_DECLARE_ARRAY_FUNC(uint32_t, input_op_idx); + MACE_DECLARE_PTR_ARRAY_FUNC(OpIOInfo, output_info); + + MaceStatus Init(MaceMicroEngineConfig *engine_config); + MaceStatus RegisterInputData(MaceMicroEngineConfig *engine_config, + uint32_t idx, + const void *input_buffer, + const int32_t *input_dims); + MaceStatus Run(MaceMicroEngineConfig *engine_config); + MaceStatus GetOutputData(MaceMicroEngineConfig *engine_config, + const uint32_t idx, + void **output_data, + const int32_t **output_dims, + uint32_t *output_dim_size); + MaceStatus GetOpOutputData(MaceMicroEngineConfig *engine_config, + const uint32_t op_def_idx, + const uint32_t output_idx, + void **output_data, + const int32_t **output_dims, + uint32_t *output_dim_size); + + protected: + SerialArray op_contexts_; + SerialArray input_op_idxs_; + SerialArray output_infos_; +}; + +} // namespace framework +} // namespace micro + +#endif // MICRO_FRAMEWORK_GRAPH_H_ diff --git a/micro/framework/micro_engine.cc b/micro/framework/micro_engine.cc new file mode 100644 index 00000000..4b481fbd --- /dev/null +++ b/micro/framework/micro_engine.cc @@ -0,0 +1,81 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/base/logging.h" +#include "micro/base/utils.h" +#include "micro/framework/graph.h" +#include "micro/framework/scratch_buffer.h" +#include "micro/include/public/micro.h" +#include "micro/model/net_def.h" +#include "micro/model/operator_def.h" +#include "micro/port/api.h" + +namespace micro { +MaceStatus MaceMicroEngine::Init(MaceMicroEngineConfig *engine_config) { + MACE_ASSERT(engine_config != NULL && engine_config->net_def_ != NULL + && engine_config->model_data_ != NULL + && engine_config->graph_ != NULL + && engine_config->op_array_ != NULL + && engine_config->tensor_mem_ != NULL); + engine_config_ = engine_config; + + MACE_RETURN_IF_ERROR(engine_config_->graph_->Init(engine_config_)); + + return MACE_SUCCESS; +} + +MaceStatus MaceMicroEngine::RegisterInputData(uint32_t idx, + const void *input_buffer, + const int32_t *input_dims) { + MACE_ASSERT(idx < engine_config_->net_def_->input_info_size()); + MACE_ASSERT(input_buffer != NULL); + MACE_ASSERT(input_dims != NULL); + + return engine_config_->graph_->RegisterInputData(engine_config_, idx, + input_buffer, input_dims); +} + +MaceStatus MaceMicroEngine::Run() { + return engine_config_->graph_->Run(engine_config_); +} + +MaceStatus MaceMicroEngine::GetOutputData(const uint32_t idx, + void **output_data, + const int32_t **output_dims, + uint32_t *output_dim_size) { + return engine_config_->graph_->GetOutputData(engine_config_, idx, + output_data, output_dims, + output_dim_size); +} + +MaceStatus MaceMicroEngine::GetOpOutputData(const uint32_t op_def_idx, + const uint32_t output_idx, + void **output_data, + const int32_t **output_dims, + uint32_t *output_dim_size) { + return engine_config_->graph_->GetOpOutputData(engine_config_, op_def_idx, + output_idx, output_data, + output_dims, output_dim_size); +} + +MaceMicroEngine::MaceMicroEngine(const MaceMicroEngine &) { + MACE_NOT_IMPLEMENTED; +} + +MaceMicroEngine &MaceMicroEngine::operator=(const MaceMicroEngine &) { + MACE_NOT_IMPLEMENTED; + return *this; +} + +} // namespace micro diff --git a/micro/framework/op_context.cc b/micro/framework/op_context.cc new file mode 100644 index 00000000..bec08c15 --- /dev/null +++ b/micro/framework/op_context.cc @@ -0,0 +1,53 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/framework/op_context.h" + +#include "micro/framework/operator.h" +#include "micro/model/net_def.h" +#include "micro/model/operator_def.h" +#include "micro/include/public/micro.h" + +namespace micro { +namespace framework { + +MACE_DEFINE_OBJECT_FUNC(OpContext, uint32_t, op_idx) + +MACE_DEFINE_PTR_ARRAY_FUNC(OpContext, OpIOInfo, input_info, input_infos_) + +MACE_DEFINE_PTR_ARRAY_FUNC(OpContext, model::OutputShape, + output_resize_shape, output_resize_shapes_) + +MaceStatus OpContext::Init(MaceMicroEngineConfig *engine_config, + const model::OperatorDef *op_def) { + // init OpContext + uint32_t input_info_size = this->input_info_size(); + for (uint32_t i = 0; i < input_info_size; ++i) { + Uint2OpIOInfo(this->input_info(i)); + } + + // init Op + uint32_t op_i = op_idx(); + MACE_RETURN_IF_ERROR( + engine_config->op_array_[op_i]->Init(engine_config, this, op_def)); + + return MACE_SUCCESS; +} + +MaceStatus OpContext::Run(MaceMicroEngineConfig *engine_config) { + return engine_config->op_array_[op_idx()]->Run(); +} + +} // namespace framework +} // namespace micro diff --git a/micro/framework/op_context.h b/micro/framework/op_context.h new file mode 100644 index 00000000..fe1e42f3 --- /dev/null +++ b/micro/framework/op_context.h @@ -0,0 +1,51 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_FRAMEWORK_OP_CONTEXT_H_ +#define MICRO_FRAMEWORK_OP_CONTEXT_H_ + +#include "micro/base/serialize.h" +#include "micro/model/operator_def.h" +#include "micro/model/output_shape.h" + +namespace micro { + +struct MaceMicroEngineConfig; + +namespace framework { + +class Operator; + +class OpContext : public Serialize { + public: + MACE_DEFINE_HARD_CODE_MAGIC(OpContext) + + MACE_DECLARE_OBJECT_FUNC(uint32_t, op_idx); + MACE_DECLARE_PTR_ARRAY_FUNC(OpIOInfo, input_info); + MACE_DECLARE_PTR_ARRAY_FUNC(model::OutputShape, output_resize_shape); + + MaceStatus Init(MaceMicroEngineConfig *engine_config, + const model::OperatorDef *op_def); + MaceStatus Run(MaceMicroEngineConfig *engine_config); + + protected: + SerialUint32 op_idx_; + SerialArray input_infos_; + SerialArray output_resize_shapes_; +}; + +} // namespace framework +} // namespace micro + +#endif // MICRO_FRAMEWORK_OP_CONTEXT_H_ diff --git a/micro/framework/operator.cc b/micro/framework/operator.cc new file mode 100644 index 00000000..7a998f59 --- /dev/null +++ b/micro/framework/operator.cc @@ -0,0 +1,242 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/framework/operator.h" + +#include "micro/base/utils.h" +#include "micro/framework/op_context.h" +#include "micro/include/port/define.h" +#include "micro/include/public/micro.h" +#include "micro/model/const_tensor.h" +#include "micro/model/input_output_info.h" +#include "micro/model/net_def.h" +#include "micro/model/operator_def.h" + +namespace micro { +namespace framework { + +namespace { +const uint16_t kIdxConstTensor = 0xffff; +const uint16_t kIdxModelInput = 0xfffe; +} // namespace + +Operator::~Operator() {} + +MaceStatus Operator::Init(MaceMicroEngineConfig *engine_config, + framework::OpContext *op_context, + const model::OperatorDef *op_def) { + engine_config_ = engine_config; + op_context_ = op_context; + op_def_ = op_def; + + MACE_ASSERT1(op_def_->input_size() == op_context_->input_info_size(), + "op_def_'s input dosen't match the op_context_'s"); + MACE_ASSERT1( + op_def_->output_size() == op_context_->output_resize_shape_size(), + "op_def_'s output dosen't match the op_context_'s"); + + return OnInit(); +} + +MaceStatus Operator::Run() { + MACE_NOT_IMPLEMENTED; + return MACE_SUCCESS; +} + +MaceStatus Operator::OnInit() { + return MACE_SUCCESS; +} + +const model::Argument *Operator::GetArgByName(const char *name) const { + MACE_ASSERT(op_def_ != NULL); + for (uint32_t i = 0; i < op_def_->arg_size(); ++i) { + const model::Argument *argument = op_def_->arg(i); + if (base::strcmp(name, argument->name()) == 0) { + return argument; + } + } + return NULL; +} + +uint32_t Operator::GetInputSize() { + return op_def_->input_size(); +} + +const void *Operator::DoGetInputData(uint32_t idx) { + const void *data = NULL; + const OpIOInfo *input_info = op_context_->input_info(idx); + const uint32_t op_def_idx = input_info->op_def_idx_; + if (kIdxConstTensor == op_def_idx) { + const model::ConstTensor *const_tensor = + engine_config_->net_def_->tensor(input_info->output_idx_); + data = engine_config_->model_data_ + const_tensor->offset(); + } else if (kIdxModelInput == op_def_idx) { + data = engine_config_->input_buffers_[input_info->output_idx_]; + } else { + const model::OperatorDef *pre_op_def = + engine_config_->net_def_->op(op_def_idx); + data = engine_config_->tensor_mem_ + + pre_op_def->mem_offset(input_info->output_idx_); + } + + return data; +} + +uint32_t Operator::GetInputShapeDimSize(uint32_t idx) { + uint32_t dim_size = 0; + const OpIOInfo *input_info = op_context_->input_info(idx); + const uint32_t op_def_idx = input_info->op_def_idx_; + if (kIdxConstTensor == op_def_idx) { + const model::ConstTensor *const_tensor = + engine_config_->net_def_->tensor(input_info->output_idx_); + dim_size = const_tensor->dim_size(); + } else if (kIdxModelInput == op_def_idx) { + const model::InputOutputInfo *info = + engine_config_->net_def_->input_info(input_info->output_idx_); + dim_size = info->dim_size(); + } else { + const model::OperatorDef *op_def = engine_config_->net_def_->op(op_def_idx); + const model::OutputShape *output_shape = + op_def->output_shape(input_info->output_idx_); + dim_size = output_shape->dim_size(); + } + return dim_size; +} + +const int32_t *Operator::GetInputShapeDims(uint32_t idx) { + const int32_t *dims = NULL; + const OpIOInfo *input_info = op_context_->input_info(idx); + const uint32_t op_def_idx = input_info->op_def_idx_; + if (kIdxConstTensor == op_def_idx) { + const model::ConstTensor *const_tensor = + engine_config_->net_def_->tensor(input_info->output_idx_); + dims = const_tensor->dim(); + } else if (kIdxModelInput == op_def_idx) { + dims = engine_config_->input_shapes_[input_info->output_idx_]; + } else { + const model::OperatorDef *op_def = engine_config_->net_def_->op(op_def_idx); + const model::OutputShape *output_shape = + op_def->output_shape(input_info->output_idx_); + dims = output_shape->dim(); + } + return dims; +} + +uint32_t Operator::GetOutputSize() { + return op_def_->output_size(); +} + +DataType Operator::GetOutputDataType(uint32_t idx) { + return op_def_->output_type(idx); +} + +void *Operator::DoGetOutputData(uint32_t idx) { + return engine_config_->tensor_mem_ + op_def_->mem_offset(idx); +} + +uint32_t Operator::GetOutputShapeDimSize(uint32_t idx) { + uint32_t dim_size = 0; + model::OutputShape *output_shape = + const_cast(op_context_->output_resize_shape(idx)); + if (output_shape != NULL) { + dim_size = output_shape->dim_size(); + } + return dim_size; +} + +const int32_t *Operator::GetOutputShapeDims(uint32_t idx) { + const int32_t *dims = NULL; + model::OutputShape *output_shape = + const_cast(op_context_->output_resize_shape(idx)); + if (output_shape != NULL) { + dims = output_shape->dim(); + } + return dims; +} + +MaceStatus Operator::ResizeOutputShape(uint32_t idx, uint32_t dim_size, + const int32_t *dims) { + model::OutputShape *output_shape = + const_cast(op_context_->output_resize_shape(idx)); +#ifndef NDEBUG + if (op_def_->output_shape(idx)->dim_size() < dim_size + || output_shape->dim_size() < dim_size) { + LOG(FATAL) << "Can not support dynamic dim_size. op_def_dim_size = " + << op_def_->output_shape(idx)->dim_size() + << ", output_shape_dim_size = " << output_shape->dim_size() + << ", dim_size = " << dim_size; + } + int32_t def_output_shape_size = + base::GetShapeSize(output_shape->dim_size(), output_shape->dim()); + int32_t input_shape_size = base::GetShapeSize(dim_size, dims); + if (def_output_shape_size < input_shape_size) { + LOG(INFO) << op_def_->name() << " resize failed, because " + << def_output_shape_size << " < " << input_shape_size; + LOG(INFO) << "input: "; + for (uint32_t i = 0; i < dim_size; ++i) { + LOG(INFO) << dims[i] << ", "; + } + LOG(INFO) << "old output: "; + for (uint32_t i = 0; i < output_shape->dim_size(); ++i) { + LOG(INFO) << output_shape->dim(i) << ", "; + } + MACE_ASSERT(def_output_shape_size >= input_shape_size); + } +#endif // NDEBUG + + if (dim_size > 0) { + base::memcpy(output_shape->mutable_dim(), dims, dim_size * sizeof(int32_t)); + } + return MACE_SUCCESS; +} + +#ifndef MACE_DEFINE_GET_ARG_BY_NAME_FUNC +#define MACE_DEFINE_GET_ARG_BY_NAME_FUNC(T, FUNC) \ +template <> \ +T Operator::GetArgByName(const char *name, T default_value) const { \ + const model::Argument *arg = GetArgByName(name); \ + if (arg == NULL) { \ + return default_value; \ + } else { \ + return arg->FUNC(); \ + } \ +} +#endif // MACE_DEFINE_GET_ARG_BY_NAME_FUNC + +MACE_DEFINE_GET_ARG_BY_NAME_FUNC(bool, i) +MACE_DEFINE_GET_ARG_BY_NAME_FUNC(int32_t, i) +MACE_DEFINE_GET_ARG_BY_NAME_FUNC(float, f) + +#ifndef MACE_DEFINE_GET_ARRAY_ARG_BY_NAME_FUNC +#define MACE_DEFINE_GET_ARRAY_ARG_BY_NAME_FUNC(T, FUNC) \ +template <> \ +const T *Operator::GetRepeatArgByName(const char *name, \ + uint32_t *size) const { \ + const model::Argument *arg = GetArgByName(name); \ + if (arg == NULL) { \ + return NULL; \ + } \ + if (size != NULL) { \ + *size = arg->FUNC##_size(); \ + } \ + return arg->FUNC(); \ +} +#endif // MACE_DEFINE_GET_ARRAY_ARG_BY_NAME_FUNC + +MACE_DEFINE_GET_ARRAY_ARG_BY_NAME_FUNC(int32_t, ints) +MACE_DEFINE_GET_ARRAY_ARG_BY_NAME_FUNC(float, floats) +MACE_DEFINE_GET_ARRAY_ARG_BY_NAME_FUNC(uint8_t, s) + +} // namespace framework +} // namespace micro diff --git a/micro/framework/operator.h b/micro/framework/operator.h new file mode 100644 index 00000000..6269773e --- /dev/null +++ b/micro/framework/operator.h @@ -0,0 +1,111 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_FRAMEWORK_OPERATOR_H_ +#define MICRO_FRAMEWORK_OPERATOR_H_ + +#include "micro/base/logging.h" +#include "micro/base/types.h" +#include "micro/include/public/micro.h" +#include "micro/framework/scratch_buffer.h" + +namespace micro { + +struct MaceMicroEngineConfig; + +namespace model { +class Argument; +class OperatorDef; +class OutputShape; +} // namespace model + +namespace ops { +typedef framework::ScratchBuffer ScratchBuffer; +} + +namespace framework { + +#ifndef MACE_OP_INPUT_TAGS +#define MACE_OP_INPUT_TAGS(first_input, ...) \ + enum _InputTags { first_input = 0, __VA_ARGS__ } +#endif // MACE_OP_INPUT_TAGS + +#ifndef MACE_OP_OUTPUT_TAGS +#define MACE_OP_OUTPUT_TAGS(first_input, ...) \ + enum _OutputTags { first_input = 0, __VA_ARGS__ } +#endif // MACE_OP_OUTPUT_TAGS + +class OpContext; + +class Operator { + public: + Operator() {} + // Note: This func should be virtual, but if we make it virtual, + // the operator delete will be needed, which is in c++ runtime library. + // For we don't use the Operator pointer to point sub-classes, the + // virtual ~Operator() is not needed. + ~Operator(); + + MaceStatus Init(MaceMicroEngineConfig *engine_config, + OpContext *op_context, + const model::OperatorDef *op_def); + virtual MaceStatus OnInit(); + virtual MaceStatus Run(); + + template + T GetArgByName(const char *name, T default_value) const; + + template + const T *GetRepeatArgByName(const char *name, + uint32_t *size = NULL) const; + + protected: + uint32_t GetInputSize(); + const void *DoGetInputData(uint32_t idx); + uint32_t GetInputShapeDimSize(uint32_t idx); + const int32_t *GetInputShapeDims(uint32_t idx); + uint32_t GetOutputSize(); + DataType GetOutputDataType(uint32_t idx); + void *DoGetOutputData(uint32_t idx); + uint32_t GetOutputShapeDimSize(uint32_t idx); + const int32_t *GetOutputShapeDims(uint32_t idx); + MaceStatus ResizeOutputShape(uint32_t idx, uint32_t input_dim_size, + const int32_t *input_dims); + MaceStatus ReuseInputBufferForOutput(uint32_t output_idx, uint32_t input_idx); + + template + const T *GetInputData(uint32_t idx) { + return static_cast(DoGetInputData(idx)); + } + + template + T *GetOutputData(uint32_t idx) { + return static_cast(DoGetOutputData(idx)); + } + + private: + const model::Argument *GetArgByName(const char *name) const; + + protected: + const model::OperatorDef *op_def_; + MaceMicroEngineConfig *engine_config_; + + private: + OpContext *op_context_; +}; + +} // namespace framework +} // namespace micro + +#endif // MICRO_FRAMEWORK_OPERATOR_H_ diff --git a/micro/framework/scratch_buffer.cc b/micro/framework/scratch_buffer.cc new file mode 100644 index 00000000..61fb18f0 --- /dev/null +++ b/micro/framework/scratch_buffer.cc @@ -0,0 +1,62 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/framework/scratch_buffer.h" + +#include "micro/base/logging.h" +#include "micro/include/public/micro.h" + +namespace micro { +namespace framework { + +#ifndef NDEBUG +namespace { +int64_t kDetectHandle = -1; +} +#endif + +ScratchBuffer::ScratchBuffer(MaceMicroEngineConfig *engine_config) : + engine_config_(engine_config), offset_(0) { +#ifndef NDEBUG + int64_t cur_handle = reinterpret_cast(engine_config); + MACE_ASSERT1(cur_handle != kDetectHandle, "Detect scratch buffer error."); + kDetectHandle = cur_handle; +#endif +} + +ScratchBuffer::~ScratchBuffer() { +#ifndef NDEBUG + kDetectHandle = -1; +#endif +} + +void *ScratchBuffer::DoGetBuffer(uint32_t size) { + if (size % 4 != 0) { + size = (size + 3) / 4 * 4; + } + if (offset_ + size > engine_config_->scratch_buffer_size_) { + LOG(FATAL) << "The scratch buffer is not enough." + << "offset_: " << offset_ << ", size: " << size + << ", engine_config_->scratch_buffer_size_: " + << engine_config_->scratch_buffer_size_; + } + + void *ptr = engine_config_->scratch_buffer_ + offset_; + offset_ += size; + + return ptr; +} + +} // namespace framework +} // namespace micro diff --git a/micro/framework/scratch_buffer.h b/micro/framework/scratch_buffer.h new file mode 100644 index 00000000..5d54aa60 --- /dev/null +++ b/micro/framework/scratch_buffer.h @@ -0,0 +1,52 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_FRAMEWORK_SCRATCH_BUFFER_H_ +#define MICRO_FRAMEWORK_SCRATCH_BUFFER_H_ + +#include "micro/base/logging.h" +#include "micro/include/public/micro.h" + +namespace micro { +namespace framework { + +class ScratchBuffer { + public: + explicit ScratchBuffer(MaceMicroEngineConfig *engine_config); + ~ScratchBuffer(); + + template + T *GetBuffer(int32_t size) { + MACE_ASSERT(size > 0); + return static_cast( + DoGetBuffer(static_cast(size) * sizeof(T))); + } + + template + T *GetBuffer(uint32_t size) { + return static_cast(DoGetBuffer(size * sizeof(T))); + } + + private: + void *DoGetBuffer(uint32_t size); + + private: + const MaceMicroEngineConfig *engine_config_; + uint32_t offset_; +}; + +} // namespace framework +} // namespace micro + +#endif // MICRO_FRAMEWORK_SCRATCH_BUFFER_H_ diff --git a/micro/include/BUILD.bazel b/micro/include/BUILD.bazel new file mode 100644 index 00000000..285e2b5f --- /dev/null +++ b/micro/include/BUILD.bazel @@ -0,0 +1,31 @@ +package( + default_visibility = ["//visibility:public"], +) + +licenses(["notice"]) # Apache 2.0 + +cc_library( + name = "include", + hdrs = glob([ + "public/*.h", + "port/*.h", + "utils/*.h", + ]), + copts = [ + "-Werror", + "-Wextra", + "-Wno-missing-field-initializers", + ], +) + +cc_library( + name = "public_headers", + hdrs = glob([ + "public/*.h", + ]), + copts = [ + "-Werror", + "-Wextra", + "-Wno-missing-field-initializers", + ], +) diff --git a/micro/include/port/define.h b/micro/include/port/define.h new file mode 100644 index 00000000..d8db23d7 --- /dev/null +++ b/micro/include/port/define.h @@ -0,0 +1,33 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_INCLUDE_PORT_DEFINE_H_ +#define MICRO_INCLUDE_PORT_DEFINE_H_ + +#define MACE_API +#define MACE_DEPRECATED + +#ifndef __FILE__ +#define __FILE__ "" +#endif + +#ifndef __LINE__ +#define __LINE__ 0 +#endif + +#ifndef NULL +#define NULL 0 +#endif + +#endif // MICRO_INCLUDE_PORT_DEFINE_H_ diff --git a/micro/include/public/micro.h b/micro/include/public/micro.h new file mode 100644 index 00000000..6618b64b --- /dev/null +++ b/micro/include/public/micro.h @@ -0,0 +1,105 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_INCLUDE_PUBLIC_MICRO_H_ +#define MICRO_INCLUDE_PUBLIC_MICRO_H_ + +#include + +#include "micro/include/port/define.h" + +namespace micro { + +enum DataFormat { + NONE = 0, NHWC = 1, NCHW = 2, + HWOI = 100, OIHW = 101, HWIO = 102, OHWI = 103, + AUTO = 1000, +}; + +enum PerfHint { + PERF_DEFAULT = 0, + PERF_LOW = 1, + PERF_NORMAL = 2, + PERF_HIGH = 3 +}; + +enum DataType { + DT_INVALID = 0, + DT_FLOAT = 1, + DT_UINT8 = 2, + DT_HALF = 3, + DT_INT32 = 4, + DT_FLOAT16 = 5, + DT_BFLOAT16 = 6, +}; + +enum MaceStatus { + MACE_SUCCESS = 0, + MACE_INVALID_ARGS = 1, + MACE_OUT_OF_RESOURCES = 2, + MACE_UNSUPPORTED = 3, + MACE_RUNTIME_ERROR = 4, +}; + +namespace model { +class NetDef; +} // namespace model + +namespace framework { +class Graph; +class Operator; +} // namespace framework + +struct MACE_API MaceMicroEngineConfig { + model::NetDef *net_def_; + const uint8_t *model_data_; + framework::Graph *graph_; + framework::Operator **op_array_; + uint8_t *tensor_mem_; + const void **input_buffers_; + const int32_t **input_shapes_; + uint8_t *scratch_buffer_; + uint32_t scratch_buffer_size_; +}; + +class MACE_API MaceMicroEngine { + public: + MaceMicroEngine() {} + ~MaceMicroEngine() {} + + MaceStatus Init(MaceMicroEngineConfig *engine_config); + + MaceStatus RegisterInputData(uint32_t idx, const void *input_buffer, + const int32_t *input_dims); + MaceStatus Run(); + + MaceStatus GetOutputData(const uint32_t idx, void **output_data, + const int32_t **output_dims, + uint32_t *output_dim_size); + MaceStatus GetOpOutputData(const uint32_t op_def_idx, + const uint32_t output_idx, + void **output_data, + const int32_t **output_dims, + uint32_t *output_dim_size); + + private: + MaceMicroEngineConfig *engine_config_; + + MaceMicroEngine(const MaceMicroEngine &); + MaceMicroEngine &operator=(const MaceMicroEngine &); +}; + +} // namespace micro + +#endif // MICRO_INCLUDE_PUBLIC_MICRO_H_ diff --git a/micro/include/utils/bfloat16.h b/micro/include/utils/bfloat16.h new file mode 100644 index 00000000..b293548d --- /dev/null +++ b/micro/include/utils/bfloat16.h @@ -0,0 +1,57 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_INCLUDE_UTILS_BFLOAT16_H_ +#define MICRO_INCLUDE_UTILS_BFLOAT16_H_ + +#include + +#ifdef MACE_ENABLE_BFLOAT16 + +namespace micro { + +union Sphinx { + uint32_t i; + float f; + + Sphinx(uint32_t value) : i(value) {} + + Sphinx(float value) : f(value) {} +}; + +class BFloat16 { + public: + BFloat16(); + + operator float() const { + return Sphinx(static_cast(data_ << 16)).f; + } + + void operator=(const BFloat16 &value) { + data_ = value.data_; + } + + void operator=(float value) { + data_ = Sphinx(value).i >> 16; + } + + public: + uint16_t data_; +}; + +} // namespace micro + +#endif // MACE_ENABLE_BFLOAT16 + +#endif // MICRO_INCLUDE_UTILS_BFLOAT16_H_ diff --git a/micro/include/utils/macros.h b/micro/include/utils/macros.h new file mode 100644 index 00000000..1aa03ce9 --- /dev/null +++ b/micro/include/utils/macros.h @@ -0,0 +1,32 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_INCLUDE_UTILS_MACROS_H_ +#define MICRO_INCLUDE_UTILS_MACROS_H_ + +#include "micro/include/public/micro.h" + +namespace micro { + +#ifndef MACE_EMPTY_VIRTUAL_DESTRUCTOR +#define MACE_EMPTY_VIRTUAL_DESTRUCTOR(CLASSNAME) \ + public: \ + virtual ~CLASSNAME() {} +#endif // MACE_EMPTY_VIRTUAL_DESTRUCTOR + +#define MACE_UNUSED(var) (void)(var) + +} // namespace micro + +#endif // MICRO_INCLUDE_UTILS_MACROS_H_ diff --git a/micro/micro.bzl b/micro/micro.bzl new file mode 100644 index 00000000..30505961 --- /dev/null +++ b/micro/micro.bzl @@ -0,0 +1,37 @@ +def if_hexagon_enabled(a): + return select({ + "//micro:hexagon_enabled": a, + "//conditions:default": [], + }) + +def if_not_hexagon_enabled(a): + return select({ + "//micro:hexagon_enabled": [], + "//conditions:default": a, + }) + +def new_local_repository_env_impl(repository_ctx): + echo_cmd = "echo " + repository_ctx.attr.path + echo_result = repository_ctx.execute(["bash", "-c", echo_cmd]) + src_path_str = echo_result.stdout.splitlines()[0] + source_path = repository_ctx.path(src_path_str) + + work_path = repository_ctx.path(".") + child_list = source_path.readdir() + for child in child_list: + child_name = child.basename + repository_ctx.symlink(child, work_path.get_child(child_name)) + + build_file_babel = Label("//:" + repository_ctx.attr.build_file) + build_file_path = repository_ctx.path(build_file_babel) + repository_ctx.symlink(build_file_path, work_path.get_child("BUILD")) + +# a new_local_repository support environment variable +new_local_repository_env = repository_rule( + implementation = new_local_repository_env_impl, + local = True, + attrs = { + "path": attr.string(mandatory = True), + "build_file": attr.string(mandatory = True), + }, +) diff --git a/micro/model/BUILD.bazel b/micro/model/BUILD.bazel new file mode 100644 index 00000000..05052be7 --- /dev/null +++ b/micro/model/BUILD.bazel @@ -0,0 +1,20 @@ +package( + default_visibility = ["//visibility:public"], +) + +licenses(["notice"]) # Apache 2.0 + +cc_library( + name = "model", + srcs = glob(["*.cc"]), + hdrs = glob(["*.h"]), + copts = [ + "-Werror", + "-Wextra", + "-Wno-missing-field-initializers", + ], + deps = [ + "//micro/base", + "//micro/include", + ], +) diff --git a/micro/model/argument.cc b/micro/model/argument.cc new file mode 100644 index 00000000..64a913de --- /dev/null +++ b/micro/model/argument.cc @@ -0,0 +1,30 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/model/argument.h" + +namespace micro { +namespace model { + +MACE_DEFINE_STRING_FUNC(Argument, name, name_) +MACE_DEFINE_OBJECT_FUNC(Argument, float, f) +MACE_DEFINE_OBJECT_FUNC(Argument, int32_t, i) +MACE_DEFINE_BYTES_FUNC(Argument, s, s_) +MACE_DEFINE_ARRAY_FUNC(Argument, float, floats, floats_) +MACE_DEFINE_ARRAY_BASE_PTR_FUNC(Argument, float, floats, floats_) +MACE_DEFINE_ARRAY_FUNC(Argument, int32_t, ints, ints_) +MACE_DEFINE_ARRAY_BASE_PTR_FUNC(Argument, int32_t, ints, ints_) + +} // namespace model +} // namespace micro diff --git a/micro/model/argument.h b/micro/model/argument.h new file mode 100644 index 00000000..f400bab5 --- /dev/null +++ b/micro/model/argument.h @@ -0,0 +1,48 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_MODEL_ARGUMENT_H_ +#define MICRO_MODEL_ARGUMENT_H_ + +#include "micro/base/serialize.h" + +namespace micro { +namespace model { + +class Argument : public Serialize { + public: + MACE_DEFINE_HARD_CODE_MAGIC(Argument) + + MACE_DECLARE_STRING_FUNC(name); + MACE_DECLARE_OBJECT_FUNC(float, f); + MACE_DECLARE_OBJECT_FUNC(int32_t, i); + MACE_DECLARE_BYTES_FUNC(s); + MACE_DECLARE_ARRAY_FUNC(float, floats); + MACE_DECLARE_ARRAY_BASE_PTR_FUNC(float, floats); + MACE_DECLARE_ARRAY_FUNC(int32_t, ints); + MACE_DECLARE_ARRAY_BASE_PTR_FUNC(int32_t, ints); + + private: + SerialString name_; + SerialFloat f_; + SerialInt32 i_; + SerialBytes s_; + SerialArray floats_; + SerialArray ints_; +}; + +} // namespace model +} // namespace micro + +#endif // MICRO_MODEL_ARGUMENT_H_ diff --git a/micro/model/const_tensor.cc b/micro/model/const_tensor.cc new file mode 100644 index 00000000..6e2262ca --- /dev/null +++ b/micro/model/const_tensor.cc @@ -0,0 +1,41 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/model/const_tensor.h" + +namespace micro { +namespace model { + +MACE_DEFINE_ARRAY_FUNC(ConstTensor, int32_t, dim, dims_) +MACE_DEFINE_OBJECT_FUNC(ConstTensor, DataType, data_type) +MACE_DEFINE_ARRAY_FUNC(ConstTensor, float, float_data, float_datas_) +MACE_DEFINE_ARRAY_FUNC(ConstTensor, int32_t, int32_data, int32_datas_) +MACE_DEFINE_STRING_FUNC(ConstTensor, name, name_) +MACE_DEFINE_OBJECT_FUNC(ConstTensor, int32_t, offset) +MACE_DEFINE_OBJECT_FUNC(ConstTensor, int32_t, data_size) +MACE_DEFINE_OBJECT_FUNC(ConstTensor, float, scale) +MACE_DEFINE_OBJECT_FUNC(ConstTensor, int32_t, zero_point) +MACE_DEFINE_OBJECT_FUNC(ConstTensor, float, minval) +MACE_DEFINE_OBJECT_FUNC(ConstTensor, float, maxval) +MACE_DEFINE_OBJECT_FUNC(ConstTensor, bool, quantized) +MACE_DEFINE_OBJECT_FUNC(ConstTensor, uint32_t, node_id) + +const int32_t *ConstTensor::dim() const { + const int32_t *array = reinterpret_cast( + reinterpret_cast(this) + dims_.offset_); + return array; +} + +} // namespace model +} // namespace micro diff --git a/micro/model/const_tensor.h b/micro/model/const_tensor.h new file mode 100644 index 00000000..21778ba8 --- /dev/null +++ b/micro/model/const_tensor.h @@ -0,0 +1,63 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_MODEL_CONST_TENSOR_H_ +#define MICRO_MODEL_CONST_TENSOR_H_ + +#include "micro/base/serialize.h" +#include "micro/include/public/micro.h" + +namespace micro { +namespace model { + +class ConstTensor : public Serialize { + public: + MACE_DEFINE_HARD_CODE_MAGIC(ConstTensor) + + MACE_DECLARE_ARRAY_FUNC(int32_t, dim); + MACE_DECLARE_OBJECT_FUNC(DataType, data_type); + MACE_DECLARE_ARRAY_FUNC(float, float_data); + MACE_DECLARE_ARRAY_FUNC(int32_t, int32_data); + MACE_DECLARE_STRING_FUNC(name); + MACE_DECLARE_OBJECT_FUNC(int32_t, offset); + MACE_DECLARE_OBJECT_FUNC(int32_t, data_size); + MACE_DECLARE_OBJECT_FUNC(float, scale); + MACE_DECLARE_OBJECT_FUNC(int32_t, zero_point); + MACE_DECLARE_OBJECT_FUNC(float, minval); + MACE_DECLARE_OBJECT_FUNC(float, maxval); + MACE_DECLARE_OBJECT_FUNC(bool, quantized); + MACE_DECLARE_OBJECT_FUNC(uint32_t, node_id); + + const int32_t *dim() const; + + private: + SerialArray dims_; + DataType data_type_; + SerialArray float_datas_; + SerialArray int32_datas_; + SerialString name_; + SerialInt32 offset_; + SerialInt32 data_size_; + SerialFloat scale_; + SerialInt32 zero_point_; + SerialFloat minval_; + SerialFloat maxval_; + SerialBool quantized_; + SerialUint32 node_id_; +}; + +} // namespace model +} // namespace micro + +#endif // MICRO_MODEL_CONST_TENSOR_H_ diff --git a/micro/model/input_output_info.cc b/micro/model/input_output_info.cc new file mode 100644 index 00000000..2978d988 --- /dev/null +++ b/micro/model/input_output_info.cc @@ -0,0 +1,30 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/model/input_output_info.h" + +namespace micro { +namespace model { + +MACE_DEFINE_STRING_FUNC(InputOutputInfo, name, name_) +MACE_DEFINE_OBJECT_FUNC(InputOutputInfo, int32_t, node_id) +MACE_DEFINE_ARRAY_FUNC(InputOutputInfo, int32_t, dim, dims_) +MACE_DEFINE_OBJECT_FUNC(InputOutputInfo, int32_t, max_byte_size) +MACE_DEFINE_OBJECT_FUNC(InputOutputInfo, int32_t, data_type) +MACE_DEFINE_OBJECT_FUNC(InputOutputInfo, int32_t, data_format) +MACE_DEFINE_OBJECT_FUNC(InputOutputInfo, float, scale) +MACE_DEFINE_OBJECT_FUNC(InputOutputInfo, int32_t, zero_point) + +} // namespace model +} // namespace micro diff --git a/micro/model/input_output_info.h b/micro/model/input_output_info.h new file mode 100644 index 00000000..9032e234 --- /dev/null +++ b/micro/model/input_output_info.h @@ -0,0 +1,50 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_MODEL_INPUT_OUTPUT_INFO_H_ +#define MICRO_MODEL_INPUT_OUTPUT_INFO_H_ + +#include "micro/base/serialize.h" + +namespace micro { +namespace model { + +class InputOutputInfo : public Serialize { + public: + MACE_DEFINE_HARD_CODE_MAGIC(InputOutputInfo) + + MACE_DECLARE_STRING_FUNC(name); + MACE_DECLARE_OBJECT_FUNC(int32_t, node_id); + MACE_DECLARE_ARRAY_FUNC(int32_t, dim); + MACE_DECLARE_OBJECT_FUNC(int32_t, max_byte_size); + MACE_DECLARE_OBJECT_FUNC(int32_t, data_type); + MACE_DECLARE_OBJECT_FUNC(int32_t, data_format); + MACE_DECLARE_OBJECT_FUNC(float, scale); + MACE_DECLARE_OBJECT_FUNC(int32_t, zero_point); + + private: + SerialString name_; + SerialInt32 node_id_; + SerialArray dims_; + SerialInt32 max_byte_size_; + SerialInt32 data_type_; + SerialInt32 data_format_; + SerialFloat scale_; + SerialInt32 zero_point_; +}; + +} // namespace model +} // namespace micro + +#endif // MICRO_MODEL_INPUT_OUTPUT_INFO_H_ diff --git a/micro/model/net_def.cc b/micro/model/net_def.cc new file mode 100644 index 00000000..9bc922b8 --- /dev/null +++ b/micro/model/net_def.cc @@ -0,0 +1,33 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/model/net_def.h" + +namespace micro { +namespace model { + +MACE_DEFINE_PTR_ARRAY_FUNC(NetDef, OperatorDef, op, ops_) + +MACE_DEFINE_PTR_ARRAY_FUNC(NetDef, Argument, arg, args_) + +MACE_DEFINE_PTR_ARRAY_FUNC(NetDef, ConstTensor, tensor, tensors_) + +MACE_DEFINE_OBJECT_FUNC(NetDef, int32_t, data_type) + +MACE_DEFINE_PTR_ARRAY_FUNC(NetDef, InputOutputInfo, input_info, input_infos_) + +MACE_DEFINE_PTR_ARRAY_FUNC(NetDef, InputOutputInfo, output_info, output_infos_) + +} // namespace model +} // namespace micro diff --git a/micro/model/net_def.h b/micro/model/net_def.h new file mode 100644 index 00000000..e6e95ee5 --- /dev/null +++ b/micro/model/net_def.h @@ -0,0 +1,50 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_MODEL_NET_DEF_H_ +#define MICRO_MODEL_NET_DEF_H_ + +#include "micro/base/serialize.h" +#include "micro/model/argument.h" +#include "micro/model/const_tensor.h" +#include "micro/model/input_output_info.h" +#include "micro/model/operator_def.h" + +namespace micro { +namespace model { + +class NetDef : public Serialize { + public: + MACE_DEFINE_HARD_CODE_MAGIC(NetDef) + + MACE_DECLARE_PTR_ARRAY_FUNC(OperatorDef, op); + MACE_DECLARE_PTR_ARRAY_FUNC(Argument, arg); + MACE_DECLARE_PTR_ARRAY_FUNC(ConstTensor, tensor); + MACE_DECLARE_OBJECT_FUNC(int32_t, data_type); + MACE_DECLARE_PTR_ARRAY_FUNC(InputOutputInfo, input_info); + MACE_DECLARE_PTR_ARRAY_FUNC(InputOutputInfo, output_info); + + private: + SerialArray ops_; + SerialArray args_; + SerialArray tensors_; + SerialInt32 data_type_; + SerialArray input_infos_; + SerialArray output_infos_; +}; + +} // namespace model +} // namespace micro + +#endif // MICRO_MODEL_NET_DEF_H_ diff --git a/micro/model/operator_def.cc b/micro/model/operator_def.cc new file mode 100644 index 00000000..31ffa678 --- /dev/null +++ b/micro/model/operator_def.cc @@ -0,0 +1,33 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/model/operator_def.h" + +namespace micro { +namespace model { + +MACE_DEFINE_STRING_ARRAY_FUNC(OperatorDef, input, inputs_) +MACE_DEFINE_STRING_ARRAY_FUNC(OperatorDef, output, outputs_) +MACE_DEFINE_STRING_FUNC(OperatorDef, name, name_) +MACE_DEFINE_STRING_FUNC(OperatorDef, type, type_) +MACE_DEFINE_OBJECT_FUNC(OperatorDef, int32_t, device_type) +MACE_DEFINE_PTR_ARRAY_FUNC(OperatorDef, Argument, arg, args_) +MACE_DEFINE_PTR_ARRAY_FUNC(OperatorDef, OutputShape, + output_shape, output_shapes_) +MACE_DEFINE_ARRAY_FUNC(OperatorDef, DataType, output_type, output_types_) +// the mem_offset is the mem_id in proto file +MACE_DEFINE_ARRAY_FUNC(OperatorDef, int32_t, mem_offset, mem_offsets_) + +} // namespace model +} // namespace micro diff --git a/micro/model/operator_def.h b/micro/model/operator_def.h new file mode 100644 index 00000000..92695ad9 --- /dev/null +++ b/micro/model/operator_def.h @@ -0,0 +1,57 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_MODEL_OPERATOR_DEF_H_ +#define MICRO_MODEL_OPERATOR_DEF_H_ + +#include "micro/base/serialize.h" +#include "micro/include/public/micro.h" +#include "micro/model/argument.h" +#include "micro/model/output_shape.h" + +namespace micro { +namespace model { + +class OperatorDef : public Serialize { + public: + MACE_DEFINE_HARD_CODE_MAGIC(OperatorDef) + + MACE_DECLARE_STRING_ARRAY_FUNC(input); + MACE_DECLARE_STRING_ARRAY_FUNC(output); + MACE_DECLARE_STRING_FUNC(name); + MACE_DECLARE_STRING_FUNC(type); + MACE_DECLARE_OBJECT_FUNC(int32_t, device_type); + MACE_DECLARE_PTR_ARRAY_FUNC(Argument, arg); + MACE_DECLARE_PTR_ARRAY_FUNC(OutputShape, output_shape); + MACE_DECLARE_ARRAY_FUNC(DataType, output_type); + // the mem_offset is the mem_id in proto file + MACE_DECLARE_ARRAY_FUNC(int32_t, mem_offset); + + private: + SerialArray inputs_; + SerialArray outputs_; + SerialString name_; + SerialString type_; + // device_type_ is not used currently, for future; + SerialInt32 device_type_; + SerialArray args_; + SerialArray output_shapes_; + SerialArray output_types_; + SerialArray mem_offsets_; +}; + +} // namespace model +} // namespace micro + +#endif // MICRO_MODEL_OPERATOR_DEF_H_ diff --git a/micro/model/output_shape.cc b/micro/model/output_shape.cc new file mode 100644 index 00000000..b53c07f4 --- /dev/null +++ b/micro/model/output_shape.cc @@ -0,0 +1,35 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/model/output_shape.h" + +namespace micro { +namespace model { + +MACE_DEFINE_ARRAY_FUNC(OutputShape, int32_t, dim, dims_) + +const int32_t *OutputShape::dim() const { + const int32_t *array = reinterpret_cast( + reinterpret_cast(this) + dims_.offset_); + return array; +} + +int32_t *OutputShape::mutable_dim() { + char *base_addr = reinterpret_cast(const_cast(this)); + int32_t *array = reinterpret_cast(base_addr + dims_.offset_); + return array; +} + +} // namespace model +} // namespace micro diff --git a/micro/model/output_shape.h b/micro/model/output_shape.h new file mode 100644 index 00000000..9e8ecade --- /dev/null +++ b/micro/model/output_shape.h @@ -0,0 +1,40 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_MODEL_OUTPUT_SHAPE_H_ +#define MICRO_MODEL_OUTPUT_SHAPE_H_ + +#include "micro/base/serialize.h" + +namespace micro { +namespace model { + +class OutputShape : public Serialize { + public: + MACE_DEFINE_HARD_CODE_MAGIC(OutputShape) + + MACE_DECLARE_ARRAY_FUNC(int32_t, dim); + + const int32_t *dim() const; + + int32_t *mutable_dim(); + + private: + SerialArray dims_; +}; + +} // namespace model +} // namespace micro + +#endif // MICRO_MODEL_OUTPUT_SHAPE_H_ diff --git a/micro/ops/BUILD.bazel b/micro/ops/BUILD.bazel new file mode 100644 index 00000000..f514071f --- /dev/null +++ b/micro/ops/BUILD.bazel @@ -0,0 +1,36 @@ +package( + default_visibility = ["//visibility:public"], +) + +licenses(["notice"]) # Apache 2.0 + +cc_library( + name = "ops", + srcs = glob(["**/*.cc"]), + hdrs = glob(["**/*.h"]), + copts = [ + "-Werror", + "-Wextra", + "-Wno-missing-field-initializers", + ], + deps = [ + "//micro/base", + "//micro/framework", + ], +) + +cc_library( + name = "ops_for_test", + srcs = glob(["**/*.cc"]), + hdrs = glob(["**/*.h"]), + copts = [ + "-Werror", + "-Wextra", + "-Wno-missing-field-initializers", + ], + deps = [ + "//micro/base", + "//micro/framework:framework_for_optest", + ], + alwayslink = 1, +) diff --git a/micro/ops/activation.cc b/micro/ops/activation.cc new file mode 100644 index 00000000..2d5f9cff --- /dev/null +++ b/micro/ops/activation.cc @@ -0,0 +1,69 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/activation.h" + +#include "micro/base/logging.h" +#include "micro/base/utils.h" +#include "micro/model/argument.h" + +namespace micro { +namespace ops { + +namespace { +template +void PReLUActivation(const T *input_ptr, const int32_t outer_size, + const int32_t channel, const T *alpha_ptr, + T *output_ptr) { + for (int32_t i = 0; i < outer_size; ++i) { + const int32_t outer_base = i * channel; + for (int32_t c = 0; c < channel; ++c) { + const int32_t idx = outer_base + c; + if (input_ptr[idx] < 0) { + output_ptr[idx] = input_ptr[idx] * alpha_ptr[c]; + } else { + output_ptr[idx] = input_ptr[idx]; + } + } + } +} +} // namespace + +MaceStatus ActivationOp::OnInit() { + input_ = GetInputData(INPUT); + input_dims_ = GetInputShapeDims(INPUT); + input_dim_size_ = GetInputShapeDimSize(INPUT); + output_ = GetOutputData(OUTPUT); + + return activation_.Init(this); +} + +MaceStatus ActivationOp::Run() { + MACE_RETURN_IF_ERROR(ResizeOutputShape(OUTPUT, input_dim_size_, input_dims_)); + if (activation_.GetActivationType() == PRELU) { + MACE_ASSERT(GetInputSize() > 1); + const mifloat *alpha = GetInputData(ALPHA); + const int32_t outer_size = + base::accumulate_multi(input_dims_, 0, input_dim_size_ - 1); + const int32_t channel = input_dims_[input_dim_size_ - 1]; + PReLUActivation(input_, outer_size, channel, alpha, output_); + return MACE_SUCCESS; + } else { + const int32_t input_size = base::GetShapeSize(input_dim_size_, input_dims_); + return activation_.Compute(input_, input_size, output_); + } +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/activation.h b/micro/ops/activation.h new file mode 100644 index 00000000..c57d6e99 --- /dev/null +++ b/micro/ops/activation.h @@ -0,0 +1,44 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_ACTIVATION_H_ +#define MICRO_OPS_ACTIVATION_H_ + +#include "micro/framework/operator.h" +#include "micro/ops/utils/activation.h" + +namespace micro { +namespace ops { +class ActivationOp : public framework::Operator { + public: + MaceStatus OnInit(); + MaceStatus Run(); + + private: + const mifloat *input_; + const int32_t *input_dims_; + uint32_t input_dim_size_; + + mifloat *output_; + + Activation activation_; + + MACE_OP_INPUT_TAGS(INPUT, ALPHA); + MACE_OP_OUTPUT_TAGS(OUTPUT); +}; +} // namespace ops +} // namespace micro + + +#endif // MICRO_OPS_ACTIVATION_H_ diff --git a/micro/ops/argmax.h b/micro/ops/argmax.h new file mode 100644 index 00000000..363cf1a0 --- /dev/null +++ b/micro/ops/argmax.h @@ -0,0 +1,129 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_ARGMAX_H_ +#define MICRO_OPS_ARGMAX_H_ + +#include "micro/base/logging.h" +#include "micro/base/utils.h" +#include "micro/framework/operator.h" +#include "micro/framework/scratch_buffer.h" +#include "micro/include/utils/macros.h" + +namespace micro { +namespace ops { + +template +class ArgMaxOp : public framework::Operator { + public: + MaceStatus OnInit() { + axis_ = GetArgByName("axis", static_cast(0)); + keep_dims_ = GetArgByName("keepdims", true); + MACE_ASSERT1(keep_dims_, "Mace only supports keep_dims ArgMax."); + argmin_ = GetArgByName("argmin", false); + input_ = GetInputData(INPUT); + input_dims_ = GetInputShapeDims(INPUT); + input_dim_size_ = GetInputShapeDimSize(INPUT); + MACE_ASSERT1(input_dim_size_ > 0, "ArgMax input should not be a scalar"); + + output_ = GetOutputData(OUTPUT); + output_dims_ = GetOutputShapeDims(OUTPUT); + output_dim_size_ = GetOutputShapeDimSize(OUTPUT); + return MACE_SUCCESS; + } + + MaceStatus Run() { + int32_t axis_value = 0; + const int32_t *axis = GetInputSize() == 2 ? + GetInputData(AXIS) : NULL; + if (axis != NULL) { + MACE_ASSERT1(GetInputShapeDimSize(AXIS) == 0, + "Mace argmax only supports scalar axis"); + axis_value = axis[0]; + } else { + axis_value = axis_; + } + if (axis_value < 0) { + axis_value += input_dim_size_; + } + MACE_ASSERT1(axis_value == static_cast(input_dim_size_) - 1, + "Mace argmax only supports last dimension as axis"); + + MACE_ASSERT1(output_dim_size_ >= input_dim_size_ - 1, + "Convert model error."); + int32_t *output_dims = + ScratchBuffer(engine_config_).GetBuffer(output_dim_size_); + for (int32_t d = 0; d < static_cast(output_dim_size_); ++d) { + output_dims[d] = input_dims_[d < axis_value ? d : d + 1]; + } + ResizeOutputShape(OUTPUT, output_dim_size_, output_dims); + + int32_t outer_size = base::GetShapeSize(output_dim_size_, output_dims_); + int32_t inner_size = input_dims_[axis_value]; + + if (argmin_) { + for (int32_t i = 0; i < outer_size; ++i) { + int32_t idx = 0; + T min_value = base::highest(); + const T *input_ptr = input_ + i * inner_size; + for (int32_t j = 0; j < inner_size; ++j) { + float input = input_ptr[j]; + if (input < min_value) { + min_value = input; + idx = j; + } + } + output_[i] = idx; + } + } else { + for (int32_t i = 0; i < outer_size; ++i) { + int32_t idx = 0; + T max_value = base::lowest(); + const T *input_ptr = input_ + i * inner_size; + for (int32_t j = 0; j < inner_size; ++j) { + float input = input_ptr[j]; + if (input > max_value) { + max_value = input; + idx = j; + } + } + output_[i] = idx; + } + } + + return MaceStatus::MACE_SUCCESS; + } + + private: + int32_t axis_; + bool keep_dims_; + bool argmin_; + + const T *input_; + const int32_t *input_dims_; + uint32_t input_dim_size_; + + int32_t *output_; + const int32_t *output_dims_; + uint32_t output_dim_size_; + + MACE_OP_INPUT_TAGS(INPUT, AXIS); + MACE_OP_OUTPUT_TAGS(OUTPUT); +}; + +} // namespace ops +} // namespace micro + + +#endif // MICRO_OPS_ARGMAX_H_ diff --git a/micro/ops/bias_add.cc b/micro/ops/bias_add.cc new file mode 100644 index 00000000..5a1035cd --- /dev/null +++ b/micro/ops/bias_add.cc @@ -0,0 +1,51 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/bias_add.h" + +#include "micro/base/logging.h" +#include "micro/ops/utils/crumb_utils.h" + +namespace micro { +namespace ops { + +MaceStatus BiasAddOp::OnInit() { + MACE_ASSERT1(static_cast( + GetArgByName("data_format", static_cast(NHWC))) + != NCHW, "Now only support NHWC"); + + input_ = GetInputData(INPUT); + input_dims_ = GetInputShapeDims(INPUT); + input_dim_size_ = GetInputShapeDimSize(INPUT); + + bias_ = GetInputData(BIAS); + bias_dims_ = GetInputShapeDims(BIAS); + bias_dim_size_ = GetInputShapeDimSize(BIAS); + + output_ = GetOutputData(OUTPUT); + + MACE_ASSERT1(bias_dim_size_ == 1, "Bias dim must be 1."); + MACE_ASSERT1(bias_dims_[0] == input_dims_[input_dim_size_ - 1], + "The bias's channel dim should be equal to the input's"); + + return ResizeOutputShape(OUTPUT, input_dim_size_, input_dims_); +} + +MaceStatus BiasAddOp::Run() { + return crumb::ComputeBias(input_, input_dims_, input_dim_size_, + bias_, bias_dims_[0], output_); +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/bias_add.h b/micro/ops/bias_add.h new file mode 100644 index 00000000..4a1d1880 --- /dev/null +++ b/micro/ops/bias_add.h @@ -0,0 +1,45 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_BIAS_ADD_H_ +#define MICRO_OPS_BIAS_ADD_H_ + +#include "micro/framework/operator.h" + +namespace micro { +namespace ops { +class BiasAddOp : public framework::Operator { + public: + MaceStatus OnInit(); + MaceStatus Run(); + + private: + const mifloat *input_; + const int32_t *input_dims_; + uint32_t input_dim_size_; + + const mifloat *bias_; + const int32_t *bias_dims_; + uint32_t bias_dim_size_; + + mifloat *output_; + + MACE_OP_INPUT_TAGS(INPUT, BIAS); + MACE_OP_OUTPUT_TAGS(OUTPUT); +}; +} // namespace ops +} // namespace micro + + +#endif // MICRO_OPS_BIAS_ADD_H_ diff --git a/micro/ops/cast.h b/micro/ops/cast.h new file mode 100644 index 00000000..ca1fd779 --- /dev/null +++ b/micro/ops/cast.h @@ -0,0 +1,87 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_CAST_H_ +#define MICRO_OPS_CAST_H_ + +#include "micro/base/utils.h" +#include "micro/base/types.h" +#include "micro/framework/operator.h" +#include "micro/include/utils/bfloat16.h" + +namespace micro { +namespace ops { + +#ifndef MACE_CAST_OP_CAST_TENSOR +#define MACE_CAST_OP_CAST_TENSOR(SrcType, DstType) \ +const SrcType *input = static_cast(input_); \ +DstType *output = static_cast(output_); \ +for (int32_t i = 0; i < tensor_size_; ++i) { \ + output[i] = input[i]; \ +} +#endif // MACE_CAST_OP_CAST_TENSOR + +class CastOp : public framework::Operator { + public: + MaceStatus OnInit() { + input_ = GetInputData(INPUT); + input_dt_ = static_cast( + GetArgByName("T", static_cast(DT_FLOAT))); + const int32_t *input_dims = GetInputShapeDims(INPUT); + const uint32_t input_dim_size_ = GetInputShapeDimSize(INPUT); + tensor_size_ = base::GetShapeSize(input_dim_size_, input_dims); + MACE_ASSERT(tensor_size_ > 0); + output_ = GetOutputData(OUTPUT); + output_dt_ = GetOutputDataType(OUTPUT); + + return MACE_SUCCESS; + } + + MaceStatus Run() { + if (input_dt_ == DT_FLOAT && output_dt_ == DT_BFLOAT16) { +#ifdef MACE_ENABLE_BFLOAT16 + MACE_CAST_OP_CAST_TENSOR(float, BFloat16) +#else + MACE_NOT_IMPLEMENTED; +#endif + } else if (input_dt_ == DT_BFLOAT16 && output_dt_ == DT_FLOAT) { +#ifdef MACE_ENABLE_BFLOAT16 + MACE_CAST_OP_CAST_TENSOR(BFloat16, float) +#else + MACE_NOT_IMPLEMENTED; +#endif + } else { + MACE_NOT_IMPLEMENTED; + } + + return MACE_SUCCESS; + } + + private: + const void *input_; + DataType input_dt_; + int32_t tensor_size_; + + void *output_; + DataType output_dt_; + + MACE_OP_INPUT_TAGS(INPUT); + MACE_OP_OUTPUT_TAGS(OUTPUT); +}; + +} // namespace ops +} // namespace micro + + +#endif // MICRO_OPS_CAST_H_ diff --git a/micro/ops/eltwise.cc b/micro/ops/eltwise.cc new file mode 100644 index 00000000..98f3897e --- /dev/null +++ b/micro/ops/eltwise.cc @@ -0,0 +1,54 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/eltwise.h" + +#include "micro/base/logging.h" + +namespace micro { +namespace ops { +namespace eltwise { +bool ShapeIsEqual(const int32_t *dims0, + const int32_t *dims1, uint32_t dim_size) { + while (--dim_size > 0) { + if (dims0[dim_size] != dims1[dim_size]) + return false; + } + return true; +} + +int32_t GetIndex(const int32_t *shape, + const int32_t *index, int32_t dim_size) { + int32_t idx = 0; + for (int32_t i = 0; i < dim_size; ++i) { + if (shape[i] > 1) { + idx = idx * shape[i] + index[i]; + } + } + return idx; +} + +void IncreaseIndex(const int32_t *shape, int32_t **index, int32_t dim_size) { + for (int32_t i = dim_size - 1; i >= 0; --i) { + ++(*index)[i]; + if ((*index)[i] >= shape[i]) { + (*index)[i] -= shape[i]; + } else { + break; + } + } +} +} // namespace eltwise +} // namespace ops +} // namespace micro diff --git a/micro/ops/eltwise.h b/micro/ops/eltwise.h new file mode 100644 index 00000000..263082cc --- /dev/null +++ b/micro/ops/eltwise.h @@ -0,0 +1,1089 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_ELTWISE_H_ +#define MICRO_OPS_ELTWISE_H_ + +#include "micro/base/logging.h" +#include "micro/base/utils.h" +#include "micro/framework/operator.h" +#include "micro/framework/scratch_buffer.h" + +namespace micro { +namespace ops { + +namespace eltwise { // for redefine +enum Type { + SUM = 0, + SUB = 1, + PROD = 2, + DIV = 3, + MIN = 4, + MAX = 5, + NEG = 6, + ABS = 7, + SQR_DIFF = 8, + POW = 9, + EQUAL = 10, + FLOOR_DIV = 11, + CLIP = 12, + SIGN = 13, + NONE = 14, +}; + +bool ShapeIsEqual(const int32_t *dims0, + const int32_t *dims1, uint32_t dim_size); +int32_t GetIndex(const int32_t *shape, const int32_t *index, int32_t dim_size); +void IncreaseIndex(const int32_t *shape, int32_t **index, int32_t dim_size); +template +int32_t Sign(T val) { + return (T(0) < val) - (val < T(0)); +} + +} // namespace eltwise + +template +class EltwiseOp : public framework::Operator { + public: + MaceStatus OnInit() { + input0_ = GetInputData(INPUT0); + input0_dims_ = GetInputShapeDims(INPUT0); + input0_dim_size_ = GetInputShapeDimSize(INPUT0); + + if (GetInputSize() >= 2) { + input1_ = GetInputData(INPUT1); + input1_dims_ = GetInputShapeDims(INPUT1); + input1_dim_size_ = GetInputShapeDimSize(INPUT1); + } else { + input1_ = NULL; + input1_dims_ = NULL; + input1_dim_size_ = 0; + } + + output_ = GetOutputData(OUTPUT); + + type_ = static_cast(GetArgByName( + "type", static_cast(NONE))); + coeff_ = GetRepeatArgByName("coeff", &coeff_size_); + scalar_input_ = GetArgByName("scalar_input", 1.0f); + scalar_input_index_ = GetArgByName("scalar_input_index", + static_cast(1)); + DataFormat data_format = static_cast( + GetArgByName("data_format", static_cast(NHWC))); + nchw_ = (data_format == NCHW); + + return MACE_SUCCESS; + } + + MaceStatus Run() { + MACE_ASSERT1(GetInputSize() < 3, + "Element-Wise does not support 3 or higher inputs," + " you could change your model to multiple Element-Wise"); + + if (input1_ == NULL) { + input1_ = &scalar_input_; + input1_dim_size_ = 1; + input1_dims_ = static_cast( + reinterpret_cast(&input1_dim_size_)); // a trick + } + + if (type_ == eltwise::CLIP) { + MACE_ASSERT1(coeff_size_ == 2 && coeff_[0] < coeff_[1], + "Clip's min/max values are not correct."); + } + + if (type_ == eltwise::EQUAL) { // IsLogicalType + // as we do not have bool-type tensor, we use int type + return DoEltwise(); + } else { + return DoEltwise(); + } + } + + private: + template + MaceStatus DoEltwise() { + int32_t input0_size = base::GetShapeSize(input0_dim_size_, input0_dims_); + int32_t input1_size = input1_dim_size_ == 0 ? + 0 : base::GetShapeSize(input1_dim_size_, + input1_dims_); + bool swapped = false; + if (input0_dim_size_ < input1_dim_size_ + || (input0_dim_size_ == input1_dim_size_ + && input0_size < input1_size)) { + base::swap(&input0_, &input1_); + base::swap(&input0_dims_, &input1_dims_); + base::swap(&input0_dim_size_, &input1_dim_size_); + base::swap(&input0_size, &input1_size); + swapped = true; + } + if (scalar_input_index_ == 0) { + swapped = !swapped; + } + + // check if we can broadcast tensor + uint32_t rank_diff = + static_cast(input0_dim_size_ - input1_dim_size_); + if (nchw_) { + MACE_ASSERT1((input0_dim_size_ == 4) && + ((input1_dim_size_ == 0) || + (input1_dim_size_ == 4 && input1_dims_[1] == input0_dims_[1] && + (input1_dims_[0] == input0_dims_[0] || + input1_dims_[0] == 1)) || + (input1_dim_size_ == 1 && input1_dims_[0] == input0_dims_[1])), + "only support broadcast channel dimension"); + } else { + for (uint32_t i = 0; i < input1_dim_size_; ++i) { + MACE_ASSERT1(input0_dims_[rank_diff + i] == 1 || input1_dims_[i] == 1 || + input0_dims_[rank_diff + i] == input1_dims_[i], + "Element-Wise op only support tail dimensions broadcast"); + } + } + + if (nchw_ && input1_dim_size_ > 0) { + MACE_RETURN_IF_ERROR( + ResizeOutputShape(OUTPUT, input0_dim_size_, input0_dims_)); + DstType *output_ptr = reinterpret_cast(output_); + if (input1_size < input0_size) { + TensorEltwisePerChannel(type_, + input0_, + input1_, + input0_dims_[0], + input1_dim_size_ == 1 ? 1 : input1_dims_[0], + input0_dims_[1], + input0_dims_[2] * input0_dims_[3], + swapped, + output_ptr); + } else { + TensorEltwise(type_, input0_, input1_, input0_size, + swapped, output_ptr); + } + } else { + ScratchBuffer scratch_buffer(engine_config_); + int32_t *input1_shape = + scratch_buffer.GetBuffer(input0_dim_size_); + if (rank_diff > 0) { + base::memset(input1_shape, static_cast(1), rank_diff); + } + if (input1_dim_size_ > 0) { + base::memcpy(input1_shape + rank_diff, input1_dims_, + input1_dim_size_ * sizeof(int32_t)); + } + + int32_t *output_shape = + scratch_buffer.GetBuffer(input0_dim_size_); + for (uint32_t i = 0; i < input0_dim_size_; ++i) { + output_shape[i] = base::max(input0_dims_[i], input1_shape[i]); + } + MACE_RETURN_IF_ERROR( + ResizeOutputShape(OUTPUT, input0_dim_size_, output_shape)); + + DstType *output_ptr = reinterpret_cast(output_); + bool need_general_broadcast = false; + for (uint32_t i = 0; i < input1_dim_size_; ++i) { + if ((input0_dims_[rank_diff + i] == 1 && input1_dims_[i] > 1) || + (input0_dims_[rank_diff + i] > 1 && input1_dims_[i] == 1)) { + need_general_broadcast = true; + break; + } + } + + if (input1_size == 1) { + TensorScalarEltwise(type_, input0_, input1_[0], + input0_size, swapped, output_ptr); + } else if (eltwise::ShapeIsEqual(input0_dims_, + input1_shape, + input0_dim_size_)) { + TensorEltwise(type_, input0_, input1_, input0_size, + swapped, output_ptr); + } else if (need_general_broadcast) { + int32_t *out_index = + scratch_buffer.GetBuffer(input0_dim_size_); + TensorGeneralBroadcastEltwise(type_, input0_, input1_, input0_dim_size_, + swapped, input0_dims_, input1_shape, + output_shape, out_index, output_ptr); + } else { + int32_t common_size = input1_size; + int32_t diff_size = input0_size / common_size; + TensorBroadcastEltwise(type_, input0_, input1_, + diff_size, common_size, swapped, output_ptr); + } + } + + return MACE_SUCCESS; + } + + template + inline void TensorGeneralBroadcastEltwise( + const eltwise::Type type, + const T *input0, + const T *input1, + const uint32_t dim_size, + const bool swapped, + const int32_t *input0_shape, + const int32_t *input1_shape, + const int32_t *output_shape, + int32_t *out_index, + DstType *output) { + const int32_t output_size = base::GetShapeSize(dim_size, output_shape); + base::memset(out_index, static_cast(0), dim_size); + switch (type) { + case eltwise::SUM: + if (coeff_size_ == 0) { + for (int32_t i = 0; i < output_size; ++i) { + const int32_t idx0 = + eltwise::GetIndex(input0_shape, out_index, dim_size); + const int32_t idx1 = + eltwise::GetIndex(input1_shape, out_index, dim_size); + output[i] = input0[idx0] + input1[idx1]; + eltwise::IncreaseIndex(output_shape, &out_index, dim_size); + } + } else { + float coeff_copy[2] = {coeff_[0], coeff_[1]}; + if (swapped) { + base::swap(coeff_copy, coeff_copy + 1); + } + for (int32_t i = 0; i < output_size; ++i) { + const int32_t idx0 = + eltwise::GetIndex(input0_shape, out_index, dim_size); + const int32_t idx1 = + eltwise::GetIndex(input1_shape, out_index, dim_size); + output[i] = + input0[idx0] * coeff_copy[0] + input1[idx1] * coeff_copy[1]; + eltwise::IncreaseIndex(output_shape, &out_index, dim_size); + } + } + break; + case eltwise::SUB: + if (!swapped) { + for (int32_t i = 0; i < output_size; ++i) { + const int32_t idx0 = + eltwise::GetIndex(input0_shape, out_index, dim_size); + const int32_t idx1 = + eltwise::GetIndex(input1_shape, out_index, dim_size); + output[i] = input0[idx0] - input1[idx1]; + eltwise::IncreaseIndex(output_shape, &out_index, dim_size); + } + } else { + for (int32_t i = 0; i < output_size; ++i) { + const int32_t idx0 = + eltwise::GetIndex(input0_shape, out_index, dim_size); + const int32_t idx1 = + eltwise::GetIndex(input1_shape, out_index, dim_size); + output[i] = input1[idx1] - input0[idx0]; + eltwise::IncreaseIndex(output_shape, &out_index, dim_size); + } + } + break; + case eltwise::PROD: + for (int32_t i = 0; i < output_size; ++i) { + const int32_t idx0 = + eltwise::GetIndex(input0_shape, out_index, dim_size); + const int32_t idx1 = + eltwise::GetIndex(input1_shape, out_index, dim_size); + output[i] = input0[idx0] * input1[idx1]; + eltwise::IncreaseIndex(output_shape, &out_index, dim_size); + } + break; + case eltwise::DIV: + if (!swapped) { + for (int32_t i = 0; i < output_size; ++i) { + const int32_t idx0 = + eltwise::GetIndex(input0_shape, out_index, dim_size); + const int32_t idx1 = + eltwise::GetIndex(input1_shape, out_index, dim_size); + output[i] = input0[idx0] / input1[idx1]; + eltwise::IncreaseIndex(output_shape, &out_index, dim_size); + } + } else { + for (int32_t i = 0; i < output_size; ++i) { + const int32_t idx0 = + eltwise::GetIndex(input0_shape, out_index, dim_size); + const int32_t idx1 = + eltwise::GetIndex(input1_shape, out_index, dim_size); + output[i] = input1[idx1] / input0[idx0]; + eltwise::IncreaseIndex(output_shape, &out_index, dim_size); + } + } + break; + case eltwise::FLOOR_DIV: + if (!swapped) { + for (int32_t i = 0; i < output_size; ++i) { + const int32_t idx0 = + eltwise::GetIndex(input0_shape, out_index, dim_size); + const int32_t idx1 = + eltwise::GetIndex(input1_shape, out_index, dim_size); + output[i] = base::floor(input0[idx0] / input1[idx1]); + eltwise::IncreaseIndex(output_shape, &out_index, dim_size); + } + } else { + for (int32_t i = 0; i < output_size; ++i) { + const int32_t idx0 = + eltwise::GetIndex(input0_shape, out_index, dim_size); + const int32_t idx1 = + eltwise::GetIndex(input1_shape, out_index, dim_size); + output[i] = base::floor(input1[idx1] / input0[idx0]); + eltwise::IncreaseIndex(output_shape, &out_index, dim_size); + } + } + break; + case eltwise::MIN: + for (int32_t i = 0; i < output_size; ++i) { + const int32_t idx0 = + eltwise::GetIndex(input0_shape, out_index, dim_size); + const int32_t idx1 = + eltwise::GetIndex(input1_shape, out_index, dim_size); + output[i] = base::min(input1[idx1], input0[idx0]); + eltwise::IncreaseIndex(output_shape, &out_index, dim_size); + } + break; + case eltwise::MAX: + for (int32_t i = 0; i < output_size; ++i) { + const int32_t idx0 = + eltwise::GetIndex(input0_shape, out_index, dim_size); + const int32_t idx1 = + eltwise::GetIndex(input1_shape, out_index, dim_size); + output[i] = base::max(input1[idx1], input0[idx0]); + eltwise::IncreaseIndex(output_shape, &out_index, dim_size); + } + break; + case eltwise::SQR_DIFF: + for (int32_t i = 0; i < output_size; ++i) { + const int32_t idx0 = + eltwise::GetIndex(input0_shape, out_index, dim_size); + const int32_t idx1 = + eltwise::GetIndex(input1_shape, out_index, dim_size); + output[i] = base::pow(input1[idx1] - input0[idx0], 2.f); + eltwise::IncreaseIndex(output_shape, &out_index, dim_size); + } + break; + case eltwise::POW: + if (!swapped) { + for (int32_t i = 0; i < output_size; ++i) { + const int32_t idx0 = + eltwise::GetIndex(input0_shape, out_index, dim_size); + const int32_t idx1 = + eltwise::GetIndex(input1_shape, out_index, dim_size); + output[i] = base::pow(input0[idx0], input1[idx1]); + eltwise::IncreaseIndex(output_shape, &out_index, dim_size); + } + } else { + for (int32_t i = 0; i < output_size; ++i) { + const int32_t idx0 = + eltwise::GetIndex(input0_shape, out_index, dim_size); + const int32_t idx1 = + eltwise::GetIndex(input1_shape, out_index, dim_size); + output[i] = base::pow(input1[idx1], input0[idx0]); + eltwise::IncreaseIndex(output_shape, &out_index, dim_size); + } + } + break; + case eltwise::EQUAL: + for (int32_t i = 0; i < output_size; ++i) { + const int32_t idx0 = + eltwise::GetIndex(input0_shape, out_index, dim_size); + const int32_t idx1 = + eltwise::GetIndex(input1_shape, out_index, dim_size); + output[i] = input1[idx1] == input0[idx0]; + eltwise::IncreaseIndex(output_shape, &out_index, dim_size); + } + break; + default:LOG(FATAL) << "Eltwise op not support type " + << static_cast(type); + } + } + + template + inline void TensorBroadcastEltwise(const eltwise::Type type, + const T *input0, + const T *input1, + const int32_t diff_size, + const int32_t common_size, + const bool swapped, + DstType *output) { + switch (type) { + case eltwise::SUM: + if (coeff_size_ == 0) { + for (int32_t d = 0; d < diff_size; ++d) { + for (int32_t i = 0; i < common_size; ++i) { + output[i + d * common_size] = + input0[i + d * common_size] + input1[i]; + } + } + } else { + float coeff_copy[2] = {coeff_[0], coeff_[1]}; + if (swapped) { + base::swap(coeff_copy, coeff_copy + 1); + } + for (int32_t d = 0; d < diff_size; ++d) { + for (int32_t i = 0; i < common_size; ++i) { + output[i + d * common_size] = + input0[i + d * common_size] * coeff_copy[0] + + input1[i] * coeff_copy[1]; + } + } + } + break; + case eltwise::SUB: + if (!swapped) { + for (int32_t d = 0; d < diff_size; ++d) { + for (int32_t i = 0; i < common_size; ++i) { + output[i + d * common_size] = + input0[i + d * common_size] - input1[i]; + } + } + } else { + for (int32_t d = 0; d < diff_size; ++d) { + for (int32_t i = 0; i < common_size; ++i) { + output[i + d * common_size] = + input1[i] - input0[i + d * common_size]; + } + } + } + break; + case eltwise::PROD: + for (int32_t d = 0; d < diff_size; ++d) { + for (int32_t i = 0; i < common_size; ++i) { + output[i + d * common_size] = + input0[i + d * common_size] * input1[i]; + } + } + break; + case eltwise::DIV: + if (!swapped) { + for (int32_t d = 0; d < diff_size; ++d) { + for (int32_t i = 0; i < common_size; ++i) { + output[i + d * common_size] = + input0[i + d * common_size] / input1[i]; + } + } + } else { + for (int32_t d = 0; d < diff_size; ++d) { + for (int32_t i = 0; i < common_size; ++i) { + output[i + d * common_size] = + input1[i] / input0[i + d * common_size]; + } + } + } + break; + case eltwise::FLOOR_DIV: + if (!swapped) { + for (int32_t d = 0; d < diff_size; ++d) { + for (int32_t i = 0; i < common_size; ++i) { + output[i + d * common_size] = + base::floor(input0[i + d * common_size] / input1[i]); + } + } + } else { + for (int32_t d = 0; d < diff_size; ++d) { + for (int32_t i = 0; i < common_size; ++i) { + output[i + d * common_size] = + base::floor(input1[i] / input0[i + d * common_size]); + } + } + } + break; + case eltwise::MIN: + for (int32_t d = 0; d < diff_size; ++d) { + for (int32_t i = 0; i < common_size; ++i) { + output[i + d * common_size] = + base::min(input0[i + d * common_size], input1[i]); + } + } + break; + case eltwise::MAX: + for (int32_t d = 0; d < diff_size; ++d) { + for (int32_t i = 0; i < common_size; ++i) { + output[i + d * common_size] = + base::max(input0[i + d * common_size], input1[i]); + } + } + break; + case eltwise::SQR_DIFF: + for (int32_t d = 0; d < diff_size; ++d) { + for (int32_t i = 0; i < common_size; ++i) { + output[i + d * common_size] = + base::pow(input0[i + d * common_size] - input1[i], 2.f); + } + } + break; + case eltwise::POW: + if (!swapped) { + for (int32_t d = 0; d < diff_size; ++d) { + for (int32_t i = 0; i < common_size; ++i) { + output[i + d * common_size] = + base::pow(input0[i + d * common_size], input1[i]); + } + } + } else { + for (int32_t d = 0; d < diff_size; ++d) { + for (int32_t i = 0; i < common_size; ++i) { + output[i + d * common_size] = + base::pow(input1[i], input0[i + d * common_size]); + } + } + } + break; + case eltwise::NEG: + for (int32_t d = 0; d < diff_size; ++d) { + for (int32_t i = 0; i < common_size; ++i) { + output[i + d * common_size] = -input0[i + d * common_size]; + } + } + break; + case eltwise::ABS: + for (int32_t d = 0; d < diff_size; ++d) { + for (int32_t i = 0; i < common_size; ++i) { + output[i + d * common_size] = + base::fabs(input0[i + d * common_size]); + } + } + break; + case eltwise::EQUAL: + for (int32_t d = 0; d < diff_size; ++d) { + for (int32_t i = 0; i < common_size; ++i) { + output[i + d * common_size] = + input0[i + d * common_size] == input1[i]; + } + } + break; + case eltwise::CLIP: + for (int32_t d = 0; d < diff_size; ++d) { + for (int32_t i = 0; i < common_size; ++i) { + output[i + d * common_size] = + base::max(coeff_[0], + base::min(coeff_[1], + input0[i + d * common_size])); + } + } + break; + case eltwise::SIGN: + for (int32_t d = 0; d < diff_size; ++d) { + for (int32_t i = 0; i < common_size; ++i) { + output[i + d * common_size] = + eltwise::Sign(input0[i + d * common_size]); + } + } + break; + default:LOG(FATAL) << "Eltwise op not support type " + << static_cast(type); + } + } + +// Multiplication is costly, so we specialize the following case. + template + inline void TensorEltwise(const eltwise::Type type, + const T *input0, + const T *input1, + const int32_t size, + const bool swapped, + DstType *output) { + switch (type) { + case eltwise::SUM: + if (coeff_size_ == 0) { + for (int32_t i = 0; i < size; ++i) { + output[i] = input0[i] + input1[i]; + } + } else { + float coeff_copy[2] = {coeff_[0], coeff_[1]}; + if (swapped) { + base::swap(coeff_copy, coeff_copy + 1); + } + for (int32_t i = 0; i < size; ++i) { + output[i] = input0[i] * coeff_copy[0] + input1[i] * coeff_copy[1]; + } + } + break; + case eltwise::SUB: + if (!swapped) { + for (int32_t i = 0; i < size; ++i) { + output[i] = input0[i] - input1[i]; + } + } else { + for (int32_t i = 0; i < size; ++i) { + output[i] = input1[i] - input0[i]; + } + } + break; + case eltwise::PROD: + for (int32_t i = 0; i < size; ++i) { + output[i] = input0[i] * input1[i]; + } + break; + case eltwise::DIV: + if (!swapped) { + for (int32_t i = 0; i < size; ++i) { + output[i] = input0[i] / input1[i]; + } + + } else { + for (int32_t i = 0; i < size; ++i) { + output[i] = input1[i] / input0[i]; + } + } + break; + case eltwise::FLOOR_DIV: + if (!swapped) { + for (int32_t i = 0; i < size; ++i) { + output[i] = base::floor(input0[i] / input1[i]); + } + } else { + for (int32_t i = 0; i < size; ++i) { + output[i] = base::floor(input1[i] / input0[i]); + } + } + break; + case eltwise::MIN: + for (int32_t i = 0; i < size; ++i) { + output[i] = base::min(input0[i], input1[i]); + } + break; + case eltwise::MAX: + for (int32_t i = 0; i < size; ++i) { + output[i] = base::max(input0[i], input1[i]); + } + break; + case eltwise::SQR_DIFF: + for (int32_t i = 0; i < size; ++i) { + output[i] = base::pow(input0[i] - input1[i], 2.f); + } + break; + case eltwise::POW: + if (!swapped) { + for (int32_t i = 0; i < size; ++i) { + output[i] = base::pow(input0[i], input1[i]); + } + } else { + for (int32_t i = 0; i < size; ++i) { + output[i] = base::pow(input1[i], input0[i]); + } + } + break; + case eltwise::NEG: + for (int32_t i = 0; i < size; ++i) { + output[i] = -input0[i]; + } + break; + case eltwise::ABS: + for (int32_t i = 0; i < size; ++i) { + output[i] = base::fabs(input0[i]); + } + break; + case eltwise::EQUAL: + for (int32_t i = 0; i < size; ++i) { + output[i] = (input0[i] == input1[i]); + } + break; + case eltwise::CLIP: + for (int32_t i = 0; i < size; ++i) { + output[i] = base::max( + coeff_[0], base::min(coeff_[1], input0[i])); + } + break; + case eltwise::SIGN: + for (int32_t i = 0; i < size; ++i) { + output[i] = eltwise::Sign(input0[i]); + } + break; + default:LOG(FATAL) << "Eltwise op not support type " + << static_cast(type); + } + } + +// Multiplication is costly, so we specialize the following case. + template + inline void TensorScalarEltwise(const eltwise::Type type, + const T *input0, + const T input1, + const int32_t size, + const bool swapped, + DstType *output) { + switch (type) { + case eltwise::SUM: + if (coeff_size_ == 0) { + for (int32_t i = 0; i < size; ++i) { + output[i] = input0[i] + input1; + } + + } else { + float coeff_copy[2] = {coeff_[0], coeff_[1]}; + if (swapped) { + base::swap(coeff_copy, coeff_copy + 1); + } + for (int32_t i = 0; i < size; ++i) { + output[i] = input0[i] * coeff_copy[0] + input1 * coeff_copy[1]; + } + } + break; + case eltwise::SUB: + if (!swapped) { + for (int32_t i = 0; i < size; ++i) { + output[i] = input0[i] - input1; + } + + } else { + for (int32_t i = 0; i < size; ++i) { + output[i] = input1 - input0[i]; + } + } + break; + case eltwise::PROD: + for (int32_t i = 0; i < size; ++i) { + output[i] = input0[i] * input1; + } + break; + case eltwise::DIV: + if (!swapped) { + for (int32_t i = 0; i < size; ++i) { + output[i] = input0[i] / input1; + } + + } else { + for (int32_t i = 0; i < size; ++i) { + output[i] = input1 / input0[i]; + } + } + break; + case eltwise::FLOOR_DIV: + if (!swapped) { + for (int32_t i = 0; i < size; ++i) { + output[i] = base::floor(input0[i] / input1); + } + } else { + for (int32_t i = 0; i < size; ++i) { + output[i] = base::floor(input1 / input0[i]); + } + } + break; + case eltwise::MIN: + for (int32_t i = 0; i < size; ++i) { + output[i] = base::min(input0[i], input1); + } + + break; + case eltwise::MAX: + for (int32_t i = 0; i < size; ++i) { + output[i] = base::max(input0[i], input1); + } + + break; + case eltwise::SQR_DIFF: + for (int32_t i = 0; i < size; ++i) { + output[i] = base::pow(input0[i] - input1, 2.f); + } + + break; + case eltwise::POW: + if (!swapped) { + for (int32_t i = 0; i < size; ++i) { + output[i] = base::pow(input0[i], input1); + } + } else { + for (int32_t i = 0; i < size; ++i) { + output[i] = base::pow(input1, input0[i]); + } + } + break; + case eltwise::NEG: + for (int32_t i = 0; i < size; ++i) { + output[i] = -input0[i]; + } + break; + case eltwise::ABS: + for (int32_t i = 0; i < size; ++i) { + output[i] = base::fabs(input0[i]); + } + break; + case eltwise::EQUAL: + for (int32_t i = 0; i < size; ++i) { + output[i] = (input0[i] == input1); + } + break; + case eltwise::CLIP: + for (int32_t i = 0; i < size; ++i) { + output[i] = base::max(coeff_[0], + base::min(coeff_[1], input0[i])); + } + break; + case eltwise::SIGN: + for (int32_t i = 0; i < size; ++i) { + output[i] = eltwise::Sign(input0[i]); + } + break; + default:LOG(FATAL) << "Eltwise op not support type " + << static_cast(type); + } + } + + template + inline void TensorEltwisePerChannel(const eltwise::Type type, + const T *input0, + const T *input1, + const int32_t batch0, + const int32_t batch1, + const int32_t channel, + const int32_t image_size, + const bool swapped, + DstType *output) { + switch (type) { + case eltwise::SUM: + if (coeff_size_ == 0) { + for (int32_t b = 0; b < batch0; ++b) { + for (int32_t c = 0; c < channel; ++c) { + const T *in0_ptr = input0 + ((b * channel) + c) * image_size; + const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0); + DstType *out_ptr = output + ((b * channel) + c) * image_size; + for (int32_t i = 0; i < image_size; ++i) { + out_ptr[i] = in0_ptr[i] + in1_ptr[c]; + } + } + } + } else { + float coeff_copy[2] = {coeff_[0], coeff_[1]}; + if (swapped) { + base::swap(coeff_copy, coeff_copy + 1); // NOLINT + } + for (int32_t b = 0; b < batch0; ++b) { + for (int32_t c = 0; c < channel; ++c) { + const T *in0_ptr = input0 + ((b * channel) + c) * image_size; + const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0); + DstType *out_ptr = output + ((b * channel) + c) * image_size; + for (int32_t i = 0; i < image_size; ++i) { + out_ptr[i] = + in0_ptr[i] * coeff_copy[0] + in1_ptr[c] * coeff_copy[1]; + } + } + } + } + break; + case eltwise::SUB: + if (!swapped) { + for (int32_t b = 0; b < batch0; ++b) { + for (int32_t c = 0; c < channel; ++c) { + const T *in0_ptr = input0 + ((b * channel) + c) * image_size; + const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0); + DstType *out_ptr = output + ((b * channel) + c) * image_size; + for (int32_t i = 0; i < image_size; ++i) { + out_ptr[i] = in0_ptr[i] - in1_ptr[c]; + } + } + } + } else { + for (int32_t b = 0; b < batch0; ++b) { + for (int32_t c = 0; c < channel; ++c) { + const T *in0_ptr = input0 + ((b * channel) + c) * image_size; + const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0); + DstType *out_ptr = output + ((b * channel) + c) * image_size; + for (int32_t i = 0; i < image_size; ++i) { + out_ptr[i] = in1_ptr[c] - in0_ptr[i]; + } + } + } + } + break; + case eltwise::PROD: + for (int32_t b = 0; b < batch0; ++b) { + for (int32_t c = 0; c < channel; ++c) { + const T *in0_ptr = input0 + ((b * channel) + c) * image_size; + const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0); + DstType *out_ptr = output + ((b * channel) + c) * image_size; + for (int32_t i = 0; i < image_size; ++i) { + out_ptr[i] = in0_ptr[i] * in1_ptr[c]; + } + } + } + break; + case eltwise::DIV: + if (!swapped) { + for (int32_t b = 0; b < batch0; ++b) { + for (int32_t c = 0; c < channel; ++c) { + const T *in0_ptr = input0 + ((b * channel) + c) * image_size; + const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0); + DstType *out_ptr = output + ((b * channel) + c) * image_size; + for (int32_t i = 0; i < image_size; ++i) { + out_ptr[i] = in0_ptr[i] / in1_ptr[c]; + } + } + } + } else { + for (int32_t b = 0; b < batch0; ++b) { + for (int32_t c = 0; c < channel; ++c) { + const T *in0_ptr = input0 + ((b * channel) + c) * image_size; + const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0); + DstType *out_ptr = output + ((b * channel) + c) * image_size; + for (int32_t i = 0; i < image_size; ++i) { + out_ptr[i] = in1_ptr[c] / in0_ptr[i]; + } + } + } + } + break; + case eltwise::FLOOR_DIV: + if (!swapped) { + for (int32_t b = 0; b < batch0; ++b) { + for (int32_t c = 0; c < channel; ++c) { + const T *in0_ptr = input0 + ((b * channel) + c) * image_size; + const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0); + DstType *out_ptr = output + ((b * channel) + c) * image_size; + for (int32_t i = 0; i < image_size; ++i) { + out_ptr[i] = base::floor(in0_ptr[i] / in1_ptr[c]); + } + } + } + } else { + for (int32_t b = 0; b < batch0; ++b) { + for (int32_t c = 0; c < channel; ++c) { + const T *in0_ptr = input0 + ((b * channel) + c) * image_size; + const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0); + DstType *out_ptr = output + ((b * channel) + c) * image_size; + for (int32_t i = 0; i < image_size; ++i) { + out_ptr[i] = base::floor(in1_ptr[c] / in0_ptr[i]); + } + } + } + } + break; + case eltwise::MIN: + for (int32_t b = 0; b < batch0; ++b) { + for (int32_t c = 0; c < channel; ++c) { + const T *in0_ptr = input0 + ((b * channel) + c) * image_size; + const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0); + DstType *out_ptr = output + ((b * channel) + c) * image_size; + for (int32_t i = 0; i < image_size; ++i) { + out_ptr[i] = base::min(in0_ptr[i], in1_ptr[c]); + } + } + } + break; + case eltwise::MAX: + for (int32_t b = 0; b < batch0; ++b) { + for (int32_t c = 0; c < channel; ++c) { + const T *in0_ptr = input0 + ((b * channel) + c) * image_size; + const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0); + DstType *out_ptr = output + ((b * channel) + c) * image_size; + for (int32_t i = 0; i < image_size; ++i) { + out_ptr[i] = base::max(in0_ptr[i], in1_ptr[c]); // NOLINT + } + } + } + break; + case eltwise::SQR_DIFF: + for (int32_t b = 0; b < batch0; ++b) { + for (int32_t c = 0; c < channel; ++c) { + const T *in0_ptr = input0 + ((b * channel) + c) * image_size; + const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0); + DstType *out_ptr = output + ((b * channel) + c) * image_size; + for (int32_t i = 0; i < image_size; ++i) { + out_ptr[i] = base::pow(in0_ptr[i] - in1_ptr[c], 2.f); + } + } + } + break; + case eltwise::POW: + if (!swapped) { + for (int32_t b = 0; b < batch0; ++b) { + for (int32_t c = 0; c < channel; ++c) { + const T *in0_ptr = input0 + ((b * channel) + c) * image_size; + const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0); + DstType *out_ptr = output + ((b * channel) + c) * image_size; + for (int32_t i = 0; i < image_size; ++i) { + out_ptr[i] = base::pow(in0_ptr[i], in1_ptr[c]); + } + } + } + } else { + for (int32_t b = 0; b < batch0; ++b) { + for (int32_t c = 0; c < channel; ++c) { + const T *in0_ptr = input0 + ((b * channel) + c) * image_size; + const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0); + DstType *out_ptr = output + ((b * channel) + c) * image_size; + for (int32_t i = 0; i < image_size; ++i) { + out_ptr[i] = base::pow(in1_ptr[c], in0_ptr[i]); + } + } + } + } + break; + case eltwise::NEG: + for (int32_t b = 0; b < batch0; ++b) { + for (int32_t c = 0; c < channel; ++c) { + DstType *out_ptr = output + ((b * channel) + c) * image_size; + for (int32_t i = 0; i < image_size; ++i) { + out_ptr[i] = -input0[i]; + } + } + } + break; + case eltwise::ABS: + for (int32_t b = 0; b < batch0; ++b) { + for (int32_t c = 0; c < channel; ++c) { + for (int32_t i = 0; i < image_size; ++i) { + output[i] = base::fabs(input0[i]); + } + } + } + break; + case eltwise::EQUAL: + for (int32_t b = 0; b < batch0; ++b) { + for (int32_t c = 0; c < channel; ++c) { + const T *in0_ptr = input0 + ((b * channel) + c) * image_size; + const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0); + DstType *out_ptr = output + ((b * channel) + c) * image_size; + for (int32_t i = 0; i < image_size; ++i) { + out_ptr[i] = in0_ptr[i] == in1_ptr[c]; + } + } + } + break; + case eltwise::SIGN: + for (int32_t b = 0; b < batch0; ++b) { + for (int32_t c = 0; c < channel; ++c) { + for (int32_t i = 0; i < image_size; ++i) { + output[i] = eltwise::Sign(input0[i]); + } + } + } + break; + default:LOG(FATAL) << "Eltwise op not support type " + << static_cast(type); + } + } + + private: + const T *input0_; + const int32_t *input0_dims_; + uint32_t input0_dim_size_; + + const T *input1_; + const int32_t *input1_dims_; + uint32_t input1_dim_size_; + + T *output_; + + eltwise::Type type_; + const float *coeff_; + uint32_t coeff_size_; + T scalar_input_; + int32_t scalar_input_index_; + bool nchw_; + + MACE_OP_INPUT_TAGS(INPUT0, INPUT1); + MACE_OP_OUTPUT_TAGS(OUTPUT); +}; + +} // namespace ops +} // namespace micro + + +#endif // MICRO_OPS_ELTWISE_H_ diff --git a/micro/ops/expand_dims.cc b/micro/ops/expand_dims.cc new file mode 100644 index 00000000..5702d470 --- /dev/null +++ b/micro/ops/expand_dims.cc @@ -0,0 +1,63 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/expand_dims.h" + +#include "micro/base/logging.h" +#include "micro/base/utils.h" +#include "micro/framework/scratch_buffer.h" +#include "micro/model/argument.h" + +namespace micro { +namespace ops { + +MaceStatus ExpandDimsOp::OnInit() { + input_ = GetInputData(INPUT); + input_dims_ = GetInputShapeDims(INPUT); + input_dim_size_ = GetInputShapeDimSize(INPUT); + + output_ = GetOutputData(OUTPUT); + axis_ = GetArgByName("axis", static_cast(0)); + if (axis_ < 0) { + axis_ += input_dim_size_ + 1; + } + MACE_ASSERT2(axis_ >= 0 && axis_ <= static_cast(input_dim_size_), + "axis is out of bound: ", axis_); + + return MACE_SUCCESS; +} + +MaceStatus ExpandDimsOp::Run() { + int32_t output_dim_size = input_dim_size_ + 1; + int32_t *output_dims = + ScratchBuffer(engine_config_).GetBuffer(output_dim_size); + + for (int32_t i = 0; i < output_dim_size; ++i) { + if (i < axis_) { + output_dims[i] = input_dims_[i]; + } else if (i == axis_) { + output_dims[i] = 1; + } else { + output_dims[i] = input_dims_[i - 1]; + } + } + + // TODO(luxuhui): optimize this method by reusing buffer + int32_t input_data_size = base::GetShapeSize(input_dim_size_, input_dims_); + base::memcpy(output_, input_, input_data_size * sizeof(mifloat)); + return ResizeOutputShape(OUTPUT, output_dim_size, output_dims); +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/expand_dims.h b/micro/ops/expand_dims.h new file mode 100644 index 00000000..60b039f3 --- /dev/null +++ b/micro/ops/expand_dims.h @@ -0,0 +1,44 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_EXPAND_DIMS_H_ +#define MICRO_OPS_EXPAND_DIMS_H_ + +#include "micro/base/types.h" +#include "micro/framework/operator.h" + +namespace micro { +namespace ops { +class ExpandDimsOp : public framework::Operator { + public: + MaceStatus OnInit(); + MaceStatus Run(); + + private: + const mifloat *input_; + const int32_t *input_dims_; + uint32_t input_dim_size_; + + mifloat *output_; + + int32_t axis_; + + MACE_OP_INPUT_TAGS(INPUT); + MACE_OP_OUTPUT_TAGS(OUTPUT); +}; +} // namespace ops +} // namespace micro + + +#endif // MICRO_OPS_EXPAND_DIMS_H_ diff --git a/micro/ops/matmul.cc b/micro/ops/matmul.cc new file mode 100644 index 00000000..09caeff3 --- /dev/null +++ b/micro/ops/matmul.cc @@ -0,0 +1,163 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/matmul.h" + +#include "micro/base/logging.h" +#include "micro/base/utils.h" +#include "micro/framework/scratch_buffer.h" +#include "micro/model/argument.h" + +namespace micro { +namespace ops { + +MaceStatus MatMulOp::OnInit() { + transpose_a_ = GetArgByName("transpose_a", false); + transpose_b_ = GetArgByName("transpose_b", false); + input_a_ = GetInputData(INPUT_A); + input_b_ = GetInputData(INPUT_B); + bias_ = GetInputSize() > 3 ? GetInputData(BIAS) : NULL; + output_ = GetOutputData(OUTPUT); + + input_a_dim_size_ = GetInputShapeDimSize(INPUT_A); + input_b_dim_size_ = GetInputShapeDimSize(INPUT_B); + + input_a_dims_ = GetInputShapeDims(INPUT_A); + input_b_dims_ = GetInputShapeDims(INPUT_B); + + MACE_ASSERT1(input_a_dim_size_ >= 2 && input_b_dim_size_ >= 2, + "rank should be greater than or equal to 2"); + + return MACE_SUCCESS; +} + +MaceStatus MatMulOp::Run() { + MACE_ASSERT(Validate()); + + const int32_t lhs_rank = input_a_dim_size_; + const int32_t lhs_rows = input_a_dims_[lhs_rank - 2]; + const int32_t lhs_cols = input_a_dims_[lhs_rank - 1]; + const int32_t rhs_rank = input_b_dim_size_; + const int32_t rhs_rows = input_b_dims_[rhs_rank - 2]; + const int32_t rhs_cols = input_b_dims_[rhs_rank - 1]; + + const int32_t rows = transpose_a_ ? lhs_cols : lhs_rows; + const int32_t cols = transpose_b_ ? rhs_rows : rhs_cols; + const int32_t depth = transpose_a_ ? lhs_rows : lhs_cols; + const int32_t lhs_batch = + base::accumulate_multi(input_a_dims_, 0, input_a_dim_size_ - 2); + const int32_t rhs_batch = + base::accumulate_multi(input_b_dims_, 0, input_b_dim_size_ - 2); + int32_t *output_dims = + ScratchBuffer(engine_config_).GetBuffer(input_a_dim_size_); + + int32_t batch = 1; + base::memcpy(output_dims, input_a_dims_, input_a_dim_size_); + if (lhs_rank >= rhs_rank) { + output_dims[lhs_rank - 2] = rows; + output_dims[lhs_rank - 1] = cols; + batch = lhs_batch; + } else { + output_dims[rhs_rank - 2] = rows; + output_dims[rhs_rank - 1] = cols; + batch = rhs_batch; + } + bool lhs_batched = true; + bool rhs_batched = true; + if (lhs_rank < rhs_rank) { + lhs_batched = false; + } else if (rhs_rank < lhs_rank) { + rhs_batched = false; + } + + MACE_RETURN_IF_ERROR( + ResizeOutputShape(OUTPUT, input_a_dim_size_, output_dims)); + + if (rows == 1 && transpose_b_) { + return gemv_.Compute(input_b_, + input_a_, + bias_, + batch, + cols, + depth, + rhs_batched, + lhs_batched, + output_); + } else if (cols == 1 && !transpose_a_) { + return gemv_.Compute(input_a_, + input_b_, + bias_, + batch, + rows, + depth, + lhs_batched, + rhs_batched, + output_); + } else { + MaceStatus ret = gemm_.Compute(input_a_, + input_b_, + batch, + lhs_rows, + lhs_cols, + rhs_rows, + rhs_cols, + transpose_a_, + transpose_b_, + false, + lhs_batched, + rhs_batched, + output_); + if (bias_ != NULL) { + MACE_ASSERT1(bias_dim_size_ == 1 && bias_dims_[0] == cols, + "bias' dim should be <= 2."); + for (int32_t i = 0; i < batch * rows; ++i) { + for (int32_t w = 0; w < cols; ++w) { + int32_t idx = i * cols + w; + output_[idx] = output_[idx] + bias_[w]; + } + } + } + + return ret; + } +} + +bool MatMulOp::Validate() { + const int32_t lhs_rank = input_a_dim_size_; + const int32_t rhs_rank = input_b_dim_size_; + if (input_a_dim_size_ == input_b_dim_size_) { + for (uint32_t i = 0; i < input_a_dim_size_ - 2; ++i) { + MACE_ASSERT1(input_a_dims_[i] == input_b_dims_[i], + "batch dimensions are not equal"); + } + } else { + MACE_ASSERT1(input_a_dim_size_ == 2 || input_b_dim_size_ == 2, + "Either lhs or rhs matrix should has rank 2 " + "for non-batched matrix multiplication"); + } + + int32_t lhs_depth = transpose_a_ ? input_a_dims_[lhs_rank - 2] : + input_a_dims_[lhs_rank - 1]; + int32_t rhs_depth = transpose_b_ ? input_b_dims_[rhs_rank - 1] : + input_b_dims_[rhs_rank - 2]; + if (lhs_depth != rhs_depth) { + MACE_ASSERT1(false, "the number of A's column must be equal to B's row "); + return false; + } + + return true; +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/matmul.h b/micro/ops/matmul.h new file mode 100644 index 00000000..000adc53 --- /dev/null +++ b/micro/ops/matmul.h @@ -0,0 +1,63 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_MATMUL_H_ +#define MICRO_OPS_MATMUL_H_ + +#include "micro/framework/operator.h" + +#include "micro/ops/utils/gemv.h" +#include "micro/ops/utils/gemm.h" + +namespace micro { +namespace ops { +class MatMulOp : public framework::Operator { + public: + MaceStatus OnInit(); + MaceStatus Run(); + + private: + bool Validate(); + + private: + const mifloat *input_a_; + const int32_t *input_a_dims_; + uint32_t input_a_dim_size_; + + const mifloat *input_b_; + const int32_t *input_b_dims_; + uint32_t input_b_dim_size_; + + const mifloat *bias_; +#ifndef NDEBUG + const int32_t *bias_dims_; + uint32_t bias_dim_size_; +#endif + + mifloat *output_; + + bool transpose_a_; + bool transpose_b_; + + Gemv gemv_; + Gemm gemm_; + + MACE_OP_INPUT_TAGS(INPUT_A, INPUT_B, BIAS); + MACE_OP_OUTPUT_TAGS(OUTPUT); +}; + +} // namespace ops +} // namespace micro + +#endif // MICRO_OPS_MATMUL_H_ diff --git a/micro/ops/nhwc/base/conv_2d_base.cc b/micro/ops/nhwc/base/conv_2d_base.cc new file mode 100644 index 00000000..cb41a7f4 --- /dev/null +++ b/micro/ops/nhwc/base/conv_2d_base.cc @@ -0,0 +1,78 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/nhwc/base/conv_2d_base.h" + +#include "micro/base/logging.h" +#include "micro/base/utils.h" +#include "micro/include/utils/macros.h" +#include "micro/model/operator_def.h" +#include "micro/ops/utils/crumb_utils.h" + +namespace micro { +namespace ops { + +MaceStatus Conv2dBase::OnInit() { + MACE_ASSERT1(static_cast( + GetArgByName("data_format", + static_cast(NHWC))) + != NCHW, "Only support NHWC"); + input_ = GetInputData(INPUT); + input_dims_ = GetInputShapeDims(INPUT); + input_dim_size_ = GetInputShapeDimSize(INPUT); + + filter_ = GetInputData(FILTER); + filter_dims_ = GetInputShapeDims(FILTER); + filter_dim_size_ = GetInputShapeDimSize(FILTER); + + if (GetInputSize() >= 3) { + bias_ = GetInputData(BIAS); + bias_dims_ = GetInputShapeDims(BIAS); + bias_dim_size_ = GetInputShapeDimSize(BIAS); + } else { + bias_ = NULL; + } + + output_ = GetOutputData(OUTPUT); + + MACE_RETURN_IF_ERROR(activation_.Init(this)); + + return FilterOpBase::OnInitBase(); +} + +MaceStatus Conv2dBase::Run() { + int32_t output_dims[4] = {0}; + InitPaddingAndOutputSize(input_dims_, filter_dims_, FLOOR, output_dims); + ResizeOutputShape(0, 4, output_dims); + + MACE_RETURN_IF_ERROR(Compute(output_dims)); + + if (bias_ != NULL) { + MACE_RETURN_IF_ERROR(crumb::ComputeBias( + output_, output_dims, input_dim_size_, bias_, bias_dims_[0], output_)); + } + MACE_RETURN_IF_ERROR(activation_.Compute( + output_, base::GetShapeSize(input_dim_size_, output_dims), output_)); + + return MACE_SUCCESS; +} + +MaceStatus Conv2dBase::Compute(int32_t (&output_dims)[4]) { + MACE_NOT_IMPLEMENTED; + MACE_UNUSED(output_dims); + return MACE_RUNTIME_ERROR; +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/nhwc/base/conv_2d_base.h b/micro/ops/nhwc/base/conv_2d_base.h new file mode 100644 index 00000000..6c29fb84 --- /dev/null +++ b/micro/ops/nhwc/base/conv_2d_base.h @@ -0,0 +1,55 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_NHWC_BASE_CONV_2D_BASE_H_ +#define MICRO_OPS_NHWC_BASE_CONV_2D_BASE_H_ + +#include "micro/ops/nhwc/base/filter_op_base.h" +#include "micro/ops/utils/activation.h" + +namespace micro { +namespace ops { + +class Conv2dBase : public FilterOpBase { + public: + virtual MaceStatus OnInit(); + virtual MaceStatus Run(); + + protected: + virtual MaceStatus Compute(int32_t (&output_dims)[4]); + + protected: + const mifloat *input_; + const int32_t *input_dims_; + uint32_t input_dim_size_; + + const mifloat *filter_; + const int32_t *filter_dims_; + uint32_t filter_dim_size_; + + const mifloat *bias_; + const int32_t *bias_dims_; + uint32_t bias_dim_size_; + + mifloat *output_; + Activation activation_; + + MACE_OP_INPUT_TAGS(INPUT, FILTER, BIAS); + MACE_OP_OUTPUT_TAGS(OUTPUT); +}; +} // namespace ops +} // namespace micro + + +#endif // MICRO_OPS_NHWC_BASE_CONV_2D_BASE_H_ diff --git a/micro/ops/nhwc/base/depthwise_conv_2d_base.cc b/micro/ops/nhwc/base/depthwise_conv_2d_base.cc new file mode 100644 index 00000000..0479060b --- /dev/null +++ b/micro/ops/nhwc/base/depthwise_conv_2d_base.cc @@ -0,0 +1,44 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/nhwc/base/depthwise_conv_2d_base.h" + +#include "micro/base/logging.h" +#include "micro/base/utils.h" +#include "micro/model/operator_def.h" +#include "micro/ops/utils/crumb_utils.h" + +namespace micro { +namespace ops { + +MaceStatus DepthwiseConv2dBase::Run() { + int32_t output_dims[4] = {0}; + InitPaddingAndOutputSize(input_dims_, filter_dims_, FLOOR, output_dims); + output_dims[3] *= input_dims_[3]; + ResizeOutputShape(0, 4, output_dims); + + MACE_RETURN_IF_ERROR(Compute(output_dims)); + + if (bias_ != NULL) { + MACE_RETURN_IF_ERROR(crumb::ComputeBias( + output_, output_dims, input_dim_size_, bias_, bias_dims_[0], output_)); + } + MACE_RETURN_IF_ERROR(activation_.Compute( + output_, base::GetShapeSize(input_dim_size_, output_dims), output_)); + + return MACE_SUCCESS; +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/nhwc/base/depthwise_conv_2d_base.h b/micro/ops/nhwc/base/depthwise_conv_2d_base.h new file mode 100644 index 00000000..1f512e9a --- /dev/null +++ b/micro/ops/nhwc/base/depthwise_conv_2d_base.h @@ -0,0 +1,32 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_NHWC_BASE_DEPTHWISE_CONV_2D_BASE_H_ +#define MICRO_OPS_NHWC_BASE_DEPTHWISE_CONV_2D_BASE_H_ + +#include "micro/ops/nhwc/base/conv_2d_base.h" +#include "micro/ops/utils/activation.h" + +namespace micro { +namespace ops { + +class DepthwiseConv2dBase : public Conv2dBase { + public: + MaceStatus Run(); +}; +} // namespace ops +} // namespace micro + + +#endif // MICRO_OPS_NHWC_BASE_DEPTHWISE_CONV_2D_BASE_H_ diff --git a/micro/ops/nhwc/base/filter_op_base.cc b/micro/ops/nhwc/base/filter_op_base.cc new file mode 100644 index 00000000..191fc132 --- /dev/null +++ b/micro/ops/nhwc/base/filter_op_base.cc @@ -0,0 +1,156 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/nhwc/base/filter_op_base.h" + +#include "micro/base/logging.h" +#include "micro/base/utils.h" +#include "micro/model/argument.h" + +namespace micro { +namespace ops { + +MaceStatus FilterOpBase::OnInitBase() { + strides_ = GetRepeatArgByName("strides"); + MACE_ASSERT(strides_ != NULL); + + const int32_t *dilations = GetRepeatArgByName("dilations"); + if (dilations == NULL) { + dilations_[0] = dilations_[1] = 1; + } else { + base::memcpy(dilations_, dilations, 2 * sizeof(int32_t)); + } + + const int32_t *padding_sizes = GetRepeatArgByName("padding_values"); + if (padding_sizes == NULL) { + padding_type_ = static_cast(GetArgByName( + "padding", static_cast(SAME))); + } else { + padding_type_ = NONE; + base::memcpy(padding_sizes_, padding_sizes, 2 * sizeof(int32_t)); + } + + return MACE_SUCCESS; +} + +void FilterOpBase::InitPaddingAndOutputSize(const int32_t *input_dims, + const int32_t *filter_dims, + const RoundType round_type, + int32_t *output_dims) { + if (padding_type_ != NONE) { + CalcPaddingAndOutputSize(input_dims, filter_dims, output_dims); + } else { + CalcOutputSizeWithPaddingSize( + input_dims, filter_dims, round_type, output_dims); + } +} + +void FilterOpBase::CalcPaddingAndOutputSize(const int32_t *input_dims, + const int32_t *filter_dims, + int32_t *output_dims) { + MACE_ASSERT1(dilations_[0] > 0 && dilations_[1] > 0, + "Invalid dilations, must >= 1"); + MACE_ASSERT1((dilations_[0] == 1 || strides_[0] == 1) && + (dilations_[1] == 1 || strides_[1] == 1), + "If dilations > 1, strides should be 1"); + MACE_ASSERT(output_dims != NULL); + + int32_t input_height = input_dims[1]; + int32_t input_width = input_dims[2]; + int32_t kernel_height = filter_dims[1]; + int32_t kernel_width = filter_dims[2]; + /* + * Convlution/pooling arithmetic: + * o = (i + 2 * p - k - (k - 1) * (d - 1)) / s + 1 + * For details, see https://arxiv.org/pdf/1603.07285.pdf or + * http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html + */ + int32_t output_height = 0, output_width = 0; + int32_t output_channels = filter_dims[0]; + int32_t k_extent_height = (kernel_height - 1) * dilations_[0] + 1; + int32_t k_extent_width = (kernel_width - 1) * dilations_[1] + 1; + + switch (padding_type_) { + case VALID: { + output_height = (input_height - k_extent_height) / strides_[0] + 1; + output_width = (input_width - k_extent_width) / strides_[1] + 1; + break; + } + case SAME: { + output_height = (input_height - 1) / strides_[0] + 1; + output_width = (input_width - 1) / strides_[1] + 1; + break; + } + case FULL: { + output_height = (input_height + k_extent_height - 2) / strides_[0] + 1; + output_width = (input_width + k_extent_width - 2) / strides_[1] + 1; + break; + } + default: { + MACE_ASSERT2(false, "Unsupported padding type: ", + static_cast(padding_type_)); + break; + } + } + + padding_sizes_[0] = base::max( + 0, (output_height - 1) * strides_[0] + k_extent_height - input_height); + padding_sizes_[1] = base::max( + 0, (output_width - 1) * strides_[1] + k_extent_width - input_width); + + output_dims[0] = input_dims[0]; + output_dims[1] = output_height; + output_dims[2] = output_width; + output_dims[3] = output_channels; +} + +void FilterOpBase::CalcOutputSizeWithPaddingSize(const int32_t *input_dims, + const int32_t *filter_dims, + const RoundType round_type, + int32_t *output_dims) { + MACE_ASSERT1(dilations_[0] > 0 && dilations_[1] > 0, + "Invalid dilations, must >= 1"); + MACE_ASSERT1((dilations_[0] == 1 || strides_[0] == 1) && + (dilations_[1] == 1 || strides_[1] == 1), + "If dilations > 1, strides should be 1"); + MACE_ASSERT(output_dims != NULL); + + int32_t input_height = input_dims[1]; + int32_t input_width = input_dims[2]; + int32_t kernel_height = filter_dims[1]; + int32_t kernel_width = filter_dims[2]; + + int32_t output_channels = filter_dims[0]; + float output_h_f = input_height + padding_sizes_[0] + padding_sizes_[0] + - (kernel_height - 1) * dilations_[0] - 1; + float output_w_f = input_width + padding_sizes_[1] + padding_sizes_[1] + - (kernel_width - 1) * dilations_[1] - 1; + int32_t output_height = 1; + int32_t output_width = 1; + if (round_type == FLOOR) { + output_height += static_cast(output_h_f / strides_[0]); + output_width += static_cast(output_w_f / strides_[1]); + } else { + output_height += base::ceil(output_h_f / strides_[0]); + output_width += base::ceil(output_w_f / strides_[1]); + } + + output_dims[0] = input_dims[0]; + output_dims[1] = output_height; + output_dims[2] = output_width; + output_dims[3] = output_channels; +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/nhwc/base/filter_op_base.h b/micro/ops/nhwc/base/filter_op_base.h new file mode 100644 index 00000000..d1e62b62 --- /dev/null +++ b/micro/ops/nhwc/base/filter_op_base.h @@ -0,0 +1,66 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_NHWC_BASE_FILTER_OP_BASE_H_ +#define MICRO_OPS_NHWC_BASE_FILTER_OP_BASE_H_ + +#include "micro/framework/operator.h" + +namespace micro { +namespace ops { + +enum Padding { + VALID = 0, // No padding + SAME = 1, // Pads with half the filter size (rounded down) on both sides + FULL = 2, // Pads with one less than the filter size on both sides + + NONE, +}; + +enum RoundType { + FLOOR = 0, + CEIL = 1, +}; + +class FilterOpBase : public framework::Operator { + public: + MaceStatus OnInitBase(); + + protected: + void InitPaddingAndOutputSize(const int32_t *input_dims, + const int32_t *filter_dims, + const RoundType round_type, + int32_t *output_dims); + + private: + void CalcPaddingAndOutputSize(const int32_t *input_dims, + const int32_t *filter_dims, + int32_t *output_dims); + void CalcOutputSizeWithPaddingSize(const int32_t *input_dims, + const int32_t *filter_dims, + const RoundType round_type, + int32_t *output_dims); + + protected: + Padding padding_type_; + const int32_t *strides_; + int32_t padding_sizes_[2]; + int32_t dilations_[2]; +}; + +} // namespace ops +} // namespace micro + + +#endif // MICRO_OPS_NHWC_BASE_FILTER_OP_BASE_H_ diff --git a/micro/ops/nhwc/base/pooling_base.cc b/micro/ops/nhwc/base/pooling_base.cc new file mode 100644 index 00000000..665c7b16 --- /dev/null +++ b/micro/ops/nhwc/base/pooling_base.cc @@ -0,0 +1,95 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/nhwc/base/pooling_base.h" + +#include "micro/base/logging.h" +#include "micro/include/utils/macros.h" +#include "micro/ops/nhwc/base/filter_op_base.h" + +namespace micro { +namespace ops { + +MaceStatus PoolingBase::OnInit() { + MACE_ASSERT1(static_cast( + GetArgByName("data_format", + static_cast(NHWC))) + != NCHW, "Only support NHWC"); + input_ = GetInputData(INPUT); + input_dims_ = GetInputShapeDims(INPUT); + input_dim_size_ = GetInputShapeDimSize(INPUT); + + output_ = GetOutputData(OUTPUT); + output_dims_ = GetOutputShapeDims(OUTPUT); + output_dim_size_ = GetOutputShapeDimSize(OUTPUT); + + kernel_ = GetRepeatArgByName("kernels"); + MACE_ASSERT(kernel_ != NULL); + int32_t pooling_type = + GetArgByName("pooling_type", static_cast(AVG)); + pooling_type_ = static_cast(pooling_type); + int32_t round_type = GetArgByName("round_mode", static_cast(FLOOR)); + round_type_ = static_cast(round_type); + + filter_dims_[0] = filter_dims_[3] = input_dims_[3]; + filter_dims_[1] = kernel_[0]; + filter_dims_[2] = kernel_[1]; + + return FilterOpBase::OnInitBase(); +} + +MaceStatus PoolingBase::Run() { + int32_t output_dims[4] = {0}; + InitPaddingAndOutputSize(input_dims_, filter_dims_, round_type_, output_dims); + ResizeOutputShape(OUTPUT, 4, output_dims); + + int32_t pad_hw[2] = {padding_sizes_[0] / 2, padding_sizes_[1] / 2}; + if (pooling_type_ == MAX) { + MaxPooling(input_, kernel_, strides_, dilations_, pad_hw); + } else if (pooling_type_ == AVG) { + AvgPooling(input_, kernel_, strides_, dilations_, pad_hw); + } else { + MACE_NOT_IMPLEMENTED; + } + return MACE_SUCCESS; +} + +void PoolingBase::MaxPooling(const mifloat *input, + const int32_t *filter_hw, + const int32_t *stride_hw, + const int32_t *dilation_hw, + const int32_t *pad_hw) { + MACE_UNUSED(input); + MACE_UNUSED(filter_hw); + MACE_UNUSED(stride_hw); + MACE_UNUSED(dilation_hw); + MACE_UNUSED(pad_hw); + MACE_NOT_IMPLEMENTED; +} + +void PoolingBase::AvgPooling(const mifloat *input, + const int32_t *filter_hw, + const int32_t *stride_hw, + const int32_t *dilation_hw, + const int32_t *pad_hw) { + MACE_UNUSED(input); + MACE_UNUSED(filter_hw); + MACE_UNUSED(stride_hw); + MACE_UNUSED(dilation_hw); + MACE_UNUSED(pad_hw); + MACE_NOT_IMPLEMENTED; +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/nhwc/base/pooling_base.h b/micro/ops/nhwc/base/pooling_base.h new file mode 100644 index 00000000..4b4b4148 --- /dev/null +++ b/micro/ops/nhwc/base/pooling_base.h @@ -0,0 +1,63 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_NHWC_BASE_POOLING_BASE_H_ +#define MICRO_OPS_NHWC_BASE_POOLING_BASE_H_ + +#include "micro/model/output_shape.h" +#include "micro/ops/nhwc/base/filter_op_base.h" + +namespace micro { +namespace ops { + +enum PoolingType { + AVG = 1, // avg_pool + MAX = 2, // max_pool +}; + +class PoolingBase : public FilterOpBase { + public: + MaceStatus OnInit(); + MaceStatus Run(); + + protected: + virtual void MaxPooling(const mifloat *input, const int32_t *filter_hw, + const int32_t *stride_hw, const int32_t *dilation_hw, + const int32_t *pad_hw); + virtual void AvgPooling(const mifloat *input, const int32_t *filter_hw, + const int32_t *stride_hw, const int32_t *dilation_hw, + const int32_t *pad_hw); + + protected: + const mifloat *input_; + const int32_t *input_dims_; + uint32_t input_dim_size_; + + mifloat *output_; + const int32_t *output_dims_; + uint32_t output_dim_size_; + + const int32_t *kernel_; + int32_t filter_dims_[4]; + RoundType round_type_; + PoolingType pooling_type_; + + MACE_OP_INPUT_TAGS(INPUT); + MACE_OP_OUTPUT_TAGS(OUTPUT); +}; +} // namespace ops +} // namespace micro + + +#endif // MICRO_OPS_NHWC_BASE_POOLING_BASE_H_ diff --git a/micro/ops/nhwc/batch_norm.cc b/micro/ops/nhwc/batch_norm.cc new file mode 100644 index 00000000..66eabc4d --- /dev/null +++ b/micro/ops/nhwc/batch_norm.cc @@ -0,0 +1,92 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/nhwc/batch_norm.h" + +#include "micro/base/logging.h" +#include "micro/base/utils.h" +#include "micro/framework/scratch_buffer.h" + +namespace micro { +namespace ops { + +MaceStatus BatchNormOp::OnInit() { + input_ = GetInputData(INPUT); + input_dims_ = GetInputShapeDims(INPUT); + input_dim_size_ = GetInputShapeDimSize(INPUT); + + scale_ = GetInputData(SCALE); + scale_dims_ = GetInputShapeDims(SCALE); + scale_dim_size_ = GetInputShapeDimSize(SCALE); + + offset_ = GetInputData(OFFSET); + offset_dims_ = GetInputShapeDims(OFFSET); + offset_dim_size_ = GetInputShapeDimSize(OFFSET); + + output_ = GetOutputData(OUTPUT); + + MACE_ASSERT(input_dim_size_ >= 1); + MACE_ASSERT1(scale_dim_size_ == 1, "scale must be 1-dimensional. "); + MACE_ASSERT1(offset_dim_size_ == 1, "offset must be 1-dimensional. "); + + epsilon_ = GetArgByName("epsilon", static_cast(1e-4)); + + MACE_RETURN_IF_ERROR(activation_.Init(this)); + + MACE_RETURN_IF_ERROR(ResizeOutputShape(OUTPUT, input_dim_size_, input_dims_)); + + return MACE_SUCCESS; +} + +MaceStatus BatchNormOp::Run() { + const mifloat *scale = scale_; + const mifloat *offset = offset_; + const uint32_t input_dim_end_idx = input_dim_size_ - 1; + const int32_t channels = input_dims_[input_dim_end_idx]; + const int32_t batch = + base::accumulate_multi(input_dims_, 0, input_dim_end_idx); + if (GetInputSize() == 5) { + const float *mean = GetInputData(MEAN); + const float *var = GetInputData(VAR); + + MACE_ASSERT1(GetInputShapeDimSize(MEAN) == 1, + "mean must be 1-dimensional. "); + MACE_ASSERT1(GetInputShapeDimSize(VAR) == 1, "var must be 1-dimensional. "); + + ScratchBuffer scratch_buffer(engine_config_); + mifloat *new_scale = scratch_buffer.GetBuffer(channels); + mifloat *new_offset = scratch_buffer.GetBuffer(channels); + for (int32_t c = 0; c < channels; ++c) { + new_scale[c] = scale_[c] / base::sqrt(var[c] + epsilon_); + new_offset[c] = offset_[c] - mean[c] * new_scale[c]; + } + scale = new_scale; + offset = new_offset; + } + + for (int32_t b = 0; b < batch; ++b) { + const int32_t batch_base = b * channels; + for (int32_t c = 0; c < channels; ++c) { + output_[batch_base + c] = + input_[batch_base + c] * scale[c] + offset[c]; + } // c + } // b + + MACE_RETURN_IF_ERROR(activation_.Compute(output_, batch * channels, output_)); + + return MACE_SUCCESS; +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/nhwc/batch_norm.h b/micro/ops/nhwc/batch_norm.h new file mode 100644 index 00000000..8232cb2a --- /dev/null +++ b/micro/ops/nhwc/batch_norm.h @@ -0,0 +1,54 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_NHWC_BATCH_NORM_H_ +#define MICRO_OPS_NHWC_BATCH_NORM_H_ + +#include "micro/framework/operator.h" +#include "micro/ops/utils/activation.h" + +namespace micro { +namespace ops { +class BatchNormOp : public framework::Operator { + public: + MaceStatus OnInit(); + MaceStatus Run(); + + private: + const mifloat *input_; + const int32_t *input_dims_; + uint32_t input_dim_size_; + + const mifloat *scale_; + const int32_t *scale_dims_; + uint32_t scale_dim_size_; + + const mifloat *offset_; + const int32_t *offset_dims_; + uint32_t offset_dim_size_; + + mifloat *output_; + + float epsilon_; + + Activation activation_; + + MACE_OP_INPUT_TAGS(INPUT, SCALE, OFFSET, MEAN, VAR); + MACE_OP_OUTPUT_TAGS(OUTPUT); +}; +} // namespace ops +} // namespace micro + + +#endif // MICRO_OPS_NHWC_BATCH_NORM_H_ diff --git a/micro/ops/nhwc/conv_2d_c2_s4.cc b/micro/ops/nhwc/conv_2d_c2_s4.cc new file mode 100644 index 00000000..f1769260 --- /dev/null +++ b/micro/ops/nhwc/conv_2d_c2_s4.cc @@ -0,0 +1,166 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/nhwc/conv_2d_c2_s4.h" + +#include "micro/base/logging.h" + +namespace micro { +namespace ops { + +MaceStatus Conv2dC2S4Op::Compute(int32_t (&output_dims)[4]) { + const int32_t batch = output_dims[0]; + const int32_t height = output_dims[1]; + const int32_t width = output_dims[2]; + const int32_t channel = output_dims[3]; + MACE_ASSERT(channel == 2); + const int32_t k_height = filter_dims_[1]; + const int32_t k_width = filter_dims_[2]; + const int32_t k_channel = filter_dims_[3]; + MACE_ASSERT(filter_dims_[0] == channel && input_dims_[3] == k_channel); + const int32_t in_height = input_dims_[1]; + const int32_t in_width = input_dims_[2]; + const int32_t in_channel = input_dims_[3]; + + const int32_t pad_top = padding_sizes_[0] >> 1; + const int32_t pad_left = padding_sizes_[1] >> 1; + + const int32_t size = batch * height * width; + const int32_t size_end = size - 4; + + for (int32_t s = 0; s < size; s += 4) { + if (s > size_end) { + s = size_end; + } + int32_t h0 = s / width % height; + int32_t h1 = (s + 1) / width % height; + int32_t h2 = (s + 2) / width % height; + int32_t h3 = (s + 3) / width % height; + const int32_t in_h0 = h0 * strides_[0] - pad_top; + const int32_t in_h1 = h1 * strides_[0] - pad_top; + const int32_t in_h2 = h2 * strides_[0] - pad_top; + const int32_t in_h3 = h3 * strides_[0] - pad_top; + int32_t w0 = s % width; + int32_t w1 = (s + 1) % width; + int32_t w2 = (s + 2) % width; + int32_t w3 = (s + 3) % width; + + int32_t width_base[4] = {s * channel}; + width_base[1] = width_base[0] + channel; + width_base[2] = width_base[1] + channel; + width_base[3] = width_base[2] + channel; + const int32_t in_w0 = w0 * strides_[1] - pad_left; + const int32_t in_w1 = w1 * strides_[1] - pad_left; + const int32_t in_w2 = w2 * strides_[1] - pad_left; + const int32_t in_w3 = w3 * strides_[1] - pad_left; + for (int32_t kb = 0; kb < channel; kb += 2) { + const int32_t k_batch_base0 = kb * k_height; + const int32_t k_batch_base1 = k_batch_base0 + k_height; + float output[2 * 4] = {0}; + for (int32_t kh = 0; kh < k_height; ++kh) { + const int32_t in_h_idx0 = in_h0 + kh * dilations_[0]; + const int32_t in_h_idx1 = in_h1 + kh * dilations_[0]; + const int32_t in_h_idx2 = in_h2 + kh * dilations_[0]; + const int32_t in_h_idx3 = in_h3 + kh * dilations_[0]; + + bool h_valid[4] = {true, true, true, true}; + if (in_h_idx0 < 0 || in_h_idx0 >= in_height) { + h_valid[0] = false; + } + if (in_h_idx1 < 0 || in_h_idx1 >= in_height) { + h_valid[1] = false; + } + if (in_h_idx2 < 0 || in_h_idx2 >= in_height) { + h_valid[2] = false; + } + if (in_h_idx3 < 0 || in_h_idx3 >= in_height) { + h_valid[3] = false; + } + const int32_t k_height_base0 = (k_batch_base0 + kh) * k_width; + const int32_t k_height_base1 = (k_batch_base1 + kh) * k_width; + const int32_t in_h_base0 = in_h_idx0 * in_width; + const int32_t in_h_base1 = in_h_idx1 * in_width; + const int32_t in_h_base2 = in_h_idx2 * in_width; + const int32_t in_h_base3 = in_h_idx3 * in_width; + for (int32_t kw = 0; kw < k_width; ++kw) { + const int32_t kw_dilations = kw * dilations_[1]; + const int32_t in_w_idx0 = in_w0 + kw_dilations; + const int32_t in_w_idx1 = in_w1 + kw_dilations; + const int32_t in_w_idx2 = in_w2 + kw_dilations; + const int32_t in_w_idx3 = in_w3 + kw_dilations; + + bool valid[4] = { + h_valid[0], h_valid[1], h_valid[2], h_valid[3] + }; + if (in_w_idx0 < 0 || in_w_idx0 >= in_width) { + valid[0] = false; + } + if (in_w_idx1 < 0 || in_w_idx1 >= in_width) { + valid[1] = false; + } + if (in_w_idx2 < 0 || in_w_idx2 >= in_width) { + valid[2] = false; + } + if (in_w_idx3 < 0 || in_w_idx3 >= in_width) { + valid[3] = false; + } + + const int32_t k_width_base0 = (k_height_base0 + kw) * k_channel; + const int32_t k_width_base1 = (k_height_base1 + kw) * k_channel; + const int32_t in_w_base[4] = { + (in_h_base0 + in_w_idx0) * in_channel, + (in_h_base1 + in_w_idx1) * in_channel, + (in_h_base2 + in_w_idx2) * in_channel, + (in_h_base3 + in_w_idx3) * in_channel + }; + for (int32_t kc = 0; kc < k_channel; ++kc) { + float filter0 = filter_[k_width_base0 + kc]; + float filter1 = filter_[k_width_base1 + kc]; + if (valid[0]) { + float input0 = input_[in_w_base[0] + kc]; + output[0] += input0 * filter0; + output[1] += input0 * filter1; + } + if (valid[1]) { + float input1 = input_[in_w_base[1] + kc]; + output[2] += input1 * filter0; + output[3] += input1 * filter1; + } + if (valid[2]) { + float input2 = input_[in_w_base[2] + kc]; + output[4] += input2 * filter0; + output[5] += input2 * filter1; + } + if (valid[3]) { + float input3 = input_[in_w_base[3] + kc]; + output[6] += input3 * filter0; + output[7] += input3 * filter1; + } + } // filter channel + } // filter width + } // filter height + for (int32_t i = 0; i < 4; ++i) { + int32_t out_idx = width_base[i] + kb; + int32_t buf_idx = i * 2; + output_[out_idx] = output[buf_idx]; + output_[out_idx + 1] = output[buf_idx + 1]; + } + } // filter batch, output channel + } // output size + + return MACE_SUCCESS; +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/nhwc/conv_2d_c2_s4.h b/micro/ops/nhwc/conv_2d_c2_s4.h new file mode 100644 index 00000000..30625397 --- /dev/null +++ b/micro/ops/nhwc/conv_2d_c2_s4.h @@ -0,0 +1,31 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_NHWC_CONV_2D_C2_S4_H_ +#define MICRO_OPS_NHWC_CONV_2D_C2_S4_H_ + +#include "micro/ops/nhwc/base/conv_2d_base.h" + +namespace micro { +namespace ops { + +class Conv2dC2S4Op : public Conv2dBase { + private: + MaceStatus Compute(int32_t (&output_dims)[4]); +}; +} // namespace ops +} // namespace micro + + +#endif // MICRO_OPS_NHWC_CONV_2D_C2_S4_H_ diff --git a/micro/ops/nhwc/conv_2d_c3_s4.cc b/micro/ops/nhwc/conv_2d_c3_s4.cc new file mode 100644 index 00000000..4101a023 --- /dev/null +++ b/micro/ops/nhwc/conv_2d_c3_s4.cc @@ -0,0 +1,174 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/nhwc/conv_2d_c3_s4.h" + +#include "micro/base/logging.h" + +namespace micro { +namespace ops { + +MaceStatus Conv2dC3S4Op::Compute(int32_t (&output_dims)[4]) { + const int32_t batch = output_dims[0]; + const int32_t height = output_dims[1]; + const int32_t width = output_dims[2]; + const int32_t channel = output_dims[3]; + MACE_ASSERT(channel == 3); + const int32_t k_height = filter_dims_[1]; + const int32_t k_width = filter_dims_[2]; + const int32_t k_channel = filter_dims_[3]; + MACE_ASSERT(filter_dims_[0] == channel && input_dims_[3] == k_channel); + const int32_t in_height = input_dims_[1]; + const int32_t in_width = input_dims_[2]; + const int32_t in_channel = input_dims_[3]; + + const int32_t pad_top = padding_sizes_[0] >> 1; + const int32_t pad_left = padding_sizes_[1] >> 1; + + const int32_t size = batch * height * width; + const int32_t size_end = size - 4; + + for (int32_t s = 0; s < size; s += 4) { + if (s > size_end) { + s = size_end; + } + int32_t h0 = s / width % height; + int32_t h1 = (s + 1) / width % height; + int32_t h2 = (s + 2) / width % height; + int32_t h3 = (s + 3) / width % height; + const int32_t in_h0 = h0 * strides_[0] - pad_top; + const int32_t in_h1 = h1 * strides_[0] - pad_top; + const int32_t in_h2 = h2 * strides_[0] - pad_top; + const int32_t in_h3 = h3 * strides_[0] - pad_top; + int32_t w0 = s % width; + int32_t w1 = (s + 1) % width; + int32_t w2 = (s + 2) % width; + int32_t w3 = (s + 3) % width; + + int32_t width_base[4] = {s * channel}; + width_base[1] = width_base[0] + channel; + width_base[2] = width_base[1] + channel; + width_base[3] = width_base[2] + channel; + const int32_t in_w0 = w0 * strides_[1] - pad_left; + const int32_t in_w1 = w1 * strides_[1] - pad_left; + const int32_t in_w2 = w2 * strides_[1] - pad_left; + const int32_t in_w3 = w3 * strides_[1] - pad_left; + for (int32_t kb = 0; kb < channel; kb += 3) { + const int32_t k_batch_base0 = kb * k_height; + const int32_t k_batch_base1 = k_batch_base0 + k_height; + const int32_t k_batch_base2 = k_batch_base1 + k_height; + float output[3 * 4] = {0}; + for (int32_t kh = 0; kh < k_height; ++kh) { + const int32_t in_h_idx0 = in_h0 + kh * dilations_[0]; + const int32_t in_h_idx1 = in_h1 + kh * dilations_[0]; + const int32_t in_h_idx2 = in_h2 + kh * dilations_[0]; + const int32_t in_h_idx3 = in_h3 + kh * dilations_[0]; + + bool h_valid[4] = {true, true, true, true}; + if (in_h_idx0 < 0 || in_h_idx0 >= in_height) { + h_valid[0] = false; + } + if (in_h_idx1 < 0 || in_h_idx1 >= in_height) { + h_valid[1] = false; + } + if (in_h_idx2 < 0 || in_h_idx2 >= in_height) { + h_valid[2] = false; + } + if (in_h_idx3 < 0 || in_h_idx3 >= in_height) { + h_valid[3] = false; + } + const int32_t k_height_base0 = (k_batch_base0 + kh) * k_width; + const int32_t k_height_base1 = (k_batch_base1 + kh) * k_width; + const int32_t k_height_base2 = (k_batch_base2 + kh) * k_width; + const int32_t in_h_base0 = in_h_idx0 * in_width; + const int32_t in_h_base1 = in_h_idx1 * in_width; + const int32_t in_h_base2 = in_h_idx2 * in_width; + const int32_t in_h_base3 = in_h_idx3 * in_width; + for (int32_t kw = 0; kw < k_width; ++kw) { + const int32_t kw_dilations = kw * dilations_[1]; + const int32_t in_w_idx0 = in_w0 + kw_dilations; + const int32_t in_w_idx1 = in_w1 + kw_dilations; + const int32_t in_w_idx2 = in_w2 + kw_dilations; + const int32_t in_w_idx3 = in_w3 + kw_dilations; + + bool valid[4] = { + h_valid[0], h_valid[1], h_valid[2], h_valid[3] + }; + if (in_w_idx0 < 0 || in_w_idx0 >= in_width) { + valid[0] = false; + } + if (in_w_idx1 < 0 || in_w_idx1 >= in_width) { + valid[1] = false; + } + if (in_w_idx2 < 0 || in_w_idx2 >= in_width) { + valid[2] = false; + } + if (in_w_idx3 < 0 || in_w_idx3 >= in_width) { + valid[3] = false; + } + + const int32_t k_width_base0 = (k_height_base0 + kw) * k_channel; + const int32_t k_width_base1 = (k_height_base1 + kw) * k_channel; + const int32_t k_width_base2 = (k_height_base2 + kw) * k_channel; + const int32_t in_w_base[4] = { + (in_h_base0 + in_w_idx0) * in_channel, + (in_h_base1 + in_w_idx1) * in_channel, + (in_h_base2 + in_w_idx2) * in_channel, + (in_h_base3 + in_w_idx3) * in_channel + }; + for (int32_t kc = 0; kc < k_channel; ++kc) { + float filter0 = filter_[k_width_base0 + kc]; + float filter1 = filter_[k_width_base1 + kc]; + float filter2 = filter_[k_width_base2 + kc]; + if (valid[0]) { + float input0 = input_[in_w_base[0] + kc]; + output[0] += input0 * filter0; + output[1] += input0 * filter1; + output[2] += input0 * filter2; + } + if (valid[1]) { + float input1 = input_[in_w_base[1] + kc]; + output[3] += input1 * filter0; + output[4] += input1 * filter1; + output[5] += input1 * filter2; + } + if (valid[2]) { + float input2 = input_[in_w_base[2] + kc]; + output[6] += input2 * filter0; + output[7] += input2 * filter1; + output[8] += input2 * filter2; + } + if (valid[3]) { + float input3 = input_[in_w_base[3] + kc]; + output[9] += input3 * filter0; + output[10] += input3 * filter1; + output[11] += input3 * filter2; + } + } // filter channel + } // filter width + } // filter height + for (int32_t i = 0; i < 4; ++i) { + for (int32_t j = 0; j < 3; ++j) { + int32_t out_idx = width_base[i] + kb + j; + output_[out_idx] = output[i * 3 + j]; + } + } + } // filter batch, output channel + } // output size + + return MACE_SUCCESS; +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/nhwc/conv_2d_c3_s4.h b/micro/ops/nhwc/conv_2d_c3_s4.h new file mode 100644 index 00000000..27ff90fb --- /dev/null +++ b/micro/ops/nhwc/conv_2d_c3_s4.h @@ -0,0 +1,32 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_NHWC_CONV_2D_C3_S4_H_ +#define MICRO_OPS_NHWC_CONV_2D_C3_S4_H_ + +#include "micro/ops/nhwc/base/conv_2d_base.h" +#include "micro/ops/utils/activation.h" + +namespace micro { +namespace ops { + +class Conv2dC3S4Op : public Conv2dBase { + private: + MaceStatus Compute(int32_t (&output_dims)[4]); +}; +} // namespace ops +} // namespace micro + + +#endif // MICRO_OPS_NHWC_CONV_2D_C3_S4_H_ diff --git a/micro/ops/nhwc/conv_2d_c4_s4.cc b/micro/ops/nhwc/conv_2d_c4_s4.cc new file mode 100644 index 00000000..ca14440b --- /dev/null +++ b/micro/ops/nhwc/conv_2d_c4_s4.cc @@ -0,0 +1,185 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/nhwc/conv_2d_c4_s4.h" + +#include "micro/base/logging.h" + +namespace micro { +namespace ops { + +MaceStatus Conv2dC4S4Op::Compute(int32_t (&output_dims)[4]) { + const int32_t batch = output_dims[0]; + const int32_t height = output_dims[1]; + const int32_t width = output_dims[2]; + const int32_t channel = output_dims[3]; + const int32_t k_height = filter_dims_[1]; + const int32_t k_width = filter_dims_[2]; + const int32_t k_channel = filter_dims_[3]; + MACE_ASSERT(filter_dims_[0] == channel && input_dims_[3] == k_channel); + const int32_t in_height = input_dims_[1]; + const int32_t in_width = input_dims_[2]; + const int32_t in_channel = input_dims_[3]; + + const int32_t pad_top = padding_sizes_[0] >> 1; + const int32_t pad_left = padding_sizes_[1] >> 1; + + const int32_t size = batch * height * width; + const int32_t size_end = size - 4; + const int32_t channel_end = channel - 4; + + for (int32_t s = 0; s < size; s += 4) { + if (s > size_end) { + s = size_end; + } + int32_t h0 = s / width % height; + int32_t h1 = (s + 1) / width % height; + int32_t h2 = (s + 2) / width % height; + int32_t h3 = (s + 3) / width % height; + const int32_t in_h0 = h0 * strides_[0] - pad_top; + const int32_t in_h1 = h1 * strides_[0] - pad_top; + const int32_t in_h2 = h2 * strides_[0] - pad_top; + const int32_t in_h3 = h3 * strides_[0] - pad_top; + int32_t w0 = s % width; + int32_t w1 = (s + 1) % width; + int32_t w2 = (s + 2) % width; + int32_t w3 = (s + 3) % width; + + int32_t width_base[4] = {s * channel}; + width_base[1] = width_base[0] + channel; + width_base[2] = width_base[1] + channel; + width_base[3] = width_base[2] + channel; + const int32_t in_w0 = w0 * strides_[1] - pad_left; + const int32_t in_w1 = w1 * strides_[1] - pad_left; + const int32_t in_w2 = w2 * strides_[1] - pad_left; + const int32_t in_w3 = w3 * strides_[1] - pad_left; + for (int32_t kb = 0; kb < channel; kb += 4) { + if (kb > channel_end) { + kb = channel_end; + } + const int32_t k_batch_base0 = kb * k_height; + const int32_t k_batch_base1 = k_batch_base0 + k_height; + const int32_t k_batch_base2 = k_batch_base1 + k_height; + const int32_t k_batch_base3 = k_batch_base2 + k_height; + float output[4 * 4] = {0}; + for (int32_t kh = 0; kh < k_height; ++kh) { + const int32_t in_h_idx0 = in_h0 + kh * dilations_[0]; + const int32_t in_h_idx1 = in_h1 + kh * dilations_[0]; + const int32_t in_h_idx2 = in_h2 + kh * dilations_[0]; + const int32_t in_h_idx3 = in_h3 + kh * dilations_[0]; + + bool h_valid[4] = {true, true, true, true}; + if (in_h_idx0 < 0 || in_h_idx0 >= in_height) { + h_valid[0] = false; + } + if (in_h_idx1 < 0 || in_h_idx1 >= in_height) { + h_valid[1] = false; + } + if (in_h_idx2 < 0 || in_h_idx2 >= in_height) { + h_valid[2] = false; + } + if (in_h_idx3 < 0 || in_h_idx3 >= in_height) { + h_valid[3] = false; + } + const int32_t k_height_base0 = (k_batch_base0 + kh) * k_width; + const int32_t k_height_base1 = (k_batch_base1 + kh) * k_width; + const int32_t k_height_base2 = (k_batch_base2 + kh) * k_width; + const int32_t k_height_base3 = (k_batch_base3 + kh) * k_width; + const int32_t in_h_base0 = in_h_idx0 * in_width; + const int32_t in_h_base1 = in_h_idx1 * in_width; + const int32_t in_h_base2 = in_h_idx2 * in_width; + const int32_t in_h_base3 = in_h_idx3 * in_width; + for (int32_t kw = 0; kw < k_width; ++kw) { + const int32_t kw_dilations = kw * dilations_[1]; + const int32_t in_w_idx0 = in_w0 + kw_dilations; + const int32_t in_w_idx1 = in_w1 + kw_dilations; + const int32_t in_w_idx2 = in_w2 + kw_dilations; + const int32_t in_w_idx3 = in_w3 + kw_dilations; + + bool valid[4] = { + h_valid[0], h_valid[1], h_valid[2], h_valid[3] + }; + if (in_w_idx0 < 0 || in_w_idx0 >= in_width) { + valid[0] = false; + } + if (in_w_idx1 < 0 || in_w_idx1 >= in_width) { + valid[1] = false; + } + if (in_w_idx2 < 0 || in_w_idx2 >= in_width) { + valid[2] = false; + } + if (in_w_idx3 < 0 || in_w_idx3 >= in_width) { + valid[3] = false; + } + + const int32_t k_width_base0 = (k_height_base0 + kw) * k_channel; + const int32_t k_width_base1 = (k_height_base1 + kw) * k_channel; + const int32_t k_width_base2 = (k_height_base2 + kw) * k_channel; + const int32_t k_width_base3 = (k_height_base3 + kw) * k_channel; + const int32_t in_w_base[4] = { + (in_h_base0 + in_w_idx0) * in_channel, + (in_h_base1 + in_w_idx1) * in_channel, + (in_h_base2 + in_w_idx2) * in_channel, + (in_h_base3 + in_w_idx3) * in_channel + }; + for (int32_t kc = 0; kc < k_channel; ++kc) { + float filter0 = filter_[k_width_base0 + kc]; + float filter1 = filter_[k_width_base1 + kc]; + float filter2 = filter_[k_width_base2 + kc]; + float filter3 = filter_[k_width_base3 + kc]; + if (valid[0]) { + float input0 = input_[in_w_base[0] + kc]; + output[0] += input0 * filter0; + output[1] += input0 * filter1; + output[2] += input0 * filter2; + output[3] += input0 * filter3; + } + if (valid[1]) { + float input1 = input_[in_w_base[1] + kc]; + output[4] += input1 * filter0; + output[5] += input1 * filter1; + output[6] += input1 * filter2; + output[7] += input1 * filter3; + } + if (valid[2]) { + float input2 = input_[in_w_base[2] + kc]; + output[8] += input2 * filter0; + output[9] += input2 * filter1; + output[10] += input2 * filter2; + output[11] += input2 * filter3; + } + if (valid[3]) { + float input3 = input_[in_w_base[3] + kc]; + output[12] += input3 * filter0; + output[13] += input3 * filter1; + output[14] += input3 * filter2; + output[15] += input3 * filter3; + } + } // filter channel + } // filter width + } // filter height + for (int32_t i = 0; i < 4; ++i) { + for (int32_t j = 0; j < 4; ++j) { + int32_t out_idx = width_base[i] + kb + j; + output_[out_idx] = output[i * 4 + j]; + } + } + } // filter batch, output channel + } // output size + + return MACE_SUCCESS; +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/nhwc/conv_2d_c4_s4.h b/micro/ops/nhwc/conv_2d_c4_s4.h new file mode 100644 index 00000000..d868a618 --- /dev/null +++ b/micro/ops/nhwc/conv_2d_c4_s4.h @@ -0,0 +1,31 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_NHWC_CONV_2D_C4_S4_H_ +#define MICRO_OPS_NHWC_CONV_2D_C4_S4_H_ + +#include "micro/ops/nhwc/base/conv_2d_base.h" + +namespace micro { +namespace ops { + +class Conv2dC4S4Op : public Conv2dBase { + private: + MaceStatus Compute(int32_t (&output_dims)[4]); +}; +} // namespace ops +} // namespace micro + + +#endif // MICRO_OPS_NHWC_CONV_2D_C4_S4_H_ diff --git a/micro/ops/nhwc/conv_2d_ref.cc b/micro/ops/nhwc/conv_2d_ref.cc new file mode 100644 index 00000000..542cb3a7 --- /dev/null +++ b/micro/ops/nhwc/conv_2d_ref.cc @@ -0,0 +1,79 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/nhwc/conv_2d_ref.h" + +#include "micro/base/logging.h" + +namespace micro { +namespace ops { + +MaceStatus Conv2dRefOp::Compute(int32_t (&output_dims)[4]) { + const int32_t batch = output_dims[0]; + const int32_t height = output_dims[1]; + const int32_t width = output_dims[2]; + const int32_t channel = output_dims[3]; + const int32_t k_height = filter_dims_[1]; + const int32_t k_width = filter_dims_[2]; + const int32_t k_channel = filter_dims_[3]; + MACE_ASSERT(filter_dims_[0] == channel && input_dims_[3] == k_channel); + const int32_t in_height = input_dims_[1]; + const int32_t in_width = input_dims_[2]; + const int32_t in_channel = input_dims_[3]; + + const int32_t pad_top = padding_sizes_[0] >> 1; + const int32_t pad_left = padding_sizes_[1] >> 1; + + for (int32_t b = 0; b < batch; ++b) { + const int32_t batch_base = b * height; + for (int32_t h = 0; h < height; ++h) { + const int32_t height_base = (batch_base + h) * width; + const int32_t in_h = h * strides_[0] - pad_top; + for (int32_t w = 0; w < width; ++w) { + const int32_t width_base = (height_base + w) * channel; + const int32_t in_w = w * strides_[1] - pad_left; + for (int32_t kb = 0; kb < channel; ++kb) { + const int32_t o_idx = width_base + kb; + const int32_t k_batch_base = kb * k_height; + float output = 0; + for (int32_t kh = 0; kh < k_height; ++kh) { + const int32_t in_h_idx = in_h + kh * dilations_[0]; + if (in_h_idx < 0 || in_h_idx >= in_height) { + continue; + } + const int32_t k_height_base = (k_batch_base + kh) * k_width; + const int32_t in_h_base = in_h_idx * in_width; + for (int32_t kw = 0; kw < k_width; ++kw) { + const int32_t in_w_idx = in_w + kw * dilations_[1]; + if (in_w_idx < 0 || in_w_idx >= in_width) { + continue; + } + const int32_t k_width_base = (k_height_base + kw) * k_channel; + const int32_t in_w_base = (in_h_base + in_w_idx) * in_channel; + for (int32_t kc = 0; kc < k_channel; ++kc) { + output += input_[in_w_base + kc] * filter_[k_width_base + kc]; + } // filter channel + } // filter width + } // filter height + output_[o_idx] = output; + } // filter batch, output channel + } // output width + } // output height + } // output batch + + return MACE_SUCCESS; +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/nhwc/conv_2d_ref.h b/micro/ops/nhwc/conv_2d_ref.h new file mode 100644 index 00000000..f9e91fa6 --- /dev/null +++ b/micro/ops/nhwc/conv_2d_ref.h @@ -0,0 +1,32 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_NHWC_CONV_2D_REF_H_ +#define MICRO_OPS_NHWC_CONV_2D_REF_H_ + +#include "micro/ops/nhwc/base/conv_2d_base.h" + +namespace micro { +namespace ops { + +class Conv2dRefOp : public Conv2dBase { + private: + MaceStatus Compute(int32_t (&output_dims)[4]); +}; + +} // namespace ops +} // namespace micro + + +#endif // MICRO_OPS_NHWC_CONV_2D_REF_H_ diff --git a/micro/ops/nhwc/depthwise_conv_2d_kb1_s4.cc b/micro/ops/nhwc/depthwise_conv_2d_kb1_s4.cc new file mode 100644 index 00000000..caf843bd --- /dev/null +++ b/micro/ops/nhwc/depthwise_conv_2d_kb1_s4.cc @@ -0,0 +1,161 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/nhwc/depthwise_conv_2d_kb1_s4.h" + +#include "micro/base/logging.h" +#include "micro/base/utils.h" +#include "micro/framework/scratch_buffer.h" + +namespace micro { +namespace ops { + +MaceStatus DepthwiseConv2dKB1S4Op::Compute(int32_t (&output_dims)[4]) { + const int32_t batch = output_dims[0]; + const int32_t height = output_dims[1]; + const int32_t width = output_dims[2]; + const int32_t channel = output_dims[3]; + const int32_t k_height = filter_dims_[1]; + const int32_t k_width = filter_dims_[2]; + const int32_t k_channel = filter_dims_[3]; + MACE_ASSERT(filter_dims_[0] == 1 && input_dims_[3] == k_channel); + const int32_t in_height = input_dims_[1]; + const int32_t in_width = input_dims_[2]; + const int32_t in_channel = input_dims_[3]; + + const int32_t pad_top = padding_sizes_[0] >> 1; + const int32_t pad_left = padding_sizes_[1] >> 1; + + const int32_t size = batch * height * width; + const int32_t size_end = size - 4; + + int32_t output_size = k_channel * 4; + float *output = ScratchBuffer(engine_config_).GetBuffer(output_size); + for (int32_t s = 0; s < size; s += 4) { + if (s > size_end) { + s = size_end; + } + int32_t h0 = s / width % height; + int32_t h1 = (s + 1) / width % height; + int32_t h2 = (s + 2) / width % height; + int32_t h3 = (s + 3) / width % height; + const int32_t in_h0 = h0 * strides_[0] - pad_top; + const int32_t in_h1 = h1 * strides_[0] - pad_top; + const int32_t in_h2 = h2 * strides_[0] - pad_top; + const int32_t in_h3 = h3 * strides_[0] - pad_top; + int32_t w0 = s % width; + int32_t w1 = (s + 1) % width; + int32_t w2 = (s + 2) % width; + int32_t w3 = (s + 3) % width; + + int32_t width_base[4] = {s * channel}; + width_base[1] = width_base[0] + channel; + width_base[2] = width_base[1] + channel; + width_base[3] = width_base[2] + channel; + const int32_t in_w0 = w0 * strides_[1] - pad_left; + const int32_t in_w1 = w1 * strides_[1] - pad_left; + const int32_t in_w2 = w2 * strides_[1] - pad_left; + const int32_t in_w3 = w3 * strides_[1] - pad_left; + + base::memset(output, 0.0f, output_size); + for (int32_t kh = 0; kh < k_height; ++kh) { + const int32_t in_h_idx0 = in_h0 + kh * dilations_[0]; + const int32_t in_h_idx1 = in_h1 + kh * dilations_[0]; + const int32_t in_h_idx2 = in_h2 + kh * dilations_[0]; + const int32_t in_h_idx3 = in_h3 + kh * dilations_[0]; + + bool h_valid[4] = {true, true, true, true}; + if (in_h_idx0 < 0 || in_h_idx0 >= in_height) { + h_valid[0] = false; + } + if (in_h_idx1 < 0 || in_h_idx1 >= in_height) { + h_valid[1] = false; + } + if (in_h_idx2 < 0 || in_h_idx2 >= in_height) { + h_valid[2] = false; + } + if (in_h_idx3 < 0 || in_h_idx3 >= in_height) { + h_valid[3] = false; + } + const int32_t k_height_base = kh * k_width; + const int32_t in_h_base0 = in_h_idx0 * in_width; + const int32_t in_h_base1 = in_h_idx1 * in_width; + const int32_t in_h_base2 = in_h_idx2 * in_width; + const int32_t in_h_base3 = in_h_idx3 * in_width; + for (int32_t kw = 0; kw < k_width; ++kw) { + const int32_t kw_dilations = kw * dilations_[1]; + const int32_t in_w_idx0 = in_w0 + kw_dilations; + const int32_t in_w_idx1 = in_w1 + kw_dilations; + const int32_t in_w_idx2 = in_w2 + kw_dilations; + const int32_t in_w_idx3 = in_w3 + kw_dilations; + + bool valid[4] = { + h_valid[0], h_valid[1], h_valid[2], h_valid[3] + }; + if (in_w_idx0 < 0 || in_w_idx0 >= in_width) { + valid[0] = false; + } + if (in_w_idx1 < 0 || in_w_idx1 >= in_width) { + valid[1] = false; + } + if (in_w_idx2 < 0 || in_w_idx2 >= in_width) { + valid[2] = false; + } + if (in_w_idx3 < 0 || in_w_idx3 >= in_width) { + valid[3] = false; + } + + const int32_t k_width_base = (k_height_base + kw) * k_channel; + const int32_t in_w_base[] = { + (in_h_base0 + in_w_idx0) * in_channel, + (in_h_base1 + in_w_idx1) * in_channel, + (in_h_base2 + in_w_idx2) * in_channel, + (in_h_base3 + in_w_idx3) * in_channel + }; + for (int32_t kc = 0; kc < k_channel; ++kc) { + float *output_kc = output + kc * 4; + float filter = filter_[k_width_base + kc]; + if (valid[0]) { + float input0 = input_[in_w_base[0] + kc]; + output_kc[0] += input0 * filter; + } + if (valid[1]) { + float input1 = input_[in_w_base[1] + kc]; + output_kc[1] += input1 * filter; + } + if (valid[2]) { + float input2 = input_[in_w_base[2] + kc]; + output_kc[2] += input2 * filter; + } + if (valid[3]) { + float input3 = input_[in_w_base[3] + kc]; + output_kc[3] += input3 * filter; + } + } // filter channel + } // filter width + } // filter height + for (int32_t i = 0; i < 4; ++i) { + int32_t out_base = width_base[i]; + for (int32_t c_offset = 0, kc_offset = 0; + c_offset < channel; ++c_offset, kc_offset += 4) { + output_[out_base + c_offset] = output[kc_offset + i]; + } + } + } // output size + + return MACE_SUCCESS; +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/nhwc/depthwise_conv_2d_kb1_s4.h b/micro/ops/nhwc/depthwise_conv_2d_kb1_s4.h new file mode 100644 index 00000000..2660dfc5 --- /dev/null +++ b/micro/ops/nhwc/depthwise_conv_2d_kb1_s4.h @@ -0,0 +1,31 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_NHWC_DEPTHWISE_CONV_2D_KB1_S4_H_ +#define MICRO_OPS_NHWC_DEPTHWISE_CONV_2D_KB1_S4_H_ + +#include "micro/ops/nhwc/base/depthwise_conv_2d_base.h" + +namespace micro { +namespace ops { + +class DepthwiseConv2dKB1S4Op : public DepthwiseConv2dBase { + private: + MaceStatus Compute(int32_t (&output_dims)[4]); +}; +} // namespace ops +} // namespace micro + + +#endif // MICRO_OPS_NHWC_DEPTHWISE_CONV_2D_KB1_S4_H_ diff --git a/micro/ops/nhwc/depthwise_conv_2d_kb2_s4.cc b/micro/ops/nhwc/depthwise_conv_2d_kb2_s4.cc new file mode 100644 index 00000000..909214ce --- /dev/null +++ b/micro/ops/nhwc/depthwise_conv_2d_kb2_s4.cc @@ -0,0 +1,181 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/nhwc/depthwise_conv_2d_kb2_s4.h" + +#include "micro/base/logging.h" +#include "micro/base/utils.h" +#include "micro/framework/scratch_buffer.h" + +namespace micro { +namespace ops { + +MaceStatus DepthwiseConv2dKB2S4Op::Compute(int32_t (&output_dims)[4]) { + const int32_t batch = output_dims[0]; + const int32_t height = output_dims[1]; + const int32_t width = output_dims[2]; + const int32_t channel = output_dims[3]; + const int32_t k_batch = filter_dims_[0]; + const int32_t k_height = filter_dims_[1]; + const int32_t k_width = filter_dims_[2]; + const int32_t k_channel = filter_dims_[3]; + MACE_ASSERT(input_dims_[3] == k_channel); + const int32_t in_height = input_dims_[1]; + const int32_t in_width = input_dims_[2]; + const int32_t in_channel = input_dims_[3]; + + const int32_t pad_top = padding_sizes_[0] >> 1; + const int32_t pad_left = padding_sizes_[1] >> 1; + + const int32_t size = batch * height * width; + const int32_t size_end = size - 4; + const int32_t k_batch_end = k_batch - 2; + + for (int32_t s = 0; s < size; s += 4) { + if (s > size_end) { + s = size - 4; + } + int32_t h0 = s / width % height; + int32_t h1 = (s + 1) / width % height; + int32_t h2 = (s + 2) / width % height; + int32_t h3 = (s + 3) / width % height; + const int32_t in_h0 = h0 * strides_[0] - pad_top; + const int32_t in_h1 = h1 * strides_[0] - pad_top; + const int32_t in_h2 = h2 * strides_[0] - pad_top; + const int32_t in_h3 = h3 * strides_[0] - pad_top; + int32_t w0 = s % width; + int32_t w1 = (s + 1) % width; + int32_t w2 = (s + 2) % width; + int32_t w3 = (s + 3) % width; + + int32_t width_base[4] = {s * channel}; + width_base[1] = width_base[0] + channel; + width_base[2] = width_base[1] + channel; + width_base[3] = width_base[2] + channel; + const int32_t in_w0 = w0 * strides_[1] - pad_left; + const int32_t in_w1 = w1 * strides_[1] - pad_left; + const int32_t in_w2 = w2 * strides_[1] - pad_left; + const int32_t in_w3 = w3 * strides_[1] - pad_left; + + for (int32_t kb = 0; kb < k_batch; kb += 2) { + if (kb > k_batch_end) { + kb = k_batch - 2; + } + const int32_t k_batch_base0 = kb * k_height; + const int32_t k_batch_base1 = k_batch_base0 + k_height; + int32_t output_size = k_channel * 8; + float *output = + ScratchBuffer(engine_config_).GetBuffer(output_size); + base::memset(output, 0.0f, output_size); + for (int32_t kh = 0; kh < k_height; ++kh) { + const int32_t in_h_idx0 = in_h0 + kh * dilations_[0]; + const int32_t in_h_idx1 = in_h1 + kh * dilations_[0]; + const int32_t in_h_idx2 = in_h2 + kh * dilations_[0]; + const int32_t in_h_idx3 = in_h3 + kh * dilations_[0]; + + bool h_valid[4] = {true, true, true, true}; + if (in_h_idx0 < 0 || in_h_idx0 >= in_height) { + h_valid[0] = false; + } + if (in_h_idx1 < 0 || in_h_idx1 >= in_height) { + h_valid[1] = false; + } + if (in_h_idx2 < 0 || in_h_idx2 >= in_height) { + h_valid[2] = false; + } + if (in_h_idx3 < 0 || in_h_idx3 >= in_height) { + h_valid[3] = false; + } + const int32_t k_height_base0 = (k_batch_base0 + kh) * k_width; + const int32_t k_height_base1 = (k_batch_base1 + kh) * k_width; + const int32_t in_h_base0 = in_h_idx0 * in_width; + const int32_t in_h_base1 = in_h_idx1 * in_width; + const int32_t in_h_base2 = in_h_idx2 * in_width; + const int32_t in_h_base3 = in_h_idx3 * in_width; + for (int32_t kw = 0; kw < k_width; ++kw) { + const int32_t kw_dilations = kw * dilations_[1]; + const int32_t in_w_idx0 = in_w0 + kw_dilations; + const int32_t in_w_idx1 = in_w1 + kw_dilations; + const int32_t in_w_idx2 = in_w2 + kw_dilations; + const int32_t in_w_idx3 = in_w3 + kw_dilations; + + bool valid[4] = { + h_valid[0], h_valid[1], h_valid[2], h_valid[3] + }; + if (in_w_idx0 < 0 || in_w_idx0 >= in_width) { + valid[0] = false; + } + if (in_w_idx1 < 0 || in_w_idx1 >= in_width) { + valid[1] = false; + } + if (in_w_idx2 < 0 || in_w_idx2 >= in_width) { + valid[2] = false; + } + if (in_w_idx3 < 0 || in_w_idx3 >= in_width) { + valid[3] = false; + } + + const int32_t k_width_base0 = (k_height_base0 + kw) * k_channel; + const int32_t k_width_base1 = (k_height_base1 + kw) * k_channel; + const int32_t in_w_base[] = { + (in_h_base0 + in_w_idx0) * in_channel, + (in_h_base1 + in_w_idx1) * in_channel, + (in_h_base2 + in_w_idx2) * in_channel, + (in_h_base3 + in_w_idx3) * in_channel + }; + for (int32_t kc = 0; kc < k_channel; ++kc) { + float *output_kc = output + kc * 8; + float filter0 = filter_[k_width_base0 + kc]; + float filter1 = filter_[k_width_base1 + kc]; + if (valid[0]) { + float input0 = input_[in_w_base[0] + kc]; + output_kc[0] += input0 * filter0; + output_kc[1] += input0 * filter1; + } + if (valid[1]) { + float input1 = input_[in_w_base[1] + kc]; + output_kc[2] += input1 * filter0; + output_kc[3] += input1 * filter1; + } + if (valid[2]) { + float input2 = input_[in_w_base[2] + kc]; + output_kc[4] += input2 * filter0; + output_kc[5] += input2 * filter1; + } + if (valid[3]) { + float input3 = input_[in_w_base[3] + kc]; + output_kc[6] += input3 * filter0; + output_kc[7] += input3 * filter1; + } + } // filter channel + } // filter width + } // filter height + for (int32_t i = 0; i < 4; ++i) { + for (int32_t j = 0; j < 2; ++j) { + int32_t out_base = width_base[i] + kb + j; + int32_t buf_offset = i * 2 + j; + for (int32_t c_offset = 0, kc_offset = 0; + c_offset < channel; c_offset += k_batch, kc_offset += 8) { + output_[out_base + c_offset] = output[kc_offset + buf_offset]; + } + } + } + } // filter batch, output channel + } // output size + + return MACE_SUCCESS; +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/nhwc/depthwise_conv_2d_kb2_s4.h b/micro/ops/nhwc/depthwise_conv_2d_kb2_s4.h new file mode 100644 index 00000000..bded6867 --- /dev/null +++ b/micro/ops/nhwc/depthwise_conv_2d_kb2_s4.h @@ -0,0 +1,31 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_NHWC_DEPTHWISE_CONV_2D_KB2_S4_H_ +#define MICRO_OPS_NHWC_DEPTHWISE_CONV_2D_KB2_S4_H_ + +#include "micro/ops/nhwc/base/depthwise_conv_2d_base.h" + +namespace micro { +namespace ops { + +class DepthwiseConv2dKB2S4Op : public DepthwiseConv2dBase { + private: + MaceStatus Compute(int32_t (&output_dims)[4]); +}; +} // namespace ops +} // namespace micro + + +#endif // MICRO_OPS_NHWC_DEPTHWISE_CONV_2D_KB2_S4_H_ diff --git a/micro/ops/nhwc/depthwise_conv_2d_kb3_s4.cc b/micro/ops/nhwc/depthwise_conv_2d_kb3_s4.cc new file mode 100644 index 00000000..5aaf41ed --- /dev/null +++ b/micro/ops/nhwc/depthwise_conv_2d_kb3_s4.cc @@ -0,0 +1,189 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/nhwc/depthwise_conv_2d_kb3_s4.h" + +#include "micro/base/logging.h" +#include "micro/base/utils.h" +#include "micro/framework/scratch_buffer.h" + +namespace micro { +namespace ops { + +MaceStatus DepthwiseConv2dKB3S4Op::Compute(int32_t (&output_dims)[4]) { + const int32_t batch = output_dims[0]; + const int32_t height = output_dims[1]; + const int32_t width = output_dims[2]; + const int32_t channel = output_dims[3]; + const int32_t k_batch = filter_dims_[0]; + const int32_t k_height = filter_dims_[1]; + const int32_t k_width = filter_dims_[2]; + const int32_t k_channel = filter_dims_[3]; + MACE_ASSERT(input_dims_[3] == k_channel); + const int32_t in_height = input_dims_[1]; + const int32_t in_width = input_dims_[2]; + const int32_t in_channel = input_dims_[3]; + + const int32_t pad_top = padding_sizes_[0] >> 1; + const int32_t pad_left = padding_sizes_[1] >> 1; + + const int32_t size = batch * height * width; + const int32_t size_end = size - 4; + const int32_t k_batch_end = k_batch - 3; + + for (int32_t s = 0; s < size; s += 4) { + if (s > size_end) { + s = size - 4; + } + int32_t h0 = s / width % height; + int32_t h1 = (s + 1) / width % height; + int32_t h2 = (s + 2) / width % height; + int32_t h3 = (s + 3) / width % height; + const int32_t in_h0 = h0 * strides_[0] - pad_top; + const int32_t in_h1 = h1 * strides_[0] - pad_top; + const int32_t in_h2 = h2 * strides_[0] - pad_top; + const int32_t in_h3 = h3 * strides_[0] - pad_top; + int32_t w0 = s % width; + int32_t w1 = (s + 1) % width; + int32_t w2 = (s + 2) % width; + int32_t w3 = (s + 3) % width; + + int32_t width_base[4] = {s * channel}; + width_base[1] = width_base[0] + channel; + width_base[2] = width_base[1] + channel; + width_base[3] = width_base[2] + channel; + const int32_t in_w0 = w0 * strides_[1] - pad_left; + const int32_t in_w1 = w1 * strides_[1] - pad_left; + const int32_t in_w2 = w2 * strides_[1] - pad_left; + const int32_t in_w3 = w3 * strides_[1] - pad_left; + + for (int32_t kb = 0; kb < k_batch; kb += 3) { + if (kb > k_batch_end) { + kb = k_batch - 3; + } + const int32_t k_batch_base0 = kb * k_height; + const int32_t k_batch_base1 = k_batch_base0 + k_height; + const int32_t k_batch_base2 = k_batch_base1 + k_height; + int32_t output_size = k_channel * 12; + float *output = + ScratchBuffer(engine_config_).GetBuffer(output_size); + base::memset(output, 0.0f, output_size); + for (int32_t kh = 0; kh < k_height; ++kh) { + const int32_t in_h_idx0 = in_h0 + kh * dilations_[0]; + const int32_t in_h_idx1 = in_h1 + kh * dilations_[0]; + const int32_t in_h_idx2 = in_h2 + kh * dilations_[0]; + const int32_t in_h_idx3 = in_h3 + kh * dilations_[0]; + + bool h_valid[4] = {true, true, true, true}; + if (in_h_idx0 < 0 || in_h_idx0 >= in_height) { + h_valid[0] = false; + } + if (in_h_idx1 < 0 || in_h_idx1 >= in_height) { + h_valid[1] = false; + } + if (in_h_idx2 < 0 || in_h_idx2 >= in_height) { + h_valid[2] = false; + } + if (in_h_idx3 < 0 || in_h_idx3 >= in_height) { + h_valid[3] = false; + } + const int32_t k_height_base0 = (k_batch_base0 + kh) * k_width; + const int32_t k_height_base1 = (k_batch_base1 + kh) * k_width; + const int32_t k_height_base2 = (k_batch_base2 + kh) * k_width; + const int32_t in_h_base0 = in_h_idx0 * in_width; + const int32_t in_h_base1 = in_h_idx1 * in_width; + const int32_t in_h_base2 = in_h_idx2 * in_width; + const int32_t in_h_base3 = in_h_idx3 * in_width; + for (int32_t kw = 0; kw < k_width; ++kw) { + const int32_t kw_dilations = kw * dilations_[1]; + const int32_t in_w_idx0 = in_w0 + kw_dilations; + const int32_t in_w_idx1 = in_w1 + kw_dilations; + const int32_t in_w_idx2 = in_w2 + kw_dilations; + const int32_t in_w_idx3 = in_w3 + kw_dilations; + + bool valid[4] = { + h_valid[0], h_valid[1], h_valid[2], h_valid[3] + }; + if (in_w_idx0 < 0 || in_w_idx0 >= in_width) { + valid[0] = false; + } + if (in_w_idx1 < 0 || in_w_idx1 >= in_width) { + valid[1] = false; + } + if (in_w_idx2 < 0 || in_w_idx2 >= in_width) { + valid[2] = false; + } + if (in_w_idx3 < 0 || in_w_idx3 >= in_width) { + valid[3] = false; + } + + const int32_t k_width_base0 = (k_height_base0 + kw) * k_channel; + const int32_t k_width_base1 = (k_height_base1 + kw) * k_channel; + const int32_t k_width_base2 = (k_height_base2 + kw) * k_channel; + const int32_t in_w_base[] = { + (in_h_base0 + in_w_idx0) * in_channel, + (in_h_base1 + in_w_idx1) * in_channel, + (in_h_base2 + in_w_idx2) * in_channel, + (in_h_base3 + in_w_idx3) * in_channel + }; + for (int32_t kc = 0; kc < k_channel; ++kc) { + float *output_kc = output + kc * 12; + float filter0 = filter_[k_width_base0 + kc]; + float filter1 = filter_[k_width_base1 + kc]; + float filter2 = filter_[k_width_base2 + kc]; + if (valid[0]) { + float input0 = input_[in_w_base[0] + kc]; + output_kc[0] += input0 * filter0; + output_kc[1] += input0 * filter1; + output_kc[2] += input0 * filter2; + } + if (valid[1]) { + float input1 = input_[in_w_base[1] + kc]; + output_kc[3] += input1 * filter0; + output_kc[4] += input1 * filter1; + output_kc[5] += input1 * filter2; + } + if (valid[2]) { + float input2 = input_[in_w_base[2] + kc]; + output_kc[6] += input2 * filter0; + output_kc[7] += input2 * filter1; + output_kc[8] += input2 * filter2; + } + if (valid[3]) { + float input3 = input_[in_w_base[3] + kc]; + output_kc[9] += input3 * filter0; + output_kc[10] += input3 * filter1; + output_kc[11] += input3 * filter2; + } + } // filter channel + } // filter width + } // filter height + for (int32_t i = 0; i < 4; ++i) { + for (int32_t j = 0; j < 3; ++j) { + int32_t out_base = width_base[i] + kb + j; + int32_t buf_offset = i * 3 + j; + for (int32_t c_offset = 0, kc_offset = 0; + c_offset < channel; c_offset += k_batch, kc_offset += 12) { + output_[out_base + c_offset] = output[kc_offset + buf_offset]; + } + } + } + } // filter batch, output channel + } // output size + + return MACE_SUCCESS; +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/nhwc/depthwise_conv_2d_kb3_s4.h b/micro/ops/nhwc/depthwise_conv_2d_kb3_s4.h new file mode 100644 index 00000000..91a753db --- /dev/null +++ b/micro/ops/nhwc/depthwise_conv_2d_kb3_s4.h @@ -0,0 +1,31 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_NHWC_DEPTHWISE_CONV_2D_KB3_S4_H_ +#define MICRO_OPS_NHWC_DEPTHWISE_CONV_2D_KB3_S4_H_ + +#include "micro/ops/nhwc/base/depthwise_conv_2d_base.h" + +namespace micro { +namespace ops { + +class DepthwiseConv2dKB3S4Op : public DepthwiseConv2dBase { + private: + MaceStatus Compute(int32_t (&output_dims)[4]); +}; +} // namespace ops +} // namespace micro + + +#endif // MICRO_OPS_NHWC_DEPTHWISE_CONV_2D_KB3_S4_H_ diff --git a/micro/ops/nhwc/depthwise_conv_2d_kb4_s4.cc b/micro/ops/nhwc/depthwise_conv_2d_kb4_s4.cc new file mode 100644 index 00000000..3d805b91 --- /dev/null +++ b/micro/ops/nhwc/depthwise_conv_2d_kb4_s4.cc @@ -0,0 +1,199 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/nhwc/depthwise_conv_2d_kb4_s4.h" + +#include "micro/base/logging.h" +#include "micro/base/utils.h" +#include "micro/framework/scratch_buffer.h" + +namespace micro { +namespace ops { + +MaceStatus DepthwiseConv2dKB4S4Op::Compute(int32_t (&output_dims)[4]) { + const int32_t batch = output_dims[0]; + const int32_t height = output_dims[1]; + const int32_t width = output_dims[2]; + const int32_t channel = output_dims[3]; + const int32_t k_batch = filter_dims_[0]; + const int32_t k_height = filter_dims_[1]; + const int32_t k_width = filter_dims_[2]; + const int32_t k_channel = filter_dims_[3]; + MACE_ASSERT(input_dims_[3] == k_channel); + const int32_t in_height = input_dims_[1]; + const int32_t in_width = input_dims_[2]; + const int32_t in_channel = input_dims_[3]; + + const int32_t pad_top = padding_sizes_[0] >> 1; + const int32_t pad_left = padding_sizes_[1] >> 1; + + const int32_t size = batch * height * width; + const int32_t size_end = size - 4; + const int32_t k_batch_end = k_batch - 4; + + for (int32_t s = 0; s < size; s += 4) { + if (s > size_end) { + s = size - 4; + } + int32_t h0 = s / width % height; + int32_t h1 = (s + 1) / width % height; + int32_t h2 = (s + 2) / width % height; + int32_t h3 = (s + 3) / width % height; + const int32_t in_h0 = h0 * strides_[0] - pad_top; + const int32_t in_h1 = h1 * strides_[0] - pad_top; + const int32_t in_h2 = h2 * strides_[0] - pad_top; + const int32_t in_h3 = h3 * strides_[0] - pad_top; + int32_t w0 = s % width; + int32_t w1 = (s + 1) % width; + int32_t w2 = (s + 2) % width; + int32_t w3 = (s + 3) % width; + + int32_t width_base[4] = {s * channel}; + width_base[1] = width_base[0] + channel; + width_base[2] = width_base[1] + channel; + width_base[3] = width_base[2] + channel; + const int32_t in_w0 = w0 * strides_[1] - pad_left; + const int32_t in_w1 = w1 * strides_[1] - pad_left; + const int32_t in_w2 = w2 * strides_[1] - pad_left; + const int32_t in_w3 = w3 * strides_[1] - pad_left; + + for (int32_t kb = 0; kb < k_batch; kb += 4) { + if (kb > k_batch_end) { + kb = k_batch - 4; + } + const int32_t k_batch_base0 = kb * k_height; + const int32_t k_batch_base1 = k_batch_base0 + k_height; + const int32_t k_batch_base2 = k_batch_base1 + k_height; + const int32_t k_batch_base3 = k_batch_base2 + k_height; + int32_t output_size = k_channel * 16; + float *output = + ScratchBuffer(engine_config_).GetBuffer(output_size); + base::memset(output, static_cast(0.0f), output_size); + for (int32_t kh = 0; kh < k_height; ++kh) { + const int32_t in_h_idx0 = in_h0 + kh * dilations_[0]; + const int32_t in_h_idx1 = in_h1 + kh * dilations_[0]; + const int32_t in_h_idx2 = in_h2 + kh * dilations_[0]; + const int32_t in_h_idx3 = in_h3 + kh * dilations_[0]; + + bool h_valid[4] = {true, true, true, true}; + if (in_h_idx0 < 0 || in_h_idx0 >= in_height) { + h_valid[0] = false; + } + if (in_h_idx1 < 0 || in_h_idx1 >= in_height) { + h_valid[1] = false; + } + if (in_h_idx2 < 0 || in_h_idx2 >= in_height) { + h_valid[2] = false; + } + if (in_h_idx3 < 0 || in_h_idx3 >= in_height) { + h_valid[3] = false; + } + const int32_t k_height_base0 = (k_batch_base0 + kh) * k_width; + const int32_t k_height_base1 = (k_batch_base1 + kh) * k_width; + const int32_t k_height_base2 = (k_batch_base2 + kh) * k_width; + const int32_t k_height_base3 = (k_batch_base3 + kh) * k_width; + const int32_t in_h_base0 = in_h_idx0 * in_width; + const int32_t in_h_base1 = in_h_idx1 * in_width; + const int32_t in_h_base2 = in_h_idx2 * in_width; + const int32_t in_h_base3 = in_h_idx3 * in_width; + + for (int32_t kw = 0; kw < k_width; ++kw) { + const int32_t kw_dilations = kw * dilations_[1]; + const int32_t in_w_idx0 = in_w0 + kw_dilations; + const int32_t in_w_idx1 = in_w1 + kw_dilations; + const int32_t in_w_idx2 = in_w2 + kw_dilations; + const int32_t in_w_idx3 = in_w3 + kw_dilations; + + bool valid[4] = { + h_valid[0], h_valid[1], h_valid[2], h_valid[3] + }; + if (in_w_idx0 < 0 || in_w_idx0 >= in_width) { + valid[0] = false; + } + if (in_w_idx1 < 0 || in_w_idx1 >= in_width) { + valid[1] = false; + } + if (in_w_idx2 < 0 || in_w_idx2 >= in_width) { + valid[2] = false; + } + if (in_w_idx3 < 0 || in_w_idx3 >= in_width) { + valid[3] = false; + } + + const int32_t k_width_base0 = (k_height_base0 + kw) * k_channel; + const int32_t k_width_base1 = (k_height_base1 + kw) * k_channel; + const int32_t k_width_base2 = (k_height_base2 + kw) * k_channel; + const int32_t k_width_base3 = (k_height_base3 + kw) * k_channel; + const int32_t in_w_base[4] = { + (in_h_base0 + in_w_idx0) * in_channel, + (in_h_base1 + in_w_idx1) * in_channel, + (in_h_base2 + in_w_idx2) * in_channel, + (in_h_base3 + in_w_idx3) * in_channel + }; + + for (int32_t kc = 0; kc < k_channel; ++kc) { + float *output_kc = output + kc * 16; + float filter0 = filter_[k_width_base0 + kc]; + float filter1 = filter_[k_width_base1 + kc]; + float filter2 = filter_[k_width_base2 + kc]; + float filter3 = filter_[k_width_base3 + kc]; + if (valid[0]) { + float input0 = input_[in_w_base[0] + kc]; + output_kc[0] += input0 * filter0; + output_kc[1] += input0 * filter1; + output_kc[2] += input0 * filter2; + output_kc[3] += input0 * filter3; + } + if (valid[1]) { + float input1 = input_[in_w_base[1] + kc]; + output_kc[4] += input1 * filter0; + output_kc[5] += input1 * filter1; + output_kc[6] += input1 * filter2; + output_kc[7] += input1 * filter3; + } + if (valid[2]) { + float input2 = input_[in_w_base[2] + kc]; + output_kc[8] += input2 * filter0; + output_kc[9] += input2 * filter1; + output_kc[10] += input2 * filter2; + output_kc[11] += input2 * filter3; + } + if (valid[3]) { + float input3 = input_[in_w_base[3] + kc]; + output_kc[12] += input3 * filter0; + output_kc[13] += input3 * filter1; + output_kc[14] += input3 * filter2; + output_kc[15] += input3 * filter3; + } + } // filter channel + } // filter width + } // filter height + for (int32_t i = 0; i < 4; ++i) { + for (int32_t j = 0; j < 4; ++j) { + int32_t out_base = width_base[i] + kb + j; + int32_t buf_offset = i * 4 + j; + for (int32_t c_offset = 0, kc_offset = 0; + c_offset < channel; c_offset += k_batch, kc_offset += 16) { + output_[out_base + c_offset] = output[kc_offset + buf_offset]; + } + } + } + } // filter batch, output channel + } // output size + + return MACE_SUCCESS; +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/nhwc/depthwise_conv_2d_kb4_s4.h b/micro/ops/nhwc/depthwise_conv_2d_kb4_s4.h new file mode 100644 index 00000000..faf8c3ae --- /dev/null +++ b/micro/ops/nhwc/depthwise_conv_2d_kb4_s4.h @@ -0,0 +1,31 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_NHWC_DEPTHWISE_CONV_2D_KB4_S4_H_ +#define MICRO_OPS_NHWC_DEPTHWISE_CONV_2D_KB4_S4_H_ + +#include "micro/ops/nhwc/base/depthwise_conv_2d_base.h" + +namespace micro { +namespace ops { + +class DepthwiseConv2dKB4S4Op : public DepthwiseConv2dBase { + private: + MaceStatus Compute(int32_t (&output_dims)[4]); +}; +} // namespace ops +} // namespace micro + + +#endif // MICRO_OPS_NHWC_DEPTHWISE_CONV_2D_KB4_S4_H_ diff --git a/micro/ops/nhwc/depthwise_conv_2d_ref.cc b/micro/ops/nhwc/depthwise_conv_2d_ref.cc new file mode 100644 index 00000000..24b49074 --- /dev/null +++ b/micro/ops/nhwc/depthwise_conv_2d_ref.cc @@ -0,0 +1,80 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/nhwc/depthwise_conv_2d_ref.h" + +#include "micro/base/logging.h" + +namespace micro { +namespace ops { + +MaceStatus DepthwiseConv2dRefOp::Compute(int32_t (&output_dims)[4]) { + const int32_t batch = output_dims[0]; + const int32_t height = output_dims[1]; + const int32_t width = output_dims[2]; + const int32_t channel = output_dims[3]; + const int32_t k_batch = filter_dims_[0]; + const int32_t k_height = filter_dims_[1]; + const int32_t k_width = filter_dims_[2]; + const int32_t k_channel = filter_dims_[3]; + MACE_ASSERT(input_dims_[3] == k_channel); + const int32_t in_height = input_dims_[1]; + const int32_t in_width = input_dims_[2]; + const int32_t in_channel = input_dims_[3]; + + const int32_t pad_top = padding_sizes_[0] >> 1; + const int32_t pad_left = padding_sizes_[1] >> 1; + + for (int32_t b = 0; b < batch; ++b) { + const int32_t batch_base = b * height; + for (int32_t h = 0; h < height; ++h) { + const int32_t height_base = (batch_base + h) * width; + const int32_t in_h = h * strides_[0] - pad_top; + for (int32_t w = 0; w < width; ++w) { + const int32_t width_base = (height_base + w) * channel; + const int32_t in_w = w * strides_[1] - pad_left; + for (int32_t oc = 0; oc < channel; ++oc) { + const int32_t kb = oc % k_batch; + const int32_t kc = oc / k_batch; + const int32_t o_idx = width_base + oc; + const int32_t k_batch_base = kb * k_height; + float output = 0; + for (int32_t kh = 0; kh < k_height; ++kh) { + const int32_t in_h_idx = in_h + kh * dilations_[0]; + if (in_h_idx < 0 || in_h_idx >= in_height) { + continue; + } + const int32_t k_height_base = (k_batch_base + kh) * k_width; + const int32_t in_h_base = in_h_idx * in_width; + for (int32_t kw = 0; kw < k_width; ++kw) { + const int32_t in_w_idx = in_w + kw * dilations_[1]; + if (in_w_idx < 0 || in_w_idx >= in_width) { + continue; + } + const int32_t k_width_base = (k_height_base + kw) * k_channel; + const int32_t in_w_base = (in_h_base + in_w_idx) * in_channel; + output += input_[in_w_base + kc] * filter_[k_width_base + kc]; + } // filter width + } // filter height + output_[o_idx] = output; + } // filter batch, output channel + } // output width + } // output height + } // output batch + + return MACE_SUCCESS; +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/nhwc/depthwise_conv_2d_ref.h b/micro/ops/nhwc/depthwise_conv_2d_ref.h new file mode 100644 index 00000000..09fdd5ab --- /dev/null +++ b/micro/ops/nhwc/depthwise_conv_2d_ref.h @@ -0,0 +1,32 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_NHWC_DEPTHWISE_CONV_2D_REF_H_ +#define MICRO_OPS_NHWC_DEPTHWISE_CONV_2D_REF_H_ + +#include "micro/ops/nhwc/base/depthwise_conv_2d_base.h" + +namespace micro { +namespace ops { + +class DepthwiseConv2dRefOp : public DepthwiseConv2dBase { + private: + MaceStatus Compute(int32_t (&output_dims)[4]); +}; + +} // namespace ops +} // namespace micro + + +#endif // MICRO_OPS_NHWC_DEPTHWISE_CONV_2D_REF_H_ diff --git a/micro/ops/nhwc/pooling_ref.cc b/micro/ops/nhwc/pooling_ref.cc new file mode 100644 index 00000000..270a7c0f --- /dev/null +++ b/micro/ops/nhwc/pooling_ref.cc @@ -0,0 +1,130 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/nhwc/pooling_ref.h" + +#include "micro/base/logging.h" +#include "micro/base/utils.h" +#include "micro/framework/scratch_buffer.h" + +namespace micro { +namespace ops { + +void PoolingRefOp::MaxPooling(const mifloat *input, + const int32_t *filter_hw, + const int32_t *stride_hw, + const int32_t *dilation_hw, + const int32_t *pad_hw) { + const int32_t batch = output_dims_[0]; + const int32_t out_channels = output_dims_[3]; + const int32_t out_height = output_dims_[1]; + const int32_t out_width = output_dims_[2]; + const int32_t in_channels = input_dims_[3]; + const int32_t in_height = input_dims_[1]; + const int32_t in_width = input_dims_[2]; + + float *max = ScratchBuffer(engine_config_).GetBuffer(in_channels); + for (int32_t b = 0; b < batch; ++b) { + int32_t batch_base = b * out_height; + int32_t in_b_base = b * in_height; + for (int32_t h = 0; h < out_height; ++h) { + int32_t height_base = (batch_base + h) * out_width; + int32_t inh_addr = h * stride_hw[0] - pad_hw[0]; + for (int32_t w = 0; w < out_width; ++w) { + int32_t width_base = (height_base + w) * out_channels; + int32_t inw_addr = w * stride_hw[1] - pad_hw[1]; + for (int32_t c = 0; c < in_channels; ++c) { + max[c] = base::lowest(); + } + for (int32_t fh = 0; fh < filter_hw[0]; ++fh) { + int32_t inh = inh_addr + dilation_hw[0] * fh; + if (inh < 0 && inh >= in_height) { + continue; + } + int32_t in_h_base = (in_b_base + inh) * in_width; + for (int32_t fw = 0; fw < filter_hw[1]; ++fw) { + int32_t inw = inw_addr + dilation_hw[1] * fw; + int32_t in_w_base = (in_h_base + inw) * in_channels; + for (int32_t c = 0; c < out_channels; ++c) { + if (inw >= 0 && inw < in_width) { + const int32_t input_offset = in_w_base + c; + float input_value = input[input_offset]; + if (input_value > max[c]) { + max[c] = input_value; + } + } + } + } + } + for (int i = 0; i < in_channels; ++i) { + output_[width_base + i] = max[i]; + } + } + } + } +} + +void PoolingRefOp::AvgPooling(const mifloat *input, + const int32_t *filter_hw, + const int32_t *stride_hw, + const int32_t *dilation_hw, + const int32_t *pad_hw) { + const int32_t batch = output_dims_[0]; + const int32_t out_channels = output_dims_[3]; + const int32_t out_height = output_dims_[1]; + const int32_t out_width = output_dims_[2]; + const int32_t in_channels = input_dims_[3]; + const int32_t in_height = input_dims_[1]; + const int32_t in_width = input_dims_[2]; + + ScratchBuffer scratch_buffer(engine_config_); + float *total = scratch_buffer.GetBuffer(in_channels); + uint32_t *block_size = scratch_buffer.GetBuffer(in_channels); + for (int32_t b = 0; b < batch; ++b) { + int32_t batch_base = b * out_height; + int32_t in_b_base = b * in_height; + for (int32_t h = 0; h < out_height; ++h) { + int32_t height_base = (batch_base + h) * out_width; + int32_t inh_addr = h * stride_hw[0] - pad_hw[0]; + for (int32_t w = 0; w < out_width; ++w) { + int32_t width_base = (height_base + w) * out_channels; + int32_t inw_addr = w * stride_hw[1] - pad_hw[1]; + for (int32_t c = 0; c < out_channels; ++c) { + total[c] = 0; + block_size[c] = 0; + } + for (int32_t fh = 0; fh < filter_hw[0]; ++fh) { + int32_t inh = inh_addr + dilation_hw[0] * fh; + int32_t in_h_base = (in_b_base + inh) * in_width; + for (int32_t fw = 0; fw < filter_hw[1]; ++fw) { + int32_t inw = inw_addr + dilation_hw[1] * fw; + int32_t in_w_base = (in_h_base + inw) * in_channels; + for (int32_t c = 0; c < out_channels; ++c) { + if (inh >= 0 && inh < in_height && inw >= 0 && inw < in_width) { + total[c] += input[in_w_base + c]; + ++block_size[c]; + } + } + } + } + for (int32_t c = 0; c < out_channels; ++c) { + output_[width_base + c] = total[c] / block_size[c]; + } + } + } + } +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/nhwc/pooling_ref.h b/micro/ops/nhwc/pooling_ref.h new file mode 100644 index 00000000..50456cc6 --- /dev/null +++ b/micro/ops/nhwc/pooling_ref.h @@ -0,0 +1,37 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_NHWC_POOLING_REF_H_ +#define MICRO_OPS_NHWC_POOLING_REF_H_ + +#include "micro/model/output_shape.h" +#include "micro/ops/nhwc/base/pooling_base.h" + +namespace micro { +namespace ops { + +class PoolingRefOp : public PoolingBase { + private: + void MaxPooling(const mifloat *input, const int32_t *filter_hw, + const int32_t *stride_hw, const int32_t *dilation_hw, + const int32_t *pad_hw); + void AvgPooling(const mifloat *input, const int32_t *filter_hw, + const int32_t *stride_hw, const int32_t *dilation_hw, + const int32_t *pad_hw); +}; +} // namespace ops +} // namespace micro + + +#endif // MICRO_OPS_NHWC_POOLING_REF_H_ diff --git a/micro/ops/nhwc/pooling_s4.cc b/micro/ops/nhwc/pooling_s4.cc new file mode 100644 index 00000000..7595d292 --- /dev/null +++ b/micro/ops/nhwc/pooling_s4.cc @@ -0,0 +1,225 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/nhwc/pooling_s4.h" + +#include "micro/base/logging.h" +#include "micro/base/utils.h" +#include "micro/framework/scratch_buffer.h" + +namespace micro { +namespace ops { +void PoolingS4Op::MaxPooling(const mifloat *input, + const int32_t *filter_hw, + const int32_t *stride_hw, + const int32_t *dilation_hw, + const int32_t *pad_hw) { + const int32_t batch = output_dims_[0]; + const int32_t out_channels = output_dims_[3]; + const int32_t out_height = output_dims_[1]; + const int32_t out_width = output_dims_[2]; + const int32_t in_channels = input_dims_[3]; + const int32_t in_height = input_dims_[1]; + const int32_t in_width = input_dims_[2]; + const int32_t filter_size = filter_hw[0] * filter_hw[1]; + const int32_t filter_size_end = filter_size - 4; + + float *max = ScratchBuffer(engine_config_).GetBuffer(in_channels); + for (int32_t b = 0; b < batch; ++b) { + int32_t batch_base = b * out_height; + int32_t in_b_base = b * in_height; + for (int32_t h = 0; h < out_height; ++h) { + int32_t height_base = (batch_base + h) * out_width; + int32_t inh_base = h * stride_hw[0] - pad_hw[0]; + for (int32_t w = 0; w < out_width; ++w) { + int32_t width_base = (height_base + w) * out_channels; + int32_t inw_base = w * stride_hw[1] - pad_hw[1]; + for (int32_t c = 0; c < in_channels; ++c) { + max[c] = base::lowest(); + } + + for (int32_t s = 0; s < filter_size; s += 4) { + if (s > filter_size_end) { + s = filter_size_end; + } + const int32_t s1 = s + 1; + const int32_t s2 = s1 + 1; + const int32_t s3 = s2 + 1; + int32_t fh0 = s / filter_hw[1]; + int32_t fh1 = s1 / filter_hw[1]; + int32_t fh2 = s2 / filter_hw[1]; + int32_t fh3 = s3 / filter_hw[1]; + int32_t fw0 = s % filter_hw[1]; + int32_t fw1 = s1 % filter_hw[1]; + int32_t fw2 = s2 % filter_hw[1]; + int32_t fw3 = s3 % filter_hw[1]; + int32_t inh0 = inh_base + dilation_hw[0] * fh0; + int32_t inh1 = inh_base + dilation_hw[0] * fh1; + int32_t inh2 = inh_base + dilation_hw[0] * fh2; + int32_t inh3 = inh_base + dilation_hw[0] * fh3; + int32_t inw0 = inw_base + dilation_hw[1] * fw0; + int32_t inw1 = inw_base + dilation_hw[1] * fw1; + int32_t inw2 = inw_base + dilation_hw[1] * fw2; + int32_t inw3 = inw_base + dilation_hw[1] * fw3; + bool valid[4] = { + inh0 >= 0 && inh0 < in_height && inw0 >= 0 && inw0 < in_width, + inh1 >= 0 && inh1 < in_height && inw1 >= 0 && inw1 < in_width, + inh2 >= 0 && inh2 < in_height && inw2 >= 0 && inw2 < in_width, + inh3 >= 0 && inh3 < in_height && inw3 >= 0 && inw3 < in_width + }; + int32_t in_w_base0 = + ((in_b_base + inh0) * in_width + inw0) * in_channels; + int32_t in_w_base1 = + ((in_b_base + inh1) * in_width + inw1) * in_channels; + int32_t in_w_base2 = + ((in_b_base + inh2) * in_width + inw2) * in_channels; + int32_t in_w_base3 = + ((in_b_base + inh3) * in_width + inw3) * in_channels; + for (int32_t c = 0; c < out_channels; ++c) { + if (valid[0]) { + const int32_t input_offset0 = in_w_base0 + c; + float input_value = input[input_offset0]; + if (input_value > max[c]) { + max[c] = input_value; + } + } + if (valid[1]) { + const int32_t input_offset1 = in_w_base1 + c; + float input_value = input[input_offset1]; + if (input_value > max[c]) { + max[c] = input_value; + } + } + if (valid[2]) { + const int32_t input_offset2 = in_w_base2 + c; + float input_value = input[input_offset2]; + if (input_value > max[c]) { + max[c] = input_value; + } + } + if (valid[3]) { + const int32_t input_offset3 = in_w_base3 + c; + float input_value = input[input_offset3]; + if (input_value > max[c]) { + max[c] = input_value; + } + } + } + } + for (int i = 0; i < in_channels; ++i) { + output_[width_base + i] = max[i]; + } + } + } + } +} + +void PoolingS4Op::AvgPooling(const mifloat *input, + const int32_t *filter_hw, + const int32_t *stride_hw, + const int32_t *dilation_hw, + const int32_t *pad_hw) { + const int32_t batch = output_dims_[0]; + const int32_t out_channels = output_dims_[3]; + const int32_t out_height = output_dims_[1]; + const int32_t out_width = output_dims_[2]; + const int32_t in_channels = input_dims_[3]; + const int32_t in_height = input_dims_[1]; + const int32_t in_width = input_dims_[2]; + const int32_t filter_size = filter_hw[0] * filter_hw[1]; + const int32_t filter_size_end = filter_size - 4; + + ScratchBuffer scratch_buffer(engine_config_); + float *total = scratch_buffer.GetBuffer(in_channels); + uint32_t *block_size = scratch_buffer.GetBuffer(in_channels); + for (int32_t b = 0; b < batch; ++b) { + int32_t batch_base = b * out_height; + int32_t in_b_base = b * in_height; + for (int32_t h = 0; h < out_height; ++h) { + int32_t height_base = (batch_base + h) * out_width; + int32_t inh_base = h * stride_hw[0] - pad_hw[0]; + for (int32_t w = 0; w < out_width; ++w) { + int32_t width_base = (height_base + w) * out_channels; + int32_t inw_base = w * stride_hw[1] - pad_hw[1]; + for (int32_t c = 0; c < in_channels; ++c) { + total[c] = 0; + block_size[c] = 0; + } + + for (int32_t s = 0; s < filter_size; s += 4) { + if (s > filter_size_end) { + s = filter_size_end; + } + const int32_t s1 = s + 1; + const int32_t s2 = s1 + 1; + const int32_t s3 = s2 + 1; + int32_t fh0 = s / filter_hw[1]; + int32_t fh1 = s1 / filter_hw[1]; + int32_t fh2 = s2 / filter_hw[1]; + int32_t fh3 = s3 / filter_hw[1]; + int32_t fw0 = s % filter_hw[1]; + int32_t fw1 = s1 % filter_hw[1]; + int32_t fw2 = s2 % filter_hw[1]; + int32_t fw3 = s3 % filter_hw[1]; + int32_t inh0 = inh_base + dilation_hw[0] * fh0; + int32_t inh1 = inh_base + dilation_hw[0] * fh1; + int32_t inh2 = inh_base + dilation_hw[0] * fh2; + int32_t inh3 = inh_base + dilation_hw[0] * fh3; + int32_t inw0 = inw_base + dilation_hw[1] * fw0; + int32_t inw1 = inw_base + dilation_hw[1] * fw1; + int32_t inw2 = inw_base + dilation_hw[1] * fw2; + int32_t inw3 = inw_base + dilation_hw[1] * fw3; + bool valid[4] = { + inh0 >= 0 && inh0 < in_height && inw0 >= 0 && inw0 < in_width, + inh1 >= 0 && inh1 < in_height && inw1 >= 0 && inw1 < in_width, + inh2 >= 0 && inh2 < in_height && inw2 >= 0 && inw2 < in_width, + inh3 >= 0 && inh3 < in_height && inw3 >= 0 && inw3 < in_width + }; + int32_t in_w_base0 = + ((in_b_base + inh0) * in_width + inw0) * in_channels; + int32_t in_w_base1 = + ((in_b_base + inh1) * in_width + inw1) * in_channels; + int32_t in_w_base2 = + ((in_b_base + inh2) * in_width + inw2) * in_channels; + int32_t in_w_base3 = + ((in_b_base + inh3) * in_width + inw3) * in_channels; + int32_t block_num = valid[0] + valid[1] + valid[2] + valid[3]; + for (int32_t c = 0; c < out_channels; ++c) { + float total_c = 0; + if (valid[0]) { + total_c += input[in_w_base0 + c]; + } + if (valid[1]) { + total_c += input[in_w_base1 + c]; + } + if (valid[2]) { + total_c += input[in_w_base2 + c]; + } + if (valid[3]) { + total_c += input[in_w_base3 + c]; + } + total[c] += total_c; + block_size[c] += block_num; + } + } + for (int32_t c = 0; c < out_channels; ++c) { + output_[width_base + c] = total[c] / block_size[c]; + } + } + } + } +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/nhwc/pooling_s4.h b/micro/ops/nhwc/pooling_s4.h new file mode 100644 index 00000000..a3d9416f --- /dev/null +++ b/micro/ops/nhwc/pooling_s4.h @@ -0,0 +1,37 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_NHWC_POOLING_S4_H_ +#define MICRO_OPS_NHWC_POOLING_S4_H_ + +#include "micro/model/output_shape.h" +#include "micro/ops/nhwc/base/pooling_base.h" + +namespace micro { +namespace ops { + +class PoolingS4Op : public PoolingBase { + private: + void MaxPooling(const mifloat *input, const int32_t *filter_hw, + const int32_t *stride_hw, const int32_t *dilation_hw, + const int32_t *pad_hw); + void AvgPooling(const mifloat *input, const int32_t *filter_hw, + const int32_t *stride_hw, const int32_t *dilation_hw, + const int32_t *pad_hw); +}; +} // namespace ops +} // namespace micro + + +#endif // MICRO_OPS_NHWC_POOLING_S4_H_ diff --git a/micro/ops/reduce.cc b/micro/ops/reduce.cc new file mode 100644 index 00000000..6ecce84d --- /dev/null +++ b/micro/ops/reduce.cc @@ -0,0 +1,45 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/reduce.h" + +#include "micro/base/logging.h" + +namespace micro { +namespace ops { + +MaceStatus ReduceOpBase::OnInit() { + reduce_type_ = static_cast( + GetArgByName("reduce_type", static_cast(MEAN))); + axis_ = GetRepeatArgByName("axis", &axis_size_); + keep_dims_ = GetArgByName("keepdims", false); + + return MACE_SUCCESS; +} + +void ReduceOpBase::Validate() { +#ifndef NDEBUG + const int32_t input_dim_size = GetInputShapeDimSize(INPUT); + const int32_t left = input_dim_size * -1; + const int32_t right = input_dim_size; + if (axis_size_) { + for (uint32_t i = 0; i < axis_size_; ++i) { + MACE_ASSERT1(axis_[i] > left && axis_[i] < right, "Axis is over range."); + } + } +#endif +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/reduce.h b/micro/ops/reduce.h new file mode 100644 index 00000000..354b3392 --- /dev/null +++ b/micro/ops/reduce.h @@ -0,0 +1,575 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_REDUCE_H_ +#define MICRO_OPS_REDUCE_H_ + +#include "micro/base/logging.h" +#include "micro/base/types.h" +#include "micro/base/utils.h" +#include "micro/framework/operator.h" +#include "micro/framework/scratch_buffer.h" + +namespace micro { +namespace ops { + +class ReduceOpBase : public framework::Operator { + public: + MaceStatus OnInit(); + + public: + enum ReduceType { + MEAN = 0, + MIN = 1, + MAX = 2, + PROD = 3, + SUM = 4, + }; + + protected: + void Validate(); + + protected: + ReduceType reduce_type_; + const int32_t *axis_; + uint32_t axis_size_; + bool keep_dims_; + + MACE_OP_INPUT_TAGS(INPUT); + MACE_OP_OUTPUT_TAGS(OUTPUT); +}; + +template +class ReduceOp : public ReduceOpBase { + public: + MaceStatus OnInit() { + input_ = GetInputData(INPUT); + input_dims_ = GetInputShapeDims(INPUT); + input_dim_size_ = GetInputShapeDimSize(INPUT); + + output_ = GetOutputData(OUTPUT); + return ReduceOpBase::OnInit(); + } + + MaceStatus Run() { + Validate(); + ScratchBuffer scratch_buffer(engine_config_); + bool *bitmap = scratch_buffer.GetBuffer(input_dim_size_); + int32_t *data_dims = scratch_buffer.GetBuffer(input_dim_size_); + uint32_t data_dim_size = 0; + int32_t *output_dims = scratch_buffer.GetBuffer(input_dim_size_); + uint32_t output_dim_size = 0; + Simplify(output_dims, &output_dim_size, bitmap, + input_dim_size_, data_dims, &data_dim_size); + MACE_RETURN_IF_ERROR( + ResizeOutputShape(OUTPUT, output_dim_size, output_dims)); + const int32_t output_size = + base::GetShapeSize(output_dim_size, output_dims); + Compute(data_dims, data_dim_size, static_cast(output_size)); + + return MACE_SUCCESS; + } + + private: + void Simplify(int32_t *output_dims, uint32_t *output_dim_size, + bool *bitmap, int32_t bitmap_size, + int32_t *data_dims, uint32_t *data_dim_size) { + base::memset(bitmap, false, bitmap_size); + if (axis_size_ == 0) { + for (uint32_t i = 0; i < input_dim_size_; ++i) { + bitmap[i] = true; + } + } else { + for (uint32_t i = 0; i < axis_size_; ++i) { + int32_t index = axis_[i] >= 0 ? axis_[i] : axis_[i] + input_dim_size_; + DataFormat data_format = static_cast(GetArgByName( + "data_format", static_cast(NHWC))); + if (data_format == NCHW && + DataTypeToEnum::value != DT_UINT8 && input_dim_size_ == 4) { + if (index == 1 || index == 2) { + index = index + 1; + } else if (index == 3) { + index = 1; + } + } + bitmap[index] = true; + } + } + uint32_t out_dim_idx = 0; + for (uint32_t i = 0; i < input_dim_size_; ++i) { + if (!bitmap[i]) { + output_dims[out_dim_idx++] = input_dims_[i]; + } else if (keep_dims_) { + output_dims[out_dim_idx++] = 1; + } + } + *output_dim_size = out_dim_idx; + + int32_t data_dims_idx = 0; + uint32_t dim_index = 0; + for (; dim_index < input_dim_size_; ++dim_index) { + if (input_dims_[dim_index] != 1) break; + } + if (dim_index >= input_dim_size_) { + reduce_first_axis_ = true; + } else { + reduce_first_axis_ = bitmap[dim_index]; + data_dims[data_dims_idx++] = input_dims_[dim_index]; + ++dim_index; + for (; dim_index < input_dim_size_; ++dim_index) { + const int32_t n = input_dims_[dim_index]; + if (n == 1) { + bitmap[dim_index] = bitmap[dim_index - 1]; + } + if (bitmap[dim_index - 1] != bitmap[dim_index]) { + data_dims[data_dims_idx++] = n; + } else { + data_dims[data_dims_idx - 1] *= n; + } + } + } + *data_dim_size = data_dims_idx; + } + + void Reduce1Dims(ReduceType type, int32_t *data_reshape) { + if (reduce_first_axis_) { + if (type == MEAN) { + T tmp = 0; + for (int32_t i = 0; i < data_reshape[0]; ++i) { + tmp = tmp + input_[i]; + } + output_[0] = tmp / data_reshape[0]; + } else if (type == MIN) { + T tmp = input_[0]; + for (int32_t i = 1; i < data_reshape[0]; ++i) { + tmp = base::min(tmp, input_[i]); + } + output_[0] = tmp; + } else if (type == MAX) { + T tmp = input_[0]; + for (int32_t i = 1; i < data_reshape[0]; ++i) { + tmp = base::max(tmp, input_[i]); + } + output_[0] = tmp; + } else if (type == PROD) { + T tmp = input_[0]; + for (int32_t i = 1; i < data_reshape[0]; ++i) { + tmp = tmp * input_[i]; + } + output_[0] = tmp; + } else if (type == SUM) { + T tmp = 0; + for (int32_t i = 0; i < data_reshape[0]; ++i) { + tmp = tmp + input_[i]; + } + output_[0] = tmp; + } else { + MACE_NOT_IMPLEMENTED; + } + } else { + base::memcpy(output_, input_, data_reshape[0] * sizeof(T)); + } + } + + void Reduce2Dims(ReduceType type, int32_t *data_reshape) { + if (reduce_first_axis_) { + if (type == MEAN) { + for (int32_t i = 0; i < data_reshape[1]; ++i) { + T tmp = 0; + for (int32_t j = 0; j < data_reshape[0]; ++j) { + tmp += input_[j * data_reshape[1] + i]; + } + output_[i] = tmp / data_reshape[0]; + } + } else if (type == MIN) { + for (int32_t i = 0; i < data_reshape[1]; ++i) { + T tmp = input_[i]; + for (int32_t j = 1; j < data_reshape[0]; ++j) { + tmp = base::min(tmp, input_[j * data_reshape[1] + i]); + } + output_[i] = tmp; + } + } else if (type == MAX) { + for (int32_t i = 0; i < data_reshape[1]; ++i) { + T tmp = input_[i]; + for (int32_t j = 1; j < data_reshape[0]; ++j) { + tmp = base::max(tmp, input_[j * data_reshape[1] + i]); + } + output_[i] = tmp; + } + } else if (type == PROD) { + for (int32_t i = 0; i < data_reshape[1]; ++i) { + T tmp = input_[i]; + for (int32_t j = 1; j < data_reshape[0]; ++j) { + tmp = tmp * input_[j * data_reshape[1] + i]; + } + output_[i] = tmp; + } + } else if (type == SUM) { + for (int32_t i = 0; i < data_reshape[1]; ++i) { + T tmp = 0; + for (int32_t j = 0; j < data_reshape[0]; ++j) { + tmp += input_[j * data_reshape[1] + i]; + } + output_[i] = tmp; + } + } else { + MACE_NOT_IMPLEMENTED; + } + } else { + if (type == MEAN) { + for (int32_t i = 0; i < data_reshape[0]; ++i) { + T tmp = 0; + for (int32_t j = 0; j < data_reshape[1]; ++j) { + tmp += input_[i * data_reshape[1] + j]; + } + output_[i] = tmp / data_reshape[1]; + } + } else if (type == MIN) { + for (int32_t i = 0; i < data_reshape[0]; ++i) { + T tmp = input_[i * data_reshape[1]]; + for (int32_t j = 1; j < data_reshape[1]; ++j) { + tmp = base::min(tmp, input_[i * data_reshape[1] + j]); + } + output_[i] = tmp; + } + } else if (type == MAX) { + for (int32_t i = 0; i < data_reshape[0]; ++i) { + T tmp = input_[i * data_reshape[1]]; + for (int32_t j = 1; j < data_reshape[1]; ++j) { + tmp = base::max(tmp, input_[i * data_reshape[1] + j]); + } + output_[i] = tmp; + } + } else if (type == PROD) { + for (int32_t i = 0; i < data_reshape[0]; ++i) { + T tmp = input_[i * data_reshape[1]]; + for (int32_t j = 1; j < data_reshape[1]; ++j) { + tmp = tmp * input_[i * data_reshape[1] + j]; + } + output_[i] = tmp; + } + } else if (type == SUM) { + for (int32_t i = 0; i < data_reshape[0]; ++i) { + T tmp = 0; + for (int32_t j = 0; j < data_reshape[1]; ++j) { + tmp += input_[i * data_reshape[1] + j]; + } + output_[i] = tmp; + } + } else { + MACE_NOT_IMPLEMENTED; + } + } + } + + void Reduce3Dims(ReduceType type, int32_t *data_reshape) { + if (reduce_first_axis_) { + if (type == MEAN) { + for (int32_t i = 0; i < data_reshape[1]; ++i) { + for (int32_t j = 0; j < data_reshape[2]; ++j) { + for (int32_t k = 0; k < data_reshape[0]; ++k) { + output_[i] += + input_[(k * data_reshape[1] + i) * data_reshape[2] + + j]; + } + } + output_[i] /= (data_reshape[0] * data_reshape[2]); + } + } else if (type == MIN) { + for (int32_t i = 0; i < data_reshape[1]; ++i) { + T tmp = input_[i * data_reshape[2]]; + for (int32_t j = 0; j < data_reshape[2]; ++j) { + for (int32_t k = 0; k < data_reshape[0]; ++k) { + tmp = base::min( + tmp, input_[(k * data_reshape[1] + i) * data_reshape[2] + j]); + } + } + output_[i] = tmp; + } + } else if (type == MAX) { + for (int32_t i = 0; i < data_reshape[1]; ++i) { + T tmp = input_[i * data_reshape[2]]; + for (int32_t j = 0; j < data_reshape[2]; ++j) { + for (int32_t k = 0; k < data_reshape[0]; ++k) { + tmp = base::max( + tmp, input_[(k * data_reshape[1] + i) * data_reshape[2] + j]); + } + } + output_[i] = tmp; + } + } else if (type == PROD) { + for (int32_t i = 0; i < data_reshape[1]; ++i) { + T tmp = 1; + for (int32_t j = 0; j < data_reshape[2]; ++j) { + for (int32_t k = 0; k < data_reshape[0]; ++k) { + tmp *= input_[(k * data_reshape[1] + i) * data_reshape[2] + j]; + } + } + output_[i] = tmp; + } + } else if (type == SUM) { + for (int32_t i = 0; i < data_reshape[1]; ++i) { + for (int32_t j = 0; j < data_reshape[2]; ++j) { + for (int32_t k = 0; k < data_reshape[0]; ++k) { + output_[i] += + input_[(k * data_reshape[1] + i) * data_reshape[2] + j]; + } + } + } + } else { + MACE_NOT_IMPLEMENTED; + } + } else { + if (type == MEAN) { + for (int32_t i = 0; i < data_reshape[0]; ++i) { + for (int32_t j = 0; j < data_reshape[2]; ++j) { + for (int32_t k = 0; k < data_reshape[1]; ++k) { + output_[i * data_reshape[2] + j] += + input_[(i * data_reshape[1] + k) * data_reshape[2] + j]; + } + output_[i * data_reshape[2] + j] /= data_reshape[1]; + } + } + } else if (type == MIN) { + for (int32_t i = 0; i < data_reshape[0]; ++i) { + for (int32_t j = 0; j < data_reshape[2]; ++j) { + T tmp = input_[i * data_reshape[1] * data_reshape[2] + j]; + for (int32_t k = 1; k < data_reshape[1]; ++k) { + tmp = base::min( + tmp, input_[(i * data_reshape[1] + k) * data_reshape[2] + j]); + } + output_[i * data_reshape[2] + j] = tmp; + } + } + } else if (type == MAX) { + for (int32_t i = 0; i < data_reshape[0]; ++i) { + for (int32_t j = 0; j < data_reshape[2]; ++j) { + T tmp = input_[i * data_reshape[1] * data_reshape[2] + j]; + for (int32_t k = 1; k < data_reshape[1]; ++k) { + tmp = base::max( + tmp, input_[(i * data_reshape[1] + k) * data_reshape[2] + j]); + } + output_[i * data_reshape[2] + j] = tmp; + } + } + } else if (type == PROD) { + for (int32_t i = 0; i < data_reshape[0]; ++i) { + for (int32_t j = 0; j < data_reshape[2]; ++j) { + T tmp = input_[i * data_reshape[1] * data_reshape[2] + j]; + for (int32_t k = 1; k < data_reshape[1]; ++k) { + tmp *= input_[(i * data_reshape[1] + k) * data_reshape[2] + j]; + } + output_[i * data_reshape[2] + j] = tmp; + } + } + } else if (type == SUM) { + for (int32_t i = 0; i < data_reshape[0]; ++i) { + for (int32_t j = 0; j < data_reshape[2]; ++j) { + for (int32_t k = 0; k < data_reshape[1]; ++k) { + output_[i * data_reshape[2] + j] += + input_[(i * data_reshape[1] + k) * data_reshape[2] + j]; + } + } + } + } else { + MACE_NOT_IMPLEMENTED; + } + } + } + + void Reduce4Dims(ReduceType type, int32_t *data_reshape) { + if (reduce_first_axis_) { + if (type == MEAN) { + for (int32_t i = 0; i < data_reshape[1]; ++i) { + for (int32_t j = 0; j < data_reshape[3]; ++j) { + for (int32_t k = 0; k < data_reshape[2]; ++k) { + for (int32_t t = 0; t < data_reshape[0]; ++t) { + output_[i * data_reshape[3] + j] += + input_[((t * data_reshape[1] + i) * + data_reshape[2] + k) * data_reshape[3] + j]; + } + } + output_[i * data_reshape[3] + j] /= + (data_reshape[0] * data_reshape[2]); + } + } + } else if (type == MIN) { + for (int32_t i = 0; i < data_reshape[1]; ++i) { + for (int32_t j = 0; j < data_reshape[3]; ++j) { + T tmp = input_[i * data_reshape[2] * data_reshape[3] + j]; + for (int32_t k = 0; k < data_reshape[2]; ++k) { + for (int32_t t = 0; t < data_reshape[0]; ++t) { + tmp = base::min(tmp, + input_[((t * data_reshape[1] + i) * + data_reshape[2] + k) * data_reshape[3] + + j]); + } + } + output_[i * data_reshape[3] + j] = tmp; + } + } + } else if (type == MAX) { + for (int32_t i = 0; i < data_reshape[1]; ++i) { + for (int32_t j = 0; j < data_reshape[3]; ++j) { + T tmp = input_[i * data_reshape[2] * data_reshape[3] + j]; + for (int32_t k = 0; k < data_reshape[2]; ++k) { + for (int32_t t = 0; t < data_reshape[0]; ++t) { + tmp = base::max(tmp, // NOLINT + input_[((t * data_reshape[1] + i) * + data_reshape[2] + k) * data_reshape[3] + + j]); + } + } + output_[i * data_reshape[3] + j] = tmp; + } + } + } else if (type == PROD) { + for (int32_t i = 0; i < data_reshape[1]; ++i) { + for (int32_t j = 0; j < data_reshape[3]; ++j) { + T tmp = 1; + for (int32_t k = 0; k < data_reshape[2]; ++k) { + for (int32_t t = 0; t < data_reshape[0]; ++t) { + tmp = tmp * input_[((t * data_reshape[1] + i) * + data_reshape[2] + k) * data_reshape[3] + j]; + } + } + output_[i * data_reshape[3] + j] = tmp; + } + } + } else if (type == SUM) { + for (int32_t i = 0; i < data_reshape[1]; ++i) { + for (int32_t j = 0; j < data_reshape[3]; ++j) { + for (int32_t k = 0; k < data_reshape[2]; ++k) { + for (int32_t t = 0; t < data_reshape[0]; ++t) { + output_[i * data_reshape[3] + j] += + input_[((t * data_reshape[1] + i) * + data_reshape[2] + k) * data_reshape[3] + j]; + } + } + } + } + } else { + MACE_NOT_IMPLEMENTED; + } + } else { + if (type == MEAN) { + for (int32_t i = 0; i < data_reshape[0]; ++i) { + for (int32_t j = 0; j < data_reshape[2]; ++j) { + for (int32_t k = 0; k < data_reshape[1]; ++k) { + for (int32_t t = 0; t < data_reshape[3]; ++t) { + output_[i * data_reshape[2] + j] += + input_[((i * data_reshape[1] + k) * + data_reshape[2] + j) * data_reshape[3] + t]; + } + } + output_[i * data_reshape[2] + j] /= + (data_reshape[1] * data_reshape[3]); + } + } + } else if (type == MIN) { + for (int32_t i = 0; i < data_reshape[0]; ++i) { + for (int32_t j = 0; j < data_reshape[2]; ++j) { + T tmp = input_[(i * data_reshape[1] * + data_reshape[2] + j) * data_reshape[3]]; + for (int32_t k = 0; k < data_reshape[1]; ++k) { + for (int32_t t = 0; t < data_reshape[3]; ++t) { + tmp = base::min( + tmp, input_[((i * data_reshape[1] + k) * + data_reshape[2] + j) * data_reshape[3] + t]); + } + } + output_[i * data_reshape[2] + j] = tmp; + } + } + } else if (type == MAX) { + for (int32_t i = 0; i < data_reshape[0]; ++i) { + for (int32_t j = 0; j < data_reshape[2]; ++j) { + T tmp = input_[(i * data_reshape[1] * + data_reshape[2] + j) * data_reshape[3]]; + for (int32_t k = 0; k < data_reshape[1]; ++k) { + for (int32_t t = 0; t < data_reshape[3]; ++t) { + tmp = base::max( + tmp, input_[((i * data_reshape[1] + k) * + data_reshape[2] + j) * data_reshape[3] + t]); + } + } + output_[i * data_reshape[2] + j] = tmp; + } + } + } else if (type == PROD) { + for (int32_t i = 0; i < data_reshape[0]; ++i) { + for (int32_t j = 0; j < data_reshape[2]; ++j) { + T tmp = 1; + for (int32_t k = 0; k < data_reshape[1]; ++k) { + for (int32_t t = 0; t < data_reshape[3]; ++t) { + tmp = tmp * input_[((i * data_reshape[1] + k) * + data_reshape[2] + j) * data_reshape[3] + t]; + } + } + output_[i * data_reshape[2] + j] = tmp; + } + } + } else if (type == SUM) { + for (int32_t i = 0; i < data_reshape[0]; ++i) { + for (int32_t j = 0; j < data_reshape[2]; ++j) { + for (int32_t k = 0; k < data_reshape[1]; ++k) { + for (int32_t t = 0; t < data_reshape[3]; ++t) { + output_[i * data_reshape[2] + j] += + input_[((i * data_reshape[1] + k) * + data_reshape[2] + j) * data_reshape[3] + t]; + } + } + } + } + } else { + MACE_NOT_IMPLEMENTED; + } + } + } + + void Compute(int32_t *data_reshape, + uint32_t data_reshape_size, uint32_t output_size) { + base::memset(output_, static_cast(0), output_size); + switch (data_reshape_size) { + case 1:Reduce1Dims(reduce_type_, data_reshape); + break; + case 2:Reduce2Dims(reduce_type_, data_reshape); + break; + case 3:Reduce3Dims(reduce_type_, data_reshape); + break; + case 4:Reduce4Dims(reduce_type_, data_reshape); + break; + default:LOG(FATAL) << "not implemented in mace" + << "data reshape size" << data_reshape_size + << "reduce first axis:" << reduce_first_axis_; + break; + } + } + + private: + const T *input_; + const int32_t *input_dims_; + uint32_t input_dim_size_; + + T *output_; + + bool reduce_first_axis_; +}; + +} // namespace ops +} // namespace micro + + +#endif // MICRO_OPS_REDUCE_H_ diff --git a/micro/ops/reshape.cc b/micro/ops/reshape.cc new file mode 100644 index 00000000..5b9c05d5 --- /dev/null +++ b/micro/ops/reshape.cc @@ -0,0 +1,108 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/reshape.h" + +#include "micro/base/logging.h" +#include "micro/base/utils.h" +#include "micro/framework/scratch_buffer.h" + +namespace micro { +namespace ops { + +namespace { + +MaceStatus ValidShapeData(const int32_t *input_dims, + const uint32_t input_dim_size, + int32_t *shape_data, + const uint32_t shape_data_size) { + MACE_ASSERT( + input_dims != NULL && shape_data != NULL); + int32_t unknown_idx = -1; + int32_t product = 1; + const int32_t input_size = base::GetShapeSize(input_dim_size, input_dims); + + for (uint32_t i = 0; i < shape_data_size; ++i) { + if (shape_data[i] == -1) { + MACE_ASSERT1(unknown_idx == -1, "Only one input size may be -1"); + unknown_idx = i; + shape_data[i] = 1; + } else { + MACE_ASSERT2(shape_data[i] >= 0, "Shape must be non-negative: ", + shape_data[i]); + if (shape_data[i] == 0) { + MACE_ASSERT1(i < input_dim_size, "dims:0 out of input dims' range."); + shape_data[i] = input_dims[i]; + } + product *= shape_data[i]; + } + } + + if (unknown_idx != -1) { + MACE_ASSERT1(product != 0, + "Cannot infer shape if there is zero shape size."); + const int32_t missing = input_size / product; + MACE_ASSERT1(missing * product == input_size, + "Input size not match reshaped tensor size"); + shape_data[unknown_idx] = missing; + } + + return MACE_SUCCESS; +} + +} // namespace + +MaceStatus ReshapeOp::OnInit() { + input_ = GetInputData(INPUT); + input_dims_ = GetInputShapeDims(INPUT); + input_dim_size_ = GetInputShapeDimSize(INPUT); + + shape_ = GetInputData(SHAPE); + shape_dims_ = GetInputShapeDims(SHAPE); + shape_dim_size_ = GetInputShapeDimSize(SHAPE); + + output_ = GetOutputData(OUTPUT); + return MACE_SUCCESS; +} + +MaceStatus ReshapeOp::Run() { + const int32_t input_data_size = + base::GetShapeSize(input_dim_size_, input_dims_); + const int32_t shape_data_size = + base::GetShapeSize(shape_dim_size_, shape_dims_); + + int32_t *shape_data = + ScratchBuffer(engine_config_).GetBuffer(shape_data_size); + base::memcpy(shape_data, shape_, shape_data_size * sizeof(int32_t)); + + MACE_RETURN_IF_ERROR(ValidShapeData(input_dims_, input_dim_size_, + shape_data, shape_data_size)); + +#ifndef NDEBUG + const int32_t output_data_size = base::accumulate_multi( + shape_data, 0, static_cast(shape_data_size)); + if (input_data_size != output_data_size) { + LOG(FATAL) << "input_data_size(" << input_data_size + << ") != output_data_size(" << output_data_size + << "), please check the model."; + } +#endif + + // TODO(luxuhui): optimize this method by reusing buffer + base::memcpy(output_, input_, input_data_size * sizeof(mifloat)); + return ResizeOutputShape(OUTPUT, shape_data_size, shape_data); +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/reshape.h b/micro/ops/reshape.h new file mode 100644 index 00000000..0e907b2f --- /dev/null +++ b/micro/ops/reshape.h @@ -0,0 +1,45 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_RESHAPE_H_ +#define MICRO_OPS_RESHAPE_H_ + +#include "micro/framework/operator.h" + +namespace micro { +namespace ops { +class ReshapeOp : public framework::Operator { + public: + MaceStatus OnInit(); + MaceStatus Run(); + + private: + const mifloat *input_; + const int32_t *input_dims_; + uint32_t input_dim_size_; + + const int32_t *shape_; + const int32_t *shape_dims_; + uint32_t shape_dim_size_; + + mifloat *output_; + + MACE_OP_INPUT_TAGS(INPUT, SHAPE); + MACE_OP_OUTPUT_TAGS(OUTPUT); +}; +} // namespace ops +} // namespace micro + + +#endif // MICRO_OPS_RESHAPE_H_ diff --git a/micro/ops/shape.cc b/micro/ops/shape.cc new file mode 100644 index 00000000..fadee5c3 --- /dev/null +++ b/micro/ops/shape.cc @@ -0,0 +1,47 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/shape.h" + +#include "micro/base/logging.h" +#include "micro/base/utils.h" + +namespace micro { +namespace ops { + +MaceStatus ShapeOp::OnInit() { + input_dims_ = GetInputShapeDims(INPUT); + input_dim_size_ = GetInputShapeDimSize(INPUT); + output_ = GetOutputData(OUTPUT); + + return MACE_SUCCESS; +} + +MaceStatus ShapeOp::Run() { + if (input_dim_size_ > 0) { + const int32_t out_put_dims[1] = {static_cast(input_dim_size_)}; + MACE_RETURN_IF_ERROR(ResizeOutputShape(OUTPUT, 1, out_put_dims)); + } else { + ResizeOutputShape(OUTPUT, 0, NULL); + } + + for (uint32_t i = 0; i < input_dim_size_; ++i) { + output_[i] = static_cast(input_dims_[i]); + } + + return MACE_SUCCESS; +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/shape.h b/micro/ops/shape.h new file mode 100644 index 00000000..047516d4 --- /dev/null +++ b/micro/ops/shape.h @@ -0,0 +1,39 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_SHAPE_H_ +#define MICRO_OPS_SHAPE_H_ + +#include "micro/framework/operator.h" + +namespace micro { +namespace ops { +class ShapeOp : public framework::Operator { + public: + MaceStatus OnInit(); + MaceStatus Run(); + + private: + const int32_t *input_dims_; + uint32_t input_dim_size_; + + int32_t *output_; + + MACE_OP_INPUT_TAGS(INPUT); + MACE_OP_OUTPUT_TAGS(OUTPUT); +}; +} // namespace ops +} // namespace micro + +#endif // MICRO_OPS_SHAPE_H_ diff --git a/micro/ops/softmax.cc b/micro/ops/softmax.cc new file mode 100644 index 00000000..26a91f90 --- /dev/null +++ b/micro/ops/softmax.cc @@ -0,0 +1,91 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/softmax.h" + +#include "micro/base/logging.h" +#include "micro/base/utils.h" + +namespace micro { +namespace ops { + +MaceStatus SoftmaxOp::OnInit() { + data_format_ = static_cast(GetArgByName( + "data_format", static_cast(NHWC))); + input_ = GetInputData(INPUT); + input_dims_ = GetInputShapeDims(INPUT); + input_dim_size_ = GetInputShapeDimSize(INPUT); + MACE_ASSERT1(input_dim_size_ >= 2, "The input->dim_size() >= 2 failed."); + + output_ = GetOutputData(OUTPUT); + use_log_ = GetArgByName("use_log", false); + + return MACE_SUCCESS; +} + +MaceStatus SoftmaxOp::Run() { + MACE_RETURN_IF_ERROR(ResizeOutputShape(OUTPUT, input_dim_size_, input_dims_)); + if (NHWC == data_format_) { // NHWC + return RunForNHWC(); + } else { + MACE_NOT_IMPLEMENTED; + return MACE_UNSUPPORTED; + } +} + +MaceStatus SoftmaxOp::RunForNHWC() { + int32_t class_size = input_dims_[input_dim_size_ - 1]; + int32_t hw_stride = class_size; + int32_t hw_size = base::accumulate_multi(input_dims_, 1, input_dim_size_); + int32_t batch_stride = hw_size; + int32_t batch_size = base::GetShapeSize(input_dim_size_, input_dims_); + + float std_lowest = base::lowest(); + for (int32_t b_offset = 0; b_offset < batch_size; b_offset += batch_stride) { + const mifloat *input_b_ptr = input_ + b_offset; + mifloat *output_b_ptr = output_ + b_offset; + for (int32_t k = 0; k < hw_size; k += hw_stride) { + const mifloat *input_ptr = input_b_ptr + k; + mifloat *output_ptr = output_b_ptr + k; + + float max_val = std_lowest; + for (int32_t c = 0; c < class_size; ++c) { + max_val = base::max(max_val, input_ptr[c]); // NOLINT + } + + float sum = 0; + for (int32_t c = 0; c < class_size; ++c) { + float exp_value = base::exp(input_ptr[c] - max_val); + sum += exp_value; + output_ptr[c] = exp_value; + } + + if (use_log_) { + for (int32_t c = 0; c < class_size; ++c) { + float output_value = output_ptr[c]; + output_value /= sum; + output_ptr[c] = base::log(output_value); + } + } else { + for (int32_t c = 0; c < class_size; ++c) { + output_ptr[c] = output_ptr[c] / sum; + } + } + } // k + } // b_offset + return MACE_SUCCESS; +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/softmax.h b/micro/ops/softmax.h new file mode 100644 index 00000000..38e77620 --- /dev/null +++ b/micro/ops/softmax.h @@ -0,0 +1,47 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_SOFTMAX_H_ +#define MICRO_OPS_SOFTMAX_H_ + +#include "micro/framework/operator.h" + +namespace micro { +namespace ops { +class SoftmaxOp : public framework::Operator { + public: + MaceStatus OnInit(); + MaceStatus Run(); + + private: + MaceStatus RunForNHWC(); + + private: + const mifloat *input_; + const int32_t *input_dims_; + uint32_t input_dim_size_; + + mifloat *output_; + + bool use_log_; + DataFormat data_format_; + + MACE_OP_INPUT_TAGS(INPUT); + MACE_OP_OUTPUT_TAGS(OUTPUT); +}; +} // namespace ops +} // namespace micro + + +#endif // MICRO_OPS_SOFTMAX_H_ diff --git a/micro/ops/squeeze.cc b/micro/ops/squeeze.cc new file mode 100644 index 00000000..11d64693 --- /dev/null +++ b/micro/ops/squeeze.cc @@ -0,0 +1,77 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/squeeze.h" + +#include "micro/base/logging.h" +#include "micro/base/utils.h" +#include "micro/framework/scratch_buffer.h" + +namespace micro { +namespace ops { + +MaceStatus SqueezeOp::OnInit() { + input_ = GetInputData(INPUT); + input_dims_ = GetInputShapeDims(INPUT); + input_dim_size_ = GetInputShapeDimSize(INPUT); + MACE_ASSERT1(input_dim_size_ >= 2, "The input->dim_size() >= 2 failed."); + + output_ = GetOutputData(OUTPUT); + + const int32_t *axis = GetRepeatArgByName("axis", &axis_size_); + data_format_ = static_cast(GetArgByName( + "data_format", static_cast(NHWC))); + ScratchBuffer scratch_buffer(engine_config_); + if (data_format_ == NCHW && input_dim_size_ == 4 + && axis_size_ == 2 && axis[0] == 1 && axis[1] == 2) { + axis_ = scratch_buffer.GetBuffer(axis_size_); + base::memcpy(axis_, axis, axis_size_ * sizeof(int32_t)); + axis_[0] = 2; + axis_[1] = 3; + } else { + axis_ = const_cast(axis); + } + resize_shape_ = scratch_buffer.GetBuffer(input_dim_size_); + + return MACE_SUCCESS; +} + +MaceStatus SqueezeOp::Run() { + int32_t resize_shape_idx = 0; + for (uint32_t i = 0; i < input_dim_size_; ++i) { + if (input_dims_[i] > 1) { + resize_shape_[resize_shape_idx++] = input_dims_[i]; + } else if (axis_size_ > 0) { + bool exist_in_axis = false; + for (uint32_t k = 0; k < axis_size_; ++k) { + if (i == static_cast(axis_[k])) { + exist_in_axis = true; + break; + } + } + if (!exist_in_axis) { + resize_shape_[resize_shape_idx++] = input_dims_[i]; + } + } + } + + // TODO(luxuhui): optimize this method by reusing buffer + const int32_t input_size = base::GetShapeSize(input_dim_size_, input_dims_); + base::memcpy(output_, input_, input_size * sizeof(mifloat)); + + return ResizeOutputShape(OUTPUT, resize_shape_idx, resize_shape_); +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/squeeze.h b/micro/ops/squeeze.h new file mode 100644 index 00000000..6a62b3aa --- /dev/null +++ b/micro/ops/squeeze.h @@ -0,0 +1,46 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_SQUEEZE_H_ +#define MICRO_OPS_SQUEEZE_H_ + +#include "micro/framework/operator.h" + +namespace micro { +namespace ops { +class SqueezeOp : public framework::Operator { + public: + MaceStatus OnInit(); + MaceStatus Run(); + + private: + const mifloat *input_; + const int32_t *input_dims_; + uint32_t input_dim_size_; + + mifloat *output_; + + int32_t *axis_; + uint32_t axis_size_; + int32_t *resize_shape_; + DataFormat data_format_; + + MACE_OP_INPUT_TAGS(INPUT); + MACE_OP_OUTPUT_TAGS(OUTPUT); +}; +} // namespace ops +} // namespace micro + + +#endif // MICRO_OPS_SQUEEZE_H_ diff --git a/micro/ops/stack.h b/micro/ops/stack.h new file mode 100644 index 00000000..0efcf8ff --- /dev/null +++ b/micro/ops/stack.h @@ -0,0 +1,94 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_STACK_H_ +#define MICRO_OPS_STACK_H_ + +#include "micro/base/utils.h" +#include "micro/framework/operator.h" +#include "micro/framework/scratch_buffer.h" + +namespace micro { +namespace ops { + +template +class StackOp : public framework::Operator { + public: + MaceStatus OnInit() { + input_dims_ = GetInputShapeDims(INPUT); + input_dim_size_ = GetInputShapeDimSize(INPUT); + + output_ = GetOutputData(OUTPUT); + axis_ = GetArgByName("axis", static_cast(0)); + + const int32_t output_dim_size = static_cast(input_dim_size_) + 1; + MACE_ASSERT1(axis_ >= -output_dim_size && axis_ < output_dim_size, + "axis out of bound."); + if (axis_ < 0) { + axis_ += output_dim_size; + } + + return MACE_SUCCESS; + } + + MaceStatus Run() { + const uint32_t inputs_size = GetInputSize(); + MACE_ASSERT1(inputs_size > 0, "stack inputs are empty."); + + int32_t output_dim_size = static_cast(input_dim_size_) + 1; + int32_t *output_dims = + ScratchBuffer(engine_config_).GetBuffer(output_dim_size); + for (int32_t i = 0; i < output_dim_size; ++i) { + if (i < axis_) { + output_dims[i] = input_dims_[i]; + } else if (i == axis_) { + output_dims[i] = inputs_size; + } else { + output_dims[i] = input_dims_[i - 1]; + } + } + ResizeOutputShape(OUTPUT, output_dim_size, output_dims); + + int32_t high_dim_elem_size = base::accumulate_multi(input_dims_, 0, axis_); + int32_t low_dim_elem_size = + base::accumulate_multi(input_dims_, axis_, input_dim_size_); + T *output_data = output_; + for (int32_t h = 0; h < high_dim_elem_size; ++h) { + for (uint32_t i = 0; i < inputs_size; ++i) { + const T *input_data = GetInputData(i); + base::memcpy(output_data, input_data + h * low_dim_elem_size, + sizeof(T) * low_dim_elem_size); + output_data += low_dim_elem_size; + } + } + + return MACE_SUCCESS; + } + + private: + const int32_t *input_dims_; + uint32_t input_dim_size_; + + T *output_; + + int32_t axis_; + + MACE_OP_INPUT_TAGS(INPUT); + MACE_OP_OUTPUT_TAGS(OUTPUT); +}; + +} // namespace ops +} // namespace micro + +#endif // MICRO_OPS_STACK_H_ diff --git a/micro/ops/strided_slice.h b/micro/ops/strided_slice.h new file mode 100644 index 00000000..39a64ca2 --- /dev/null +++ b/micro/ops/strided_slice.h @@ -0,0 +1,293 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_STRIDED_SLICE_H_ +#define MICRO_OPS_STRIDED_SLICE_H_ + +#include "micro/base/utils.h" +#include "micro/framework/operator.h" +#include "micro/framework/scratch_buffer.h" +#include "micro/include/utils/macros.h" + +namespace micro { +namespace ops { +template +class StridedSliceOp : public framework::Operator { + public: + MaceStatus OnInit() { + MACE_RETURN_IF_ERROR(InitPrams()); + + return MACE_SUCCESS; + } + + MaceStatus Run() { + MACE_RETURN_IF_ERROR(AdjustPrams()); + MACE_RETURN_IF_ERROR( + ResizeOutputShape(OUTPUT, output_shape_idx_, output_shape_)); + + bool slice_by_first_axis = true; + if (strides_[0] != 1) { + slice_by_first_axis = false; + } else { + for (uint32_t d = 1; d < input_dim_size_; ++d) { + if (strides_[d] != 1 || begin_[d] != 0 || + end_[d] != input_dims_[d]) { + slice_by_first_axis = false; + break; + } + } + } + + if (slice_by_first_axis) { + base::memset(dim_stride_, static_cast(1), input_dim_size_); + for (int32_t d = input_dim_size_ - 2; d >= 0; --d) { + dim_stride_[d] = dim_stride_[d + 1] * input_dims_[d + 1]; + } + base::memcpy(output_, input_ + begin_[0] * dim_stride_[0], + sizeof(T) * (end_[0] - begin_[0]) * dim_stride_[0]); + } else { + if (input_dim_size_ == 1) { + for (int32_t i = begin_[0]; + strides_[0] > 0 ? i < end_[0] : i > end_[0]; i += strides_[0]) { + *output_++ = input_[i]; + } + } else if (input_dim_size_ == 2) { + for (int32_t i = begin_[0]; + strides_[0] > 0 ? i < end_[0] : i > end_[0]; i += strides_[0]) { + for (int32_t j = begin_[1]; + strides_[1] > 0 ? j < end_[1] : j > end_[1]; j += strides_[1]) { + *output_++ = input_[i * input_dims_[1] + j]; + } + } + } else if (input_dim_size_ == 3) { + for (int32_t i = begin_[0]; + strides_[0] > 0 ? i < end_[0] : i > end_[0]; i += strides_[0]) { + for (int32_t j = begin_[1]; + strides_[1] > 0 ? j < end_[1] : j > end_[1]; j += strides_[1]) { + for (int32_t k = begin_[2]; + strides_[2] > 0 ? k < end_[2] : k > end_[2]; + k += strides_[2]) { + *output_++ = + input_[(i * input_dims_[1] + j) * input_dims_[2] + k]; + } + } + } + } else if (input_dim_size_ == 4) { + for (int32_t i = begin_[0]; + strides_[0] > 0 ? i < end_[0] : i > end_[0]; i += strides_[0]) { + for (int32_t j = begin_[1]; + strides_[1] > 0 ? j < end_[1] : j > end_[1]; j += strides_[1]) { + for (int32_t k = begin_[2]; + strides_[2] > 0 ? k < end_[2] : k > end_[2]; + k += strides_[2]) { + for (int32_t l = begin_[3]; + strides_[3] > 0 ? l < end_[3] : l > end_[3]; + l += strides_[3]) { + int32_t input_base = + (i * input_dims_[1] + j) * input_dims_[2] + k; + int32_t input_idx = input_base * input_dims_[3] + l; + *output_++ = input_[input_idx]; + } + } + } + } + } else { + MACE_NOT_IMPLEMENTED; + } + } + return MACE_SUCCESS; + } + + private: + MaceStatus InitPrams() { + input_ = GetInputData(INPUT); + input_dims_ = GetInputShapeDims(INPUT); + input_dim_size_ = GetInputShapeDimSize(INPUT); + MACE_ASSERT1(input_dim_size_ > 0 && input_dim_size_ <= 4, + "The input dims should be an integer in (0, 4]."); + + ScratchBuffer scratch_buffer(engine_config_); + begin_ = scratch_buffer.GetBuffer(input_dim_size_); + end_ = scratch_buffer.GetBuffer(input_dim_size_); + strides_ = scratch_buffer.GetBuffer(input_dim_size_); + output_shape_ = scratch_buffer.GetBuffer(input_dim_size_); + dim_stride_ = scratch_buffer.GetBuffer(input_dim_size_); + base::memset(begin_, static_cast(0), input_dim_size_); + base::memset(end_, static_cast(0), input_dim_size_); + base::memset(strides_, static_cast(1), input_dim_size_); + + begin_dims_ = GetInputShapeDims(BEGIN); + end_dims_ = GetInputShapeDims(END); + + MACE_ASSERT1( + GetInputShapeDimSize(BEGIN) == 1 && GetInputShapeDimSize(END) == 1, + "Expected begin, end, and to be 1D tensor"); + + output_ = GetOutputData(OUTPUT); + + begin_mask_ = GetArgByName("begin_mask", static_cast(0)); + end_mask_ = GetArgByName("end_mask", static_cast(0)); + ellipsis_mask_ = GetArgByName("ellipsis_mask", static_cast(0)); + new_axis_mask_ = GetArgByName("new_axis_mask", static_cast(0)); + shrink_axis_mask_ = + GetArgByName("shrink_axis_mask", static_cast(0)); + is_slice_ = GetArgByName("slice", false); + MACE_ASSERT1(ellipsis_mask_ == 0 && new_axis_mask_ == 0, + "ellipsis_mask and new_axis_mask are not supported yet."); + + return MACE_SUCCESS; + } + + int32_t FormatIndices(const int32_t (&valid_range)[2], + const int32_t dim_len, int32_t indice) { + int32_t forward = indice < 0 ? indice + dim_len : indice; + return base::clamp(forward, valid_range[0], valid_range[1]); + } + + MaceStatus AdjustPrams() { + const int32_t *begin = GetInputData(BEGIN); + base::memcpy(begin_, begin, begin_dims_[0] * sizeof(int32_t)); + const int32_t *end = GetInputData(END); + base::memcpy(end_, end, end_dims_[0] * sizeof(int32_t)); + + const int32_t *strides = NULL; + if (GetInputSize() > 3) { + strides = GetInputData(STRIDES); + strides_dims_ = GetInputShapeDims(STRIDES); + } + if (strides == NULL) { + base::memset(strides_, static_cast(1), input_dim_size_); + strides_dims_ = begin_dims_; + } else { + base::memcpy(strides_, strides, strides_dims_[0] * sizeof(int32_t)); + } + + output_shape_idx_ = 0; + const uint32_t begin_size = static_cast(begin_dims_[0]); + MACE_UNUSED(begin_size); + const uint32_t end_size = static_cast(end_dims_[0]); + if (is_slice_) { + MACE_ASSERT1(begin_size == input_dim_size_ && end_size == input_dim_size_, + "In slice, begin and size elements num should be equal"); + for (uint32_t i = 0; i < input_dim_size_; ++i) { + if (end_[i] == -1) { + end_[i] = input_dims_[i] - begin_[i]; + } + } + for (uint32_t i = 0; i < input_dim_size_; ++i) { + int32_t b = begin_[i]; + int32_t s = end_[i]; +#ifndef NDEBUG + int32_t input_i = input_dims_[i]; + if (!(0 <= b && b <= input_i)) { + LOG(FATAL) << "In Slice, expected begin[" << i << "] in [0, " + << input_i << "], but got " << b; + } + if (!(0 <= s && b + s <= input_i)) { + LOG(FATAL) << "In Slice, expected size[" << i << "] in [0, " + << input_i - b << "], but got" << s; + } +#endif + end_[i] = b + s; + output_shape_[output_shape_idx_++] = s; + } + } else { + const uint32_t strides_size = static_cast(strides_dims_[0]); + MACE_ASSERT2(begin_size == end_size && end_size == strides_size, + "In strided_slice, expected begin, end, and strides to be", + " equal size tensors"); + for (uint32_t i = 0; i < strides_size; ++i) { + MACE_ASSERT1(strides_[i] != 0, "strides data cannot be 0!"); + } + + // pad + for (uint32_t i = end_size; i < input_dim_size_; ++i) { + end_[i] = input_dims_[i]; + } + + // mask and shrink + for (uint32_t d = 0; d < input_dim_size_; ++d) { + int32_t dim_len = input_dims_[d]; + const int32_t valid_range[] = {strides_[d] > 0 ? 0 : -1, + strides_[d] > 0 ? dim_len : dim_len - 1}; + if (!(shrink_axis_mask_ & (1 << d))) { + if (begin_mask_ & (1 << d)) { + begin_[d] = strides_[d] > 0 ? 0 : dim_len - 1; + } else { + begin_[d] = FormatIndices(valid_range, dim_len, begin_[d]); + } + if (end_mask_ & (1 << d)) { + end_[d] = strides_[d] > 0 ? dim_len : -1; + } else { + end_[d] = FormatIndices(valid_range, dim_len, end_[d]); + } + + int32_t out_dim_len = base::max( + static_cast(0), base::ceil((end_[d] - begin_[d]) / + static_cast(strides_[d]))); + output_shape_[output_shape_idx_++] = out_dim_len; + } else { + begin_[d] = begin_[d] < 0 ? begin_[d] + dim_len : begin_[d]; + end_[d] = begin_[d] + 1; +#ifndef NDEBUG + if (!(begin_[d] >= 0 && begin_[d] < dim_len)) { + LOG(FATAL) << "slice begin indice of dimension '" << d << "': " + << begin_[d] << ", is out of bound"; + } +#endif + } + } + } +#ifndef NDEBUG + for (uint32_t i = 0; i < output_shape_idx_; ++i) { + if (output_shape_[i] <= 0) { + LOG(FATAL) << "Expected output_shape[" << i + << "] larger than 0, but got " << output_shape_[i]; + } + } +#endif + return MACE_SUCCESS; + } + + private: + const T *input_; + const int32_t *input_dims_; + uint32_t input_dim_size_; + int32_t *begin_; + const int32_t *begin_dims_; + int32_t *end_; + const int32_t *end_dims_; + int32_t *strides_; + const int32_t *strides_dims_; + + T *output_; + int32_t *output_shape_; + uint32_t output_shape_idx_; + int32_t *dim_stride_; + + int32_t begin_mask_; + int32_t end_mask_; + int32_t ellipsis_mask_; + int32_t new_axis_mask_; + int32_t shrink_axis_mask_; + bool is_slice_; + + MACE_OP_INPUT_TAGS(INPUT, BEGIN, END, STRIDES); + MACE_OP_OUTPUT_TAGS(OUTPUT); +}; + +} // namespace ops +} // namespace micro + +#endif // MICRO_OPS_STRIDED_SLICE_H_ diff --git a/micro/ops/utils/activation.cc b/micro/ops/utils/activation.cc new file mode 100644 index 00000000..29f34d2f --- /dev/null +++ b/micro/ops/utils/activation.cc @@ -0,0 +1,125 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/utils/activation.h" + +#include "micro/base/logging.h" +#include "micro/base/utils.h" +#include "micro/framework/operator.h" +#include "micro/model/argument.h" + +namespace micro { +namespace ops { + +Activation::Activation() : type_(TYPE_COUNT) {} + +MaceStatus Activation::Init(const framework::Operator *op) { + const char *atcivation_type = reinterpret_cast( + op->GetRepeatArgByName("activation")); + if (atcivation_type == NULL) { + atcivation_type = "NOOP"; + } + const float max_limit = op->GetArgByName("max_limit", 0.0f); + const float leakyrelu_coefficient = + op->GetArgByName("leakyrelu_coefficient", 0.0f); + + return Init(atcivation_type, max_limit, leakyrelu_coefficient); +} + +MaceStatus Activation::Init(const char *type, const float limit, + const float leakyrelu_coefficient) { + type_ = StringToActivationType(type); + limit_ = limit; + leakyrelu_coefficient_ = leakyrelu_coefficient; + + return MACE_SUCCESS; +} + +ActivationType Activation::GetActivationType() { + MACE_ASSERT1(type_ != TYPE_COUNT, "Activation should init first."); + return type_; +} + +MaceStatus Activation::Compute(const mifloat *input_ptr, + const int32_t size, mifloat *output_ptr) { + MACE_ASSERT1(type_ != TYPE_COUNT, "Activation should init first."); + switch (type_) { + case RELU: { + for (int32_t i = 0; i < size; ++i) { + *output_ptr++ = base::max(0.f, *input_ptr++); + } + break; + } + case RELUX: { + for (int32_t i = 0; i < size; ++i) { + *output_ptr++ = base::max(0.f, base::min(limit_, *input_ptr++)); + } + break; + } + case LEAKYRELU: { + for (int32_t i = 0; i < size; ++i) { + float input = *input_ptr; + *output_ptr = base::max(input, 0.f) + + base::min(input, 0.f) * leakyrelu_coefficient_; // NOLINT + ++input_ptr; + ++output_ptr; + } + break; + } + case TANH: { + for (int32_t i = 0; i < size; ++i) { + *output_ptr++ = base::tanh(*input_ptr++); + } + break; + } + case SIGMOID: { + for (int32_t i = 0; i < size; ++i) { + *output_ptr++ = 1 / (1 + base::exp(-(*input_ptr++))); + } + break; + } + case NOOP: { + break; + } + default: { + MACE_NOT_IMPLEMENTED; + } + } + + return MACE_SUCCESS; +} + +ActivationType Activation::StringToActivationType(const char *type) { + if (base::strcmp(type, "RELU") == 0) { + return RELU; + } else if (base::strcmp(type, "RELUX") == 0) { + return RELUX; + } else if (base::strcmp(type, "PRELU") == 0) { + return PRELU; + } else if (base::strcmp(type, "TANH") == 0) { + return TANH; + } else if (base::strcmp(type, "SIGMOID") == 0) { + return SIGMOID; + } else if (base::strcmp(type, "NOOP") == 0) { + return NOOP; + } else if (base::strcmp(type, "LEAKYRELU") == 0) { + return LEAKYRELU; + } else { + LOG(FATAL) << "Unknown activation type: " << type; + } + return NOOP; +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/utils/activation.h b/micro/ops/utils/activation.h new file mode 100644 index 00000000..f2786157 --- /dev/null +++ b/micro/ops/utils/activation.h @@ -0,0 +1,65 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_UTILS_ACTIVATION_H_ +#define MICRO_OPS_UTILS_ACTIVATION_H_ + +#include "micro/base/types.h" +#include "micro/include/public/micro.h" + +namespace micro { +namespace framework { +class Operator; +} // namespace framework + +namespace ops { + +enum ActivationType { + NOOP = 0, + RELU = 1, + RELUX = 2, + PRELU = 3, + TANH = 4, + SIGMOID = 5, + LEAKYRELU = 6, + + TYPE_COUNT, +}; + +class Activation { + public: + Activation(); + ~Activation() {} + + MaceStatus Init(const framework::Operator *op); + MaceStatus Init(const char *type, const float limit, + const float leakyrelu_coefficient); + MaceStatus Compute(const mifloat *input_ptr, + const int32_t size, mifloat *output_ptr); + ActivationType GetActivationType(); + + private: + ActivationType StringToActivationType(const char *type); + + private: + ActivationType type_; + float limit_; + float leakyrelu_coefficient_; +}; + +} // namespace ops +} // namespace micro + + +#endif // MICRO_OPS_UTILS_ACTIVATION_H_ diff --git a/micro/ops/utils/crumb_utils.cc b/micro/ops/utils/crumb_utils.cc new file mode 100644 index 00000000..ff2779ba --- /dev/null +++ b/micro/ops/utils/crumb_utils.cc @@ -0,0 +1,43 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/utils/crumb_utils.h" + +#include "micro/base/logging.h" +#include "micro/base/utils.h" + +namespace micro { +namespace ops { +namespace crumb { + +MaceStatus ComputeBias(const mifloat *input, const int32_t *input_dims, + const uint32_t input_dim_size, const mifloat *bias, + const int32_t channel, mifloat *output) { + MACE_ASSERT(input != NULL && input_dims != NULL && input_dim_size > 0 + && bias != NULL && channel > 0 && output != NULL); + const int32_t outer_size = + base::accumulate_multi(input_dims, 0, input_dim_size - 1); + for (int32_t i = 0; i < outer_size; ++i) { + const int32_t outer_base = i * channel; + for (int32_t c = 0; c < channel; ++c) { + const int32_t idx = outer_base + c; + output[idx] = input[idx] + bias[c]; + } + } + return MACE_SUCCESS; +} + +} // namespace crumb +} // namespace ops +} // namespace micro diff --git a/micro/ops/utils/crumb_utils.h b/micro/ops/utils/crumb_utils.h new file mode 100644 index 00000000..56e7ad5d --- /dev/null +++ b/micro/ops/utils/crumb_utils.h @@ -0,0 +1,35 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_UTILS_CRUMB_UTILS_H_ +#define MICRO_OPS_UTILS_CRUMB_UTILS_H_ + +#include "micro/base/types.h" +#include "micro/include/public/micro.h" + +namespace micro { +namespace ops { +namespace crumb { + +MaceStatus ComputeBias(const mifloat *input, const int32_t *input_dims, + const uint32_t input_dim_size, + const mifloat *bias, const int32_t channel, + mifloat *output); + +} // crumb +} // namespace ops +} // namespace micro + + +#endif // MICRO_OPS_UTILS_CRUMB_UTILS_H_ diff --git a/micro/ops/utils/gemm.cc b/micro/ops/utils/gemm.cc new file mode 100644 index 00000000..a1494efa --- /dev/null +++ b/micro/ops/utils/gemm.cc @@ -0,0 +1,199 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/utils/gemm.h" + +#include "micro/base/logging.h" + +namespace micro { +namespace ops { + +#ifndef MICRO_NOT_OPT +MaceStatus Gemm::Compute(const mifloat *lhs_data, + const mifloat *rhs_data, + const int32_t batch, + const int32_t rows, + const int32_t cols, + const int32_t depth, + const MatrixMajor lhs_major, + const MatrixMajor rhs_major, + const MatrixMajor output_major, + const bool lhs_batched, + const bool rhs_batched, + mifloat *output_data) { + for (int32_t b = 0; b < batch; ++b) { + MatrixMap + lhs_matrix + (lhs_data + static_cast(lhs_batched) * b * rows * depth, + lhs_major, + rows, + depth); + MatrixMap + rhs_matrix + (rhs_data + static_cast(rhs_batched) * b * depth * cols, + rhs_major, + depth, + cols); + MatrixMap + output_matrix(output_data + b * rows * cols, output_major, rows, cols); + + const int32_t rows_4 = rows / 4 * 4; + const int32_t cols_4 = cols / 4 * 4; + for (int32_t r = 0; r < rows; r += 4) { + if (r < rows_4) { + int32_t ro[4] = {r, r + 1, r + 2, r + 3}; + for (int32_t c = 0; c < cols; c += 4) { + if (c < cols_4) { + float sum[16] = {0}; + int32_t co[4] = {c, c + 1, c + 2, c + 3}; + for (int32_t d = 0; d < depth; ++d) { + float lhs0 = lhs_matrix(ro[0], d); + float lhs1 = lhs_matrix(ro[1], d); + float lhs2 = lhs_matrix(ro[2], d); + float lhs3 = lhs_matrix(ro[3], d); + float rhs0 = rhs_matrix(d, co[0]); + float rhs1 = rhs_matrix(d, co[1]); + float rhs2 = rhs_matrix(d, co[2]); + float rhs3 = rhs_matrix(d, co[3]); + sum[0] += lhs0 * rhs0; + sum[1] += lhs0 * rhs1; + sum[2] += lhs0 * rhs2; + sum[3] += lhs0 * rhs3; + sum[4] += lhs1 * rhs0; + sum[5] += lhs1 * rhs1; + sum[6] += lhs1 * rhs2; + sum[7] += lhs1 * rhs3; + sum[8] += lhs2 * rhs0; + sum[9] += lhs2 * rhs1; + sum[10] += lhs2 * rhs2; + sum[11] += lhs2 * rhs3; + sum[12] += lhs3 * rhs0; + sum[13] += lhs3 * rhs1; + sum[14] += lhs3 * rhs2; + sum[15] += lhs3 * rhs3; + } // d + for (int32_t ro_i = 0; ro_i < 4; ++ro_i) { + int32_t ro_i_base = ro_i * 4; + for (int32_t co_i = 0; co_i < 4; ++co_i) { + *output_matrix.data(ro[ro_i], co[co_i]) = sum[ro_i_base + co_i]; + } + } + } else { + for (int32_t ro = r; ro < r + 4; ++ro) { + for (int32_t co = cols_4; co < cols; ++co) { + float sum = 0; + for (int32_t d = 0; d < depth; ++d) { + sum += lhs_matrix(ro, d) * rhs_matrix(d, co); + } // d + *output_matrix.data(ro, co) = sum; + } + } + } + } // c + } else { + for (int32_t ro = rows_4; ro < rows; ++ro) { + for (int32_t c = 0; c < cols; ++c) { + float sum = 0; + for (int32_t d = 0; d < depth; ++d) { + sum += lhs_matrix(ro, d) * rhs_matrix(d, c); + } // d + *output_matrix.data(ro, c) = sum; + } // c + } + } + } // r + } // b + + return MACE_SUCCESS; +} +#else +MaceStatus Gemm::Compute(const mifloat *lhs_data, + const mifloat *rhs_data, + const int32_t batch, + const int32_t rows, + const int32_t cols, + const int32_t depth, + const MatrixMajor lhs_major, + const MatrixMajor rhs_major, + const MatrixMajor output_major, + const bool lhs_batched, + const bool rhs_batched, + mifloat *output_data) { + for (int32_t b = 0; b < batch; ++b) { + MatrixMap + lhs_matrix + (lhs_data + static_cast(lhs_batched) * b * rows * depth, + lhs_major, + rows, + depth); + MatrixMap + rhs_matrix + (rhs_data + static_cast(rhs_batched) * b * depth * cols, + rhs_major, + depth, + cols); + MatrixMap + output_matrix(output_data + b * rows * cols, output_major, rows, cols); + + for (int32_t r = 0; r < rows; ++r) { + for (int32_t c = 0; c < cols; ++c) { + float sum = 0; + for (int32_t d = 0; d < depth; ++d) { + sum += lhs_matrix(r, d) * rhs_matrix(d, c); + } // d + + *output_matrix.data(r, c) = sum; + } // c + } // r + } // b + + return MACE_SUCCESS; +} +#endif + +MaceStatus Gemm::Compute(const mifloat *lhs, + const mifloat *rhs, + const int32_t batch, + const int32_t lhs_rows, + const int32_t lhs_cols, + const int32_t rhs_rows, + const int32_t rhs_cols, + const bool transpose_lhs, + const bool transpose_rhs, + const bool transpose_out, + const bool lhs_batched, + const bool rhs_batched, + mifloat *output_data) { + int32_t rows = transpose_lhs ? lhs_cols : lhs_rows; + int32_t depth = transpose_lhs ? lhs_rows : lhs_cols; + int32_t cols = transpose_rhs ? rhs_rows : rhs_cols; + MACE_ASSERT1(depth == (transpose_rhs ? rhs_cols : rhs_rows), + "Matrices that multiply have inconsistent depth dim: "); + + return Compute(lhs, + rhs, + batch, + rows, + cols, + depth, + transpose_lhs ? ColMajor : RowMajor, + transpose_rhs ? ColMajor : RowMajor, + transpose_out ? ColMajor : RowMajor, + lhs_batched, + rhs_batched, + output_data); +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/utils/gemm.h b/micro/ops/utils/gemm.h new file mode 100644 index 00000000..d1e1e3ac --- /dev/null +++ b/micro/ops/utils/gemm.h @@ -0,0 +1,82 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_UTILS_GEMM_H_ +#define MICRO_OPS_UTILS_GEMM_H_ + +#include "micro/base/types.h" +#include "micro/include/public/micro.h" +#include "micro/ops/utils/matrix.h" + +namespace micro { +namespace ops { + +template +class Gemm { + public: + Gemm() {} + ~Gemm() {} + MaceStatus Compute(const mifloat *lhs_data, + const mifloat *rhs_data, + const int32_t batch, + const int32_t rows, + const int32_t cols, + const int32_t depth, + const MatrixMajor lhs_major, + const MatrixMajor rhs_major, + const MatrixMajor output_major, + const bool lhs_batched, + const bool rhs_batched, + T *output_data); +}; + +template<> +class Gemm { + public: + Gemm() {} + ~Gemm() {} + MaceStatus Compute(const mifloat *lhs_data, + const mifloat *rhs_data, + const int32_t batch, + const int32_t rows, + const int32_t cols, + const int32_t depth, + const MatrixMajor lhs_major, + const MatrixMajor rhs_major, + const MatrixMajor output_major, + const bool lhs_batched, + const bool rhs_batched, + mifloat *output_data); + // Original matrix before transpose has row-major + MaceStatus Compute( + const mifloat *lhs_data, + const mifloat *rhs_data, + const int32_t batch, + const int32_t lhs_rows, + const int32_t lhs_cols, + const int32_t rhs_rows, + const int32_t rhs_cols, + const bool transpose_lhs, + const bool transpose_rhs, + const bool transpose_out, + const bool lhs_batched, + const bool rhs_batched, + mifloat *output_data); +}; + +} // namespace ops +} // namespace micro + + +#endif // MICRO_OPS_UTILS_GEMM_H_ diff --git a/micro/ops/utils/gemv.cc b/micro/ops/utils/gemv.cc new file mode 100644 index 00000000..1fc81c47 --- /dev/null +++ b/micro/ops/utils/gemv.cc @@ -0,0 +1,131 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/utils/gemv.h" + +#include "micro/base/logging.h" + +namespace micro { +namespace ops { + +MaceStatus Gemv::Compute(const mifloat *lhs_data, + const mifloat *rhs_data, + const mifloat *bias_data, + const int32_t batch, + const int32_t lhs_height, + const int32_t lhs_width, + const bool lhs_batched, + const bool rhs_batched, + mifloat *output_data) { + if (lhs_height == 1) { + for (int32_t b = 0; b < batch; ++b) { + const int32_t lhs_b_base = static_cast(lhs_batched) * b; + const int32_t rhs_b_base = + static_cast(rhs_batched) * b * lhs_width; + float sum = bias_data != NULL ? bias_data[0] : 0.0f; + const int32_t lhs_h_base = lhs_b_base * lhs_width; + for (int32_t w = 0; w < lhs_width; ++w) { + sum += lhs_data[lhs_h_base + w] * rhs_data[rhs_b_base + w]; + } // w + output_data[lhs_b_base] = sum; + } // b + } else if (lhs_height == 2) { + for (int32_t b = 0; b < batch; ++b) { + const int32_t lhs_b_base = + static_cast(lhs_batched) * b * 2; + const int32_t rhs_b_base = + static_cast(rhs_batched) * b * lhs_width; + + float sum0 = bias_data != NULL ? bias_data[0] : 0.0f; + float sum1 = bias_data != NULL ? bias_data[1] : 0.0f; + const int32_t lhs_h_base0 = lhs_b_base * lhs_width; + const int32_t lhs_h_base1 = lhs_h_base0 + lhs_width; + for (int32_t w = 0; w < lhs_width; ++w) { + float rhs_data_value = rhs_data[rhs_b_base + w]; + sum0 += lhs_data[lhs_h_base0 + w] * rhs_data_value; + sum1 += lhs_data[lhs_h_base1 + w] * rhs_data_value; + } // w + output_data[lhs_b_base] = sum0; + output_data[lhs_b_base + 1] = sum1; + } // b + } else if (lhs_height == 3) { + for (int32_t b = 0; b < batch; ++b) { + const int32_t lhs_b_base = + static_cast(lhs_batched) * b * 2; + const int32_t rhs_b_base = + static_cast(rhs_batched) * b * lhs_width; + + float sum0 = bias_data != NULL ? bias_data[0] : 0.0f; + float sum1 = bias_data != NULL ? bias_data[1] : 0.0f; + float sum2 = bias_data != NULL ? bias_data[2] : 0.0f; + const int32_t lhs_h_base0 = lhs_b_base * lhs_width; + const int32_t lhs_h_base1 = lhs_h_base0 + lhs_width; + const int32_t lhs_h_base2 = lhs_h_base1 + lhs_width; + for (int32_t w = 0; w < lhs_width; ++w) { + float rhs_data_value = rhs_data[rhs_b_base + w]; + sum0 += lhs_data[lhs_h_base0 + w] * rhs_data_value; + sum1 += lhs_data[lhs_h_base1 + w] * rhs_data_value; + sum2 += lhs_data[lhs_h_base2 + w] * rhs_data_value; + } // w + output_data[lhs_b_base] = sum0; + output_data[lhs_b_base + 1] = sum1; + output_data[lhs_b_base + 2] = sum2; + } // b + } else { // lhs_height >= 4 + int32_t lhs_height_end = lhs_height - 4; + for (int32_t b = 0; b < batch; ++b) { + const int32_t lhs_b_base = + static_cast(lhs_batched) * b * lhs_height; + const int32_t rhs_b_base = + static_cast(rhs_batched) * b * lhs_width; + for (int32_t h = 0; h < lhs_height; h += 4) { + if (h > lhs_height_end) { + h = lhs_height_end; + } + float sum0 = 0; + float sum1 = 0; + float sum2 = 0; + float sum3 = 0; + if (bias_data != NULL) { + sum0 = bias_data[0]; + sum1 = bias_data[1]; + sum2 = bias_data[2]; + sum3 = bias_data[3]; + } + const int32_t lhs_h_base0 = (lhs_b_base + h) * lhs_width; + const int32_t lhs_h_base1 = lhs_h_base0 + lhs_width; + const int32_t lhs_h_base2 = lhs_h_base1 + lhs_width; + const int32_t lhs_h_base3 = lhs_h_base2 + lhs_width; + for (int32_t w = 0; w < lhs_width; ++w) { + float rhs_data_value = rhs_data[rhs_b_base + w]; + + sum0 += lhs_data[lhs_h_base0 + w] * rhs_data_value; + sum1 += lhs_data[lhs_h_base1 + w] * rhs_data_value; + sum2 += lhs_data[lhs_h_base2 + w] * rhs_data_value; + sum3 += lhs_data[lhs_h_base3 + w] * rhs_data_value; + } // w + + output_data[lhs_b_base + h] = sum0; + output_data[lhs_b_base + h + 1] = sum1; + output_data[lhs_b_base + h + 2] = sum2; + output_data[lhs_b_base + h + 3] = sum3; + } // h + } // b + } + + return MACE_SUCCESS; +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/utils/gemv.h b/micro/ops/utils/gemv.h new file mode 100644 index 00000000..9fdb75d9 --- /dev/null +++ b/micro/ops/utils/gemv.h @@ -0,0 +1,64 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_UTILS_GEMV_H_ +#define MICRO_OPS_UTILS_GEMV_H_ + +#include "micro/base/types.h" +#include "micro/include/public/micro.h" + +namespace micro { +namespace ops { + +template +class Gemv { + public: + Gemv() {} + ~Gemv() {} + // Always row-major after transpose + MaceStatus Compute( + const T *lhs_data, + const T *rhs_data, + const T *bias_data, + const int32_t batch, + const int32_t lhs_height, + const int32_t lhs_width, + const bool lhs_batched, + const bool rhs_batched, + T *output_data); +}; + +template<> +class Gemv { + public: + Gemv() {} + ~Gemv() {} + // Always row-major after transpose + MaceStatus Compute( + const mifloat *lhs_data, + const mifloat *rhs_data, + const mifloat *bias_data, + const int32_t batch, + const int32_t lhs_height, + const int32_t lhs_width, + const bool lhs_batched, + const bool rhs_batched, + mifloat *output_data); +}; + +} // namespace ops +} // namespace micro + + +#endif // MICRO_OPS_UTILS_GEMV_H_ diff --git a/micro/ops/utils/matrix.h b/micro/ops/utils/matrix.h new file mode 100644 index 00000000..8942588b --- /dev/null +++ b/micro/ops/utils/matrix.h @@ -0,0 +1,109 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#ifndef MICRO_OPS_UTILS_MATRIX_H_ +#define MICRO_OPS_UTILS_MATRIX_H_ + +#include "micro/base/logging.h" + +namespace micro { +namespace ops { + +enum MatrixMajor { + RowMajor, + ColMajor +}; + +inline MatrixMajor TransposeMatrixMajor(const MatrixMajor src_major) { + return src_major == RowMajor ? ColMajor : RowMajor; +} + +template +class MatrixMap { + public: + MatrixMap() + : data_(NULL), + matrix_major_(RowMajor), + rows_(0), + cols_(0), + stride_(0) {} + MatrixMap(T *data, + const MatrixMajor matrix_major, + const int32_t rows, + const int32_t cols) : + data_(data), + matrix_major_(matrix_major), + rows_(rows), + cols_(cols), + stride_(matrix_major == ColMajor ? rows : cols) {} + MatrixMap(T *data, + const MatrixMajor matrix_major, + const int32_t rows, + const int32_t cols, + const int32_t stride) : + data_(data), + matrix_major_(matrix_major), + rows_(rows), + cols_(cols), + stride_(stride) {} + MatrixMap(const MatrixMap &other) + : data_(other.data_), + matrix_major_(other.matrix_major_), + rows_(other.rows_), + cols_(other.cols_), + stride_(other.stride_) {} + + MatrixMajor matrix_major() const { return matrix_major_; } + int32_t rows() const { return rows_; } + int32_t cols() const { return cols_; } + int32_t stride() const { return stride_; } + int32_t rows_stride() const { + return matrix_major_ == ColMajor ? 1 : stride_; + } + int32_t cols_stride() const { + return matrix_major_ == RowMajor ? 1 : stride_; + } + int32_t size() const { return rows_ * cols_; } + T *data() const { return data_; } + T *data(int32_t rows, int32_t cols) const { + return data_ + rows * rows_stride() + cols * cols_stride(); + } + T &operator()(int32_t row, int32_t col) const { return *data(row, col); } + MatrixMap block(int32_t start_row, int32_t start_col, int32_t block_rows, + int32_t block_cols) const { + MACE_ASSERT(start_row >= 0); + MACE_ASSERT(start_row + block_rows <= rows_); + MACE_ASSERT(start_col >= 0); + MACE_ASSERT(start_col + block_cols <= cols_); + + return MatrixMap(data(start_row, start_col), + matrix_major_, + block_rows, + block_cols, + stride_); + } + + private: + T *data_; + MatrixMajor matrix_major_; + int32_t rows_; + int32_t cols_; + int32_t stride_; +}; + +} // namespace ops +} // namespace micro + +#endif // MICRO_OPS_UTILS_MATRIX_H_ diff --git a/micro/port/BUILD.bazel b/micro/port/BUILD.bazel new file mode 100644 index 00000000..eb2eec02 --- /dev/null +++ b/micro/port/BUILD.bazel @@ -0,0 +1,27 @@ +package( + default_visibility = ["//visibility:public"], +) + +load( + "//micro:micro.bzl", + "if_hexagon_enabled", +) + +licenses(["notice"]) # Apache 2.0 + +cc_library( + name = "port", + srcs = glob(["*.cc"]), + hdrs = glob(["*.h"]), + copts = [ + "-Werror", + "-Wextra", + ] + if_hexagon_enabled([ + "-DMACE_ENABLE_HEXAGON", + ]), + deps = if_hexagon_enabled([ + "@hexagon_sdk//:headers_incs", + "@hexagon_sdk//:headers_incs_stddef", + "@hexagon_tools//:headers_tools_target", + ]), +) diff --git a/micro/port/api.cc b/micro/port/api.cc new file mode 100644 index 00000000..b63ab07a --- /dev/null +++ b/micro/port/api.cc @@ -0,0 +1,59 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + + +#include "micro/port/api.h" + +#include +#include +#ifdef MACE_ENABLE_HEXAGON +#include +#include +#else +#include +#endif + +namespace micro { +namespace port { +namespace api { + +void DebugLog(const char *str) { + // you should rewrite this file in the platform source file. +#ifdef MACE_ENABLE_HEXAGON + FARF(ALWAYS, "%s", str); +#else + printf("%s", str); +#endif +} + +int64_t NowMicros() { + // you should rewrite this file in the platform source file. +#ifdef MACE_ENABLE_HEXAGON + return HAP_perf_get_time_us(); +#else + struct timeval tv; + gettimeofday(&tv, 0); + return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; +#endif +} + +void Abort() { + // you should rewrite this file in the platform source file. + abort(); +} + +} // namespace api +} // namespace port +} // namespace micro diff --git a/micro/port/api.h b/micro/port/api.h new file mode 100644 index 00000000..ba1a300c --- /dev/null +++ b/micro/port/api.h @@ -0,0 +1,32 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_PORT_API_H_ +#define MICRO_PORT_API_H_ + +#include + +namespace micro { +namespace port { +namespace api { + +void DebugLog(const char *str); +int64_t NowMicros(); +void Abort(); + +} // api +} // namespace port +} // namespace micro + +#endif // MICRO_PORT_API_H_ diff --git a/micro/test/ccbaseline/BUILD.bazel b/micro/test/ccbaseline/BUILD.bazel new file mode 100644 index 00000000..b203e3b8 --- /dev/null +++ b/micro/test/ccbaseline/BUILD.bazel @@ -0,0 +1,109 @@ +package( + default_visibility = ["//visibility:public"], +) + +licenses(["notice"]) # Apache 2.0 + +load( + "//micro:micro.bzl", + "if_hexagon_enabled", + "if_not_hexagon_enabled", +) + +MACEMC_IDL_FILES = [ + "macemc/rpc/macemc.idl", +] + +MACEMC_IDL_HEADERS = [ + "codegen/macemc.h", +] + +MACEMC_IDL_SKELS = [ + "codegen/macemc_skel.c", +] + +MACEMC_IDL_STUBS = [ + "codegen/macemc_stub.c", +] + +genrule( + name = "macemc_idl_gen", + srcs = MACEMC_IDL_FILES, + outs = MACEMC_IDL_HEADERS + MACEMC_IDL_SKELS + MACEMC_IDL_STUBS, + cmd = "bash $(location //micro/test/ccutils:qaic) $(@D)/codegen $(SRCS)", + tools = ["//micro/test/ccutils:qaic"], +) + +cc_library( + name = "macemc_idl_skel", + srcs = MACEMC_IDL_SKELS, + hdrs = MACEMC_IDL_HEADERS, + copts = [ + "-Werror", + "-std=c99", + "-Wextra", + "-Wno-missing-field-initializers", + ], + deps = [ + "@hexagon_sdk//:headers_dsp", + ], + alwayslink = True, +) + +cc_binary( + name = "libmacemc_skel.so", + srcs = glob(["macemc/rpc/skel/*.cc"]), + linkshared = True, + deps = [ + ":macemc_idl_skel", + "//micro/codegen:micro_engine", + "//micro/include", + "//micro/test/ccutils:rpc_skel", + "@hexagon_sdk//:headers_dsp", + ], +) + +cc_library( + name = "macemc_idl_stub", + srcs = MACEMC_IDL_STUBS, + hdrs = MACEMC_IDL_HEADERS, + copts = [ + "-Werror", + "-std=c99", + "-Wextra", + "-Wno-missing-field-initializers", + ], + deps = [ + "@hexagon_sdk//:sdk_arm", + ], + alwayslink = True, +) + +cc_library( + name = "macemc_stub", + srcs = glob(["macemc/rpc/stub/*.cc"]), + hdrs = glob(["macemc/rpc/stub/*.h"]), + strip_include_prefix = "", + deps = [ + ":macemc_idl_stub", + "//micro/test/ccutils:rpc_stub", + ], + alwayslink = True, +) + +cc_test( + name = "micro_cc_baseline", + srcs = glob([ + "test_baseline_main.cc", + ]), + copts = [ + "-Werror", + "-Wextra", + "-Wno-missing-field-initializers", + "-DMACE_ENABLE_HEXAGON", + ], + linkstatic = 1, + deps = [ + ":macemc_stub", + ], +) diff --git a/micro/test/ccbaseline/macemc/rpc/macemc.idl b/micro/test/ccbaseline/macemc/rpc/macemc.idl new file mode 100755 index 00000000..34ec038a --- /dev/null +++ b/micro/test/ccbaseline/macemc/rpc/macemc.idl @@ -0,0 +1,20 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#include "remote.idl" + +interface macemc : remote_handle64 { + long run(); +}; diff --git a/micro/test/ccbaseline/macemc/rpc/skel/macemc.cc b/micro/test/ccbaseline/macemc/rpc/skel/macemc.cc new file mode 100644 index 00000000..7ee9c06c --- /dev/null +++ b/micro/test/ccbaseline/macemc/rpc/skel/macemc.cc @@ -0,0 +1,28 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "rpc/skel/base_func.h" + +#include + +extern void MaceMcRun(); + +extern "C" { +MACE_DEFINE_RANDOM_INPUT(macemc) +int macemc_run(remote_handle64 h) { + MaceMcRun(); + FARF(ALWAYS, "run end, h=%d", h); + return 0; +} +} // extern "C" diff --git a/micro/test/ccbaseline/macemc/rpc/skel/micro_tester.cc b/micro/test/ccbaseline/macemc/rpc/skel/micro_tester.cc new file mode 100644 index 00000000..88d92937 --- /dev/null +++ b/micro/test/ccbaseline/macemc/rpc/skel/micro_tester.cc @@ -0,0 +1,88 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "micro/include/public/micro.h" +#include "rpc/skel/base_func.h" + +#ifndef MICRO_MODEL_NAME +#error Please specify model name in the command +#endif + +namespace micro { + +namespace MICRO_MODEL_NAME { +MaceStatus GetMicroEngineSingleton(MaceMicroEngine **engine); +} // namespace MICRO_MODEL_NAME + +namespace port { +namespace api { +int64_t NowMicros(); +} // namespace api +} // namespace port + +namespace testing { + +namespace { +const int32_t kMicroRunTestTimes = 10; +const int32_t input0_shape[4] = {1, 1, 128, 9}; +const int32_t input_length = 1 * 1 * 128 * 9; +float input0[input_length] = {0}; +} // namespace + +void MicroRunModel() { + int64_t t0 = port::api::NowMicros(); + MaceMicroEngine *micro_engine = NULL; + MICRO_MODEL_NAME::GetMicroEngineSingleton(µ_engine); + int64_t t1 = port::api::NowMicros(); + double init_millis = (t1 - t0) / 1000.0; + FARF(ALWAYS, "Total init latency: %fms", init_millis); + + if (micro_engine == NULL) { + FARF(ALWAYS, "GetMicroEngineSingleton failed"); + return; + } + + rpc::skel::FillRandomValue(input0, input_length * sizeof(float)); + micro_engine->RegisterInputData(0, input0, input0_shape); + + // warm up + t0 = port::api::NowMicros(); + if (micro_engine->Run() != MACE_SUCCESS) { + FARF(ALWAYS, "warm up error"); + return; + } else { + t1 = port::api::NowMicros(); + double run_millis = (t1 - t0) / 1000.0; + FARF(ALWAYS, "run latency for cold start: %fms", run_millis); + } + + // run + t0 = port::api::NowMicros(); + for (int32_t i = 0; i < kMicroRunTestTimes; ++i) { + micro_engine->Run(); + } + t1 = port::api::NowMicros(); + + double run_millis = (t1 - t0) / kMicroRunTestTimes / 1000.0; + FARF(ALWAYS, "run latency: %fms", run_millis); +} + +} // namespace testing +} // namespace micro + +void MaceMcRun() { + micro::testing::MicroRunModel(); +} diff --git a/micro/test/ccbaseline/macemc/rpc/stub/macemc.cc b/micro/test/ccbaseline/macemc/rpc/stub/macemc.cc new file mode 100644 index 00000000..d23d17d4 --- /dev/null +++ b/micro/test/ccbaseline/macemc/rpc/stub/macemc.cc @@ -0,0 +1,40 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "macemc/rpc/stub/macemc.h" +#include "micro/test/ccbaseline/codegen/macemc.h" + +namespace micro { +namespace testing { + +namespace { +const char kMaceMcUri[] = macemc_URI"&_dom=sdsp"; +} // namespace + +MaceMc::MaceMc() : + rpc::stub::BaseHandle(macemc_open, macemc_close, kMaceMcUri) {} + +void MaceMc::Run() { + macemc_run(remote_handle_); +} + +} // namespace testing +} // namespace micro + +void MaceMcBaselineRun() { + micro::testing::MaceMc mace_mc; + mace_mc.Open(); + mace_mc.Run(); + mace_mc.Close(); +} diff --git a/micro/test/ccbaseline/macemc/rpc/stub/macemc.h b/micro/test/ccbaseline/macemc/rpc/stub/macemc.h new file mode 100644 index 00000000..51725c6f --- /dev/null +++ b/micro/test/ccbaseline/macemc/rpc/stub/macemc.h @@ -0,0 +1,36 @@ +// Copyright 2020 The MICRO Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#ifndef MICRO_TEST_CCBASELINE_MACEMC_RPC_STUB_MACEMC_H_ +#define MICRO_TEST_CCBASELINE_MACEMC_RPC_STUB_MACEMC_H_ + +#include "rpc/stub/base_handle.h" + +namespace micro { +namespace testing { + +class MaceMc : public rpc::stub::BaseHandle { + public: + MaceMc(); + + void Run(); +}; + +} // namespace testing +} // namespace micro + +void MaceMcBaselineRun(); + +#endif // MICRO_TEST_CCBASELINE_MACEMC_RPC_STUB_MACEMC_H_ diff --git a/micro/test/ccbaseline/test_baseline_main.cc b/micro/test/ccbaseline/test_baseline_main.cc new file mode 100644 index 00000000..21e371a6 --- /dev/null +++ b/micro/test/ccbaseline/test_baseline_main.cc @@ -0,0 +1,22 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +void MaceMcBaselineRun(); + +int main(int argc, char *argv[]) { + (void) (argc); + (void) (argv); + MaceMcBaselineRun(); + return 0; +} diff --git a/micro/test/ccbenchmark/BUILD.bazel b/micro/test/ccbenchmark/BUILD.bazel new file mode 100644 index 00000000..164b8b16 --- /dev/null +++ b/micro/test/ccbenchmark/BUILD.bazel @@ -0,0 +1,149 @@ +package( + default_visibility = ["//visibility:public"], +) + +licenses(["notice"]) # Apache 2.0 + +load( + "//micro:micro.bzl", + "if_hexagon_enabled", + "if_not_hexagon_enabled", +) + +IDL_FILES = [ + "micro/rpc/benchmark.idl", +] + +IDL_HEADERS = [ + "codegen/benchmark.h", +] + +IDL_SKELS = [ + "codegen/benchmark_skel.c", +] + +IDL_STUBS = [ + "codegen/benchmark_stub.c", +] + +genrule( + name = "idl_gen", + srcs = IDL_FILES, + outs = IDL_HEADERS + IDL_SKELS + IDL_STUBS, + cmd = "bash $(location //micro/test/ccutils:qaic) $(@D)/codegen $(SRCS)", + tools = ["//micro/test/ccutils:qaic"], +) + +cc_library( + name = "benchmark_idl_skel", + srcs = IDL_SKELS, + hdrs = IDL_HEADERS, + deps = [ + "@hexagon_sdk//:headers_dsp", + ], + alwayslink = True, +) + +cc_binary( + name = "libbenchmark_skel.so", + srcs = glob(["micro/rpc/skel/*.c"]), + deps = [ + ":benchmark_idl_skel", + ":benchmark_lib", + ":benchmark_utils", + "//micro/test/ccutils:rpc_skel", + "@hexagon_sdk//:headers_dsp", + ], + linkshared = True, + linkstatic = 0, +) + +cc_library( + name = "benchmark_idl_stub", + srcs = IDL_STUBS, + hdrs = IDL_HEADERS, + deps = [ + "@hexagon_sdk//:sdk_arm", + ], + alwayslink = True, +) + +cc_library( + name = "benchmark_stub", + srcs = glob(["micro/rpc/stub/*.cc"]), + hdrs = glob(["micro/rpc/stub/*.h"]), + strip_include_prefix = "", + deps = [ + ":benchmark_idl_stub", + "//micro/test/ccutils:rpc_stub", + ], + alwayslink = True, +) + +cc_library( + name = "benchmark_utils", + srcs = glob([ + "micro/benchmark_utils/*.cc", + ]), + hdrs = glob([ + "micro/benchmark_utils/*.h", + ]), + copts = [ + "-Werror", + "-Wextra", + "-Wno-missing-field-initializers", + ], + strip_include_prefix = "", + deps = [ + "//micro/base", + "//micro/test/ccutils", + ], + alwayslink = True, +) + +cc_library( + name = "benchmark_lib", + srcs = glob([ + "micro/ops/*.cc", + "micro/ops/nhwc/*.cc", + ]), + hdrs = glob([ + "micro/benchmark_utils/*.h", + ]), + copts = [ + "-Werror", + "-Wextra", + "-Wno-missing-field-initializers", + ] + if_hexagon_enabled([ + "-DMACE_ENABLE_HEXAGON", + ]), + strip_include_prefix = "", + deps = [ + "benchmark_utils", + "//micro/ops:ops_for_test", + "//micro/test/ccutils", + ], + alwayslink = True, +) + +cc_test( + name = "micro_cc_benchmark", + srcs = glob( + [ + "micro/test_benchmark_main.cc", + ], + ), + copts = [ + "-Werror", + "-Wextra", + "-Wno-missing-field-initializers", + ] + if_hexagon_enabled([ + "-DMACE_ENABLE_HEXAGON", + ]), + linkstatic = 1, + deps = if_hexagon_enabled([ + ":benchmark_stub", + ]) + if_not_hexagon_enabled([ + ":benchmark_lib", + ]), +) diff --git a/micro/test/ccbenchmark/micro/benchmark_utils/test_benchmark.cc b/micro/test/ccbenchmark/micro/benchmark_utils/test_benchmark.cc new file mode 100644 index 00000000..6d23340d --- /dev/null +++ b/micro/test/ccbenchmark/micro/benchmark_utils/test_benchmark.cc @@ -0,0 +1,190 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/benchmark_utils/test_benchmark.h" + +#include "micro/base/logging.h" +#include "micro/base/utils.h" +#include "micro/common/global_buffer.h" +#include "micro/port/api.h" + +namespace micro { +namespace base { +template +char *ToString(T value, char *buffer, char *end); +template<> +char *ToString(float value, char *buffer, char *end); +template<> +char *ToString(int32_t value, char *buffer, char *end); +template<> +char *ToString(int64_t value, char *buffer, char *end); +} // namespace base + +namespace testing { +namespace { +const int32_t kMaxBenchmarkNum = 200; + +const int32_t kNameWidth = 50 + 1; +const int32_t kInt64ValueBufferLength = 21; +const int32_t kInt32ValueBufferLength = 12; +const int32_t kFloatValueBufferLength = 21; +void GetFixWidthStr(const char *input, char *output, const int32_t fix_width) { + int32_t length = micro::base::strlen(input); + if (length >= fix_width) { + micro::base::memcpy(output, input, fix_width * sizeof(char)); + } else { + micro::base::memcpy(output, input, length * sizeof(char)); + while (length < fix_width) { + output[length++] = ' '; + } + } + output[fix_width] = '\0'; +} + +void GetFixWidthStr(int32_t input, char *output, const int32_t fix_width) { + char int_str[kInt32ValueBufferLength] = {0}; + micro::base::ToString(input, int_str, int_str + kInt32ValueBufferLength); + GetFixWidthStr(int_str, output, fix_width); +} + +void GetFixWidthStr(int64_t input, char *output, const int32_t fix_width) { + char int_str[kInt64ValueBufferLength] = {0}; + micro::base::ToString(input, int_str, int_str + kInt64ValueBufferLength); + GetFixWidthStr(int_str, output, fix_width); +} + +void GetFixWidthStr(float input, char *output, const int32_t fix_width) { + char int_str[kFloatValueBufferLength] = {0}; + micro::base::ToString(input, int_str, int_str + kFloatValueBufferLength); + GetFixWidthStr(int_str, output, fix_width); +} + +Benchmark *all_benchmarks[kMaxBenchmarkNum] = {NULL}; +int32_t benchmark_size = 0; +int64_t bytes_processed; +int64_t macs_processed = 0; +int64_t accum_time = 0; +int64_t start_time = 0; + +} // namespace + +Benchmark::Benchmark(const char *name, BenchmarkFunc *benchmark_func) + : name_(name), benchmark_func_(benchmark_func) { + Register(); +} + +void Benchmark::Run() { + LOG(INFO) << "Benchmark::Run start, benchmark_size=" << benchmark_size; + if (benchmark_size == 0) { + return; + } + + char benchmark_name[kNameWidth] = {0}; + GetFixWidthStr("Benchmark", benchmark_name, kNameWidth - 1); + char time_name[kInt64ValueBufferLength] = {0}; + GetFixWidthStr("Time(ns)", time_name, kInt64ValueBufferLength - 1); + char iterations_name[kInt32ValueBufferLength] = {0}; + GetFixWidthStr("Iterations", iterations_name, kInt32ValueBufferLength - 1); + char input_mb_name[kFloatValueBufferLength] = {0}; + GetFixWidthStr("Input(MB/s)", input_mb_name, kFloatValueBufferLength - 1); + LOG(CLEAN) << benchmark_name << "\t" << time_name << "\t" << iterations_name + << "\t" << input_mb_name << "\t" << "GMACPS"; + LOG(CLEAN) << "--------------------------------------------------------------" + "-------------------------------------------------------------"; + + for (int32_t i = 0; i < benchmark_size; ++i) { + Benchmark *b = all_benchmarks[i]; + int32_t iters; + double seconds; + b->Run(&iters, &seconds); + float mbps = (bytes_processed * 1e-6) / seconds; + // MACCs or other computations + float gmacs = (macs_processed * 1e-9) / seconds; + int64_t ns = static_cast(seconds * 1e9); + + char name_str[kNameWidth] = {0}; + GetFixWidthStr(b->name_, name_str, kNameWidth - 1); + char ns_str[kInt64ValueBufferLength] = {0}; + GetFixWidthStr(ns / iters, ns_str, kInt64ValueBufferLength - 1); + char iters_str[kInt32ValueBufferLength] = {0}; + GetFixWidthStr(iters, iters_str, kInt32ValueBufferLength - 1); + char mbps_str[kFloatValueBufferLength] = {0}; + GetFixWidthStr(mbps, mbps_str, kFloatValueBufferLength - 1); + char gmacs_str[kInt32ValueBufferLength] = {0}; + if (gmacs != 0) { + GetFixWidthStr(gmacs, gmacs_str, kInt32ValueBufferLength - 1); + } else { + gmacs_str[0] = '-'; + } + LOG(CLEAN) << name_str << "\t" << ns_str << "\t" + << iters_str << "\t" << mbps_str << "\t" << gmacs_str; + } +} + +void Benchmark::Register() { + MACE_ASSERT2(benchmark_size < kMaxBenchmarkNum, + "benchmark_size is:", benchmark_size); + all_benchmarks[benchmark_size++] = this; +} + +void Benchmark::Run(int32_t *run_count, double *run_seconds) { + static const int32_t kMinIters = 10; + static const int32_t kMaxIters = 10000; + static const double kMinTime = 0.5; + int32_t iters = kMinIters; + while (true) { + bytes_processed = -1; + macs_processed = 0; + common::test::GetGlobalBuffer()->reset(); + RestartTiming(); + (*benchmark_func_)(iters); + StopTiming(); + const double seconds = accum_time * 1e-6; + if (seconds >= kMinTime || iters >= kMaxIters) { + *run_count = iters; + *run_seconds = seconds; + return; + } + + // Update number of iterations. + // Overshoot by 100% in an attempt to succeed the next time. + double multiplier = 2.0 * kMinTime / base::max(seconds, 1e-9); + iters = base::min(multiplier * iters, kMaxIters); // NOLINT + } +} + +void BytesProcessed(int64_t n) { bytes_processed = n; } +void MacsProcessed(int64_t n) { macs_processed = n; } +void RestartTiming() { + accum_time = 0; + start_time = port::api::NowMicros(); +} +void StartTiming() { + start_time = port::api::NowMicros(); +} +void StopTiming() { + if (start_time != 0) { + accum_time += (port::api::NowMicros() - start_time); + start_time = 0; + } +} + +} // namespace testing +} // namespace micro + +extern "C" { +void BenchmarkRun() { + micro::testing::Benchmark::Run(); +} +} diff --git a/micro/test/ccbenchmark/micro/benchmark_utils/test_benchmark.h b/micro/test/ccbenchmark/micro/benchmark_utils/test_benchmark.h new file mode 100644 index 00000000..5ce2ea33 --- /dev/null +++ b/micro/test/ccbenchmark/micro/benchmark_utils/test_benchmark.h @@ -0,0 +1,56 @@ +// Copyright 2019 The MICRO Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Simple benchmarking facility. +#ifndef MICRO_TEST_CCBENCHMARK_MICRO_BENCHMARK_UTILS_TEST_BENCHMARK_H_ +#define MICRO_TEST_CCBENCHMARK_MICRO_BENCHMARK_UTILS_TEST_BENCHMARK_H_ + +#include + +#define MICRO_BENCHMARK(n) \ + static ::micro::testing::Benchmark __benchmark_##n(#n, (n)) + +namespace micro { +namespace testing { + +typedef void BenchmarkFunc(int32_t iters); + +class Benchmark { + public: + Benchmark(const char *name, BenchmarkFunc *benchmark_func); + + static void Run(); + + private: + const char *name_; + BenchmarkFunc *benchmark_func_; + + void Register(); + void Run(int32_t *run_count, double *run_seconds); +}; + +void BytesProcessed(int64_t); +void MacsProcessed(int64_t); +void RestartTiming(); +void StartTiming(); +void StopTiming(); + +} // namespace testing +} // namespace micro + +extern "C" { +void BenchmarkRun(); +} + +#endif // MICRO_TEST_CCBENCHMARK_MICRO_BENCHMARK_UTILS_TEST_BENCHMARK_H_ diff --git a/micro/test/ccbenchmark/micro/ops/activation_benchmark.cc b/micro/test/ccbenchmark/micro/ops/activation_benchmark.cc new file mode 100644 index 00000000..d37d3a86 --- /dev/null +++ b/micro/test/ccbenchmark/micro/ops/activation_benchmark.cc @@ -0,0 +1,103 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/benchmark_utils/test_benchmark.h" +#include "micro/ops/activation.h" +#include "micro/ops/substitute_op.h" +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { + +namespace { +void ActivationBenchmark(const char *activation_type, int iters, + const float *input, const int32_t *input_dims, + float *output, int32_t *output_dims) { + micro::testing::StopTiming(); + + const uint32_t arg_type_len = base::strlen(activation_type); + ActivationOp activation_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddRepeatArg("activation", activation_type, arg_type_len) + .AddOutput(output, output_dims, 4); + MACE_DEFINE_RANDOM_INPUT(float, alpha, input_dims[3]); + if (base::strcmp(activation_type, "PRELU") == 0) { + substitude_op.AddInput(alpha, input_dims + 3, 1); + } + activation_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + + // Warm-up + for (int i = 0; i < 5; ++i) { + activation_op.Run(); + } + + micro::testing::StartTiming(); + while (iters--) { + activation_op.Run(); + } +} +} // namespace + +#define MICRO_BM_ACTIVATION_MACRO(N, H, W, C, TYPE) \ + static void MICRO_BM##_##TYPE##_##N##_##H##_##W##_##C(int32_t iters) { \ + const int32_t buffer_length = N * H * W * C; \ + MACE_DEFINE_RANDOM_INPUT(float, input, buffer_length); \ + MACE_DEFINE_RANDOM_INPUT(float, input, buffer_length); \ + float *output = \ + common::test::GetGlobalBuffer()->GetBuffer(buffer_length);\ + int32_t input_dims[] = {N, H, W, C}; \ + int32_t output_dims[4] = {0}; \ + const int64_t tot = static_cast(iters) * buffer_length; \ + micro::testing::BytesProcessed(tot *(sizeof(float))); \ + ActivationBenchmark(#TYPE, iters, input, \ + input_dims, output, output_dims); \ + } \ + MICRO_BENCHMARK(MICRO_BM##_##TYPE##_##N##_##H##_##W##_##C) + +#define MICRO_BM_RELU(N, H, W, C) \ + MICRO_BM_ACTIVATION_MACRO(N, H, W, C, RELU) + +MICRO_BM_RELU(1, 4, 4, 1); +MICRO_BM_RELU(1, 128, 128, 1); + +#define MICRO_BM_RELUX(N, H, W, C) \ + MICRO_BM_ACTIVATION_MACRO(N, H, W, C, RELUX) + +MICRO_BM_RELUX(1, 4, 4, 1); +MICRO_BM_RELUX(1, 128, 128, 1); + +#define MICRO_BM_PRELU(N, H, W, C) \ + MICRO_BM_ACTIVATION_MACRO(N, H, W, C, PRELU) + +MICRO_BM_PRELU(1, 4, 4, 1); +MICRO_BM_PRELU(1, 128, 128, 1); + +#define MICRO_BM_TANH(N, H, W, C) \ + MICRO_BM_ACTIVATION_MACRO(N, H, W, C, TANH) + +MICRO_BM_TANH(1, 4, 4, 1); +MICRO_BM_TANH(1, 128, 128, 1); + +#define MICRO_BM_SIGMOID(N, H, W, C) \ + MICRO_BM_ACTIVATION_MACRO(N, H, W, C, SIGMOID) + +MICRO_BM_SIGMOID(1, 4, 4, 1); +MICRO_BM_SIGMOID(1, 128, 128, 1); + +} // namespace test +} // namespace ops +} // namespace micro diff --git a/micro/test/ccbenchmark/micro/ops/bias_add_benchmark.cc b/micro/test/ccbenchmark/micro/ops/bias_add_benchmark.cc new file mode 100644 index 00000000..9e83df9b --- /dev/null +++ b/micro/test/ccbenchmark/micro/ops/bias_add_benchmark.cc @@ -0,0 +1,79 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/benchmark_utils/test_benchmark.h" +#include "micro/ops/bias_add.h" +#include "micro/ops/substitute_op.h" +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { + +namespace { +template +void BiasAdd(int32_t iters, const int32_t N, + const int32_t H, const int32_t W, const int32_t C) { + micro::testing::StopTiming(); + + BiasAddOp bias_add_op; + framework::SubstituteOp substitude_op; + const int32_t input_length = N * H * W * C; + MACE_DEFINE_RANDOM_INPUT(T, input, input_length); + MACE_DEFINE_RANDOM_INPUT(T, bias, C); + T *output = common::test::GetGlobalBuffer()->GetBuffer(input_length); + int32_t input_dims[] = {N, H, W, C}; + int32_t output_dims[4] = {0}; + substitude_op.AddInput(input, input_dims, 4) + .AddInput(bias, input_dims + 3, 1) + .AddOutput(output, output_dims, 4); + bias_add_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + + // Warm-up + for (int32_t i = 0; i < 2; ++i) { + bias_add_op.Run(); + } + + micro::testing::StartTiming(); + while (iters--) { + bias_add_op.Run(); + } +} +} // namespace + +#define MICRO_BM_BIAS_ADD_MACRO(N, H, W, C, TYPE) \ + static void MICRO_BM_BIAS_ADD_##N##_##H##_##W##_##C##_##TYPE( \ + int32_t iters) { \ + const int64_t tot = static_cast(iters) * N * H * W * C; \ + micro::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + BiasAdd(iters, N, H, W, C); \ + } \ + MICRO_BENCHMARK(MICRO_BM_BIAS_ADD_##N##_##H##_##W##_##C##_##TYPE) + +#define MICRO_BM_BIAS_ADD(N, H, W, C) \ + MICRO_BM_BIAS_ADD_MACRO(N, H, W, C, float) + +MICRO_BM_BIAS_ADD(1, 128, 128, 1); +MICRO_BM_BIAS_ADD(1, 128, 128, 3); +MICRO_BM_BIAS_ADD(1, 64, 64, 3); +MICRO_BM_BIAS_ADD(1, 56, 56, 16); +MICRO_BM_BIAS_ADD(1, 28, 28, 32); +MICRO_BM_BIAS_ADD(1, 14, 14, 128); +MICRO_BM_BIAS_ADD(1, 14, 14, 256); +MICRO_BM_BIAS_ADD(1, 7, 7, 1024); + +} // namespace test +} // namespace ops +} // namespace micro diff --git a/micro/test/ccbenchmark/micro/ops/eltwise_benchmark.cc b/micro/test/ccbenchmark/micro/ops/eltwise_benchmark.cc new file mode 100644 index 00000000..4bb3bf0e --- /dev/null +++ b/micro/test/ccbenchmark/micro/ops/eltwise_benchmark.cc @@ -0,0 +1,83 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/benchmark_utils/test_benchmark.h" +#include "micro/ops/eltwise.h" +#include "micro/ops/substitute_op.h" +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { +namespace { +template +void EltwiseBenchmark(int32_t iters, eltwise::Type type, const int32_t N, + const int32_t H, const int32_t W, const int32_t C) { + micro::testing::StopTiming(); + + EltwiseOp eltwise_op; + framework::SubstituteOp substitude_op; + const int32_t input_length = N * H * W * C; + MACE_DEFINE_RANDOM_INPUT(T, input0, input_length); + MACE_DEFINE_RANDOM_INPUT(T, input1, input_length); + T *output = common::test::GetGlobalBuffer()->GetBuffer(input_length); + int32_t input_dims[] = {N, H, W, C}; + int32_t output_dims[4] = {0}; + T coeffs[] = {1.2, 2.1}; + substitude_op.AddInput(input0, input_dims, 4) + .AddInput(input1, input_dims, 4) + .AddArg("type", static_cast(type)) + .AddRepeatArg("coeff", coeffs, sizeof(coeffs) / sizeof(T)) + .AddOutput(output, output_dims, 4); + eltwise_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + + // Warm-up + for (int32_t i = 0; i < 2; ++i) { + eltwise_op.Run(); + } + + micro::testing::StartTiming(); + while (iters--) { + eltwise_op.Run(); + } +} +} // namespace + +#define MICRO_BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, TYPE) \ + static void \ + MICRO_BM_ELTWISE_##ELT_TYPE##_##N##_##H##_##W##_##C##_##TYPE( \ + int32_t iters) { \ + const int64_t tot = static_cast(iters) * N * H * W * C; \ + micro::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + EltwiseBenchmark( \ + iters, static_cast(ELT_TYPE), N, H, W, C); \ + } \ + MICRO_BENCHMARK( \ + MICRO_BM_ELTWISE_##ELT_TYPE##_##N##_##H##_##W##_##C##_##TYPE) + +#define MICRO_BM_ELTWISE(ELT_TYPE, N, H, W, C) \ + MICRO_BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, float) + +MICRO_BM_ELTWISE(2, 1, 32, 32, 8); +MICRO_BM_ELTWISE(2, 1, 60, 60, 16); +MICRO_BM_ELTWISE(2, 1, 64, 64, 8); +MICRO_BM_ELTWISE(0, 1, 32, 32, 8); +MICRO_BM_ELTWISE(0, 1, 60, 60, 16); +MICRO_BM_ELTWISE(5, 1, 32, 32, 8); +MICRO_BM_ELTWISE(5, 1, 60, 60, 16); + +} // namespace test +} // namespace ops +} // namespace micro diff --git a/micro/test/ccbenchmark/micro/ops/matmul_benchmark.cc b/micro/test/ccbenchmark/micro/ops/matmul_benchmark.cc new file mode 100644 index 00000000..17f48023 --- /dev/null +++ b/micro/test/ccbenchmark/micro/ops/matmul_benchmark.cc @@ -0,0 +1,134 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/benchmark_utils/test_benchmark.h" +#include "micro/ops/matmul.h" +#include "micro/ops/substitute_op.h" +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { +namespace { +template +void MatMulBenchmark(int32_t iters, const int32_t N, + const int32_t H, const int32_t C, const int32_t OW) { + micro::testing::StopTiming(); + + MatMulOp matmul_op; + framework::SubstituteOp substitude_op; + const int32_t input0_length = N * H * C; + MACE_DEFINE_RANDOM_INPUT(T, input0, input0_length); + const int32_t input1_length = N * C * OW; + MACE_DEFINE_RANDOM_INPUT(T, input1, input1_length); + const int32_t output_length = N * H * OW; + T *output = common::test::GetGlobalBuffer()->GetBuffer(output_length); + int32_t input0_dims[] = {N, H, C}; + int32_t input1_dims[] = {N, C, OW}; + int32_t output_dims[3] = {0}; + substitude_op.AddInput(input0, input0_dims, 3) + .AddInput(input1, input1_dims, 3) + .AddOutput(output, output_dims, 3); + matmul_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + + // Warm-up + for (int32_t i = 0; i < 2; ++i) { + matmul_op.Run(); + } + + micro::testing::StartTiming(); + while (iters--) { + matmul_op.Run(); + } +} + +template +void MatMulTransposeBenchmark(int32_t iters, const int32_t N, const int32_t H, + const int32_t C, const int32_t OW) { + micro::testing::StopTiming(); + + MatMulOp matmul_op; + framework::SubstituteOp substitude_op; + const int32_t input0_length = N * H * C; + MACE_DEFINE_RANDOM_INPUT(T, input0, input0_length); + const int32_t input1_length = N * OW * C; + MACE_DEFINE_RANDOM_INPUT(T, input1, input1_length); + const int32_t output_length = N * H * OW; + T *output = common::test::GetGlobalBuffer()->GetBuffer(output_length); + int32_t input0_dims[] = {N, H, C}; + int32_t input1_dims[] = {N, OW, C}; + int32_t output_dims[3] = {0}; + substitude_op.AddInput(input0, input0_dims, 3) + .AddInput(input1, input1_dims, 3) + .AddArg("transpose_b", 1) + .AddOutput(output, output_dims, 3); + matmul_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + + // Warm-up + for (int32_t i = 0; i < 2; ++i) { + matmul_op.Run(); + } + + micro::testing::StartTiming(); + while (iters--) { + matmul_op.Run(); + } +} + +} // namespace + +#define MICRO_BM_MATMUL_MACRO(N, H, C, W, TYPE) \ + static void MICRO_BM_MATMUL_##N##_##H##_##C##_##W##_##TYPE( \ + int32_t iters) { \ + const int64_t macs = N * H * W * C; \ + const int64_t tot = static_cast(iters) * N * (C * H + H * W); \ + micro::testing::MacsProcessed(macs); \ + micro::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + MatMulBenchmark(iters, N, H, C, W); \ + } \ + MICRO_BENCHMARK(MICRO_BM_MATMUL_##N##_##H##_##C##_##W##_##TYPE) + +#define MICRO_BM_MATMUL_OP(N, H, C, W) \ + MICRO_BM_MATMUL_MACRO(N, H, C, W, float) + +MICRO_BM_MATMUL_OP(1, 300, 32, 1); +MICRO_BM_MATMUL_OP(1, 32, 64, 32); +MICRO_BM_MATMUL_OP(2, 16, 16, 49); +MICRO_BM_MATMUL_OP(3, 16, 16, 49); +MICRO_BM_MATMUL_OP(4, 16, 16, 49); +MICRO_BM_MATMUL_OP(4, 8, 32, 49); +MICRO_BM_MATMUL_OP(4, 32, 32, 49); + +#define MICRO_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, TYPE) \ + static void MICRO_BM_MATMUL_##T_##N##_##H##_##C##_##W##_##TYPE( \ + int32_t iters) { \ + const int64_t macs = N * H * W * C; \ + const int64_t tot = static_cast(iters) * N * (C * H + H * W); \ + micro::testing::MacsProcessed(macs); \ + micro::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + MatMulBenchmark(iters, N, H, C, W); \ + } \ + MICRO_BENCHMARK(MICRO_BM_MATMUL_##T_##N##_##H##_##C##_##W##_##TYPE) + +#define MICRO_BM_MATMUL_TRANSPOSE(N, H, C, W) \ + MICRO_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, float) + +MICRO_BM_MATMUL_TRANSPOSE(4, 8, 32, 49); +MICRO_BM_MATMUL_TRANSPOSE(2, 16, 16, 49); + +} // namespace test +} // namespace ops +} // namespace micro diff --git a/micro/test/ccbenchmark/micro/ops/nhwc/batch_norm_benchmark.cc b/micro/test/ccbenchmark/micro/ops/nhwc/batch_norm_benchmark.cc new file mode 100644 index 00000000..904ac740 --- /dev/null +++ b/micro/test/ccbenchmark/micro/ops/nhwc/batch_norm_benchmark.cc @@ -0,0 +1,86 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/benchmark_utils/test_benchmark.h" +#include "micro/ops/nhwc/batch_norm.h" +#include "micro/ops/substitute_op.h" +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { +namespace { +template +void BatchNorm(int iters, const int N, const int H, const int W, const int C) { + micro::testing::StopTiming(); + + BatchNormOp batch_norm_op; + framework::SubstituteOp substitude_op; + const int32_t input_length = N * H * W * C; + MACE_DEFINE_RANDOM_INPUT(T, input, input_length); + MACE_DEFINE_RANDOM_INPUT(T, scale, static_cast(C)); + MACE_DEFINE_RANDOM_INPUT(T, offset, static_cast(C)); + MACE_DEFINE_RANDOM_INPUT(T, mean, static_cast(C)); + MACE_DEFINE_RANDOM_INPUT(T, var, static_cast(C)); + T *output = common::test::GetGlobalBuffer()->GetBuffer(input_length); + int32_t input_dims[] = {N, H, W, C}; + int32_t other_dims[] = {C}; + int32_t output_dims[4] = {0}; + substitude_op.AddInput(input, input_dims, 4) + .AddInput(scale, other_dims, 1) + .AddInput(offset, other_dims, 1) + .AddInput(mean, other_dims, 1) + .AddInput(var, other_dims, 1) + .AddArg("epsilon", 1e-3) + .AddOutput(output, output_dims, 4); + batch_norm_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + + // Warm-up + for (int i = 0; i < 2; ++i) { + batch_norm_op.Run(); + } + + micro::testing::StartTiming(); + while (iters--) { + batch_norm_op.Run(); + } +} +} // namespace + +#define MICRO_BM_BATCH_NORM_MACRO(N, C, H, W, TYPE) \ + static void MICRO_BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE( \ + int32_t iters) { \ + const int64_t tot = static_cast(iters) * N * H * W * C; \ + micro::testing::MacsProcessed(tot); \ + micro::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + BatchNorm(iters, N, H, W, C); \ + } \ + MICRO_BENCHMARK(MICRO_BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE) + +#define MICRO_BM_BATCH_NORM(N, C, H, W) \ + MICRO_BM_BATCH_NORM_MACRO(N, C, H, W, float); + +MICRO_BM_BATCH_NORM(1, 128, 128, 1); +MICRO_BM_BATCH_NORM(1, 128, 128, 3); +MICRO_BM_BATCH_NORM(1, 64, 64, 3); +MICRO_BM_BATCH_NORM(1, 56, 56, 16); +MICRO_BM_BATCH_NORM(1, 28, 28, 64); +MICRO_BM_BATCH_NORM(1, 14, 14, 64); +MICRO_BM_BATCH_NORM(1, 14, 14, 32); +MICRO_BM_BATCH_NORM(1, 7, 7, 1024); + +} // namespace test +} // namespace ops +} // namespace micro diff --git a/micro/test/ccbenchmark/micro/ops/nhwc/conv_2d_benchmark.cc b/micro/test/ccbenchmark/micro/ops/nhwc/conv_2d_benchmark.cc new file mode 100644 index 00000000..69a6caf1 --- /dev/null +++ b/micro/test/ccbenchmark/micro/ops/nhwc/conv_2d_benchmark.cc @@ -0,0 +1,112 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/benchmark_utils/test_benchmark.h" +#include "micro/ops/nhwc/conv_2d_ref.h" +#include "micro/ops/substitute_op.h" +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { +namespace { +template +void Conv2d(int iters, + const T *input, const int32_t *input_dims, + const T *filter, const int32_t *filter_dims, + const T *bias, T *output, int32_t *output_dims, + int32_t stride, int32_t dilation, Padding padding) { + micro::testing::StopTiming(); + + Conv2dRefOp conv2d_op; + framework::SubstituteOp substitude_op; + int32_t strides[] = {stride, stride}; + int32_t dilations[] = {dilation, dilation}; + substitude_op.AddInput(input, input_dims, 4) + .AddInput(filter, filter_dims, 4) + .AddInput(bias, filter_dims, 1) + .AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t)) + .AddArg("padding", static_cast(padding)) + .AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t)) + .AddOutput(output, output_dims, 4); + conv2d_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + + // Warm-up + for (int i = 0; i < 2; ++i) { + conv2d_op.Run(); + } + + micro::testing::StartTiming(); + while (iters--) { + conv2d_op.Run(); + } +} +} // namespace + +#define MICRO_BM_CONV_2D_MACRO(\ + N, H, W, C, KH, KW, STRIDE, DILATION, P, OC, TYPE) \ + static void \ + MICRO_BM_CONV_2D_##N##_##H##_##W##_##C##_K##KH##x##KW##S##STRIDE##D##\ + DILATION##_##P##_##OC##_##TYPE(int32_t iters) { \ + const int32_t input_length = N * H * W * C; \ + const int64_t tot = static_cast(iters) * input_length; \ + int64_t pad_h = 0, pad_w = 0; \ + if (P == SAME) { \ + pad_h = KH / 2; \ + pad_w = KW / 2; \ + } \ + int64_t oh = \ + (H + 2 * pad_h - KH - (KH - 1) * (DILATION - 1)) / STRIDE + 1; \ + int64_t ow = \ + (W + 2 * pad_w - KW - (KW - 1) * (DILATION - 1)) / STRIDE + 1; \ + const int64_t macs = N * oh * ow * OC * KH * KW * C; \ + MACE_DEFINE_RANDOM_INPUT(TYPE, input, input_length); \ + const int32_t filter_length = OC * KH * KW * C; \ + MACE_DEFINE_RANDOM_INPUT(TYPE, filter, filter_length); \ + MACE_DEFINE_RANDOM_INPUT(TYPE, bias, (int32_t)OC); \ + const int32_t output_length = N * H * W * OC; \ + TYPE *output = \ + common::test::GetGlobalBuffer()->GetBuffer(output_length); \ + int32_t input_dims[] = {N, H, W, C}; \ + int32_t filter_dims[] = {OC, KH, KW, C}; \ + int32_t output_dims[4] = {0}; \ + micro::testing::MacsProcessed(macs); \ + micro::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + Conv2d(iters, input, input_dims, \ + filter, filter_dims, bias, output, \ + output_dims, STRIDE, DILATION, P); \ + } \ + MICRO_BENCHMARK( \ + MICRO_BM_CONV_2D_##N##_##H##_##W##_##C##_K##KH##x##KW##S##STRIDE##D##\ + DILATION##_##P##_##OC##_##TYPE) + +#define MICRO_BM_CONV_2D(N, H, W, C, KH, KW, S, D, P, OC) \ + MICRO_BM_CONV_2D_MACRO(N, H, W, C, KH, KW, S, D, P, OC, float) + +MICRO_BM_CONV_2D(1, 32, 32, 64, 1, 1, 1, 1, VALID, 32); +MICRO_BM_CONV_2D(1, 33, 31, 64, 1, 1, 1, 1, VALID, 32); +MICRO_BM_CONV_2D(1, 32, 32, 64, 3, 3, 1, 1, SAME, 32); +MICRO_BM_CONV_2D(1, 33, 31, 64, 3, 3, 1, 1, SAME, 32); +MICRO_BM_CONV_2D(1, 32, 32, 64, 5, 5, 1, 1, SAME, 32); +MICRO_BM_CONV_2D(1, 32, 31, 64, 5, 5, 1, 1, SAME, 32); +MICRO_BM_CONV_2D(1, 32, 31, 64, 15, 1, 1, 1, SAME, 32); +MICRO_BM_CONV_2D(1, 32, 31, 64, 1, 15, 1, 1, SAME, 32); +MICRO_BM_CONV_2D(1, 32, 31, 64, 7, 7, 1, 1, SAME, 32); +MICRO_BM_CONV_2D(1, 32, 31, 64, 7, 7, 2, 1, SAME, 32); +MICRO_BM_CONV_2D(1, 32, 31, 64, 7, 7, 3, 1, SAME, 32); + +} // namespace test +} // namespace ops +} // namespace micro diff --git a/micro/test/ccbenchmark/micro/ops/nhwc/conv_2d_opt_benchmark.cc b/micro/test/ccbenchmark/micro/ops/nhwc/conv_2d_opt_benchmark.cc new file mode 100644 index 00000000..331198f1 --- /dev/null +++ b/micro/test/ccbenchmark/micro/ops/nhwc/conv_2d_opt_benchmark.cc @@ -0,0 +1,112 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/benchmark_utils/test_benchmark.h" +#include "micro/ops/nhwc/conv_2d_c4_s4.h" +#include "micro/ops/substitute_op.h" +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { +namespace { +template +void Conv2dOpt(int iters, + const T *input, const int32_t *input_dims, + const T *filter, const int32_t *filter_dims, + const T *bias, T *output, int32_t *output_dims, + int32_t stride, int32_t dilation, Padding padding) { + micro::testing::StopTiming(); + + Conv2dC4S4Op conv2d_opt_op; + framework::SubstituteOp substitude_op; + int32_t strides[] = {stride, stride}; + int32_t dilations[] = {dilation, dilation}; + substitude_op.AddInput(input, input_dims, 4) + .AddInput(filter, filter_dims, 4) + .AddInput(bias, filter_dims, 1) + .AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t)) + .AddArg("padding", static_cast(padding)) + .AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t)) + .AddOutput(output, output_dims, 4); + conv2d_opt_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + + // Warm-up + for (int i = 0; i < 2; ++i) { + conv2d_opt_op.Run(); + } + + micro::testing::StartTiming(); + while (iters--) { + conv2d_opt_op.Run(); + } +} +} // namespace + +#define MICRO_BM_CONV_2D_OPT_MACRO(\ + N, H, W, C, KH, KW, STRIDE, DILATION, P, OC, TYPE) \ + static void \ + MICRO_BM_CONV_2D_OPT_##N##_##H##_##W##_##C##_K##KH##x##KW##S##STRIDE##D##\ + DILATION##_##P##_##OC##_##TYPE(int32_t iters) { \ + const int32_t input_length = N * H * W * C; \ + const int64_t tot = static_cast(iters) * input_length; \ + int64_t pad_h = 0, pad_w = 0; \ + if (P == SAME) { \ + pad_h = KH / 2; \ + pad_w = KW / 2; \ + } \ + int64_t oh = \ + (H + 2 * pad_h - KH - (KH - 1) * (DILATION - 1)) / STRIDE + 1; \ + int64_t ow = \ + (W + 2 * pad_w - KW - (KW - 1) * (DILATION - 1)) / STRIDE + 1; \ + const int64_t macs = N * oh * ow * OC * KH * KW * C; \ + MACE_DEFINE_RANDOM_INPUT(TYPE, input, input_length); \ + const int32_t filter_length = OC * KH * KW * C; \ + MACE_DEFINE_RANDOM_INPUT(TYPE, filter, filter_length); \ + MACE_DEFINE_RANDOM_INPUT(TYPE, bias, (int32_t)OC); \ + const int32_t output_length = N * H * W * OC; \ + TYPE *output = \ + common::test::GetGlobalBuffer()->GetBuffer(output_length); \ + int32_t input_dims[] = {N, H, W, C}; \ + int32_t filter_dims[] = {OC, KH, KW, C}; \ + int32_t output_dims[4] = {0}; \ + micro::testing::MacsProcessed(macs); \ + micro::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + Conv2dOpt(iters, input, input_dims, \ + filter, filter_dims, bias, output, \ + output_dims, STRIDE, DILATION, P); \ + } \ + MICRO_BENCHMARK( \ + MICRO_BM_CONV_2D_OPT_##N##_##H##_##W##_##C##_K##KH##x##KW##S##STRIDE##\ + D##DILATION##_##P##_##OC##_##TYPE) + +#define MICRO_BM_CONV_2D_OPT(N, H, W, C, KH, KW, S, D, P, OC) \ + MICRO_BM_CONV_2D_OPT_MACRO(N, H, W, C, KH, KW, S, D, P, OC, float) + +MICRO_BM_CONV_2D_OPT(1, 32, 32, 64, 1, 1, 1, 1, VALID, 32); +MICRO_BM_CONV_2D_OPT(1, 33, 31, 64, 1, 1, 1, 1, VALID, 32); +MICRO_BM_CONV_2D_OPT(1, 32, 32, 64, 3, 3, 1, 1, SAME, 32); +MICRO_BM_CONV_2D_OPT(1, 33, 31, 64, 3, 3, 1, 1, SAME, 32); +MICRO_BM_CONV_2D_OPT(1, 32, 32, 64, 5, 5, 1, 1, SAME, 32); +MICRO_BM_CONV_2D_OPT(1, 32, 31, 64, 5, 5, 1, 1, SAME, 32); +MICRO_BM_CONV_2D_OPT(1, 32, 31, 64, 15, 1, 1, 1, SAME, 32); +MICRO_BM_CONV_2D_OPT(1, 32, 31, 64, 1, 15, 1, 1, SAME, 32); +MICRO_BM_CONV_2D_OPT(1, 32, 31, 64, 7, 7, 1, 1, SAME, 32); +MICRO_BM_CONV_2D_OPT(1, 32, 31, 64, 7, 7, 2, 1, SAME, 32); +MICRO_BM_CONV_2D_OPT(1, 32, 31, 64, 7, 7, 3, 1, SAME, 32); + +} // namespace test +} // namespace ops +} // namespace micro diff --git a/micro/test/ccbenchmark/micro/ops/nhwc/depthwise_conv_2d_benchmark.cc b/micro/test/ccbenchmark/micro/ops/nhwc/depthwise_conv_2d_benchmark.cc new file mode 100644 index 00000000..6cfa087d --- /dev/null +++ b/micro/test/ccbenchmark/micro/ops/nhwc/depthwise_conv_2d_benchmark.cc @@ -0,0 +1,112 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/benchmark_utils/test_benchmark.h" +#include "micro/ops/nhwc/depthwise_conv_2d_ref.h" +#include "micro/ops/substitute_op.h" +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { +namespace { +template +void Conv2d(int iters, + const T *input, const int32_t *input_dims, + const T *filter, const int32_t *filter_dims, + const T *bias, T *output, int32_t *output_dims, + int32_t stride, int32_t dilation, Padding padding) { + micro::testing::StopTiming(); + + DepthwiseConv2dRefOp depthwise_conv2d_op; + framework::SubstituteOp substitude_op; + int32_t strides[] = {stride, stride}; + int32_t dilations[] = {dilation, dilation}; + substitude_op.AddInput(input, input_dims, 4) + .AddInput(filter, filter_dims, 4) + .AddInput(bias, filter_dims, 1) + .AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t)) + .AddArg("padding", static_cast(padding)) + .AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t)) + .AddOutput(output, output_dims, 4); + depthwise_conv2d_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + + // Warm-up + for (int i = 0; i < 2; ++i) { + depthwise_conv2d_op.Run(); + } + + micro::testing::StartTiming(); + while (iters--) { + depthwise_conv2d_op.Run(); + } +} +} // namespace + +#define MICRO_BM_DEPTHWISE_CONV_2D_MACRO(\ + N, H, W, C, KH, KW, STRIDE, DILATION, P, OC, TYPE) \ + static void \ + MICRO_BM_DEPTHWISE_CONV_2D_##N##_##H##_##W##_##C##_K##KH##x##KW##S##\ + STRIDE##D##DILATION##_##P##_##OC##_##TYPE(int32_t iters) { \ + const int32_t input_length = N * H * W * C; \ + const int64_t tot = static_cast(iters) * input_length; \ + int64_t pad_h = 0, pad_w = 0; \ + if (P == SAME) { \ + pad_h = KH / 2; \ + pad_w = KW / 2; \ + } \ + int64_t oh = \ + (H + 2 * pad_h - KH - (KH - 1) * (DILATION - 1)) / STRIDE + 1; \ + int64_t ow = \ + (W + 2 * pad_w - KW - (KW - 1) * (DILATION - 1)) / STRIDE + 1; \ + const int64_t macs = N * oh * ow * OC * KH * KW * C; \ + MACE_DEFINE_RANDOM_INPUT(TYPE, input, input_length); \ + const int32_t filter_length = OC * KH * KW * C; \ + MACE_DEFINE_RANDOM_INPUT(TYPE, filter, filter_length); \ + MACE_DEFINE_RANDOM_INPUT(TYPE, bias, (int32_t)OC); \ + const int32_t output_length = N * H * W * OC; \ + TYPE *output = \ + common::test::GetGlobalBuffer()->GetBuffer(output_length); \ + int32_t input_dims[] = {N, H, W, C}; \ + int32_t filter_dims[] = {OC, KH, KW, C}; \ + int32_t output_dims[4] = {0}; \ + micro::testing::MacsProcessed(macs); \ + micro::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + Conv2d(iters, input, input_dims, \ + filter, filter_dims, bias, output, \ + output_dims, STRIDE, DILATION, P); \ + } \ + MICRO_BENCHMARK( \ + MICRO_BM_DEPTHWISE_CONV_2D_##N##_##H##_##W##_##C##_K##KH##x##KW##S##\ + STRIDE##D##DILATION##_##P##_##OC##_##TYPE) + +#define MICRO_BM_DEPTHWISE_CONV_2D(N, H, W, C, KH, KW, S, D, P, OC) \ + MICRO_BM_DEPTHWISE_CONV_2D_MACRO(N, H, W, C, KH, KW, S, D, P, OC, float) + +MICRO_BM_DEPTHWISE_CONV_2D(1, 8, 8, 32, 1, 1, 1, 1, VALID, 1); +MICRO_BM_DEPTHWISE_CONV_2D(1, 9, 7, 32, 1, 1, 1, 1, VALID, 1); +MICRO_BM_DEPTHWISE_CONV_2D(1, 8, 8, 32, 3, 3, 1, 1, SAME, 1); +MICRO_BM_DEPTHWISE_CONV_2D(1, 9, 7, 32, 3, 3, 1, 1, SAME, 1); +MICRO_BM_DEPTHWISE_CONV_2D(1, 8, 8, 32, 5, 5, 1, 1, SAME, 1); +MICRO_BM_DEPTHWISE_CONV_2D(1, 8, 7, 32, 5, 5, 1, 1, SAME, 1); +MICRO_BM_DEPTHWISE_CONV_2D(1, 8, 7, 32, 15, 1, 1, 1, SAME, 1); +MICRO_BM_DEPTHWISE_CONV_2D(1, 8, 7, 32, 1, 15, 1, 1, SAME, 1); +MICRO_BM_DEPTHWISE_CONV_2D(1, 8, 7, 32, 7, 7, 1, 1, SAME, 1); +MICRO_BM_DEPTHWISE_CONV_2D(1, 8, 7, 32, 7, 7, 2, 1, SAME, 1); +MICRO_BM_DEPTHWISE_CONV_2D(1, 8, 7, 32, 7, 7, 3, 1, SAME, 1); + +} // namespace test +} // namespace ops +} // namespace micro diff --git a/micro/test/ccbenchmark/micro/ops/nhwc/depthwise_conv_2d_opt_benchmark.cc b/micro/test/ccbenchmark/micro/ops/nhwc/depthwise_conv_2d_opt_benchmark.cc new file mode 100644 index 00000000..95f68f73 --- /dev/null +++ b/micro/test/ccbenchmark/micro/ops/nhwc/depthwise_conv_2d_opt_benchmark.cc @@ -0,0 +1,114 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/benchmark_utils/test_benchmark.h" +#include "micro/ops/nhwc/depthwise_conv_2d_kb1_s4.h" +#include "micro/ops/nhwc/depthwise_conv_2d_kb4_s4.h" +#include "micro/ops/substitute_op.h" +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { +namespace { +template +void DepthwiseConv2dOpt(int iters, + const T *input, const int32_t *input_dims, + const T *filter, const int32_t *filter_dims, + const T *bias, T *output, int32_t *output_dims, + int32_t stride, int32_t dilation, Padding padding) { + micro::testing::StopTiming(); + + DepthwiseConv2dKB1S4Op depthwise_conv2d_opt_op; + framework::SubstituteOp substitude_op; + int32_t strides[] = {stride, stride}; + int32_t dilations[] = {dilation, dilation}; + substitude_op.AddInput(input, input_dims, 4) + .AddInput(filter, filter_dims, 4) + .AddInput(bias, filter_dims, 1) + .AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t)) + .AddArg("padding", static_cast(padding)) + .AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t)) + .AddOutput(output, output_dims, 4); + depthwise_conv2d_opt_op.Init( + NULL, reinterpret_cast(&substitude_op), + NULL); + + // Warm-up + for (int i = 0; i < 2; ++i) { + depthwise_conv2d_opt_op.Run(); + } + + micro::testing::StartTiming(); + while (iters--) { + depthwise_conv2d_opt_op.Run(); + } +} +} // namespace + +#define MICRO_BM_DEPTHWISE_CONV_2D_OPT_MACRO(\ + N, H, W, C, KH, KW, STRIDE, DILATION, P, OC, TYPE) \ + static void \ + MICRO_BM_DEPTHWISE_CONV_2D_OPT_##N##_##H##_##W##_##C##_K##KH##x##KW##S##\ + STRIDE##D##DILATION##_##P##_##OC##_##TYPE(int32_t iters) { \ + const int32_t input_length = N * H * W * C; \ + const int64_t tot = static_cast(iters) * input_length; \ + int64_t pad_h = 0, pad_w = 0; \ + if (P == SAME) { \ + pad_h = KH / 2; \ + pad_w = KW / 2; \ + } \ + int64_t oh = \ + (H + 2 * pad_h - KH - (KH - 1) * (DILATION - 1)) / STRIDE + 1; \ + int64_t ow = \ + (W + 2 * pad_w - KW - (KW - 1) * (DILATION - 1)) / STRIDE + 1; \ + const int64_t macs = N * oh * ow * OC * KH * KW * C; \ + MACE_DEFINE_RANDOM_INPUT(TYPE, input, input_length); \ + const int32_t filter_length = OC * KH * KW * C; \ + MACE_DEFINE_RANDOM_INPUT(TYPE, filter, filter_length); \ + MACE_DEFINE_RANDOM_INPUT(TYPE, bias, (int32_t)OC); \ + const int32_t output_length = N * H * W * OC; \ + TYPE *output = \ + common::test::GetGlobalBuffer()->GetBuffer(output_length); \ + int32_t input_dims[] = {N, H, W, C}; \ + int32_t filter_dims[] = {OC, KH, KW, C}; \ + int32_t output_dims[4] = {0}; \ + micro::testing::MacsProcessed(macs); \ + micro::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + DepthwiseConv2dOpt(iters, input, input_dims, \ + filter, filter_dims, bias, output, \ + output_dims, STRIDE, DILATION, P); \ + } \ + MICRO_BENCHMARK( \ + MICRO_BM_DEPTHWISE_CONV_2D_OPT_##N##_##H##_##W##_##C##_K##KH##x##KW##\ + S##STRIDE##D##DILATION##_##P##_##OC##_##TYPE) + +#define MICRO_BM_DEPTHWISE_CONV_2D_OPT(N, H, W, C, KH, KW, S, D, P, OC) \ + MICRO_BM_DEPTHWISE_CONV_2D_OPT_MACRO(N, H, W, C, KH, KW, S, D, P, OC, float) + +MICRO_BM_DEPTHWISE_CONV_2D_OPT(1, 8, 8, 32, 1, 1, 1, 1, VALID, 1); +MICRO_BM_DEPTHWISE_CONV_2D_OPT(1, 9, 7, 32, 1, 1, 1, 1, VALID, 1); +MICRO_BM_DEPTHWISE_CONV_2D_OPT(1, 8, 8, 32, 3, 3, 1, 1, SAME, 1); +MICRO_BM_DEPTHWISE_CONV_2D_OPT(1, 9, 7, 32, 3, 3, 1, 1, SAME, 1); +MICRO_BM_DEPTHWISE_CONV_2D_OPT(1, 8, 8, 32, 5, 5, 1, 1, SAME, 1); +MICRO_BM_DEPTHWISE_CONV_2D_OPT(1, 8, 7, 32, 5, 5, 1, 1, SAME, 1); +MICRO_BM_DEPTHWISE_CONV_2D_OPT(1, 8, 7, 32, 15, 1, 1, 1, SAME, 1); +MICRO_BM_DEPTHWISE_CONV_2D_OPT(1, 8, 7, 32, 1, 15, 1, 1, SAME, 1); +MICRO_BM_DEPTHWISE_CONV_2D_OPT(1, 8, 7, 32, 7, 7, 1, 1, SAME, 1); +MICRO_BM_DEPTHWISE_CONV_2D_OPT(1, 8, 7, 32, 7, 7, 2, 1, SAME, 1); +MICRO_BM_DEPTHWISE_CONV_2D_OPT(1, 8, 7, 32, 7, 7, 3, 1, SAME, 1); + +} // namespace test +} // namespace ops +} // namespace micro diff --git a/micro/test/ccbenchmark/micro/ops/nhwc/pooling_benchmark.cc b/micro/test/ccbenchmark/micro/ops/nhwc/pooling_benchmark.cc new file mode 100644 index 00000000..96ae926e --- /dev/null +++ b/micro/test/ccbenchmark/micro/ops/nhwc/pooling_benchmark.cc @@ -0,0 +1,88 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/benchmark_utils/test_benchmark.h" +#include "micro/ops/nhwc/pooling_ref.h" +#include "micro/ops/nhwc/pooling_s4.h" +#include "micro/ops/substitute_op.h" +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { +namespace { +template +void Pooling(int iters, const T *input, const int32_t *input_dims, + T *output, int32_t *output_dims, int32_t kernel, + int32_t stride, Padding padding, PoolingType pooling_type) { + micro::testing::StopTiming(); + + PoolingS4Op pooling_op; + framework::SubstituteOp substitude_op; + int32_t strides[] = {stride, stride}; + int32_t kernels[] = {kernel, kernel}; + int32_t dilations[] = {1, 1}; + substitude_op.AddInput(input, input_dims, 4) + .AddArg("pooling_type", static_cast(pooling_type)) + .AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t)) + .AddArg("padding", static_cast(padding)) + .AddRepeatArg("kernels", kernels, sizeof(kernels) / sizeof(int32_t)) + .AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t)) + .AddOutput(output, output_dims, 4); + pooling_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + + // Warm-up + for (int i = 0; i < 2; ++i) { + pooling_op.Run(); + } + + micro::testing::StartTiming(); + while (iters--) { + pooling_op.Run(); + } +} +} // namespace + +#define MICRO_BM_POOLING_MACRO(N, H, W, C, KE, STRIDE, PA, PO, TYPE) \ + static void \ + MICRO_BM_POOLING_##N##_##H##_##W##_##C##_K##KE##S##STRIDE##_##PA##_\ + ##PO##_##TYPE(int32_t iters) { \ + const int32_t input_length = N * H * W * C; \ + const int64_t tot = static_cast(iters) * input_length; \ + micro::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + MACE_DEFINE_RANDOM_INPUT(TYPE, input, input_length); \ + const int32_t output_length = input_length; \ + TYPE *output = \ + common::test::GetGlobalBuffer()->GetBuffer(output_length); \ + int32_t input_dims[] = {N, H, W, C}; \ + int32_t output_dims[4] = {0}; \ + Pooling(iters, input, input_dims, \ + output, output_dims, KE, STRIDE, PA, PO); \ + } \ + MICRO_BENCHMARK( \ + MICRO_BM_POOLING_##N##_##H##_##W##_##C##_K##KE##S##STRIDE##_##PA##_\ + ##PO##_##TYPE) + +#define MICRO_BM_POOLING(N, H, W, C, K, S, PA, PO) \ + MICRO_BM_POOLING_MACRO(N, H, W, C, K, S, PA, PO, float) + +MICRO_BM_POOLING(1, 129, 129, 3, 2, 2, SAME, MAX); +MICRO_BM_POOLING(1, 65, 65, 3, 2, 2, SAME, MAX); +MICRO_BM_POOLING(1, 48, 64, 8, 48, 64, VALID, AVG); +MICRO_BM_POOLING(1, 7, 7, 8, 7, 1, VALID, AVG); + +} // namespace test +} // namespace ops +} // namespace micro diff --git a/micro/test/ccbenchmark/micro/ops/reduce_benchmark.cc b/micro/test/ccbenchmark/micro/ops/reduce_benchmark.cc new file mode 100644 index 00000000..4b3df637 --- /dev/null +++ b/micro/test/ccbenchmark/micro/ops/reduce_benchmark.cc @@ -0,0 +1,76 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/benchmark_utils/test_benchmark.h" +#include "micro/ops/reduce.h" +#include "micro/ops/substitute_op.h" +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { +namespace { +template +void Reduce(int32_t iters, const int32_t N, + const int32_t H, const int32_t W, const int32_t C) { + micro::testing::StopTiming(); + + ReduceOp reduce_op; + framework::SubstituteOp substitude_op; + const int32_t input_length = N * H * W * C; + MACE_DEFINE_RANDOM_INPUT(T, input, input_length); + T *output = common::test::GetGlobalBuffer()->GetBuffer(input_length); + int32_t input_dims[] = {N, H, W, C}; + int32_t output_dims[4] = {0}; + int32_t axis[] = {1, 2}; + substitude_op.AddInput(input, input_dims, 4) + .AddRepeatArg("axis", axis, sizeof(axis) / sizeof(int32_t)) + .AddOutput(output, output_dims, 4); + reduce_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + + // Warm-up + for (int32_t i = 0; i < 2; ++i) { + reduce_op.Run(); + } + + micro::testing::StartTiming(); + while (iters--) { + reduce_op.Run(); + } +} +} // namespace + +#define MICRO_BM_REDUCE_MACRO(N, H, W, C, TYPE) \ + static void MICRO_BM_REDUCE_##N##_##H##_##W##_##C##_##TYPE( \ + int32_t iters) { \ + const int64_t tot = static_cast(iters) * N * H * W * C; \ + micro::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + Reduce(iters, N, H, W, C); \ + } \ + MICRO_BENCHMARK(MICRO_BM_REDUCE_##N##_##H##_##W##_##C##_##TYPE) + +#define MICRO_BM_REDUCE(N, H, W, C) \ + MICRO_BM_REDUCE_MACRO(N, H, W, C, float) + +MICRO_BM_REDUCE(1, 128, 128, 1); +MICRO_BM_REDUCE(4, 64, 64, 3); +MICRO_BM_REDUCE(2, 128, 128, 1); +MICRO_BM_REDUCE(2, 28, 28, 32); +MICRO_BM_REDUCE(1, 32, 32, 16); +MICRO_BM_REDUCE(1, 48, 64, 8); + +} // namespace test +} // namespace ops +} // namespace micro diff --git a/micro/test/ccbenchmark/micro/ops/softmax_benchmark.cc b/micro/test/ccbenchmark/micro/ops/softmax_benchmark.cc new file mode 100644 index 00000000..7c1fdbed --- /dev/null +++ b/micro/test/ccbenchmark/micro/ops/softmax_benchmark.cc @@ -0,0 +1,73 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/benchmark_utils/test_benchmark.h" +#include "micro/ops/softmax.h" +#include "micro/ops/substitute_op.h" +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { + +namespace { +template +void SoftmaxBenchmark(int32_t iters, const int32_t N, + const int32_t H, const int32_t W, const int32_t C) { + micro::testing::StopTiming(); + + SoftmaxOp softmax_op; + framework::SubstituteOp substitude_op; + const int32_t input_length = N * H * W * C; + MACE_DEFINE_RANDOM_INPUT(T, input, input_length); + T *output = common::test::GetGlobalBuffer()->GetBuffer(input_length); + int32_t input_dims[] = {N, H, W, C}; + int32_t output_dims[4] = {0}; + substitude_op.AddInput(input, input_dims, 4) + .AddOutput(output, output_dims, 4); + softmax_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + + // Warm-up + for (int32_t i = 0; i < 2; ++i) { + softmax_op.Run(); + } + + micro::testing::StartTiming(); + while (iters--) { + softmax_op.Run(); + } +} +} // namespace +#define MICRO_BM_SOFTMAX_MACRO(N, H, W, C, TYPE) \ + static void MICRO_BM_SOFTMAX_##N##_##H##_##W##_##C##_##TYPE( \ + int32_t iters) { \ + const int64_t tot = static_cast(iters) * N * H * W * C; \ + micro::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + SoftmaxBenchmark(iters, N, C, H, W); \ + } \ + MICRO_BENCHMARK(MICRO_BM_SOFTMAX_##N##_##H##_##W##_##C##_##TYPE) + +#define MICRO_BM_SOFTMAX(N, H, W, C) \ + MICRO_BM_SOFTMAX_MACRO(N, H, W, C, float) + +MICRO_BM_SOFTMAX(1, 64, 64, 2); +MICRO_BM_SOFTMAX(1, 64, 64, 3); +MICRO_BM_SOFTMAX(1, 32, 32, 4); +MICRO_BM_SOFTMAX(1, 16, 16, 10); +MICRO_BM_SOFTMAX(1, 7, 7, 128); + +} // namespace test +} // namespace ops +} // namespace micro diff --git a/micro/test/ccbenchmark/micro/rpc/benchmark.idl b/micro/test/ccbenchmark/micro/rpc/benchmark.idl new file mode 100755 index 00000000..aa1fb09b --- /dev/null +++ b/micro/test/ccbenchmark/micro/rpc/benchmark.idl @@ -0,0 +1,20 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#include "remote.idl" + +interface benchmark: remote_handle64 { + long run(); +}; diff --git a/micro/test/ccbenchmark/micro/rpc/skel/benchmark.c b/micro/test/ccbenchmark/micro/rpc/skel/benchmark.c new file mode 100644 index 00000000..23b023fb --- /dev/null +++ b/micro/test/ccbenchmark/micro/rpc/skel/benchmark.c @@ -0,0 +1,27 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "rpc/skel/base_func.h" + +extern void BenchmarkRun(); + +MACE_DEFINE_RANDOM_INPUT(benchmark) + +int benchmark_run(remote_handle64 h) { + BenchmarkRun(); + FARF(ALWAYS, "run end, h=%d", h); + return 0; +} diff --git a/micro/test/ccbenchmark/micro/rpc/stub/benchmark.cc b/micro/test/ccbenchmark/micro/rpc/stub/benchmark.cc new file mode 100644 index 00000000..22745167 --- /dev/null +++ b/micro/test/ccbenchmark/micro/rpc/stub/benchmark.cc @@ -0,0 +1,40 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/rpc/stub/benchmark.h" +#include "micro/test/ccbenchmark/codegen/benchmark.h" + +namespace micro { +namespace testing { + +namespace { +const char kBenchmarkUri[] = benchmark_URI"&_dom=sdsp"; +} // namespace + +Benchmark::Benchmark() : + rpc::stub::BaseHandle(benchmark_open, benchmark_close, kBenchmarkUri) {} + +void Benchmark::Run() { + benchmark_run(remote_handle_); +} + +} // namespace testing +} // namespace micro + +void BenchmarkRun() { + micro::testing::Benchmark benchmark; + benchmark.Open(); + benchmark.Run(); + benchmark.Close(); +} diff --git a/micro/test/ccbenchmark/micro/rpc/stub/benchmark.h b/micro/test/ccbenchmark/micro/rpc/stub/benchmark.h new file mode 100644 index 00000000..4816a738 --- /dev/null +++ b/micro/test/ccbenchmark/micro/rpc/stub/benchmark.h @@ -0,0 +1,36 @@ +// Copyright 2018 The MICRO Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#ifndef MICRO_TEST_CCBENCHMARK_MICRO_RPC_STUB_BENCHMARK_H_ +#define MICRO_TEST_CCBENCHMARK_MICRO_RPC_STUB_BENCHMARK_H_ + +#include "rpc/stub/base_handle.h" + +namespace micro { +namespace testing { + +class Benchmark : public rpc::stub::BaseHandle { + public: + Benchmark(); + + void Run(); +}; + +} // namespace testing +} // namespace micro + +void BenchmarkRun(); + +#endif // MICRO_TEST_CCBENCHMARK_MICRO_RPC_STUB_BENCHMARK_H_ diff --git a/micro/test/ccbenchmark/micro/test_benchmark_main.cc b/micro/test/ccbenchmark/micro/test_benchmark_main.cc new file mode 100644 index 00000000..2fcb858a --- /dev/null +++ b/micro/test/ccbenchmark/micro/test_benchmark_main.cc @@ -0,0 +1,27 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#ifdef MACE_ENABLE_HEXAGON +#include "micro/rpc/stub/benchmark.h" +#else +#include "micro/benchmark_utils/test_benchmark.h" +#endif + +int main(int argc, char *argv[]) { + (void) (argc); + (void) (argv); + BenchmarkRun(); + return 0; +} diff --git a/micro/test/ccunit/BUILD.bazel b/micro/test/ccunit/BUILD.bazel new file mode 100644 index 00000000..ac4d70d3 --- /dev/null +++ b/micro/test/ccunit/BUILD.bazel @@ -0,0 +1,53 @@ +package( + default_visibility = ["//visibility:public"], +) + +licenses(["notice"]) # Apache 2.0 + +cc_test( + name = "micro_cc_test", + testonly = 1, + srcs = glob( + [ + "micro/model/*.cc", + "micro/framework/*.cc", + "micro/codegen/*.cc", + ], + ), + copts = [ + "-Werror", + "-Wextra", + "-Wno-missing-field-initializers", + ], + linkstatic = 1, + deps = [ + "//micro/base", + "//micro/codegen:generated_models", + "//micro/codegen:micro_engine", + "//micro/framework", + "@gtest//:gtest_main", + ], +) + +cc_test( + name = "micro_ops_test", + testonly = 1, + srcs = glob( + [ + "micro/ops/*.cc", + "micro/ops/nhwc/*.cc", + ], + ), + copts = [ + "-Werror", + "-Wextra", + "-Wno-missing-field-initializers", + ], + linkstatic = 1, + deps = [ + "//micro/base", + "//micro/ops:ops_for_test", + "//micro/test/ccutils:ccutils_with_gtest", + "@gtest//:gtest_main", + ], +) diff --git a/micro/test/ccunit/micro/codegen/engine_test.cc b/micro/test/ccunit/micro/codegen/engine_test.cc new file mode 100644 index 00000000..60f2841f --- /dev/null +++ b/micro/test/ccunit/micro/codegen/engine_test.cc @@ -0,0 +1,56 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#include + +#include "micro/base/logging.h" +#include "micro/include/public/micro.h" + +#ifndef MICRO_MODEL_NAME +#error Please specify model name in the command +#endif + +namespace micro { + +namespace MICRO_MODEL_NAME { +MaceStatus GetMicroEngineSingleton(MaceMicroEngine **engine); +} // namespace MICRO_MODEL_NAME + +class EngineTest : public ::testing::Test { +}; + +void OutputAllInfo() { + MaceMicroEngine *micro_engine = NULL; + MACE_ASSERT(MICRO_MODEL_NAME::GetMicroEngineSingleton(µ_engine) + == MACE_SUCCESS && micro_engine != NULL); + + float input_buffer[1 * 1 * 128 * 9] = {0}; + int32_t input_shape[] = {1, 1, 128, 9}; + + micro_engine->RegisterInputData(0, input_buffer, input_shape); + MACE_ASSERT(MACE_SUCCESS == micro_engine->Run()); + + void *output_buffer = NULL; + const int32_t *output_dims = NULL; + uint32_t dim_size = 0; + micro_engine->GetOutputData(0, &output_buffer, &output_dims, &dim_size); + LOG(INFO) << "EngineTest success, dim_size=" << dim_size; +} + +TEST_F(EngineTest, OutputAllInfo) { + OutputAllInfo(); +} + +} // namespace micro diff --git a/micro/test/ccunit/micro/framework/graph_test.cc b/micro/test/ccunit/micro/framework/graph_test.cc new file mode 100644 index 00000000..5d1acf54 --- /dev/null +++ b/micro/test/ccunit/micro/framework/graph_test.cc @@ -0,0 +1,113 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include + +#include "micro/base/logging.h" +#include "micro/framework/graph.h" +#include "micro/include/utils/macros.h" + +#ifndef MICRO_MODEL_NAME +#error Please specify model name in the command +#endif + +namespace micro { +namespace MICRO_MODEL_NAME { +extern uint8_t kGraphData[]; +} // namespace MICRO_MODEL_NAME + +namespace framework { + +#ifdef MACE_WRITE_MAGIC +#define MACE_CHECK_MAGIC_CODE(OBJ_NAME) \ + MACE_ASSERT1(CheckMagic(OBJ_NAME, OBJ_NAME->GetMagic(), \ + OBJ_NAME->GetHardCodeMagic()), "CheckMagic failed.") + +bool CheckMagic(const Serialize *serial_obj, + SerialUint32 magic, SerialUint32 hard_code_magic) { + char str_magic[5] = {0}; + serial_obj->MagicToString(magic, str_magic); + bool succ = (magic == hard_code_magic); + if (!succ) { + char str_hc_magic[5] = {0}; + serial_obj->MagicToString(hard_code_magic, str_hc_magic); + LOG(INFO) << "The magic is invalid, " << "magic = " << str_magic + << ", hard_code_magic = " << str_hc_magic; + } else { + LOG(INFO) << "OK, The magic is " << str_magic; + } + return succ; +} +#else +#define MACE_CHECK_MAGIC_CODE(OBJ_NAME) +#endif + +class GraphTest : public ::testing::Test { +}; + +void OutputOpContextInfo(const Graph *graph, const OpContext *op_context) { + LOG(INFO) << "op_idx is: " << op_context->op_idx(); + uint32_t input_info_size = op_context->input_info_size(); + LOG(INFO) << "input_info size size is: " << input_info_size; + for (uint32_t i = 0; i < input_info_size; ++i) { + const OpIOInfo *input_info = op_context->input_info(i); + graph->Uint2OpIOInfo(input_info); + LOG(INFO) << "op_def_idx_: " << input_info->op_def_idx_ + << ", output_idx_: " << input_info->output_idx_; + } +} + +void OutputGraphInfo(const Graph *graph) { + MACE_CHECK_MAGIC_CODE(graph); + uint32_t op_context_size = graph->op_context_size(); + LOG(INFO) << "op_context size is: " << op_context_size; + for (uint32_t i = 0; i < op_context_size; ++i) { + OutputOpContextInfo(graph, graph->op_context(i)); + } + + uint32_t input_op_idx_size = graph->input_op_idx_size(); + LOG(INFO) << "input_op_idx size is: " << input_op_idx_size; + for (uint32_t i = 0; i < input_op_idx_size; ++i) { + LOG(INFO) << "input_op_idx=" << graph->input_op_idx(i); + } + + uint32_t output_info_size = graph->output_info_size(); + LOG(INFO) << "output_info size is: " << output_info_size; + for (uint32_t i = 0; i < output_info_size; ++i) { + const OpIOInfo *output_info = graph->output_info(i); + graph->Uint2OpIOInfo(output_info); + LOG(INFO) << "op_def_idx_ is: " << output_info->op_def_idx_ + << ", output_idx_ is: " << output_info->output_idx_; + } +} + +void OutputAllInfo(const uint8_t *address) { + const Graph *graph = reinterpret_cast(address); + MACE_ASSERT1(graph != NULL, "reinterpret_cast failed."); + + OutputGraphInfo(graph); +} + + +TEST_F(GraphTest, OutputAllInfo) { + LOG(INFO) << "GraphTest start"; + OutputAllInfo(MICRO_MODEL_NAME::kGraphData); +} + +} // namespace framework +} // namespace micro diff --git a/micro/test/ccunit/micro/model/net_def_test.cc b/micro/test/ccunit/micro/model/net_def_test.cc new file mode 100644 index 00000000..d3f09d30 --- /dev/null +++ b/micro/test/ccunit/micro/model/net_def_test.cc @@ -0,0 +1,167 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include + +#include "micro/base/logging.h" +#include "micro/include/utils/macros.h" +#include "micro/model/const_tensor.h" +#include "micro/model/net_def.h" +#include "micro/model/operator_def.h" + +#ifndef MICRO_MODEL_NAME +#error Please specify model name in the command +#endif + +namespace micro { +namespace MICRO_MODEL_NAME { +extern uint8_t kNetDef[]; +} // namespace MICRO_MODEL_NAME + +namespace model { + +#ifdef MACE_WRITE_MAGIC +#define MACE_CHECK_MAGIC_CODE(OBJ_NAME) \ + MACE_ASSERT1(CheckMagic(OBJ_NAME, OBJ_NAME->GetMagic(), \ + OBJ_NAME->GetHardCodeMagic()), "CheckMagic failed.") + +bool CheckMagic(const Serialize *serial_obj, + SerialUint32 magic, SerialUint32 hard_code_magic) { + char str_magic[5] = {0}; + serial_obj->MagicToString(magic, str_magic); + bool succ = (magic == hard_code_magic); + if (!succ) { + char str_hc_magic[5] = {0}; + serial_obj->MagicToString(hard_code_magic, str_hc_magic); + LOG(INFO) << "The magic is invalid, " << "magic = " << str_magic + << ", hard_code_magic = " << str_hc_magic; + } else { + LOG(INFO) << "OK, The magic is " << str_magic; + } + return succ; +} +#else +#define MACE_CHECK_MAGIC_CODE(OBJ_NAME) MACE_UNUSED(OBJ_NAME) +#endif + +class NetDefTest : public ::testing::Test { +}; + +void OutputArgumentInfo(const Argument *argument) { + MACE_CHECK_MAGIC_CODE(argument); + LOG(INFO) << "The argument name: " << argument->name(); +} + +void OutputOperatorInfo(const OperatorDef *op_def) { + MACE_CHECK_MAGIC_CODE(op_def); + LOG(INFO) << "The op_def name: " << op_def->name(); + uint32_t input_size = op_def->input_size(); + LOG(INFO) << "\tThe op_def input size: " << input_size; + for (uint32_t j = 0; j < input_size; ++j) { + LOG(INFO) << "\t\tThe input name: " << op_def->input(j); + } + auto output_size = op_def->output_size(); + LOG(INFO) << "\tThe op_def output size: " << output_size; + for (uint32_t k = 0; k < output_size; ++k) { + LOG(INFO) << "\t\tThe output name: " << op_def->output(k); + } + auto mem_offset_size = op_def->mem_offset_size(); + LOG(INFO) << "\tThe mem_offset size: " << mem_offset_size; + for (uint32_t k = 0; k < mem_offset_size; ++k) { + LOG(INFO) << "\t\tThe " << k << "th mem_offset: " << op_def->mem_offset(k); + } + auto arg_size = op_def->arg_size(); + LOG(INFO) << "\tThe arg size: " << arg_size; + for (uint32_t k = 0; k < arg_size; ++k) { + OutputArgumentInfo(op_def->arg(k)); + } +} + +void OutputTensorInfo(const ConstTensor *tensor) { + MACE_CHECK_MAGIC_CODE(tensor); + LOG(INFO) << "The tensor name: " << tensor->name(); + + auto dim_size = tensor->dim_size(); + LOG(INFO) << "\tThe tensor dim size: " << dim_size; + for (uint32_t i = 0; i < dim_size; ++i) { + LOG(INFO) << "\t\ttensor dim[" << i << "] = " << tensor->dim(i); + } + + auto float_data_size = tensor->float_data_size(); + LOG(INFO) << "\tThe tensor float_data size: " << float_data_size; + for (uint32_t i = 0; i < float_data_size; ++i) { + const float f_value = tensor->float_data(i); + LOG(INFO) << "\t\ttensor float_data[" << i << "] = " << f_value; + } + if (float_data_size > 0) { + MACE_ASSERT(false); + } +} + +void OutputNetDefInfo(const NetDef *net_def) { + MACE_CHECK_MAGIC_CODE(net_def); + auto op_size = net_def->op_size(); + LOG(INFO) << "op size is: " << op_size; + for (uint32_t i = 0; i < op_size; ++i) { + OutputOperatorInfo(net_def->op(i)); + } + + auto arg_size = net_def->arg_size(); + LOG(INFO) << "arg size is: " << arg_size; + auto arg_byte_size = sizeof(Argument); + LOG(INFO) << "arg byte size is: " << (int32_t) arg_byte_size; + for (uint32_t i = 0; i < arg_size; ++i) { + OutputArgumentInfo(net_def->arg(i)); + } + + auto tensor_size = net_def->tensor_size(); + LOG(INFO) << "tensor size is: " << tensor_size; + for (uint32_t i = 0; i < tensor_size; ++i) { + OutputTensorInfo(net_def->tensor(i)); + } + + auto data_type = net_def->data_type(); + LOG(INFO) << "data_type is: " << data_type; + + auto input_info_size = net_def->input_info_size(); + LOG(INFO) << "input_info size is: " << input_info_size; + for (uint32_t i = 0; i < input_info_size; ++i) { + MACE_CHECK_MAGIC_CODE(net_def->input_info(i)); + } + + auto output_info_size = net_def->output_info_size(); + LOG(INFO) << "output_info size is: " << output_info_size; + for (uint32_t i = 0; i < output_info_size; ++i) { + MACE_CHECK_MAGIC_CODE(net_def->output_info(i)); + } +} + +void OutputAllInfo(const uint8_t *address) { + const NetDef *net_def = reinterpret_cast(address); + MACE_ASSERT1(net_def != NULL, "reinterpret_cast failed."); + + OutputNetDefInfo(net_def); +} + +TEST_F(NetDefTest, OutputAllInfo) { + LOG(INFO) << "NetDefTest start"; + OutputAllInfo(MICRO_MODEL_NAME::kNetDef); +} + +} // namespace model +} // namespace micro diff --git a/micro/test/ccunit/micro/ops/activation_test.cc b/micro/test/ccunit/micro/ops/activation_test.cc new file mode 100644 index 00000000..693f13cd --- /dev/null +++ b/micro/test/ccunit/micro/ops/activation_test.cc @@ -0,0 +1,249 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" +#include "micro/ops/activation.h" +#include "micro/ops/gtest_utils.h" +#include "micro/ops/substitute_op.h" +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { + +class ActivationOpTest : public ::testing::Test {}; +namespace { + +void TestSimpleRelu() { + float input[16] = {-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0}; + int32_t input_dims[4] = {2, 2, 2, 2}; + + float output[16] = {0}; + int32_t output_dims[4] = {0}; + float expect[16] = {0, 7, 0, 6, 0, 5, 0, 4, 0, 3, 0, 2, 0, 1, 0, 0}; + int32_t expect_dims[4] = {2, 2, 2, 2}; + + const char activation_type[] = "RELU"; + const uint32_t arg_type_len = sizeof(activation_type); + + ActivationOp activation_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddRepeatArg("activation", activation_type, arg_type_len) + .AddOutput(output, output_dims, 4); + + activation_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + activation_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-4); +} + +void TestSimpleLeakyRelu() { + float input[16] = {-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0}; + int32_t input_dims[4] = {2, 2, 2, 2}; + + float output[16] = {0}; + int32_t output_dims[4] = {0}; + float expect[16] = + {-0.7, 7, -0.6, 6, -0.5, 5, -0.4, 4, -0.3, 3, -0.2, 2, -0.1, 1, 0, 0}; + int32_t expect_dims[4] = {2, 2, 2, 2}; + + const char activation_type[] = "LEAKYRELU"; + const uint32_t arg_type_len = sizeof(activation_type); + + ActivationOp activation_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddRepeatArg("activation", activation_type, arg_type_len) + .AddArg("leakyrelu_coefficient", 0.1f) + .AddOutput(output, output_dims, 4); + + activation_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + activation_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-4); +} + +void TestUnalignedSimpleRelu() { + float input[6] = {-7, 7, -6, 6, -5, 5}; + int32_t input_dims[4] = {1, 3, 2, 1}; + + float output[6] = {0}; + int32_t output_dims[4] = {0}; + float expect[6] = {0, 7, 0, 6, 0, 5}; + int32_t expect_dims[4] = {1, 3, 2, 1}; + + const char activation_type[] = "RELU"; + const uint32_t arg_type_len = sizeof(activation_type); + + ActivationOp activation_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddRepeatArg("activation", activation_type, arg_type_len) + .AddOutput(output, output_dims, 4); + + activation_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + activation_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-4); +} + +void TestSimpleRelux() { + float input[16] = {-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0}; + int32_t input_dims[4] = {2, 2, 2, 2}; + + float output[16] = {0}; + int32_t output_dims[4] = {0}; + float expect[16] = {0, 6, 0, 6, 0, 5, 0, 4, 0, 3, 0, 2, 0, 1, 0, 0}; + int32_t expect_dims[4] = {2, 2, 2, 2}; + + const char activation_type[] = "RELUX"; + const uint32_t arg_type_len = sizeof(activation_type); + + ActivationOp activation_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddRepeatArg("activation", activation_type, arg_type_len) + .AddArg("max_limit", 6) + .AddOutput(output, output_dims, 4); + + activation_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + activation_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-4); +} + +void TestSimplePrelu() { + float input[16] = {-7, 7, -6, 6, -5, -5, -4, -4, -3, 3, -2, 2, -1, -1, 0, 0}; + int32_t input_dims[4] = {2, 2, 2, 2}; + float alpha[2] = {2.0, 3.0}; + int32_t alpha_dims[1] = {2}; + + float output[16] = {0}; + int32_t output_dims[4] = {0}; + float expect[16] = + {-14, 7, -12, 6, -10, -15, -8, -12, -6, 3, -4, 2, -2, -3, 0, 0}; + int32_t expect_dims[4] = {2, 2, 2, 2}; + + const char activation_type[] = "PRELU"; + const uint32_t arg_type_len = sizeof(activation_type); + + ActivationOp activation_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddInput(alpha, alpha_dims, 1) + .AddRepeatArg("activation", activation_type, arg_type_len) + .AddOutput(output, output_dims, 4); + + activation_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + activation_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-4); +} + +void TestSimpleTanh() { + float input[16] = {-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0}; + int32_t input_dims[4] = {2, 2, 2, 2}; + + float output[16] = {0}; + int32_t output_dims[4] = {0}; + float expect[16] = + {-0.99999834, 0.99999834, -0.99998771, 0.99998771, -0.9999092, 0.9999092, + -0.9993293, 0.9993293, -0.99505475, 0.99505475, -0.96402758, 0.96402758, + -0.76159416, 0.76159416, 0., 0.}; + int32_t expect_dims[4] = {2, 2, 2, 2}; + + const char activation_type[] = "TANH"; + const uint32_t arg_type_len = sizeof(activation_type); + + ActivationOp activation_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddRepeatArg("activation", activation_type, arg_type_len) + .AddOutput(output, output_dims, 4); + + activation_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + activation_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-4); +} + +void TestSimpleSigmoid() { + float input[16] = {-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0}; + int32_t input_dims[4] = {2, 2, 2, 2}; + + float output[16] = {0}; + int32_t output_dims[4] = {0}; + float expect[16] = + {9.11051194e-04, 9.99088949e-01, 2.47262316e-03, 9.97527377e-01, + 6.69285092e-03, 9.93307149e-01, 1.79862100e-02, 9.82013790e-01, + 4.74258732e-02, 9.52574127e-01, 1.19202922e-01, 8.80797078e-01, + 2.68941421e-01, 7.31058579e-01, 5.00000000e-01, 5.00000000e-01}; + int32_t expect_dims[4] = {2, 2, 2, 2}; + + const char activation_type[] = "SIGMOID"; + const uint32_t arg_type_len = sizeof(activation_type); + + ActivationOp activation_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddRepeatArg("activation", activation_type, arg_type_len) + .AddOutput(output, output_dims, 4); + + activation_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + activation_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-4); +} + +} // namespace + +TEST_F(ActivationOpTest, TestSimpleRelu) { + TestSimpleRelu(); +} + +TEST_F(ActivationOpTest, TestSimpleLeakyRelu) { + TestSimpleLeakyRelu(); +} + +TEST_F(ActivationOpTest, TestUnalignedSimpleRelu) { + TestUnalignedSimpleRelu(); +} + +TEST_F(ActivationOpTest, TestSimpleRelux) { + TestSimpleRelux(); +} + +TEST_F(ActivationOpTest, TestSimplePrelu) { + TestSimplePrelu(); +} + +TEST_F(ActivationOpTest, TestSimpleTanh) { + TestSimpleTanh(); +} + +TEST_F(ActivationOpTest, TestSimpleSigmoid) { + TestSimpleSigmoid(); +} + +} // namespace test +} // namespace ops +} // namespace micro diff --git a/micro/test/ccunit/micro/ops/argmax_test.cc b/micro/test/ccunit/micro/ops/argmax_test.cc new file mode 100644 index 00000000..97e60cd4 --- /dev/null +++ b/micro/test/ccunit/micro/ops/argmax_test.cc @@ -0,0 +1,111 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" +#include "micro/ops/argmax.h" +#include "micro/ops/gtest_utils.h" +#include "micro/ops/substitute_op.h" +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { + +class ArgMaxOpTest : public ::testing::Test {}; + +namespace { + +void ArgMaxTest( + const float *input, const int32_t *input_dims, + const int32_t input_dim_size, + int32_t *output, int32_t *output_dims, const int32_t output_dim_size, + const int32_t *expect, const int32_t *expect_dims) { + ArgMaxOp argmax_op; + int32_t axis[] = {-1}; + int32_t axis_dims[1] = {1}; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, input_dim_size) + .AddInput(axis, axis_dims, 0) + .AddOutput(output, output_dims, output_dim_size); + + argmax_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + argmax_op.Run(); + + ExpectTensorNear(output, output_dims, output_dim_size, + expect, expect_dims, output_dim_size, 1e-5, 1e-3); +} + +void ArgMaxTextVector() { + const float input[3] = {-3, -1, -2}; + const int32_t input_dims[1] = {3}; + + int32_t output[1] = {0}; + int32_t output_dims[1] = {0}; + + const int32_t expect[1] = {1}; + const int32_t expect_dims[1] = {0}; + + ArgMaxTest(input, input_dims, 1, + output, output_dims, 0, + expect, expect_dims); +} + +void ArgMaxTextMatrix() { + const float input[9] = {4, 5, 6, 9, 8, 7, 1, 2, 3}; + const int32_t input_dims[2] = {3, 3}; + + int32_t output[3] = {0}; + int32_t output_dims[1] = {0}; + + const int32_t expect[3] = {2, 0, 2}; + const int32_t expect_dims[1] = {3}; + + ArgMaxTest(input, input_dims, 1, + output, output_dims, 1, + expect, expect_dims); +} + +void ArgMaxTextHighRank() { + const float input[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + const int32_t input_dims[4] = {1, 2, 2, 3}; + + int32_t output[4] = {0}; + int32_t output_dims[3] = {0}; + + const int32_t expect[4] = {2, 2, 2, 2}; + const int32_t expect_dims[3] = {1, 2, 2}; + + ArgMaxTest(input, input_dims, 4, + output, output_dims, 3, + expect, expect_dims); +} + +} // namespace + +TEST_F(ArgMaxOpTest, Vector) { + ArgMaxTextVector(); +} + +TEST_F(ArgMaxOpTest, Matrix) { + ArgMaxTextMatrix(); +} + +TEST_F(ArgMaxOpTest, HighRank) { + ArgMaxTextHighRank(); +} + +} // namespace test +} // namespace ops +} // namespace micro diff --git a/micro/test/ccunit/micro/ops/bias_add_test.cc b/micro/test/ccunit/micro/ops/bias_add_test.cc new file mode 100644 index 00000000..9f69c890 --- /dev/null +++ b/micro/test/ccunit/micro/ops/bias_add_test.cc @@ -0,0 +1,62 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" +#include "micro/ops/bias_add.h" +#include "micro/ops/gtest_utils.h" +#include "micro/ops/substitute_op.h" +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { + +class BiasAddOpTest : public ::testing::Test {}; + +namespace { + +void BiasAddSimple() { + float input[12] = {5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}; + int32_t input_dims[4] = {1, 6, 2, 1}; + float bias[1] = {0.5f}; + int32_t bias_dims[1] = {1}; + + float output[12] = {0}; + int32_t output_dims[4] = {0}; + float expect[12] = + {5.5, 5.5, 7.5, 7.5, 9.5, 9.5, 11.5, 11.5, 13.5, 13.5, 15.5, 15.5}; + int32_t expect_dims[4] = {1, 6, 2, 1}; + + BiasAddOp bias_add_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddInput(bias, bias_dims, 1) + .AddOutput(output, output_dims, 4); + + bias_add_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + bias_add_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-4); +} + +} // namespace + +TEST_F(BiasAddOpTest, BiasAddSimple) { + BiasAddSimple(); +} + +} // namespace test +} // namespace ops +} // namespace micro diff --git a/micro/test/ccunit/micro/ops/eltwise_test.cc b/micro/test/ccunit/micro/ops/eltwise_test.cc new file mode 100644 index 00000000..4d0fe791 --- /dev/null +++ b/micro/test/ccunit/micro/ops/eltwise_test.cc @@ -0,0 +1,499 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" +#include "micro/ops/eltwise.h" +#include "micro/ops/gtest_utils.h" +#include "micro/ops/substitute_op.h" +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { + +class EltwiseOpTest : public ::testing::Test {}; + +namespace { +template +void SimpleScalarScalar(eltwise::Type type, T input_value, + float x, const DstType expect_value) { + T input[1] = {input_value}; + int32_t input_dims[1] = {1}; + + T output[1] = {0}; + int32_t output_dims[1] = {0}; + DstType expect[1] = {expect_value}; + int32_t expect_dims[1] = {1}; + + EltwiseOp eltwise_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 1) + .AddArg("type", static_cast(type)) + .AddArg("scalar_input", x) + .AddOutput(output, output_dims, 1); + + eltwise_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + eltwise_op.Run(); + + ExpectTensorNear(output, output_dims, 1, expect, expect_dims, 1, 1e-5); +} + +template +void SimpleTensorScalar(eltwise::Type type, const T *input, + const int32_t *input_dims, const int32_t input_dim_size, + float x, const int32_t output_dim_size, + DstType *output, int32_t *output_dims, + const DstType *expect, const int32_t *expect_dims) { + EltwiseOp eltwise_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, input_dim_size) + .AddArg("type", static_cast(type)) + .AddArg("scalar_input", x) + .AddOutput(output, output_dims, output_dim_size); + + eltwise_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + eltwise_op.Run(); + + ExpectTensorNear(output, output_dims, output_dim_size, + expect, expect_dims, output_dim_size, 1e-5); +} + +template +void SimpleTensorScalarForSpecial(eltwise::Type type, const T *input, + float x, const DstType *expect) { + const int32_t input_dim_size = 4; + const int32_t input_dims[input_dim_size] = {1, 1, 2, 3}; + const int32_t output_dim_size = 4; + DstType output[6] = {0}; + int32_t output_dims[output_dim_size] = {0}; + const int32_t expect_dims[output_dim_size] = {1, 1, 2, 3}; + SimpleTensorScalar(type, input, input_dims, + input_dim_size, x, output_dim_size, + output, output_dims, + expect, expect_dims); +} + +void SimpleTensorScalar1() { + const int32_t input_dim_size = 4; + const int32_t input_dims[input_dim_size] = {1, 1, 1, 1}; + const float input[] = {1}; + const int32_t output_dim_size = 4; + float output[1] = {0}; + int32_t output_dims[output_dim_size] = {0}; + const float expect[1] = {2}; + const int32_t expect_dims[output_dim_size] = {1, 1, 1, 1}; + SimpleTensorScalar(eltwise::SUM, input, input_dims, + input_dim_size, 1, output_dim_size, + output, output_dims, + expect, expect_dims); +} + +template +void SimpleTensorEltwise(eltwise::Type type, const T *input0, + const int32_t *input0_dims, + const int32_t input0_dim_size, + const T *input1, const int32_t *input1_dims, + const int32_t input1_dim_size, + DstType *output, int32_t *output_dims, + const int32_t output_dim_size, + const DstType *expect, const int32_t *expect_dims, + const float *coeff = NULL, + const uint32_t coeff_len = 0) { + EltwiseOp eltwise_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input0, input0_dims, input0_dim_size) + .AddArg("type", static_cast(type)) + .AddOutput(output, output_dims, output_dim_size); + if (input1 != NULL && input1_dims != NULL && input1_dim_size > 0) { + substitude_op.AddInput(input1, input1_dims, input1_dim_size); + } + + if (coeff != NULL && coeff_len > 0) { + substitude_op.AddRepeatArg("coeff", coeff, coeff_len); + } + + eltwise_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + eltwise_op.Run(); + + ExpectTensorNear(output, output_dims, output_dim_size, + expect, expect_dims, output_dim_size, 1e-5); +} + +template +void SimpleTensorEltwise(eltwise::Type type, const T *input0, + const int32_t *input0_dims, const T *input1, + const int32_t *input1_dims, DstType *output, + const DstType *expect, const int32_t *expect_dims, + const float *coeff = NULL, + const uint32_t coeff_len = 0) { + int32_t output_dims[4] = {0}; + SimpleTensorEltwise(type, input0, input0_dims, 4, input1, input1_dims, 4, + output, output_dims, 4, expect, expect_dims, coeff, + coeff_len); +} +} // namespace + +TEST_F(EltwiseOpTest, SimpleScalarScalar) { + SimpleScalarScalar(eltwise::SUM, 1, 2, 3); + SimpleScalarScalar(eltwise::SUB, 1, 2, -1); + SimpleScalarScalar(eltwise::PROD, 1, 2, 2); + SimpleScalarScalar(eltwise::DIV, 1, 2, 0.5); + SimpleScalarScalar(eltwise::FLOOR_DIV, 1, 2, 0); + SimpleScalarScalar(eltwise::FLOOR_DIV, 1, -2, -1); + SimpleScalarScalar(eltwise::MIN, 1, 2, 1); + SimpleScalarScalar(eltwise::MAX, 1, 2, 2); + SimpleScalarScalar(eltwise::NEG, 1, 2, -1); + SimpleScalarScalar(eltwise::ABS, -1, 3, 1); + SimpleScalarScalar(eltwise::SIGN, -2, 3, -1); + SimpleScalarScalar(eltwise::EQUAL, 1, 3, 0); + SimpleScalarScalar(eltwise::EQUAL, 3, 3, 1); +} + +TEST_F(EltwiseOpTest, CPUSimpleTensorScalar) { + SimpleTensorScalar1(); + const float input[] = {1, 2, 3, 4, 5, 6}; + const float expect2[] = {0, 1, 2, 3, 4, 5}; + SimpleTensorScalarForSpecial(eltwise::SUB, input, 1, expect2); + + const float expect3[] = {2, 4, 6, 8, 10, 12}; + SimpleTensorScalarForSpecial(eltwise::PROD, input, 2, expect3); + + const float expect4[] = {1, 1, 1, 1, 1, 1}; + SimpleTensorScalarForSpecial(eltwise::MIN, input, 1, expect4); + + const float expect5[] = {3, 3, 3, 4, 5, 6}; + SimpleTensorScalarForSpecial(eltwise::MAX, input, 3, expect5); + + const float expect6[] = {-1, -2, -3, -4, -5, -6}; + SimpleTensorScalarForSpecial(eltwise::NEG, input, 3, expect6); + + const float expect7[] = {0, 1, 4, 9, 16, 25}; + SimpleTensorScalarForSpecial( + eltwise::SQR_DIFF, input, 1, expect7); + + const int32_t input_i[] = {1, 2, 3, 4, 5, 6}; + const int32_t expect8[] = {0, 0, 1, 0, 0, 0}; + SimpleTensorScalarForSpecial( + eltwise::EQUAL, input_i, 3, expect8); + + const float input9[] = {2, 4, 6, 8, 10, 12}; + const float expect9[] = {1, 2, 3, 4, 5, 6}; + SimpleTensorScalarForSpecial(eltwise::DIV, input9, 2, expect9); + + const float expect10[] = {0, 1, 2, 2, 3, 4}; + SimpleTensorScalarForSpecial( + eltwise::FLOOR_DIV, input9, 3, expect10); + + const float expect11[] = {-1, -2, -2, -3, -4, -4}; + SimpleTensorScalarForSpecial( + eltwise::FLOOR_DIV, input9, -3, expect11); + + const float input12[] = {-1, -2, -3, -4, -5, -6}; + const float expect12[] = {1, 2, 3, 4, 5, 6}; + SimpleTensorScalarForSpecial( + eltwise::ABS, input12, 3, expect12); + + const float input13[] = {1, 2, -3, 0, -5, -6}; + const float expect13[] = {1, 1, -1, 0, -1, -1}; + SimpleTensorScalarForSpecial( + eltwise::SIGN, input13, 3, expect13); +} + +TEST_F(EltwiseOpTest, CPUSimpleTensorVector) { + const int32_t dims1123[] = {1, 1, 2, 3}; + const int32_t dims1113[] = {1, 1, 1, 3}; + const int32_t dims1215[] = {1, 2, 1, 5}; + const int32_t dims1115[] = {1, 1, 1, 5}; + const int32_t dims1213[] = {1, 2, 1, 3}; + const int32_t dims3[] = {3}; + const int32_t dims5[] = {5}; + + float output6[6] = {0}; + float output10[10] = {0}; + int32_t output6_i[6] = {0}; + + int32_t output_dims4[4] = {0}; + + const float input0_0[] = {1, 2, 3, 4, 5, 6}; + const float input1_0[] = {1, 2, 3}; + const float expect_0[] = {2, 4, 6, 5, 7, 9}; + SimpleTensorEltwise(eltwise::SUM, input0_0, dims1123, input1_0, + dims1113, output6, expect_0, dims1123); + + const float input0_1[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + const float input1_1[] = {1, 2, 3, 4, 5}; + const float expect_1[] = {0, 0, 0, 0, 0, 5, 5, 5, 5, 5}; + SimpleTensorEltwise(eltwise::SUB, input0_1, dims1215, input1_1, + dims1115, output10, expect_1, dims1215); + + const float expect_2[] = {0, 0, 0, 0, 0, -5, -5, -5, -5, -5}; + SimpleTensorEltwise(eltwise::SUB, input1_1, dims1115, input0_1, + dims1215, output10, expect_2, dims1215); + + const float expect_3[] = {1, 4, 9, 4, 10, 18}; + SimpleTensorEltwise(eltwise::PROD, input1_0, dims1113, input0_0, + dims1213, output6, expect_3, dims1213); + + const float input1_4[] = {1, 1, 1, 1, 5}; + const float expect_4[] = {1, 2, 3, 4, 1, 6, 7, 8, 9, 2}; + SimpleTensorEltwise(eltwise::DIV, input0_1, dims1215, input1_4, + dims1115, output10, expect_4, dims1215); + + const float input0_5[] = {1, 1, 1, 2, 4}; + const float input1_5[] = {1, 1, 1, 2, 2, 1, 1, 1, 1, 1}; + const float expect_5[] = {1, 1, 1, 1, 2, 1, 1, 1, 2, 4}; + SimpleTensorEltwise(eltwise::DIV, input0_5, dims1115, input1_5, + dims1215, output10, expect_5, dims1215); + + const float input1_6[] = {2, 2, 2, 2, 3}; + const float expect_6[] = {0, 1, 1, 2, 1, 3, 3, 4, 4, 3}; + SimpleTensorEltwise(eltwise::FLOOR_DIV, input0_1, dims1215, input1_6, + dims1115, output10, expect_6, dims1215); + + const float input1_7[] = {-2, -2, -2, -2, -3}; + const float expect_7[] = {-1, -1, -2, -2, -2, -3, -4, -4, -5, -4}; + SimpleTensorEltwise(eltwise::FLOOR_DIV, input0_1, dims1215, input1_7, + dims1115, output10, expect_7, dims1215); + + const float input1_8[] = {2, 2, 2, 3, 3, 2, 2, 2, 2, 2}; + const float expect_8[] = {0, 0, 0, 0, 1, 0, 0, 0, 1, 2}; + SimpleTensorEltwise(eltwise::FLOOR_DIV, input0_5, dims1115, input1_8, + dims1215, output10, expect_8, dims1215); + + const float input1_9[] = {-2, -2, -2, -3, -3, -2, -2, -2, -2, -2}; + const float expect_9[] = {-1, -1, -1, -1, -2, -1, -1, -1, -1, -2}; + SimpleTensorEltwise(eltwise::FLOOR_DIV, input0_5, dims1115, input1_9, + dims1215, output10, expect_9, dims1215); + + const float expect_10[] = {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}; + SimpleTensorEltwise(eltwise::MIN, input1_1, dims1115, input0_1, + dims1215, output10, expect_10, dims1215); + + const float expect_11[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + SimpleTensorEltwise(eltwise::MAX, input0_1, dims1215, input1_1, + dims1115, output10, expect_11, dims1215); + + const float expect_12[] = {0, 0, 0, 0, 0, 25, 25, 25, 25, 25}; + SimpleTensorEltwise(eltwise::SQR_DIFF, input1_1, dims1115, input0_1, + dims1215, output10, expect_12, dims1215); + + const int32_t input0_13[] = {1, 2, 3, 4, 5, 6}; + const int32_t input1_13[] = {1, 2, 3}; + const int32_t expect_13[] = {1, 1, 1, 0, 0, 0}; + SimpleTensorEltwise(eltwise::EQUAL, input0_13, dims1123, input1_13, + dims1113, output6_i, expect_13, dims1123); + + const float expect_14[] = {2, 4, 6, 5, 7, 9}; + SimpleTensorEltwise(eltwise::SUM, input0_0, dims1123, + 4, input1_0, dims3, 1, output6, + output_dims4, 4, expect_14, dims1123); + + const float expect_15[] = {0, 0, 0, 0, 0, 5, 5, 5, 5, 5}; + SimpleTensorEltwise(eltwise::SUB, input0_1, dims1215, + 4, input1_1, dims5, 1, output10, + output_dims4, 4, expect_15, dims1215); + + const float expect_16[] = {0, 0, 0, 0, 0, -5, -5, -5, -5, -5}; + SimpleTensorEltwise(eltwise::SUB, input1_1, dims5, + 1, input0_1, dims1215, 4, output10, + output_dims4, 4, expect_16, dims1215); + + const float expect_17[] = {1, 4, 9, 4, 10, 18}; + SimpleTensorEltwise(eltwise::PROD, input1_0, dims3, + 1, input0_0, dims1213, 4, output6, + output_dims4, 4, expect_17, dims1213); + + const float expect_18[] = {1, 2, 3, 4, 1, 6, 7, 8, 9, 2}; + SimpleTensorEltwise(eltwise::DIV, input0_1, dims1215, + 4, input1_4, dims5, 1, output10, + output_dims4, 4, expect_18, dims1215); + + const float expect_19[] = {1, 1, 1, 1, 2, 1, 1, 1, 2, 4}; + SimpleTensorEltwise(eltwise::DIV, input0_5, dims5, + 1, input1_5, dims1215, 4, output10, + output_dims4, 4, expect_19, dims1215); + + const float expect_20[] = {0, 1, 1, 2, 1, 3, 3, 4, 4, 3}; + SimpleTensorEltwise(eltwise::FLOOR_DIV, input0_1, dims1215, + 4, input1_6, dims5, 1, output10, + output_dims4, 4, expect_20, dims1215); + + const float expect_21[] = {-1, -1, -2, -2, -2, -3, -4, -4, -5, -4}; + SimpleTensorEltwise(eltwise::FLOOR_DIV, input0_1, dims1215, + 4, input1_7, dims5, 1, output10, output_dims4, + 4, expect_21, dims1215); + + const float expect_22[] = {0, 0, 0, 0, 1, 0, 0, 0, 1, 2}; + SimpleTensorEltwise(eltwise::FLOOR_DIV, input0_5, dims5, 1, input1_8, + dims1215, 4, output10, output_dims4, + 4, expect_22, dims1215); + + const float expect_23[] = {-1, -1, -1, -1, -2, -1, -1, -1, -1, -2}; + SimpleTensorEltwise(eltwise::FLOOR_DIV, input0_5, dims5, 1, input1_9, + dims1215, 4, output10, output_dims4, + 4, expect_23, dims1215); + + const float expect_24[] = {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}; + SimpleTensorEltwise(eltwise::MIN, input1_1, dims5, 1, input0_1, + dims1215, 4, output10, output_dims4, + 4, expect_24, dims1215); + + const float expect_25[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + SimpleTensorEltwise(eltwise::MAX, input0_1, dims1215, 4, input1_1, + dims5, 1, output10, output_dims4, 4, + expect_25, dims1215); + + const float expect_26[] = {0, 0, 0, 0, 0, 25, 25, 25, 25, 25}; + SimpleTensorEltwise(eltwise::SQR_DIFF, input1_1, dims5, 1, input0_1, + dims1215, 4, output10, output_dims4, 4, + expect_26, dims1215); + + const int32_t expect_27[] = {1, 1, 1, 0, 0, 0}; + SimpleTensorEltwise(eltwise::EQUAL, input0_13, dims1123, 4, input1_13, + dims3, 1, output6_i, output_dims4, 4, + expect_27, dims1123); +} + +TEST_F(EltwiseOpTest, CPUSimpleTensorTensor) { + const int32_t dims1123[] = {1, 1, 2, 3}; + const int32_t dims1215[] = {1, 2, 1, 5}; + const int32_t dims1115[] = {1, 1, 1, 5}; + const int32_t dims1213[] = {1, 2, 1, 3}; + + float output6[6] = {0}; + float output10[10] = {0}; + int32_t output6_i[6] = {0}; + + int32_t output_dims4[4] = {0}; + + const float input0_0[] = {1, 2, 3, 4, 5, 6}; + const float expect_0[] = {2, 4, 6, 8, 10, 12}; + SimpleTensorEltwise(eltwise::SUM, input0_0, dims1123, input0_0, + dims1123, output6, expect_0, dims1123); + + const float expect_1[] = {0.2, 0.4, 0.6, 0.8, 1, 1.2}; + const float coeff_1[] = {0.1, 0.1}; + SimpleTensorEltwise(eltwise::SUM, input0_0, dims1123, input0_0, + dims1123, output6, expect_1, dims1123, coeff_1, + sizeof(coeff_1)/ sizeof(float)); + + const float input0_2[] = {1, 2, 3, 4, 5}; + const float expect_2[] = {0, 0, 0, 0, 0}; + SimpleTensorEltwise(eltwise::SUB, input0_2, dims1115, input0_2, + dims1115, output6, expect_2, dims1115); + + const float expect_3[] = {1, 4, 9, 16, 25, 36}; + SimpleTensorEltwise(eltwise::PROD, input0_0, dims1213, input0_0, + dims1213, output6, expect_3, dims1213); + + const float expect_4[] = {1, 1, 1, 1, 1, 1}; + SimpleTensorEltwise(eltwise::DIV, input0_0, dims1213, input0_0, + dims1213, output6, expect_4, dims1213); + + const float input0_5[] = {2, 3, 4, 5, 6, 7}; + const float expect_5[] = {2, 1, 1, 1, 1, 1}; + SimpleTensorEltwise(eltwise::FLOOR_DIV, input0_5, dims1213, input0_0, + dims1213, output6, expect_5, dims1213); + + const float input0_6[] = {-2, -3, -4, -5, -6, -7}; + const float expect_6[] = {-2, -2, -2, -2, -2, -2}; + SimpleTensorEltwise(eltwise::FLOOR_DIV, input0_6, dims1213, input0_0, + dims1213, output6, expect_6, dims1213); + + const float input0_7[] = {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}; + const float input1_7[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + const float expect_7[] = {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}; + SimpleTensorEltwise(eltwise::MIN, input0_7, dims1215, input1_7, + dims1215, output10, expect_7, dims1215); + + const float expect_8[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + SimpleTensorEltwise(eltwise::MAX, input1_7, dims1215, input0_7, + dims1215, output10, expect_8, dims1215); + + const float expect_9[] = {0, 0, 0, 0, 0, 25, 25, 25, 25, 25}; + SimpleTensorEltwise(eltwise::SQR_DIFF, input0_7, dims1215, input1_7, + dims1215, output10, expect_9, dims1215); + + const int input0_10[] = {1, 2, 3, 4, 5, 6}; + const int expect_10[] = {1, 1, 1, 1, 1, 1}; + SimpleTensorEltwise(eltwise::EQUAL, input0_10, dims1123, input0_10, + dims1123, output6_i, expect_10, dims1123); + + const float expect_11[] = {2, 2, 3, 3, 3, 2, 2, 3, 3, 3}; + const float coeff_11[] = {2.0f, 3.0f}; + SimpleTensorEltwise( + eltwise::CLIP, input0_7, dims1215, + 4, NULL, NULL, 0, output10, output_dims4, 4, expect_11, dims1215, + coeff_11, sizeof(coeff_11) / sizeof(float)); +} + +TEST_F(EltwiseOpTest, TensorGeneralBroadcastCPU) { + const int32_t dims1123[] = {1, 1, 2, 3}; + const int32_t dims1121[] = {1, 1, 2, 1}; + + float output[10] = {0}; + const float input0_0[] = {1, 2, 3, 4, 5, 6}; + const float input1_0[] = {1, 2}; + const float expect_0[] = {2, 3, 4, 6, 7, 8}; + SimpleTensorEltwise(eltwise::SUM, input0_0, dims1123, input1_0, + dims1121, output, expect_0, dims1123); + + const float expect_1[] = {0, 1, 2, 2, 3, 4}; + SimpleTensorEltwise(eltwise::SUB, input0_0, dims1123, input1_0, + dims1121, output, expect_1, dims1123); + + const float expect_2[] = {1, 2, 3, 8, 10, 12}; + SimpleTensorEltwise(eltwise::PROD, input0_0, dims1123, input1_0, + dims1121, output, expect_2, dims1123); + + const float expect_3[] = {1, 2, 3, 2, 2.5, 3}; + SimpleTensorEltwise(eltwise::DIV, input0_0, dims1123, input1_0, + dims1121, output, expect_3, dims1123); + + const float input1_4[] = {2, 3}; + const float expect_4[] = {0, 1, 1, 1, 1, 2}; + SimpleTensorEltwise(eltwise::FLOOR_DIV, input0_0, dims1123, input1_4, + dims1121, output, expect_4, dims1123); + + const float input1_5[] = {-2, -3}; + const float expect_5[] = {-1, -1, -2, -2, -2, -2}; + SimpleTensorEltwise(eltwise::FLOOR_DIV, input0_0, dims1123, input1_5, + dims1121, output, expect_5, dims1123); + + const float expect_6[] = {1, 1, 1, 2, 2, 2}; + SimpleTensorEltwise(eltwise::MIN, input0_0, dims1123, input1_0, + dims1121, output, expect_6, dims1123); + + const float expect_7[] = {1, 2, 3, 4, 5, 6}; + SimpleTensorEltwise(eltwise::MAX, input0_0, dims1123, input1_0, + dims1121, output, expect_7, dims1123); + + const float expect_8[] = {0, 1, 4, 4, 9, 16}; + SimpleTensorEltwise(eltwise::SQR_DIFF, input0_0, dims1123, input1_0, + dims1121, output, expect_8, dims1123); + + const int32_t input0_9[] = {1, 2, 3, 4, 5, 6}; + const int32_t input1_9[] = {1, 2}; + const int32_t expect_9[] = {1, 0, 0, 0, 0, 0}; + int32_t output_9[6] = {0}; + SimpleTensorEltwise(eltwise::EQUAL, input0_9, dims1123, input1_9, + dims1121, output_9, expect_9, dims1123); +} + +} // namespace test +} // namespace ops +} // namespace micro diff --git a/micro/test/ccunit/micro/ops/expand_dims_test.cc b/micro/test/ccunit/micro/ops/expand_dims_test.cc new file mode 100644 index 00000000..fe92caee --- /dev/null +++ b/micro/test/ccunit/micro/ops/expand_dims_test.cc @@ -0,0 +1,82 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" +#include "micro/ops/expand_dims.h" +#include "micro/ops/gtest_utils.h" +#include "micro/ops/substitute_op.h" +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { + +class ExpandDimsOpTest : public ::testing::Test {}; + +namespace { + +void ExpandDimsSimpleA() { + MACE_DEFINE_RANDOM_INPUT(float, input, 6); + int32_t input_dims[3] = {3, 2, 1}; + + float output[6] = {0}; + int32_t output_dims[4] = {0}; + float *expect = input; + int32_t expect_dims[4] = {3, 1, 2, 1}; + + ExpandDimsOp expand_dims_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 3) + .AddArg("axis", 1) + .AddOutput(output, output_dims, 4); + + expand_dims_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + expand_dims_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-4); +} + +void ExpandDimsSimpleB() { + MACE_DEFINE_RANDOM_INPUT(float, input, 6); + int32_t input_dims[3] = {1, 2, 3}; + + float output[6] = {0}; + int32_t output_dims[4] = {0}; + float *expect = input; + int32_t expect_dims[4] = {1, 2, 3, 1}; + + ExpandDimsOp expand_dims_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 3) + .AddArg("axis", -1) + .AddOutput(output, output_dims, 4); + + expand_dims_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + expand_dims_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-4); +} + +} // namespace + +TEST_F(ExpandDimsOpTest, ExpandDimsSimple) { + ExpandDimsSimpleA(); + ExpandDimsSimpleB(); +} + +} // namespace test +} // namespace ops +} // namespace micro diff --git a/micro/test/ccunit/micro/ops/matmul_test.cc b/micro/test/ccunit/micro/ops/matmul_test.cc new file mode 100644 index 00000000..4661352a --- /dev/null +++ b/micro/test/ccunit/micro/ops/matmul_test.cc @@ -0,0 +1,99 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" +#include "micro/ops/gtest_utils.h" +#include "micro/ops/matmul.h" +#include "micro/ops/substitute_op.h" +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { + +class MatMulOpTest : public ::testing::Test {}; + +namespace { + +void Simple( + const float *input0, const int32_t *input0_dims, + const int32_t input0_dim_size, + const float *input1, const int32_t *input1_dims, + const int32_t input1_dim_size, + float *output, int32_t *output_dims, const int32_t output_dim_size, + const float *expect, const int32_t *expect_dims) { + MatMulOp mat_mul_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input0, input0_dims, input0_dim_size) + .AddInput(input1, input1_dims, input1_dim_size) + .AddOutput(output, output_dims, output_dim_size); + + mat_mul_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + mat_mul_op.Run(); + + ExpectTensorNear(output, output_dims, output_dim_size, + expect, expect_dims, output_dim_size, 1e-5); +} + +void Simple1() { + const float input0[6] = {1, 2, 3, 4, 5, 6}; + const int32_t input0_dim_size = 3; + const int32_t input0_dims[input0_dim_size] = {1, 2, 3}; + const float input1[6] = {1, 2, 3, 4, 5, 6}; + const int32_t input1_dim_size = 3; + const int32_t input1_dims[input1_dim_size] = {1, 3, 2}; + float output[6] = {0}; + const int32_t output_dim_size = 3; + int32_t output_dims[output_dim_size] = {0}; + const float expect[4] = {22, 28, 49, 64}; + const int32_t expect_dims[output_dim_size] = {1, 2, 2}; + Simple(input0, input0_dims, input0_dim_size, + input1, input1_dims, input1_dim_size, + output, output_dims, output_dim_size, + expect, expect_dims); +} + +void Simple2() { + const float input0[25] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25}; + const int32_t input0_dim_size = 3; + const int32_t input0_dims[input0_dim_size] = {1, 5, 5}; + const float input1[25] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25}; + const int32_t input1_dim_size = 3; + const int32_t input1_dims[input1_dim_size] = {1, 5, 5}; + float output[25] = {0}; + const int32_t output_dim_size = 3; + int32_t output_dims[output_dim_size] = {0}; + const float expect[25] = {215, 230, 245, 260, 275, 490, 530, 570, 610, + 650, 765, 830, 895, 960, 1025, 1040, 1130, 1220, + 1310, 1400, 1315, 1430, 1545, 1660, 1775}; + const int32_t expect_dims[output_dim_size] = {1, 5, 5}; + Simple(input0, input0_dims, input0_dim_size, + input1, input1_dims, input1_dim_size, + output, output_dims, output_dim_size, + expect, expect_dims); +} + +} // namespace + +TEST_F(MatMulOpTest, SimpleCPU) { + Simple1(); + Simple2(); +} + +} // namespace test +} // namespace ops +} // namespace micro diff --git a/micro/test/ccunit/micro/ops/nhwc/batch_norm_test.cc b/micro/test/ccunit/micro/ops/nhwc/batch_norm_test.cc new file mode 100644 index 00000000..2ed283b8 --- /dev/null +++ b/micro/test/ccunit/micro/ops/nhwc/batch_norm_test.cc @@ -0,0 +1,72 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" +#include "micro/ops/gtest_utils.h" +#include "micro/ops/nhwc/batch_norm.h" +#include "micro/ops/substitute_op.h" +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { + +class BatchNormOpTest : public ::testing::Test {}; + +namespace { + +void TestBatchNormOp() { + float input[12] = {5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}; + int32_t input_dims[4] = {1, 6, 2, 1}; + float scale[1] = {4.0f}; + int32_t scale_dims[1] = {1}; + float offset[1] = {2.0f}; + int32_t offset_dims[1] = {1}; + float mean[1] = {10}; + int32_t mean_dims[1] = {1}; + float var[1] = {11.67f}; + int32_t var_dims[1] = {1}; + + float output[12] = {0}; + int32_t output_dims[4] = {0}; + float expect[12] = {-3.8543, -3.8543, -1.5125, -1.5125, 0.8291, 0.8291, + 3.1708, 3.1708, 5.5125, 5.5125, 7.8543, 7.8543}; + int32_t expect_dims[4] = {1, 6, 2, 1}; + + BatchNormOp batch_norm_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddInput(scale, scale_dims, 1) + .AddInput(offset, offset_dims, 1) + .AddInput(mean, mean_dims, 1) + .AddInput(var, var_dims, 1) + .AddArg("epsilon", 1e-3) + .AddOutput(output, output_dims, 4); + + batch_norm_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + batch_norm_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-4); +} + +} // namespace + +TEST_F(BatchNormOpTest, TestBatchNorm) { + TestBatchNormOp(); +} + +} // namespace test +} // namespace ops +} // namespace micro diff --git a/micro/test/ccunit/micro/ops/nhwc/conv_2d_opt_test.cc b/micro/test/ccunit/micro/ops/nhwc/conv_2d_opt_test.cc new file mode 100644 index 00000000..63b6a4e8 --- /dev/null +++ b/micro/test/ccunit/micro/ops/nhwc/conv_2d_opt_test.cc @@ -0,0 +1,264 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" +#include "micro/ops/gtest_utils.h" +#include "micro/ops/nhwc/conv_2d_c2_s4.h" +#include "micro/ops/nhwc/conv_2d_c3_s4.h" +#include "micro/ops/nhwc/conv_2d_c4_s4.h" +#include "micro/ops/substitute_op.h" +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { + +class Conv2dOptOpTest : public ::testing::Test {}; + +namespace { + +void TestNHWCMulti3x3SAME() { + float input[18] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + int32_t input_dims[4] = {1, 3, 3, 2}; + float filter[72] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; + int32_t filter_dims[4] = {4, 3, 3, 2}; + float bias[4] = {0.1f, 0.1f, 0.1f, 0.1f}; + int32_t bias_dims[1] = {4}; + + float output[36] = {0}; + int32_t output_dims[4] = {0}; + float expect[36] = {8.1f, 8.1f, 8.1f, 8.1f, + 12.1f, 12.1f, 12.1f, 12.1f, + 8.1f, 8.1f, 8.1f, 8.1f, + 12.1f, 12.1f, 12.1f, 12.1f, + 18.1f, 18.1f, 18.1f, 18.1f, + 12.1f, 12.1f, 12.1f, 12.1f, + 8.1f, 8.1f, 8.1f, 8.1f, + 12.1f, 12.1f, 12.1f, 12.1f, + 8.1f, 8.1f, 8.1f, 8.1f}; + int32_t expect_dims[4] = {1, 3, 3, 4}; + + const int32_t strides[] = {1, 1}; + const int32_t dilations[] = {1, 1}; + + Conv2dC4S4Op conv_2d_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddInput(filter, filter_dims, 4) + .AddInput(bias, bias_dims, 1) + .AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t)) + .AddArg("padding", Padding::SAME) + .AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t)) + .AddOutput(output, output_dims, 4); + + conv_2d_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + conv_2d_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-5); +} + +void TestNHWCMulti3x3NeqStride() { + float input[18] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + int32_t input_dims[4] = {1, 3, 3, 2}; + float filter[36] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; + int32_t filter_dims[4] = {2, 3, 3, 2}; + float bias[2] = {0.1f, 0.1f}; + int32_t bias_dims[1] = {2}; + + float output[12] = {0}; + int32_t output_dims[4] = {0}; + float expect[12] = { + 8.1f, 8.1f, 8.1f, 8.1f, 12.1f, 12.1f, + 12.1f, 12.1f, 8.1f, 8.1f, 8.1f, 8.1f + }; + int32_t expect_dims[4] = {1, 3, 2, 2}; + + const int32_t strides[] = {1, 2}; + const int32_t dilations[] = {1, 1}; + + Conv2dC2S4Op conv_2d_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddInput(filter, filter_dims, 4) + .AddInput(bias, bias_dims, 1) + .AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t)) + .AddArg("padding", Padding::SAME) + .AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t)) + .AddOutput(output, output_dims, 4); + + conv_2d_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + conv_2d_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-5); +} + +void TestNHWC3Multi3x3NeqStride() { + float input[18] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + int32_t input_dims[4] = {1, 3, 3, 2}; + float filter[54] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; + int32_t filter_dims[4] = {3, 3, 3, 2}; + float bias[3] = {0.1f, 0.1f, 0.1f}; + int32_t bias_dims[1] = {3}; + + float output[12] = {0}; + int32_t output_dims[4] = {0}; + float expect[18] = {8.1f, 8.1f, 8.1f, 8.1f, 8.1f, 8.1f, 12.1f, 12.1f, 12.1f, + 12.1f, 12.1f, 12.1f, 8.1f, 8.1f, 8.1f, 8.1f, 8.1f, 8.1f}; + int32_t expect_dims[4] = {1, 3, 2, 3}; + + const int32_t strides[] = {1, 2}; + const int32_t dilations[] = {1, 1}; + + Conv2dC3S4Op conv_2d_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddInput(filter, filter_dims, 4) + .AddInput(bias, bias_dims, 1) + .AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t)) + .AddArg("padding", Padding::SAME) + .AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t)) + .AddOutput(output, output_dims, 4); + + conv_2d_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + conv_2d_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-5); +} + +void TestNHWCCombined3x3() { + float input[50] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + int32_t input_dims[4] = {1, 5, 5, 2}; + float filter[36] = + {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f}; + int32_t filter_dims[4] = {2, 3, 3, 2}; + float bias[2] = {0.1f, 0.2f}; + int32_t bias_dims[1] = {2}; + + float output[18] = {0}; + int32_t output_dims[4] = {0}; + float expect[18] = {8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f, 12.1f, 6.2f, 18.1f, + 9.2f, 12.1f, 6.2f, 8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f}; + int32_t expect_dims[4] = {1, 3, 3, 2}; + + const int32_t strides[] = {2, 2}; + const int32_t dilations[] = {1, 1}; + + Conv2dC2S4Op conv_2d_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddInput(filter, filter_dims, 4) + .AddInput(bias, bias_dims, 1) + .AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t)) + .AddArg("padding", Padding::SAME) + .AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t)) + .AddOutput(output, output_dims, 4); + + conv_2d_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + conv_2d_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-5); +} + +void TestConv1x1() { + float input[150] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + int32_t input_dims[4] = {1, 3, 10, 5}; + float filter[10] = + {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f}; + int32_t filter_dims[4] = {2, 1, 1, 5}; + float bias[2] = {0.1f, 0.2f}; + int32_t bias_dims[1] = {2}; + + float output[60] = {0}; + int32_t output_dims[4] = {0}; + float expect[60] = { + 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, + 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, + 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, + 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, + 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, + 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f}; + int32_t expect_dims[4] = {1, 3, 10, 2}; + + const int32_t strides[] = {1, 1}; + const int32_t dilations[] = {1, 1}; + + Conv2dC2S4Op conv_2d_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddInput(filter, filter_dims, 4) + .AddInput(bias, bias_dims, 1) + .AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t)) + .AddArg("padding", Padding::VALID) + .AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t)) + .AddOutput(output, output_dims, 4); + + conv_2d_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + conv_2d_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-5); +} + +} // namespace + +TEST_F(Conv2dOptOpTest, TestConv2dMultiSAME) { + TestNHWCMulti3x3SAME(); +} + +TEST_F(Conv2dOptOpTest, CPUStride2) { + TestNHWCCombined3x3(); +} + +TEST_F(Conv2dOptOpTest, CPUConv1x1) { + TestConv1x1(); +} + +TEST_F(Conv2dOptOpTest, TestNHWC3Multi3x3NeqStride) { + TestNHWCMulti3x3NeqStride(); + TestNHWC3Multi3x3NeqStride(); +} + +} // namespace test +} // namespace ops +} // namespace micro diff --git a/micro/test/ccunit/micro/ops/nhwc/conv_2d_test.cc b/micro/test/ccunit/micro/ops/nhwc/conv_2d_test.cc new file mode 100644 index 00000000..067420dc --- /dev/null +++ b/micro/test/ccunit/micro/ops/nhwc/conv_2d_test.cc @@ -0,0 +1,320 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" +#include "micro/ops/gtest_utils.h" +#include "micro/ops/nhwc/conv_2d_ref.h" +#include "micro/ops/substitute_op.h" +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { + +class Conv2dOpTest : public ::testing::Test {}; + +namespace { + +void TestNHWCSimple3x3VALID() { + float input[18] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + int32_t input_dims[4] = {1, 3, 3, 2}; + float filter[18] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; + int32_t filter_dims[4] = {1, 3, 3, 2}; + float bias[1] = {0.1f}; + int32_t bias_dims[1] = {1}; + + float output[1] = {0}; + int32_t output_dims[4] = {0}; + float expect[1] = {18.1f}; + int32_t expect_dims[4] = {1, 1, 1, 1}; + + const int32_t strides[] = {1, 1}; + const int32_t dilations[] = {1, 1}; + + Conv2dRefOp conv_2d_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddInput(filter, filter_dims, 4) + .AddInput(bias, bias_dims, 1) + .AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t)) + .AddArg("padding", Padding::VALID) + .AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t)) + .AddOutput(output, output_dims, 4); + + conv_2d_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + conv_2d_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-5); +} + +void TestNHWCSimple3x3SAME() { + float input[18] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + int32_t input_dims[4] = {1, 3, 3, 2}; + float filter[18] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; + int32_t filter_dims[4] = {1, 3, 3, 2}; + float bias[1] = {0.1f}; + int32_t bias_dims[1] = {1}; + + float output[9] = {0}; + int32_t output_dims[4] = {0}; + float expect[9] = {8.1f, 12.1f, 8.1f, 12.1f, 18.1f, 12.1f, 8.1f, 12.1f, 8.1f}; + int32_t expect_dims[4] = {1, 3, 3, 1}; + + const int32_t strides[] = {1, 1}; + const int32_t dilations[] = {1, 1}; + + Conv2dRefOp conv_2d_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddInput(filter, filter_dims, 4) + .AddInput(bias, bias_dims, 1) + .AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t)) + .AddArg("padding", Padding::SAME) + .AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t)) + .AddOutput(output, output_dims, 4); + + conv_2d_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + conv_2d_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-5); +} + +void TestNHWCSimple3x3NeqStride() { + float input[18] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + int32_t input_dims[4] = {1, 3, 3, 2}; + float filter[18] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; + int32_t filter_dims[4] = {1, 3, 3, 2}; + float bias[1] = {0.1f}; + int32_t bias_dims[1] = {1}; + + float output[6] = {0}; + int32_t output_dims[4] = {0}; + float expect[6] = {8.1f, 8.1f, 12.1f, 12.1f, 8.1f, 8.1f}; + int32_t expect_dims[4] = {1, 3, 2, 1}; + + const int32_t strides[] = {1, 2}; + const int32_t dilations[] = {1, 1}; + + Conv2dRefOp conv_2d_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddInput(filter, filter_dims, 4) + .AddInput(bias, bias_dims, 1) + .AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t)) + .AddArg("padding", Padding::SAME) + .AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t)) + .AddOutput(output, output_dims, 4); + + conv_2d_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + conv_2d_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-5); +} + +void TestNHWCSimple3x3WithoutBias() { + float input[18] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + int32_t input_dims[4] = {1, 3, 3, 2}; + float filter[18] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; + int32_t filter_dims[4] = {1, 3, 3, 2}; + + float output[1] = {0}; + int32_t output_dims[4] = {0}; + float expect[1] = {18.0f}; + int32_t expect_dims[4] = {1, 1, 1, 1}; + + const int32_t strides[] = {1, 1}; + const int32_t dilations[] = {1, 1}; + + Conv2dRefOp conv_2d_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddInput(filter, filter_dims, 4) + .AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t)) + .AddArg("padding", Padding::VALID) + .AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t)) + .AddOutput(output, output_dims, 4); + + conv_2d_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + conv_2d_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-5); +} + +void TestNHWCCombined3x3() { + float input[50] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + int32_t input_dims[4] = {1, 5, 5, 2}; + float filter[36] = + {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f}; + int32_t filter_dims[4] = {2, 3, 3, 2}; + float bias[2] = {0.1f, 0.2f}; + int32_t bias_dims[1] = {2}; + + float output[18] = {0}; + int32_t output_dims[4] = {0}; + float expect[18] = {8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f, 12.1f, 6.2f, 18.1f, + 9.2f, 12.1f, 6.2f, 8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f}; + int32_t expect_dims[4] = {1, 3, 3, 2}; + + const int32_t strides[] = {2, 2}; + const int32_t dilations[] = {1, 1}; + + Conv2dRefOp conv_2d_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddInput(filter, filter_dims, 4) + .AddInput(bias, bias_dims, 1) + .AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t)) + .AddArg("padding", Padding::SAME) + .AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t)) + .AddOutput(output, output_dims, 4); + + conv_2d_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + conv_2d_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-5); +} + +void TestFusedNHWCSimple3x3VALID(bool need_bias) { + float input[18] = + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; + int32_t input_dims[4] = {1, 3, 3, 2}; + float filter[18] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; + int32_t filter_dims[4] = {1, 3, 3, 2}; + float bias[1] = {-0.1f}; + int32_t bias_dims[1] = {1}; + + float output[1] = {0}; + int32_t output_dims[4] = {0}; + float expect[1] = {0.0f}; + int32_t expect_dims[4] = {1, 1, 1, 1}; + + const int32_t strides[] = {1, 1}; + const int32_t dilations[] = {1, 1}; + const char activation[] = "RELU"; + + Conv2dRefOp conv_2d_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddInput(filter, filter_dims, 4) + .AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t)) + .AddArg("padding", Padding::VALID) + .AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t)) + .AddRepeatArg("activation", activation, sizeof(activation)) + .AddOutput(output, output_dims, 4); + if (need_bias) { + substitude_op.AddInput(bias, bias_dims, 1); + } + + conv_2d_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + conv_2d_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-5); +} + +void TestConv1x1() { + float input[150] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + int32_t input_dims[4] = {1, 3, 10, 5}; + float filter[10] = + {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f}; + int32_t filter_dims[4] = {2, 1, 1, 5}; + float bias[2] = {0.1f, 0.2f}; + int32_t bias_dims[1] = {2}; + + float output[60] = {0}; + int32_t output_dims[4] = {0}; + float expect[60] = { + 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, + 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, + 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, + 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, + 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, + 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f}; + int32_t expect_dims[4] = {1, 3, 10, 2}; + + const int32_t strides[] = {1, 1}; + const int32_t dilations[] = {1, 1}; + + Conv2dRefOp conv_2d_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddInput(filter, filter_dims, 4) + .AddInput(bias, bias_dims, 1) + .AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t)) + .AddArg("padding", Padding::VALID) + .AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t)) + .AddOutput(output, output_dims, 4); + + conv_2d_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + conv_2d_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-5); +} + +} // namespace + +TEST_F(Conv2dOpTest, TestConv2dVALID) { + TestNHWCSimple3x3VALID(); +} + +TEST_F(Conv2dOpTest, TestConv2dSAME) { + TestNHWCSimple3x3SAME(); +} + +TEST_F(Conv2dOpTest, NotEqualStrideSimple) { + TestNHWCSimple3x3NeqStride(); +} + +TEST_F(Conv2dOpTest, CPUWithoutBias) { + TestNHWCSimple3x3WithoutBias(); +} + +TEST_F(Conv2dOpTest, CPUStride2) { + TestNHWCCombined3x3(); +} + +TEST_F(Conv2dOpTest, FusedCPUSimple) { + TestFusedNHWCSimple3x3VALID(true); + TestFusedNHWCSimple3x3VALID(false); +} + +TEST_F(Conv2dOpTest, CPUConv1x1) { + TestConv1x1(); +} + +} // namespace test +} // namespace ops +} // namespace micro diff --git a/micro/test/ccunit/micro/ops/nhwc/depthwise_conv_2d_opt_test.cc b/micro/test/ccunit/micro/ops/nhwc/depthwise_conv_2d_opt_test.cc new file mode 100644 index 00000000..1472c05c --- /dev/null +++ b/micro/test/ccunit/micro/ops/nhwc/depthwise_conv_2d_opt_test.cc @@ -0,0 +1,251 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" +#include "micro/ops/gtest_utils.h" +#include "micro/ops/nhwc/depthwise_conv_2d_kb1_s4.h" +#include "micro/ops/nhwc/depthwise_conv_2d_kb2_s4.h" +#include "micro/ops/nhwc/depthwise_conv_2d_kb3_s4.h" +#include "micro/ops/nhwc/depthwise_conv_2d_kb4_s4.h" +#include "micro/ops/substitute_op.h" +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { + +class DepthwiseConv2dOptOpTest : public ::testing::Test {}; + +namespace { +void SimpleValidTest() { + float input[18] = {1, 2, 2, 4, 3, 6, 4, 8, 5, 10, + 6, 12, 7, 14, 8, 16, 9, 18}; + int32_t input_dims[4] = {1, 3, 3, 2}; + float filter[8] = {1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f}; + int32_t filter_dims[4] = {1, 2, 2, 2}; + float bias[2] = {0.1f, 0.2f}; + int32_t bias_dims[1] = {2}; + + float output[8] = {0}; + int32_t output_dims[4] = {0}; + float expect[8] = {37.1f, 148.2f, 47.1f, 188.2f, + 67.1f, 268.2f, 77.1f, 308.2f}; + int32_t expect_dims[4] = {1, 2, 2, 2}; + + const int32_t strides[] = {1, 1}; + const int32_t dilations[] = {1, 1}; + + DepthwiseConv2dKB1S4Op depthwise_conv_2d_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddInput(filter, filter_dims, 4) + .AddInput(bias, bias_dims, 1) + .AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t)) + .AddArg("padding", Padding::VALID) + .AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t)) + .AddOutput(output, output_dims, 4); + + depthwise_conv_2d_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + depthwise_conv_2d_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-5); +} + +void MultiKB2ValidTest() { + float input[18] = {1, 2, 2, 4, 3, 6, 4, 8, 5, 10, 6, 12, 7, 14, 8, 16, 9, 18}; + int32_t input_dims[4] = {1, 3, 3, 2}; + float filter[16] = {1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f, + 1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f}; + int32_t filter_dims[4] = {2, 2, 2, 2}; + float bias[4] = {0.1f, 0.1f, 0.2f, 0.2f}; + int32_t bias_dims[1] = {4}; + + float output[16] = {0}; + int32_t output_dims[4] = {0}; + float expect[16] = {37.1f, 37.1f, 148.2f, 148.2f, + 47.1f, 47.1f, 188.2f, 188.2f, + 67.1f, 67.1f, 268.2f, 268.2f, + 77.1f, 77.1f, 308.2f, 308.2f}; + int32_t expect_dims[4] = {1, 2, 2, 4}; + + const int32_t strides[] = {1, 1}; + const int32_t dilations[] = {1, 1}; + + DepthwiseConv2dKB2S4Op depthwise_conv_2d_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddInput(filter, filter_dims, 4) + .AddInput(bias, bias_dims, 1) + .AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t)) + .AddArg("padding", Padding::VALID) + .AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t)) + .AddOutput(output, output_dims, 4); + + depthwise_conv_2d_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + depthwise_conv_2d_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-5); +} + +void MultiKB3ValidTest() { + float input[18] = {1, 2, 2, 4, 3, 6, 4, 8, 5, 10, 6, 12, 7, 14, 8, 16, 9, 18}; + int32_t input_dims[4] = {1, 3, 3, 2}; + float filter[24] = {1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f, + 1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f, + 1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f}; + int32_t filter_dims[4] = {3, 2, 2, 2}; + float bias[6] = {0.1f, 0.1f, 0.1f, 0.2f, 0.2f, 0.2f}; + int32_t bias_dims[1] = {6}; + + float output[24] = {0}; + int32_t output_dims[4] = {0}; + float expect[24] = {37.1f, 37.1f, 37.1f, 148.2f, 148.2f, 148.2f, + 47.1f, 47.1f, 47.1f, 188.2f, 188.2f, 188.2f, + 67.1f, 67.1f, 67.1f, 268.2f, 268.2f, 268.2f, + 77.1f, 77.1f, 77.1f, 308.2f, 308.2f, 308.2f}; + int32_t expect_dims[4] = {1, 2, 2, 6}; + + const int32_t strides[] = {1, 1}; + const int32_t dilations[] = {1, 1}; + + DepthwiseConv2dKB3S4Op depthwise_conv_2d_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddInput(filter, filter_dims, 4) + .AddInput(bias, bias_dims, 1) + .AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t)) + .AddArg("padding", Padding::VALID) + .AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t)) + .AddOutput(output, output_dims, 4); + + depthwise_conv_2d_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + depthwise_conv_2d_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-5); +} + +void MultiKB4ValidTest() { + float input[18] = {1, 2, 2, 4, 3, 6, 4, 8, 5, 10, 6, 12, 7, 14, 8, 16, 9, 18}; + int32_t input_dims[4] = {1, 3, 3, 2}; + float filter[32] = {1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f, + 1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f, + 1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f, + 1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f}; + int32_t filter_dims[4] = {4, 2, 2, 2}; + float bias[8] = {0.1f, 0.1f, 0.1f, 0.1f, 0.2f, 0.2f, 0.2f, 0.2f}; + int32_t bias_dims[1] = {8}; + + float output[32] = {0}; + int32_t output_dims[4] = {0}; + float expect[32] = { + 37.1f, 37.1f, 37.1f, 37.1f, 148.2f, 148.2f, 148.2f, 148.2f, + 47.1f, 47.1f, 47.1f, 47.1f, 188.2f, 188.2f, 188.2f, 188.2f, + 67.1f, 67.1f, 67.1f, 67.1f, 268.2f, 268.2f, 268.2f, 268.2f, + 77.1f, 77.1f, 77.1f, 77.1f, 308.2f, 308.2f, 308.2f, 308.2f}; + int32_t expect_dims[4] = {1, 2, 2, 8}; + + const int32_t strides[] = {1, 1}; + const int32_t dilations[] = {1, 1}; + + DepthwiseConv2dKB4S4Op depthwise_conv_2d_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddInput(filter, filter_dims, 4) + .AddInput(bias, bias_dims, 1) + .AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t)) + .AddArg("padding", Padding::VALID) + .AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t)) + .AddOutput(output, output_dims, 4); + + depthwise_conv_2d_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + depthwise_conv_2d_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-5); +} + +void MultiKB5ValidTest() { + float input[18] = {1, 2, 2, 4, 3, 6, 4, 8, 5, 10, 6, 12, 7, 14, 8, 16, 9, 18}; + int32_t input_dims[4] = {1, 3, 3, 2}; + float filter[40] = {1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f, + 1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f, + 1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f, + 1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f, + 1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f}; + int32_t filter_dims[4] = {5, 2, 2, 2}; + float bias[10] = {0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.2f, 0.2f, 0.2f, 0.2f, 0.2f}; + int32_t bias_dims[1] = {10}; + + float output[40] = {0}; + int32_t output_dims[4] = {0}; + float expect[40] = { + 37.1f, 37.1f, 37.1f, 37.1f, 37.1f, + 148.2f, 148.2f, 148.2f, 148.2f, 148.2f, + 47.1f, 47.1f, 47.1f, 47.1f, 47.1f, + 188.2f, 188.2f, 188.2f, 188.2f, 188.2f, + 67.1f, 67.1f, 67.1f, 67.1f, 67.1f, + 268.2f, 268.2f, 268.2f, 268.2f, 268.2f, + 77.1f, 77.1f, 77.1f, 77.1f, 77.1f, + 308.2f, 308.2f, 308.2f, 308.2f, 308.2f + }; + int32_t expect_dims[4] = {1, 2, 2, 10}; + + const int32_t strides[] = {1, 1}; + const int32_t dilations[] = {1, 1}; + + DepthwiseConv2dKB4S4Op depthwise_conv_2d_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddInput(filter, filter_dims, 4) + .AddInput(bias, bias_dims, 1) + .AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t)) + .AddArg("padding", Padding::VALID) + .AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t)) + .AddOutput(output, output_dims, 4); + + depthwise_conv_2d_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + depthwise_conv_2d_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-5); +} + +} // namespace + +TEST_F(DepthwiseConv2dOptOpTest, MultiKB1CPU) { + SimpleValidTest(); +} + +TEST_F(DepthwiseConv2dOptOpTest, MultiKB2CPU) { + MultiKB2ValidTest(); +} + +TEST_F(DepthwiseConv2dOptOpTest, MultiKB3CPU) { + MultiKB3ValidTest(); +} + +TEST_F(DepthwiseConv2dOptOpTest, MultiKB4CPU) { + MultiKB4ValidTest(); +} + +TEST_F(DepthwiseConv2dOptOpTest, MultiKB5CPU) { + MultiKB5ValidTest(); +} + +} // namespace test +} // namespace ops +} // namespace micro diff --git a/micro/test/ccunit/micro/ops/nhwc/depthwise_conv_2d_test.cc b/micro/test/ccunit/micro/ops/nhwc/depthwise_conv_2d_test.cc new file mode 100644 index 00000000..3583f4c4 --- /dev/null +++ b/micro/test/ccunit/micro/ops/nhwc/depthwise_conv_2d_test.cc @@ -0,0 +1,112 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" +#include "micro/ops/gtest_utils.h" +#include "micro/ops/nhwc/depthwise_conv_2d_ref.h" +#include "micro/ops/substitute_op.h" +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { + +class DepthwiseConv2dOpTest : public ::testing::Test {}; + +namespace { +void SimpleValidTest() { + float input[18] = {1, 2, 2, 4, 3, 6, 4, 8, 5, 10, 6, 12, 7, 14, 8, 16, 9, 18}; + int32_t input_dims[4] = {1, 3, 3, 2}; + float filter[8] = {1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f}; + int32_t filter_dims[4] = {1, 2, 2, 2}; + float bias[2] = {0.1f, 0.2f}; + int32_t bias_dims[1] = {2}; + + float output[8] = {0}; + int32_t output_dims[4] = {0}; + float expect[8] = {37.1f, 148.2f, 47.1f, 188.2f, + 67.1f, 268.2f, 77.1f, 308.2f}; + int32_t expect_dims[4] = {1, 2, 2, 2}; + + const int32_t strides[] = {1, 1}; + const int32_t dilations[] = {1, 1}; + + DepthwiseConv2dRefOp depthwise_conv_2d_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddInput(filter, filter_dims, 4) + .AddInput(bias, bias_dims, 1) + .AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t)) + .AddArg("padding", Padding::VALID) + .AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t)) + .AddOutput(output, output_dims, 4); + + depthwise_conv_2d_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + depthwise_conv_2d_op.Run(); + + ExpectTensorNear(output, output_dims, + 4, expect, expect_dims, 4, 1e-5); +} + +void MultiC2ValidTest() { + float input[18] = {1, 2, 2, 4, 3, 6, 4, 8, 5, 10, 6, 12, 7, 14, 8, 16, 9, 18}; + int32_t input_dims[4] = {1, 3, 3, 2}; + float filter[16] = {1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f, + 1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f}; + int32_t filter_dims[4] = {2, 2, 2, 2}; + float bias[4] = {0.1f, 0.1f, 0.2f, 0.2f}; + int32_t bias_dims[1] = {4}; + + float output[16] = {0}; + int32_t output_dims[4] = {0}; + float expect[16] = { + 37.1f, 37.1f, 148.2f, 148.2f, 47.1f, 47.1f, 188.2f, 188.2f, + 67.1f, 67.1f, 268.2f, 268.2f, 77.1f, 77.1f, 308.2f, 308.2f + }; + int32_t expect_dims[4] = {1, 2, 2, 4}; + + const int32_t strides[] = {1, 1}; + const int32_t dilations[] = {1, 1}; + + DepthwiseConv2dRefOp depthwise_conv_2d_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddInput(filter, filter_dims, 4) + .AddInput(bias, bias_dims, 1) + .AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t)) + .AddArg("padding", Padding::VALID) + .AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t)) + .AddOutput(output, output_dims, 4); + + depthwise_conv_2d_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + depthwise_conv_2d_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-5); +} + +} // namespace + +TEST_F(DepthwiseConv2dOpTest, SimpleCPU) { + SimpleValidTest(); +} + +TEST_F(DepthwiseConv2dOpTest, MuiltiC2CPU) { + MultiC2ValidTest(); +} + +} // namespace test +} // namespace ops +} // namespace micro diff --git a/micro/test/ccunit/micro/ops/nhwc/pooling_test.cc b/micro/test/ccunit/micro/ops/nhwc/pooling_test.cc new file mode 100644 index 00000000..d7f7db32 --- /dev/null +++ b/micro/test/ccunit/micro/ops/nhwc/pooling_test.cc @@ -0,0 +1,208 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" +#include "micro/ops/gtest_utils.h" +#include "micro/ops/nhwc/pooling_ref.h" +#include "micro/ops/nhwc/pooling_s4.h" +#include "micro/ops/substitute_op.h" +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { + +class PoolingOpTest : public ::testing::Test {}; + +namespace { + +void TestPoolingOpValidMax() { + float input[32] = { + 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, + 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}; + int32_t input_dims[4] = {1, 4, 4, 2}; + + float output[8] = {0}; + int32_t output_dims[4] = {0}; + float expect[8] = {5, 21, 7, 23, 13, 29, 15, 31}; + int32_t expect_dims[4] = {1, 2, 2, 2}; + + const int32_t strides[] = {2, 2}; + const int32_t dilations[] = {1, 1}; + const int32_t kernels[] = {2, 2}; + + PoolingS4Op pooling_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t)) + .AddRepeatArg("kernels", kernels, sizeof(kernels) / sizeof(int32_t)) + .AddArg("padding", Padding::VALID) + .AddArg("pooling_type", PoolingType::MAX) + .AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t)) + .AddOutput(output, output_dims, 4); + + pooling_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + pooling_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-5); +} + +void TestPoolingOpSameMax() { + float input[32] = {0, 1, 2, 3, 4, 5, 6, 7, 8}; + int32_t input_dims[4] = {1, 3, 3, 1}; + + float output[4] = {0}; + int32_t output_dims[4] = {0}; + float expect[4] = {4, 5, 7, 8}; + int32_t expect_dims[4] = {1, 2, 2, 1}; + + const int32_t strides[] = {2, 2}; + const int32_t dilations[] = {1, 1}; + const int32_t kernels[] = {2, 2}; + + PoolingS4Op pooling_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t)) + .AddRepeatArg("kernels", kernels, sizeof(kernels) / sizeof(int32_t)) + .AddArg("padding", Padding::SAME) + .AddArg("pooling_type", PoolingType::MAX) + .AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t)) + .AddOutput(output, output_dims, 4); + + pooling_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + pooling_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-5); +} + +void TestPoolingOpValidDilation() { + float input[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + int32_t input_dims[4] = {1, 4, 4, 1}; + + float output[4] = {0}; + int32_t output_dims[4] = {0}; + float expect[4] = {10, 11, 14, 15}; + int32_t expect_dims[4] = {1, 2, 2, 1}; + + const int32_t strides[] = {1, 1}; + const int32_t dilations[] = {2, 2}; + const int32_t kernels[] = {2, 2}; + + PoolingS4Op pooling_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t)) + .AddRepeatArg("kernels", kernels, sizeof(kernels) / sizeof(int32_t)) + .AddArg("padding", Padding::VALID) + .AddArg("pooling_type", PoolingType::MAX) + .AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t)) + .AddOutput(output, output_dims, 4); + + pooling_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + pooling_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-5); +} + +void TestPoolingOpValidAvg() { + float input[32] = { + 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, + 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}; + int32_t input_dims[4] = {1, 4, 4, 2}; + + float output[8] = {0}; + int32_t output_dims[4] = {0}; + float expect[8] = {2.5, 18.5, 4.5, 20.5, 10.5, 26.5, 12.5, 28.5}; + int32_t expect_dims[4] = {1, 2, 2, 2}; + + const int32_t strides[] = {2, 2}; + const int32_t dilations[] = {1, 1}; + const int32_t kernels[] = {2, 2}; + + PoolingS4Op pooling_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t)) + .AddRepeatArg("kernels", kernels, sizeof(kernels) / sizeof(int32_t)) + .AddArg("padding", Padding::VALID) + .AddArg("pooling_type", PoolingType::AVG) + .AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t)) + .AddOutput(output, output_dims, 4); + + pooling_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + pooling_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-5); +} + +void TestPoolingOpSameAvg() { + float input[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + int32_t input_dims[4] = {1, 2, 8, 1}; + + float output[4] = {0}; + int32_t output_dims[4] = {0}; + float expect[4] = {4.5, 6.5, 8.5, 10.5}; + int32_t expect_dims[4] = {1, 1, 4, 1}; + + const int32_t strides[] = {2, 2}; + const int32_t dilations[] = {1, 1}; + const int32_t kernels[] = {2, 2}; + + PoolingS4Op pooling_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, 4) + .AddRepeatArg("strides", strides, sizeof(strides) / sizeof(int32_t)) + .AddRepeatArg("kernels", kernels, sizeof(kernels) / sizeof(int32_t)) + .AddArg("padding", Padding::SAME) + .AddArg("pooling_type", PoolingType::AVG) + .AddRepeatArg("dilations", dilations, sizeof(dilations) / sizeof(int32_t)) + .AddOutput(output, output_dims, 4); + + pooling_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + pooling_op.Run(); + + ExpectTensorNear(output, output_dims, 4, expect, expect_dims, 4, 1e-5); +} + +} // namespace + +TEST_F(PoolingOpTest, TestPoolingValidMax) { + TestPoolingOpValidMax(); +} + +TEST_F(PoolingOpTest, TestPoolingSameMax) { + TestPoolingOpSameMax(); +} + +TEST_F(PoolingOpTest, TestPoolingValidDilation) { + TestPoolingOpValidDilation(); +} + +TEST_F(PoolingOpTest, TestPoolingOpValidAvg) { + TestPoolingOpValidAvg(); +} + +TEST_F(PoolingOpTest, TestPoolingOpSameAvg) { + TestPoolingOpSameAvg(); +} + +} // namespace test +} // namespace ops +} // namespace micro diff --git a/micro/test/ccunit/micro/ops/reduce_test.cc b/micro/test/ccunit/micro/ops/reduce_test.cc new file mode 100644 index 00000000..1b5cf88c --- /dev/null +++ b/micro/test/ccunit/micro/ops/reduce_test.cc @@ -0,0 +1,269 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" +#include "micro/ops/gtest_utils.h" +#include "micro/ops/reduce.h" +#include "micro/ops/substitute_op.h" +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { + +class ReduceOpTest : public ::testing::Test {}; + +namespace { +typedef ReduceOpBase::ReduceType ReduceType; + +void Simple( + const float *input, const int32_t *input_dims, + const int32_t input_dim_size, + const int32_t *axis, const int32_t axis_size, + float *output, int32_t *output_dims, const int32_t output_dim_size, + const float *expect, const int32_t *expect_dims, + ReduceType type, const bool keepdims = true) { + ReduceOp reduce_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, input_dim_size) + .AddRepeatArg("axis", axis, axis_size) + .AddArg("keepdims", keepdims ? 1 : 0) + .AddArg("reduce_type", static_cast(type)) + .AddOutput(output, output_dims, output_dim_size); + + reduce_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + reduce_op.Run(); + + ExpectTensorNear(output, output_dims, output_dim_size, + expect, expect_dims, output_dim_size, 1e-5, 1e-3); +} + +void SimpleMean12Test() { + const int32_t input_dim_size = 4; + const int32_t input_dims[input_dim_size] = {2, 2, 3, 4}; + const float input[48] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}; + const int32_t axis_size = 2; + const int32_t axis[axis_size] = {1, 2}; + const int32_t output_dim_size = 4; + const int32_t expect_dims[output_dim_size] = {2, 1, 1, 4}; + const float expect[8] = {10, 11, 12, 13, 10, 11, 12, 13}; + int32_t output_dims[output_dim_size] = {0}; + float output[8] = {0}; + Simple(input, input_dims, input_dim_size, axis, axis_size, + output, output_dims, output_dim_size, + expect, expect_dims, ReduceOpBase::MEAN); +} + +void SimpleMin12Test() { + const int32_t input_dim_size = 4; + const int32_t input_dims[input_dim_size] = {2, 2, 3, 4}; + const float input[48] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}; + const int32_t axis_size = 2; + const int32_t axis[axis_size] = {1, 2}; + const int32_t output_dim_size = 4; + const int32_t expect_dims[output_dim_size] = {2, 1, 1, 4}; + const float expect[8] = {0, 1, 2, 3, 0, 1, 2, 3}; + int32_t output_dims[output_dim_size] = {0}; + float output[8] = {0}; + Simple(input, input_dims, input_dim_size, axis, axis_size, + output, output_dims, output_dim_size, + expect, expect_dims, ReduceOpBase::MIN); +} + +void SimpleMax12Test() { + const int32_t input_dim_size = 4; + const int32_t input_dims[input_dim_size] = {2, 2, 3, 4}; + const float input[48] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}; + const int32_t axis_size = 2; + const int32_t axis[axis_size] = {1, 2}; + const int32_t output_dim_size = 4; + const int32_t expect_dims[output_dim_size] = {2, 1, 1, 4}; + const float expect[8] = {20, 21, 22, 23, 20, 21, 22, 23}; + int32_t output_dims[output_dim_size] = {0}; + float output[8] = {0}; + Simple(input, input_dims, input_dim_size, axis, axis_size, + output, output_dims, output_dim_size, + expect, expect_dims, ReduceOpBase::MAX); +} + +void SimpleMean1Axis() { + const int32_t input_dim_size = 4; + const int32_t input_dims[input_dim_size] = {2, 2, 3, 4}; + const float input[48] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}; + const int32_t axis_size = 1; + const int32_t axis[axis_size] = {1}; + const int32_t output_dim_size = 4; + const int32_t expect_dims[output_dim_size] = {2, 1, 3, 4}; + const float expect[24] = {6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; + int32_t output_dims[output_dim_size] = {0}; + float output[24] = {0}; + Simple(input, input_dims, input_dim_size, axis, axis_size, + output, output_dims, output_dim_size, + expect, expect_dims, ReduceOpBase::MEAN); +} + +void SimpleMin1Axis() { + const int32_t input_dim_size = 4; + const int32_t input_dims[input_dim_size] = {2, 2, 3, 4}; + const float input[48] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}; + const int32_t axis_size = 1; + const int32_t axis[axis_size] = {1}; + const int32_t output_dim_size = 4; + const int32_t expect_dims[output_dim_size] = {2, 1, 3, 4}; + const float expect[24] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + int32_t output_dims[output_dim_size] = {0}; + float output[24] = {0}; + Simple(input, input_dims, input_dim_size, axis, axis_size, + output, output_dims, output_dim_size, + expect, expect_dims, ReduceOpBase::MIN); +} + +void SimpleMax1Axis() { + const int32_t input_dim_size = 4; + const int32_t input_dims[input_dim_size] = {2, 2, 3, 4}; + const float input[48] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}; + const int32_t axis_size = 1; + const int32_t axis[axis_size] = {1}; + const int32_t output_dim_size = 4; + const int32_t expect_dims[output_dim_size] = {2, 1, 3, 4}; + const float expect[24] = {12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}; + int32_t output_dims[output_dim_size] = {0}; + float output[24] = {0}; + Simple(input, input_dims, input_dim_size, axis, axis_size, + output, output_dims, output_dim_size, + expect, expect_dims, ReduceOpBase::MAX); +} + +void Simple2Axis() { + const int32_t input_dim_size = 4; + const int32_t input_dims[input_dim_size] = {1, 2, 3, 4}; + const float input[24] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}; + const int32_t axis_size = 2; + const int32_t axis[axis_size] = {0, 1}; + const int32_t output_dim_size = 4; + const int32_t expect_dims[output_dim_size] = {1, 1, 3, 4}; + const float expect[12] = {6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; + int32_t output_dims[output_dim_size] = {0}; + float output[12] = {0}; + Simple(input, input_dims, input_dim_size, axis, axis_size, + output, output_dims, output_dim_size, + expect, expect_dims, ReduceOpBase::MEAN); + + const int32_t input1_dim_size = 3; + const int32_t input1_dims[input1_dim_size] = {2, 3, 4}; + const int32_t axis1[axis_size] = {1, 2}; + const int32_t output1_dim_size = 3; + const int32_t expect1_dims[output1_dim_size] = {2, 1, 1}; + const float expect1[2] = {5.5, 17.5}; + int32_t output1_dims[output_dim_size] = {0}; + float output1[2] = {0}; + Simple(input, input1_dims, input1_dim_size, axis1, axis_size, + output1, output1_dims, output1_dim_size, + expect1, expect1_dims, ReduceOpBase::MEAN); + + const int32_t axis2[axis_size] = {0, 2}; + const int32_t expect2_dims[output_dim_size] = {1, 2, 1, 4}; + const float expect2[8] = {4, 5, 6, 7, 16, 17, 18, 19}; + Simple(input, input_dims, input_dim_size, axis2, axis_size, + output, output_dims, output_dim_size, + expect2, expect2_dims, ReduceOpBase::MEAN); +} + +void Simple3Axis() { + const int32_t input_dim_size = 4; + const int32_t input_dims[input_dim_size] = {1, 2, 3, 4}; + const float input[48] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}; + const int32_t axis_size = 3; + const int32_t axis[axis_size] = {1, 2, 3}; + const int32_t output_dim_size = 4; + const int32_t expect_dims[output_dim_size] = {1, 1, 1, 1}; + const float expect[1] = {11.5}; + int32_t output_dims[output_dim_size] = {0}; + float output[1] = {0}; + Simple(input, input_dims, input_dim_size, axis, axis_size, + output, output_dims, output_dim_size, + expect, expect_dims, ReduceOpBase::MEAN); +} + +void CPUSimpleReduceDims() { + const int32_t input_dim_size = 3; + const int32_t input_dims[input_dim_size] = {2, 3, 4}; + const float input[48] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}; + const int32_t axis_size = 2; + const int32_t axis[axis_size] = {0, 1}; + const int32_t output_dim_size = 1; + const int32_t expect_dims[output_dim_size] = {4}; + const float expect[4] = {10, 11, 12, 13}; + int32_t output_dims[output_dim_size] = {0}; + float output[4] = {0}; + Simple(input, input_dims, input_dim_size, axis, axis_size, + output, output_dims, output_dim_size, + expect, expect_dims, ReduceOpBase::MEAN, false); +} + +} // namespace + +TEST_F(ReduceOpTest, CPUSimple12) { + SimpleMean12Test(); + SimpleMin12Test(); + SimpleMax12Test(); +} + + +TEST_F(ReduceOpTest, CPUSimple1Axis) { + SimpleMean1Axis(); + SimpleMin1Axis(); + SimpleMax1Axis(); +} + +TEST_F(ReduceOpTest, CPUSimple2Axis) { + Simple2Axis(); +} + +TEST_F(ReduceOpTest, CPUSimple3Axis) { + Simple3Axis(); +} + +TEST_F(ReduceOpTest, CPUSimpleReduceDims) { + CPUSimpleReduceDims(); +} + +} // namespace test +} // namespace ops +} // namespace micro diff --git a/micro/test/ccunit/micro/ops/reshape_test.cc b/micro/test/ccunit/micro/ops/reshape_test.cc new file mode 100644 index 00000000..aa05281d --- /dev/null +++ b/micro/test/ccunit/micro/ops/reshape_test.cc @@ -0,0 +1,68 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" +#include "micro/ops/gtest_utils.h" +#include "micro/ops/reshape.h" +#include "micro/ops/substitute_op.h" +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { + +class ReshapeOpTest : public ::testing::Test {}; + +namespace { + +template +void TestReshapeOp( + const T *input, const int32_t *input_dims, const uint32_t input_dim_size, + const int32_t *shape, const int32_t *shape_dims, + T *y, int32_t *y_dims, const uint32_t y_dim_size, + const T *e, const int32_t *e_dims, const uint32_t e_dim_size) { + + ReshapeOp reshape_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, input_dim_size) + .AddInput(shape, shape_dims, 1) + .AddOutput(y, y_dims, y_dim_size); + + reshape_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + reshape_op.Run(); + + ExpectTensorNear(y, y_dims, y_dim_size, e, e_dims, e_dim_size); +} + +} // namespace + +TEST_F(ReshapeOpTest, TestReshape) { + MACE_DEFINE_RANDOM_INPUT(float, x, 6); + int32_t x_dims[3] = {1, 2, 3}; + int32_t shape[2] = {3, 2}; + int32_t shape_dims[1] = {2}; + + float y[6] = {0}; + int32_t y_dims[2] = {0}; + + int32_t e_dims[2] = {3, 2}; + + TestReshapeOp(x, x_dims, 3, shape, shape_dims, + y, y_dims, 2, x, e_dims, 2); +} + +} // namespace test +} // namespace ops +} // namespace micro diff --git a/micro/test/ccunit/micro/ops/shape_test.cc b/micro/test/ccunit/micro/ops/shape_test.cc new file mode 100644 index 00000000..ea8f3c7c --- /dev/null +++ b/micro/test/ccunit/micro/ops/shape_test.cc @@ -0,0 +1,62 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" +#include "micro/ops/gtest_utils.h" +#include "micro/ops/shape.h" +#include "micro/ops/substitute_op.h" +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { + +class ShapeOpTest : public ::testing::Test {}; + +namespace { + +template +void TestShapeOp( + const EXP_TYPE *x, const int32_t *x_dims, const uint32_t x_dim_size, + RES_TYPE *y, int32_t *y_dims, const uint32_t y_dim_size, + const RES_TYPE *e, const int32_t *e_dims, const uint32_t e_dim_size) { + + ShapeOp shape_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(x, x_dims, x_dim_size) + .AddOutput(y, y_dims, y_dim_size); + + shape_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + shape_op.Run(); + + ExpectTensorNear(y, y_dims, y_dim_size, e, e_dims, e_dim_size); +} + +} // namespace + +TEST_F(ShapeOpTest, TestShape) { + MACE_DEFINE_RANDOM_INPUT(float, x, 6); + int32_t x_dims[3] = {1, 2, 3}; + int32_t y[3] = {0}; + int32_t y_dims[1] = {0}; + int32_t e[3] = {1, 2, 3}; + int32_t e_dims[1] = {3}; + + TestShapeOp(x, x_dims, 3, y, y_dims, 1, e, e_dims, 1); +} + +} // namespace test +} // namespace ops +} // namespace micro diff --git a/micro/test/ccunit/micro/ops/softmax_test.cc b/micro/test/ccunit/micro/ops/softmax_test.cc new file mode 100644 index 00000000..0590256f --- /dev/null +++ b/micro/test/ccunit/micro/ops/softmax_test.cc @@ -0,0 +1,63 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" +#include "micro/ops/gtest_utils.h" +#include "micro/ops/softmax.h" +#include "micro/ops/substitute_op.h" +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { + +class SoftmaxOpTest : public ::testing::Test {}; + +namespace { +void Simple(bool use_log = false) { + const float input[8] = {1, 1, 1, 1, 1, 2, 3, 4}; + const int32_t input_dim_size = 4; + const int32_t input_dims[input_dim_size] = {1, 1, 2, 4}; + float output[8] = {0}; + const int32_t output_dim_size = 4; + int32_t output_dims[output_dim_size] = {0}; + const int32_t expect_dims[output_dim_size] = {1, 1, 2, 4}; + float expected_data1[8] = {-1.3862944, -1.3862944, -1.3862944, -1.3862944, + -3.4401896, -2.4401896, -1.4401897, -0.44018975}; + float expected_data2[8] = {0.25, 0.25, 0.25, 0.25, + 0.0320586, 0.08714432, 0.23688282, 0.6439142}; + float *expect = use_log ? expected_data1 : expected_data2; + + SoftmaxOp softmax_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, input_dim_size) + .AddArg("use_log", static_cast(use_log)) + .AddOutput(output, output_dims, output_dim_size); + + softmax_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + softmax_op.Run(); + + ExpectTensorNear(output, output_dims, output_dim_size, + expect, expect_dims, output_dim_size, 1e-5); +} + +} // namespace + +TEST_F(SoftmaxOpTest, CPUSimple) { Simple(); } +TEST_F(SoftmaxOpTest, CPUSimpleUseLog) { Simple(true); } + +} // namespace test +} // namespace ops +} // namespace micro diff --git a/micro/test/ccunit/micro/ops/squeeze_test.cc b/micro/test/ccunit/micro/ops/squeeze_test.cc new file mode 100644 index 00000000..101dd11f --- /dev/null +++ b/micro/test/ccunit/micro/ops/squeeze_test.cc @@ -0,0 +1,89 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" +#include "micro/ops/gtest_utils.h" +#include "micro/ops/squeeze.h" +#include "micro/ops/substitute_op.h" +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { + +class SqueezeOpTest : public ::testing::Test {}; + +namespace { + +void TestSqueeze( + const float *input, const int32_t *input_dims, + const int32_t input_dim_size, + const int32_t *axis, + const int32_t axis_size, + float *output, int32_t *output_dims, const int32_t output_dim_size, + const float *expect, const int32_t *expect_dims) { + SqueezeOp squeeze_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, input_dim_size) + .AddOutput(output, output_dims, output_dim_size); + if (axis != NULL && axis_size > 0) { + substitude_op.AddRepeatArg("axis", axis, axis_size); + } + + squeeze_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + squeeze_op.Run(); + + ExpectTensorNear(output, output_dims, output_dim_size, + expect, expect_dims, output_dim_size, 1e-5); +} + +void TestSqueeze() { + MACE_DEFINE_RANDOM_INPUT(float, input, 8); + const int32_t dims1214[] = {1, 2, 1, 4}; + const int32_t dims24[] = {2, 4}; + const int32_t dims124[] = {1, 2, 4}; + const int32_t dims1411[] = {1, 4, 1, 1}; + const int32_t dims141[] = {1, 4, 1}; + + float output[8] = {0}; + int32_t output_dims[10] = {0}; + + TestSqueeze(input, dims1214, 4, NULL, 0, + output, output_dims, 2, input, dims24); + + int32_t axis_size = 1; + int32_t axis[] = {1}; + TestSqueeze(input, dims1214, 4, axis, axis_size, + output, output_dims, 4, input, dims1214); + + int32_t axis2[] = {2}; + TestSqueeze(input, dims1214, 4, axis2, axis_size, + output, output_dims, 3, input, dims124); + + MACE_DEFINE_RANDOM_INPUT(float, input3, 4); + int32_t axis3[2] = {1, 2}; + TestSqueeze(input, dims1411, 4, axis3, 2, + output, output_dims, 3, input, dims141); +} + +} // namespace + +TEST_F(SqueezeOpTest, TestSqueeze) { + TestSqueeze(); +} + +} // namespace test +} // namespace ops +} // namespace micro diff --git a/micro/test/ccunit/micro/ops/stack_test.cc b/micro/test/ccunit/micro/ops/stack_test.cc new file mode 100644 index 00000000..0632971f --- /dev/null +++ b/micro/test/ccunit/micro/ops/stack_test.cc @@ -0,0 +1,140 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" +#include "micro/ops/gtest_utils.h" +#include "micro/ops/stack.h" +#include "micro/ops/substitute_op.h" +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { + +class StackOpTest : public ::testing::Test {}; + +namespace { + +void TestStack( + const float **inputs, const int32_t inputs_size, const int32_t *input_dims, + const int32_t input_dim_size, int axis, + float *output, int32_t *output_dims, const int32_t output_dim_size, + const float *expect, const int32_t *expect_dims) { + StackOp stack_op; + framework::SubstituteOp substitude_op; + substitude_op.AddArg("axis", axis) + .AddOutput(output, output_dims, output_dim_size); + for (int32_t i = 0; i < inputs_size; ++i) { + substitude_op.AddInput(inputs[i], input_dims, input_dim_size); + } + + stack_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + stack_op.Run(); + + ExpectTensorNear(output, output_dims, output_dim_size, + expect, expect_dims, output_dim_size, 1e-5); +} + +void TestStackScalar() { + const float input0[1] = {1}; + const float input1[1] = {2}; + const float input2[1] = {3}; + const int32_t axis = 0; + + float output[3] = {0}; + const int32_t output_dim_size = 1; + int32_t output_dims[output_dim_size] = {0}; + const float expect[3] = {1, 2, 3}; + const int32_t expect_dims[output_dim_size] = {3}; + + const float *inputs[] = {input0, input1, input2}; + TestStack(inputs, 3, NULL, 0, axis, + output, output_dims, output_dim_size, expect, expect_dims); +} + +void TestStackVector() { + const float input0[] = {1, 4}; + const float input1[] = {2, 5}; + const float input2[] = {3, 6}; + const int32_t input_dim_size = 1; + const int32_t input_dims[input_dim_size] = {2}; + int32_t axis = 0; + + float output[6] = {0}; + const int32_t output_dim_size = 2; + int32_t output_dims[output_dim_size] = {0}; + const float expect[6] = {1, 4, 2, 5, 3, 6}; + const int32_t expect_dims[output_dim_size] = {3, 2}; + + const float *inputs[] = {input0, input1, input2}; + TestStack(inputs, 3, input_dims, input_dim_size, axis, + output, output_dims, output_dim_size, expect, expect_dims); + + axis = -2; + TestStack(inputs, 3, input_dims, input_dim_size, axis, + output, output_dims, output_dim_size, expect, expect_dims); + + axis = -1; + const float expect2[6] = {1, 2, 3, 4, 5, 6}; + const int32_t expect_dims2[output_dim_size] = {2, 3}; + TestStack(inputs, 3, input_dims, input_dim_size, axis, + output, output_dims, output_dim_size, expect2, expect_dims2); +} + +void TestStackHighRank() { + const float input0[] = {1, 2, 3, 4, 5, 6}; + const float input1[] = {7, 8, 9, 10, 11, 12}; + const int32_t input_dim_size = 2; + const int32_t input_dims[input_dim_size] = {2, 3}; + int32_t axis = -3; + + float output[12] = {0}; + const int32_t output_dim_size = 3; + int32_t output_dims[output_dim_size] = {0}; + const float expect[12] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + const int32_t expect_dims[output_dim_size] = {2, 2, 3}; + + const float *inputs[] = {input0, input1}; + TestStack(inputs, 2, input_dims, input_dim_size, axis, + output, output_dims, output_dim_size, expect, expect_dims); + + axis = 1; + const float expect1[12] = {1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12}; + TestStack(inputs, 2, input_dims, input_dim_size, axis, + output, output_dims, output_dim_size, expect1, expect_dims); + + axis = 2; + const int32_t expect_dims2[output_dim_size] = {2, 3, 2}; + const float expect2[12] = {1, 7, 2, 8, 3, 9, 4, 10, 5, 11, 6, 12}; + TestStack(inputs, 2, input_dims, input_dim_size, axis, + output, output_dims, output_dim_size, expect2, expect_dims2); +} +} // namespace + +TEST_F(StackOpTest, TestStackScalar) { + TestStackScalar(); +} + +TEST_F(StackOpTest, TestStackVector) { + TestStackVector(); +} + +TEST_F(StackOpTest, TestStackHighRank) { + TestStackHighRank(); +} + +} // namespace test +} // namespace ops +} // namespace micro diff --git a/micro/test/ccunit/micro/ops/strided_slice_test.cc b/micro/test/ccunit/micro/ops/strided_slice_test.cc new file mode 100644 index 00000000..c7667b06 --- /dev/null +++ b/micro/test/ccunit/micro/ops/strided_slice_test.cc @@ -0,0 +1,495 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" +#include "micro/ops/gtest_utils.h" +#include "micro/ops/strided_slice.h" +#include "micro/ops/substitute_op.h" +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { + +class StridedSliceOpTest : public ::testing::Test {}; + +namespace { + +void TestStridedSlice( + const float *input, const int32_t *input_dims, const int32_t input_dim_size, + const int32_t *begin_indices, const int32_t *end_indices, + const int32_t *strides, + const int32_t *indices_dims, const int32_t indices_dim_size, + const int32_t begin_mask, const int32_t end_mask, + const int32_t ellipsis_mask, const int32_t new_axis_mask, + const int32_t shrink_axis_mask, const int32_t output_dim_size, + float *output, int32_t *output_dims, + const float *expect, const int32_t *expect_dims) { + StridedSliceOp strided_slice_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, input_dim_size) + .AddInput(begin_indices, indices_dims, indices_dim_size) + .AddInput(end_indices, indices_dims, indices_dim_size) + .AddInput(strides, indices_dims, indices_dim_size) + .AddArg("begin_mask", begin_mask) + .AddArg("end_mask", end_mask) + .AddArg("ellipsis_mask", ellipsis_mask) + .AddArg("new_axis_mask", new_axis_mask) + .AddArg("shrink_axis_mask", shrink_axis_mask) + .AddOutput(output, output_dims, output_dim_size); + + strided_slice_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + strided_slice_op.Run(); + + ExpectTensorNear(output, output_dims, output_dim_size, + expect, expect_dims, output_dim_size); +} + +void TestSlice( + const float *input, const int32_t *input_dims, const int32_t input_dim_size, + const int32_t *begin_indices, const int32_t *indice_sizes, + const int32_t *indices_dims, const int32_t indices_dim_size, + float *output, int32_t *output_dims, const int32_t output_dim_size, + const float *expect, const int32_t *expect_dims) { + StridedSliceOp strided_slice_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, input_dim_size) + .AddInput(begin_indices, indices_dims, indices_dim_size) + .AddInput(indice_sizes, indices_dims, indices_dim_size) + .AddArg("slice", 1) + .AddOutput(output, output_dims, output_dim_size); + + strided_slice_op.Init(NULL, reinterpret_cast( + &substitude_op), NULL); + strided_slice_op.Run(); + + ExpectTensorNear(output, output_dims, output_dim_size, + expect, expect_dims, output_dim_size); +} + +void TestStridedSliceByFirstAxis() { + const float input[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + const int32_t begin_indices[] = {1, 0, 0}; + const int32_t end_indices[] = {2, 3, 2}; + const int32_t strides[] = {1, 1, 1}; + const int32_t indices_dim_size = 1; + const int32_t indices_dims[indices_dim_size] = {3}; + const int32_t input_dim_size = 3; + const int32_t input_dims[input_dim_size] = {2, 3, 2}; + + float output[6] = {0}; + const int32_t output_dim_size = 3; + int32_t output_dims[output_dim_size] = {0}; + const float expect[6] = {7, 8, 9, 10, 11, 12}; + const int32_t expect_dims[output_dim_size] = {1, 3, 2}; + TestStridedSlice(input, input_dims, input_dim_size, + begin_indices, end_indices, strides, + indices_dims, indices_dim_size, + 0, 0, 0, 0, 0, output_dim_size, + output, output_dims, expect, expect_dims); + + const int32_t output_dim_size1 = 2; + int32_t output_dims1[output_dim_size1] = {0}; + const int32_t expect_dims1[output_dim_size1] = {3, 2}; + TestStridedSlice(input, input_dims, input_dim_size, + begin_indices, end_indices, strides, + indices_dims, indices_dim_size, + 0, 0, 0, 0, 1, output_dim_size1, + output, output_dims1, expect, expect_dims1); + + const int32_t begin_indices2[] = {1, 1, 2}; + TestStridedSlice(input, input_dims, input_dim_size, + begin_indices2, end_indices, strides, + indices_dims, indices_dim_size, + 6, 6, 0, 0, 0, output_dim_size, + output, output_dims, expect, expect_dims); +} + +void TestStridedSliceRank1() { + const float input[] = {1, 2, 3, 4}; + const int32_t begin_indices[] = {1}; + const int32_t end_indices[] = {3}; + const int32_t strides[] = {1}; + const int32_t indices_dim_size = 1; + const int32_t indices_dims[indices_dim_size] = {1}; + const int32_t input_dim_size = 1; + const int32_t input_dims[input_dim_size] = {4}; + + float output[4] = {0}; + const int32_t output_dim_size = 1; + int32_t output_dims[output_dim_size] = {0}; + const float expect[2] = {2, 3}; + const int32_t expect_dims[output_dim_size] = {2}; + TestStridedSlice(input, input_dims, input_dim_size, + begin_indices, end_indices, strides, + indices_dims, indices_dim_size, + 0, 0, 0, 0, 0, output_dim_size, + output, output_dims, expect, expect_dims); + + const int32_t begin_indices1[] = {-3}; + TestStridedSlice(input, input_dims, input_dim_size, + begin_indices1, end_indices, strides, + indices_dims, indices_dim_size, + 0, 0, 0, 0, 0, output_dim_size, + output, output_dims, expect, expect_dims); + + const int32_t begin_indices2[] = {-2}; + const int32_t end_indices2[] = {-4}; + const int32_t strides2[] = {-1}; + const float expect2[2] = {3, 2}; + TestStridedSlice(input, input_dims, input_dim_size, + begin_indices2, end_indices2, strides2, + indices_dims, indices_dim_size, + 0, 0, 0, 0, 0, output_dim_size, + output, output_dims, expect2, expect_dims); + + const int32_t begin_indices3[] = {-1}; + const int32_t strides3[] = {-2}; + const float expect3[2] = {4, 2}; + TestStridedSlice(input, input_dims, input_dim_size, + begin_indices3, end_indices2, strides3, + indices_dims, indices_dim_size, + 0, 0, 0, 0, 0, output_dim_size, + output, output_dims, expect3, expect_dims); + + const int32_t begin_indices4[] = {-1}; + const int32_t strides4[] = {-2}; + const float expect4[2] = {4, 2}; + TestStridedSlice(input, input_dims, input_dim_size, + begin_indices4, end_indices2, strides4, + indices_dims, indices_dim_size, + 0, 0, 0, 0, 0, output_dim_size, + output, output_dims, expect4, expect_dims); + + const float expect5[3] = {4, 3, 2}; + const int32_t expect_dims5[output_dim_size] = {3}; + TestStridedSlice(input, input_dims, input_dim_size, + begin_indices2, end_indices2, strides2, + indices_dims, indices_dim_size, + 1, 0, 0, 0, 0, output_dim_size, + output, output_dims, expect5, expect_dims5); + + const float expect6[3] = {3, 2, 1}; + TestStridedSlice(input, input_dims, input_dim_size, + begin_indices2, end_indices2, strides2, + indices_dims, indices_dim_size, + 0, 1, 0, 0, 0, output_dim_size, + output, output_dims, expect6, expect_dims5); + + const float expect7[4] = {4, 3, 2, 1}; + const int32_t expect_dims7[output_dim_size] = {4}; + TestStridedSlice(input, input_dims, input_dim_size, + begin_indices2, end_indices2, strides2, + indices_dims, indices_dim_size, + 1, 1, 0, 0, 0, output_dim_size, + output, output_dims, expect7, expect_dims7); + + const int32_t begin_indices8[] = {2}; + const int32_t end_indices8[] = {4}; + const int32_t strides8[] = {2}; + const float expect8[2] = {1, 3}; + TestStridedSlice(input, input_dims, input_dim_size, + begin_indices8, end_indices8, strides8, + indices_dims, indices_dim_size, + 1, 1, 0, 0, 0, output_dim_size, + output, output_dims, expect8, expect_dims); + + const int32_t output_dim_size9 = 0; + int32_t output_dims9[] = {1}; + const float expect9[] = {3}; + const int32_t *expect_dims9 = NULL; + TestStridedSlice(input, input_dims, input_dim_size, + begin_indices8, end_indices, strides, + indices_dims, indices_dim_size, + 0, 0, 0, 0, 1, output_dim_size9, + output, output_dims9, expect9, expect_dims9); +} + +void TestStridedSliceRank2() { + const float input[] = {1, 2, 3, 4, 5, 6}; + const int32_t begin_indices[] = {0, 0}; + const int32_t end_indices[] = {2, 3}; + const int32_t strides[] = {1, 1}; + const int32_t indices_dim_size = 1; + const int32_t indices_dims[indices_dim_size] = {2}; + const int32_t input_dim_size = 2; + const int32_t input_dims[input_dim_size] = {2, 3}; + + float output[6] = {0}; + const int32_t output_dim_size = 2; + int32_t output_dims[output_dim_size] = {0}; + const float expect[6] = {1, 2, 3, 4, 5, 6}; + const int32_t expect_dims[output_dim_size] = {2, 3}; + TestStridedSlice(input, input_dims, input_dim_size, + begin_indices, end_indices, strides, + indices_dims, indices_dim_size, + 0, 0, 0, 0, 0, output_dim_size, + output, output_dims, expect, expect_dims); + + const int32_t begin_indices1[] = {0}; + const int32_t end_indices1[] = {2}; + const int32_t strides1[] = {1}; + const int32_t indices_dims1[indices_dim_size] = {1}; + TestStridedSlice(input, input_dims, input_dim_size, + begin_indices1, end_indices1, strides1, + indices_dims1, indices_dim_size, + 0, 0, 0, 0, 0, output_dim_size, + output, output_dims, expect, expect_dims); + + const int32_t begin_indices2[] = {1, 1}; + const float expect2[2] = {5, 6}; + const int32_t expect_dims2[output_dim_size] = {1, 2}; + TestStridedSlice(input, input_dims, input_dim_size, + begin_indices2, end_indices, strides, + indices_dims, indices_dim_size, + 0, 0, 0, 0, 0, output_dim_size, + output, output_dims, expect2, expect_dims2); + + const int32_t strides3[] = {1, 2}; + const float expect3[4] = {1, 3, 4, 6}; + const int32_t expect_dims3[output_dim_size] = {2, 2}; + TestStridedSlice(input, input_dims, input_dim_size, + begin_indices, end_indices, strides3, + indices_dims, indices_dim_size, + 0, 0, 0, 0, 0, output_dim_size, + output, output_dims, expect3, expect_dims3); + + const int32_t begin_indices4[] = {1, 2}; + const int32_t end_indices4[] = {0, 0}; + const int32_t strides4[] = {-1, -1}; + const float expect4[2] = {6, 5}; + const int32_t expect_dims4[output_dim_size] = {1, 2}; + TestStridedSlice(input, input_dims, input_dim_size, + begin_indices4, end_indices4, strides4, + indices_dims, indices_dim_size, + 0, 0, 0, 0, 0, output_dim_size, + output, output_dims, expect4, expect_dims4); + + const float expect5[6] = {6, 5, 4, 3, 2, 1}; + const int32_t expect_dims5[output_dim_size] = {2, 3}; + TestStridedSlice(input, input_dims, input_dim_size, + begin_indices4, end_indices4, strides4, + indices_dims, indices_dim_size, + 3, 3, 0, 0, 0, output_dim_size, + output, output_dims, expect5, expect_dims5); + + const int32_t begin_indices6[] = {1, 0}; + const int32_t end_indices6[] = {2, 3}; + const int32_t strides6[] = {1, 1}; + const float expect6[3] = {4, 5, 6}; + const int32_t output_dim_size6 = 1; + const int32_t expect_dims6[output_dim_size6] = {3}; + TestStridedSlice(input, input_dims, input_dim_size, + begin_indices6, end_indices6, strides6, + indices_dims, indices_dim_size, + 0, 0, 0, 0, 1, output_dim_size6, + output, output_dims, expect6, expect_dims6); + + const int32_t begin_indices7[] = {1, 2}; + const float expect7[1] = {6}; + const int32_t output_dim_size7 = 0; + const int32_t *expect_dims7 = NULL; + TestStridedSlice(input, input_dims, input_dim_size, + begin_indices7, end_indices6, strides6, + indices_dims, indices_dim_size, + 0, 0, 0, 0, 3, output_dim_size7, + output, output_dims, expect7, expect_dims7); +} + +void TestStridedSliceRank3() { + const float input[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + const int32_t begin_indices[] = {0, 0, 0}; + const int32_t end_indices[] = {2, 3, 2}; + const int32_t strides[] = {1, 2, 1}; + const int32_t indices_dim_size = 1; + const int32_t indices_dims[indices_dim_size] = {3}; + const int32_t input_dim_size = 3; + const int32_t input_dims[input_dim_size] = {2, 3, 2}; + + float output[8] = {0}; + const int32_t output_dim_size = 3; + int32_t output_dims[output_dim_size] = {0}; + const float expect[8] = {1, 2, 5, 6, 7, 8, 11, 12}; + const int32_t expect_dims[output_dim_size] = {2, 2, 2}; + TestStridedSlice(input, input_dims, input_dim_size, + begin_indices, end_indices, strides, + indices_dims, indices_dim_size, + 0, 0, 0, 0, 0, output_dim_size, + output, output_dims, expect, expect_dims); + + const float input1[] = {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6}; + const int32_t begin_indices1[] = {1, 0, 0}; + const int32_t end_indices1[] = {2, 1, 3}; + const int32_t strides1[] = {1, 1, 1}; + const int32_t input_dims1[input_dim_size] = {3, 2, 3}; + const float expect1[3] = {3, 3, 3}; + const int32_t expect_dims1[output_dim_size] = {1, 1, 3}; + TestStridedSlice(input1, input_dims1, input_dim_size, + begin_indices1, end_indices1, strides1, + indices_dims, indices_dim_size, + 0, 0, 0, 0, 0, output_dim_size, + output, output_dims, expect1, expect_dims1); + + const int32_t begin_indices2[] = {0, 0, 0}; + const int32_t end_indices2[] = {2, 2, 2}; + const int32_t strides2[] = {1, 2, 1}; + const float expect2[4] = {1, 1, 3, 3}; + const int32_t expect_dims2[output_dim_size] = {2, 1, 2}; + TestStridedSlice(input1, input_dims1, input_dim_size, + begin_indices2, end_indices2, strides2, + indices_dims, indices_dim_size, + 0, 0, 0, 0, 0, output_dim_size, + output, output_dims, expect2, expect_dims2); +} + +void TestStridedSliceRank4() { + const float input[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}; + const int32_t begin_indices[] = {1, 0, 1, 0}; + const int32_t end_indices[] = {2, 2, 2, 2}; + const int32_t strides[] = {1, 1, 1, 1}; + const int32_t indices_dim_size = 1; + const int32_t indices_dims[indices_dim_size] = {4}; + const int32_t input_dim_size = 4; + const int32_t input_dims[input_dim_size] = {2, 2, 2, 3}; + + float output[8] = {0}; + const int32_t output_dim_size = 4; + int32_t output_dims[output_dim_size] = {0}; + const float expect[8] = {15, 16, 21, 22}; + const int32_t expect_dims[output_dim_size] = {1, 2, 1, 2}; + TestStridedSlice(input, input_dims, input_dim_size, + begin_indices, end_indices, strides, + indices_dims, indices_dim_size, + 0, 0, 0, 0, 0, output_dim_size, + output, output_dims, expect, expect_dims); + + const float expect1[8] = {3, 4, 9, 10, 15, 16, 21, 22}; + const int32_t expect_dims1[output_dim_size] = {2, 2, 1, 2}; + TestStridedSlice(input, input_dims, input_dim_size, + begin_indices, end_indices, strides, + indices_dims, indices_dim_size, + 3, 0, 0, 0, 0, output_dim_size, + output, output_dims, expect1, expect_dims1); + + const float expect2[8] = {15, 16, 17, 21, 22, 23}; + const int32_t expect_dims2[output_dim_size] = {1, 2, 1, 3}; + TestStridedSlice(input, input_dims, input_dim_size, + begin_indices, end_indices, strides, + indices_dims, indices_dim_size, + 0, 8, 0, 0, 0, output_dim_size, + output, output_dims, expect2, expect_dims2); + + const float expect3[8] = {15, 21}; + const int32_t output_dim_size3 = 3; + const int32_t expect_dims3[output_dim_size3] = {1, 2, 1}; + TestStridedSlice(input, input_dims, input_dim_size, + begin_indices, end_indices, strides, + indices_dims, indices_dim_size, + 0, 8, 0, 0, 8, output_dim_size3, + output, output_dims, expect3, expect_dims3); + + const float expect4[8] = {15}; + const int32_t output_dim_size4 = 0; + const int32_t *expect_dims4 = NULL; + TestStridedSlice(input, input_dims, input_dim_size, + begin_indices, end_indices, strides, + indices_dims, indices_dim_size, + 0, 8, 0, 0, 15, output_dim_size4, + output, output_dims, expect4, expect_dims4); + + const int32_t begin_indices5[] = {-1, 2, 1, 3}; + const int32_t end_indices5[] = {0, 0, 0, 0}; + const int32_t strides5[] = {-1, -1, -1, -1}; + + const float expect5[2] = {23, 22}; + const int32_t expect_dims5[output_dim_size] = {1, 1, 1, 2}; + TestStridedSlice(input, input_dims, input_dim_size, + begin_indices5, end_indices5, strides5, + indices_dims, indices_dim_size, + 0, 0, 0, 0, 0, output_dim_size, + output, output_dims, expect5, expect_dims5); +} + +void TestSlice() { + const float input[] = {1, 2, 3, 4, 5, 6}; + const int32_t begin_indices[] = {0, 0}; + const int32_t indice_sizes[] = {2, 3}; + const int32_t indices_dim_size = 1; + const int32_t indices_dims[indices_dim_size] = {2}; + const int32_t input_dim_size = 2; + const int32_t input_dims[input_dim_size] = {2, 3}; + + float output[6] = {0}; + const int32_t output_dim_size = 2; + int32_t output_dims[output_dim_size] = {0}; + const float expect[6] = {1, 2, 3, 4, 5, 6}; + const int32_t expect_dims[output_dim_size] = {2, 3}; + TestSlice(input, input_dims, input_dim_size, + begin_indices, indice_sizes, + indices_dims, indices_dim_size, + output, output_dims, output_dim_size, + expect, expect_dims); + + const int32_t begin_indices1[] = {1, 0}; + const int32_t indice_sizes1[] = {1, 2}; + const float expect1[2] = {4, 5}; + const int32_t expect_dims1[output_dim_size] = {1, 2}; + TestSlice(input, input_dims, input_dim_size, + begin_indices1, indice_sizes1, + indices_dims, indices_dim_size, + output, output_dims, output_dim_size, + expect1, expect_dims1); + + const int32_t begin_indices2[] = {0, 1}; + const int32_t indice_sizes2[] = {2, -1}; + const float expect2[4] = {2, 3, 5, 6}; + const int32_t expect_dims2[output_dim_size] = {2, 2}; + TestSlice(input, input_dims, input_dim_size, + begin_indices2, indice_sizes2, + indices_dims, indices_dim_size, + output, output_dims, output_dim_size, + expect2, expect_dims2); +} + +} // namespace + + +TEST_F(StridedSliceOpTest, TestStridedSliceByFirstAxis) { + TestStridedSliceByFirstAxis(); +} + +TEST_F(StridedSliceOpTest, TestStridedSliceRank1) { + TestStridedSliceRank1();} + +TEST_F(StridedSliceOpTest, TestStridedSliceRank2) { + TestStridedSliceRank2(); +} + +TEST_F(StridedSliceOpTest, TestStridedSliceRank3) { + TestStridedSliceRank3(); +} + +TEST_F(StridedSliceOpTest, TestStridedSliceRank4) { + TestStridedSliceRank4(); +} + +TEST_F(StridedSliceOpTest, TestSlice) { + TestSlice(); +} + +} // namespace test +} // namespace ops +} // namespace micro diff --git a/micro/test/ccutils/BUILD.bazel b/micro/test/ccutils/BUILD.bazel new file mode 100644 index 00000000..055657f2 --- /dev/null +++ b/micro/test/ccutils/BUILD.bazel @@ -0,0 +1,101 @@ +package( + default_visibility = ["//visibility:public"], +) + +licenses(["notice"]) # Apache 2.0 + +filegroup( + name = "qaic", + srcs = ["rpc/qaic.sh"], + visibility = ["//visibility:public"], +) + +cc_library( + name = "rpc_stub", + srcs = glob([ + "rpc/stub/*.cc", + ]), + hdrs = glob([ + "rpc/stub/*.h", + ]), + copts = [ + "-Werror", + "-Wextra", + "-Wno-missing-field-initializers", + ], + strip_include_prefix = "", + deps = [ + "@hexagon_sdk//:headers_arm", + ], + alwayslink = 1, +) + +cc_library( + name = "rpc_skel", + srcs = glob([ + "rpc/skel/*.cc", + ]), + hdrs = glob([ + "rpc/skel/*.h", + ]), + copts = [ + "-Werror", + "-Wextra", + "-Wno-missing-field-initializers", + ], + strip_include_prefix = "", + deps = [ + "@hexagon_sdk//:headers_dsp", + ], + alwayslink = 1, +) + +cc_library( + name = "ccutils", + srcs = glob([ + "micro/common/*.cc", + "micro/ops/*.cc", + ]), + hdrs = glob( + [ + "micro/common/*.h", + "micro/ops/*.h", + ], + exclude = ["micro/ops/gtest_utils.h"], + ), + copts = [ + "-Werror", + "-Wextra", + "-Wno-missing-field-initializers", + ], + strip_include_prefix = "", + deps = [ + "//micro/base", + "//micro/framework:framework_for_optest", + "//micro/include", + ], +) + +cc_library( + name = "ccutils_with_gtest", + srcs = glob([ + "micro/common/*.cc", + "micro/ops/*.cc", + ]), + hdrs = glob([ + "micro/common/*.h", + "micro/ops/*.h", + ]), + copts = [ + "-Werror", + "-Wextra", + "-Wno-missing-field-initializers", + ], + strip_include_prefix = "", + deps = [ + "//micro/base", + "//micro/framework:framework_for_optest", + "//micro/include", + "@gtest", + ], +) diff --git a/micro/test/ccutils/micro/common/global_buffer.cc b/micro/test/ccutils/micro/common/global_buffer.cc new file mode 100644 index 00000000..92a2158d --- /dev/null +++ b/micro/test/ccutils/micro/common/global_buffer.cc @@ -0,0 +1,60 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/common/global_buffer.h" + +#include "micro/base/logging.h" +#include "micro/include/public/micro.h" + +namespace micro { +namespace common { +namespace test { + +namespace { +// for N=1, H=128, W=128, C=4, INPUT1&INPUT2&OUTPUT, sizeof(float) +const uint32_t kGlobalBufferSize = 128 * 128 * 4 * 3 * 4; +uint8_t kGlobalBuffer[kGlobalBufferSize]; +GlobalBuffer global_buffer; +} + +GlobalBuffer::GlobalBuffer() : offset_(0) {} +GlobalBuffer::~GlobalBuffer() {} + +void GlobalBuffer::reset() { + offset_ = 0; +} + +void *GlobalBuffer::DoGetBuffer(uint32_t size) { + if (size % 4 != 0) { + size = (size + 3) / 4 * 4; + } + if (offset_ + size > kGlobalBufferSize) { + LOG(FATAL) << "Global buffer is not enough." + << "offset_: " << offset_ << ", size: " << size + << ", kGlobalBufferSize: " << kGlobalBufferSize; + } + + void *ptr = kGlobalBuffer + offset_; + offset_ += size; + + return ptr; +} + +GlobalBuffer *GetGlobalBuffer() { + return &global_buffer; +} + +} // namespace test +} // namespace common +} // namespace micro diff --git a/micro/test/ccutils/micro/common/global_buffer.h b/micro/test/ccutils/micro/common/global_buffer.h new file mode 100644 index 00000000..d61a64a4 --- /dev/null +++ b/micro/test/ccutils/micro/common/global_buffer.h @@ -0,0 +1,57 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_TEST_CCUTILS_MICRO_COMMON_GLOBAL_BUFFER_H_ +#define MICRO_TEST_CCUTILS_MICRO_COMMON_GLOBAL_BUFFER_H_ + +#include "micro/base/logging.h" +#include "micro/include/public/micro.h" + +namespace micro { +namespace common { +namespace test { + +class GlobalBuffer { + public: + GlobalBuffer(); + ~GlobalBuffer(); + + void reset(); + + template + T *GetBuffer(int32_t size) { + MACE_ASSERT(size > 0); + return static_cast( + DoGetBuffer(static_cast(size) * sizeof(T))); + } + + template + T *GetBuffer(uint32_t size) { + return static_cast(DoGetBuffer(size * sizeof(T))); + } + + private: + void *DoGetBuffer(uint32_t size); + + private: + uint32_t offset_; +}; + +GlobalBuffer *GetGlobalBuffer(); + +} // namespace test +} // namespace common +} // namespace micro + +#endif // MICRO_TEST_CCUTILS_MICRO_COMMON_GLOBAL_BUFFER_H_ diff --git a/micro/test/ccutils/micro/ops/gtest_utils.h b/micro/test/ccutils/micro/ops/gtest_utils.h new file mode 100644 index 00000000..7dd6deab --- /dev/null +++ b/micro/test/ccutils/micro/ops/gtest_utils.h @@ -0,0 +1,161 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#ifndef MICRO_TEST_CCUTILS_MICRO_OPS_GTEST_UTILS_H_ +#define MICRO_TEST_CCUTILS_MICRO_OPS_GTEST_UTILS_H_ + +#include "gtest/gtest.h" +#include "micro/base/logging.h" +#include "micro/base/utils.h" +#include "micro/include/public/micro.h" +#include "micro/include/utils/macros.h" +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { + +template +inline void ExpectEqual(const T &a, const T &b) { + EXPECT_EQ(a, b); +} + +template<> +inline void ExpectEqual(const float &a, const float &b) { + EXPECT_FLOAT_EQ(a, b); +} + +template<> +inline void ExpectEqual(const double &a, const double &b) { + EXPECT_DOUBLE_EQ(a, b); +} + +template +struct Expector; + +// Partial specialization for float and double. +template +struct Expector { + static void Equal(const EXP_TYPE &a, const RES_TYPE &b) { ExpectEqual(a, b); } + + static void Equal( + const EXP_TYPE *x, const int32_t *x_dims, const uint32_t x_dim_size, + const RES_TYPE *y, const int32_t *y_dims, const uint32_t y_dim_size) { + AssertSameDims(x_dims, x_dim_size, y_dims, y_dim_size); + const int32_t size = base::GetShapeSize(x_dim_size, x_dims); + for (int32_t i = 0; i < size; ++i) { + ExpectEqual(x[i], y[i]); + } + } + + static void Near( + const EXP_TYPE *x, const int32_t *x_dims, const uint32_t x_dim_size, + const RES_TYPE *y, const int32_t *y_dims, const uint32_t y_dim_size, + const double rel_err, const double abs_err) { + AssertSameDims(x_dims, x_dim_size, y_dims, y_dim_size); + if (x_dim_size == 4) { + for (int32_t n = 0; n < x_dims[0]; ++n) { + for (int32_t h = 0; h < x_dims[1]; ++h) { + for (int32_t w = 0; w < x_dims[2]; ++w) { + for (int32_t c = 0; c < x_dims[3]; ++c) { + const double error = abs_err + rel_err * base::abs(*x); + EXPECT_NEAR(*x, *y, error) << "with index = [" << n << ", " << h + << ", " << w << ", " << c << "]"; + x++; + y++; + } + } + } + } + } else { + const int32_t size = base::GetShapeSize(x_dim_size, x_dims); + for (int32_t i = 0; i < size; ++i) { + const double error = abs_err + rel_err * base::abs(x[i]); + EXPECT_NEAR(x[i], y[i], error); + } + } + } +}; + +template +struct Expector { + static void Equal(const EXP_TYPE &a, const RES_TYPE &b) { ExpectEqual(a, b); } + + static void Equal( + const EXP_TYPE *x, const int32_t *x_dims, const uint32_t x_dim_size, + const RES_TYPE *y, const int32_t *y_dims, const uint32_t y_dim_size) { + AssertSameDims(x_dims, x_dim_size, y_dims, y_dim_size); + const int32_t size = base::GetShapeSize(x_dim_size, x_dims); + for (int32_t i = 0; i < size; ++i) { + ExpectEqual(x[i], y[i]); + } + } + + static void Near( + const EXP_TYPE *x, const int32_t *x_dims, const uint32_t x_dim_size, + const RES_TYPE *y, const int32_t *y_dims, const uint32_t y_dim_size, + const double rel_err, const double abs_err) { + MACE_UNUSED(rel_err); + MACE_UNUSED(abs_err); + Equal(x, x_dims, x_dim_size, y, y_dims, y_dim_size); + } +}; + +template +void ExpectTensorNear( + const EXP_TYPE *x, const int32_t *x_dims, const uint32_t x_dim_size, + const RES_TYPE *y, const int32_t *y_dims, const uint32_t y_dim_size, + const double rel_err = 1e-5, const double abs_err = 1e-8) { + Expector::Near(x, x_dims, x_dim_size, y, + y_dims, y_dim_size, rel_err, abs_err); +} + +template +void ExpectTensorNear( + const T *x, const int32_t *x_dims, const uint32_t x_dim_size, + const T *y, const int32_t *y_dims, const uint32_t y_dim_size, + const double rel_err = 1e-5, const double abs_err = 1e-8) { + Expector::Near(x, x_dims, x_dim_size, y, + y_dims, y_dim_size, rel_err, abs_err); +} + +template +void ExpectTensorSimilar( + const EXP_TYPE *x, const int32_t *x_dims, const uint32_t x_dim_size, + const RES_TYPE *y, const int32_t *y_dims, const uint32_t y_dim_size, + const double rel_err = 1e-5) { + AssertSameDims(x_dims, x_dim_size, y_dims, y_dim_size); + const int32_t size = base::GetShapeSize(x_dim_size, x_dims); + double dot_product = 0.0, x_norm = 0.0, y_norm = 0.0; + for (int32_t i = 0; i < size; i++) { + dot_product += x[i] * y[i]; + x_norm += x[i] * x[i]; + y_norm += y[i] * y[i]; + } + double norm_product = base::sqrt(x_norm) * base::sqrt(y_norm); + double error = rel_err * base::abs(dot_product); + + EXPECT_NEAR(dot_product, norm_product, error); + PrintDims(x_dims, x_dim_size); +} + +} // namespace test +} // namespace ops +} // namespace micro + +#endif // MICRO_TEST_CCUTILS_MICRO_OPS_GTEST_UTILS_H_ + diff --git a/micro/test/ccutils/micro/ops/operator.test.cc b/micro/test/ccutils/micro/ops/operator.test.cc new file mode 100644 index 00000000..578402b3 --- /dev/null +++ b/micro/test/ccutils/micro/ops/operator.test.cc @@ -0,0 +1,134 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/framework/operator.h" + +#include "micro/framework/scratch_buffer.h" +#include "micro/include/utils/macros.h" +#include "micro/ops/substitute_op.h" + +namespace micro { +namespace framework { + +Operator::~Operator() {} + +#ifndef fake_op_ +#define fake_op_ (reinterpret_cast(op_context_)) +#endif // fake_op_ + +const uint32_t kScratchBufferSize = 100000; +uint8_t kScratchBuffer[kScratchBufferSize] = {0}; +MaceMicroEngineConfig kTmpMicroEngineConfig = { + NULL, // net_def_; + NULL, // model_data_; + NULL, // graph_; + NULL, // op_array_; + NULL, // tensor_mem_; + NULL, // input_buffers_; + NULL, // input_shapes_; + kScratchBuffer, + kScratchBufferSize, +}; + +MaceStatus Operator::Init(MaceMicroEngineConfig *engine_config, + framework::OpContext *op_context, + const model::OperatorDef *op_def) { + engine_config_ = &kTmpMicroEngineConfig; + op_context_ = op_context; + MACE_UNUSED(engine_config); + MACE_UNUSED(op_def_); + MACE_UNUSED(op_def); + + return OnInit(); +} + +MaceStatus Operator::OnInit() { + return MACE_SUCCESS; +} + +MaceStatus Operator::Run() { + MACE_NOT_IMPLEMENTED; + return MACE_SUCCESS; +} + +const model::Argument *Operator::GetArgByName(const char *name) const { + MACE_UNUSED(name); + MACE_ASSERT1(false, "Thsi method should not be invoked."); + return NULL; +} + +uint32_t Operator::GetInputSize() { + return fake_op_->GetInputSize(); +} + +const void *Operator::DoGetInputData(uint32_t idx) { + return fake_op_->DoGetInputData(idx); +} + +uint32_t Operator::GetInputShapeDimSize(uint32_t idx) { + return fake_op_->GetInputShapeDimSize(idx); +} + +const int32_t *Operator::GetInputShapeDims(uint32_t idx) { + return fake_op_->GetInputShapeDims(idx); +} + +uint32_t Operator::GetOutputSize() { + return fake_op_->GetOutputSize(); +} + +void *Operator::DoGetOutputData(uint32_t idx) { + return fake_op_->DoGetOutputData(idx); +} + +uint32_t Operator::GetOutputShapeDimSize(uint32_t idx) { + return fake_op_->GetOutputShapeDimSize(idx); +} + +const int32_t *Operator::GetOutputShapeDims(uint32_t idx) { + return fake_op_->GetOutputShapeDims(idx); +} + +MaceStatus Operator::ResizeOutputShape(uint32_t idx, uint32_t dim_size, + const int32_t *dims) { + return fake_op_->ResizeOutputShape(idx, dim_size, dims); +} + +#ifndef MACE_DEFINE_GET_ARG_BY_NAME_FUNC +#define MACE_DEFINE_GET_ARG_BY_NAME_FUNC(T, FUNC) \ +template <> \ +T Operator::GetArgByName(const char *name, T default_value) const { \ + return fake_op_->GetArgByName(name, default_value); \ +} +#endif // MACE_DEFINE_GET_ARG_BY_NAME_FUNC + +MACE_DEFINE_GET_ARG_BY_NAME_FUNC(bool, i) +MACE_DEFINE_GET_ARG_BY_NAME_FUNC(int32_t, i) +MACE_DEFINE_GET_ARG_BY_NAME_FUNC(float, f) + +#ifndef MACE_DEFINE_GET_ARRAY_ARG_BY_NAME_FUNC +#define MACE_DEFINE_GET_ARRAY_ARG_BY_NAME_FUNC(T, FUNC) \ +template <> \ +const T *Operator::GetRepeatArgByName(const char *name, \ + uint32_t *size) const { \ + return fake_op_->GetRepeatArgByName(name, size); \ +} +#endif // MACE_DEFINE_GET_ARRAY_ARG_BY_NAME_FUNC + +MACE_DEFINE_GET_ARRAY_ARG_BY_NAME_FUNC(int32_t, ints) +MACE_DEFINE_GET_ARRAY_ARG_BY_NAME_FUNC(float, floats) +MACE_DEFINE_GET_ARRAY_ARG_BY_NAME_FUNC(uint8_t, s) + +} // namespace framework +} // namespace micro diff --git a/micro/test/ccutils/micro/ops/substitute_op.cc b/micro/test/ccutils/micro/ops/substitute_op.cc new file mode 100644 index 00000000..f65c01ec --- /dev/null +++ b/micro/test/ccutils/micro/ops/substitute_op.cc @@ -0,0 +1,112 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/base/logging.h" +#include "micro/base/utils.h" +#include "micro/include/public/micro.h" +#include "micro/include/utils/macros.h" +#include "micro/ops/substitute_op.h" + +namespace micro { +namespace framework { + +SubstituteOp::SubstituteOp() + : input_idx_(0), output_idx_(0), arg_idx_(0), repeat_arg_idx_(0) {} + +SubstituteOp &SubstituteOp::AddInput( + const void *input, const int32_t *dims, const uint32_t dims_size) { + MACE_ASSERT1(input != NULL || dims != NULL || dims_size == 0, + "Invalid param"); + MACE_ASSERT1(input_idx_ < kMaxInputNum, "Not enough mem."); + inputs_[input_idx_] = input; + input_dims_[input_idx_] = dims; + input_dim_sizes_[input_idx_] = dims_size; + ++input_idx_; + return *this; +} + +SubstituteOp &SubstituteOp::AddOutput( + void *output, int32_t *dims, const uint32_t dims_size) { + MACE_ASSERT1(output != NULL || dims != NULL || dims_size == 0, + "Invalid param"); + MACE_ASSERT1(output_idx_ < kMaxOutputNum, "Not enough mem."); + outputs_[output_idx_] = output; + output_dims_[output_idx_] = dims; + output_dim_sizes_[output_idx_] = dims_size; + ++output_idx_; + return *this; +} + +uint32_t SubstituteOp::GetInputSize() { + return input_idx_; +} + +const void *SubstituteOp::DoGetInputData(uint32_t idx) { + MACE_ASSERT1(idx < input_idx_, "idx is not valid"); + return inputs_[idx]; +} + +uint32_t SubstituteOp::GetInputShapeDimSize(uint32_t idx) { + MACE_ASSERT1(idx < input_idx_, "idx is not valid"); + return input_dim_sizes_[idx]; +} + +const int32_t *SubstituteOp::GetInputShapeDims(uint32_t idx) { + MACE_ASSERT1(idx < input_idx_, "idx is not valid"); + return input_dims_[idx]; +} + +uint32_t SubstituteOp::GetOutputSize() { + return output_idx_; +} + +void *SubstituteOp::DoGetOutputData(uint32_t idx) { + MACE_ASSERT1(idx < output_idx_, "idx is not valid"); + return outputs_[idx]; +} + +uint32_t SubstituteOp::GetOutputShapeDimSize(uint32_t idx) { + MACE_ASSERT1(idx < output_idx_, "idx is not valid"); + return output_dim_sizes_[idx]; +} + +const int32_t *SubstituteOp::GetOutputShapeDims(uint32_t idx) { + MACE_ASSERT1(idx < output_idx_, "idx is not valid"); + return output_dims_[idx]; +} + +MaceStatus SubstituteOp::ResizeOutputShape(uint32_t idx, + uint32_t input_dim_size, + const int32_t *input_dims) { + MACE_ASSERT1(idx < output_idx_, "idx is not valid"); + MACE_ASSERT1(input_dim_size <= output_dim_sizes_[idx], + "Can not support dynamic dim size"); + if (output_dims_[idx] != NULL && input_dim_size > 0) { + base::memcpy(output_dims_[idx], input_dims, + sizeof(int32_t) * input_dim_size); + } + output_dim_sizes_[idx] = input_dim_size; + + return MACE_SUCCESS; +} + +MaceStatus SubstituteOp::ReuseInputBufferForOutput(uint32_t output_idx, + uint32_t input_idx) { + MACE_UNUSED(output_idx); + MACE_UNUSED(input_idx); + return MACE_SUCCESS; +} + +} // namespace framework +} // namespace micro diff --git a/micro/test/ccutils/micro/ops/substitute_op.h b/micro/test/ccutils/micro/ops/substitute_op.h new file mode 100644 index 00000000..0f5e60d4 --- /dev/null +++ b/micro/test/ccutils/micro/ops/substitute_op.h @@ -0,0 +1,140 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_TEST_CCUTILS_MICRO_OPS_SUBSTITUTE_OP_H_ +#define MICRO_TEST_CCUTILS_MICRO_OPS_SUBSTITUTE_OP_H_ + +#include "micro/base/logging.h" +#include "micro/base/utils.h" +#include "micro/include/public/micro.h" + +namespace micro { +namespace framework { + +const uint32_t kMaxInputNum = 10; +const uint32_t kMaxOutputNum = 4; +const uint32_t kMaxArgNum = 20; + +struct Arg { + const char *name; + float value; +}; + +struct RepeatArg { + const char *name; + const void *ptr; + uint32_t length; +}; + +class SubstituteOp { + public: + SubstituteOp(); + ~SubstituteOp() {} + + SubstituteOp &AddInput(const void *input, + const int32_t *dims, const uint32_t dims_size); + SubstituteOp &AddOutput(void *output, + int32_t *dims, const uint32_t dims_size); + + template + SubstituteOp &AddArg(const char *name, T value) { + MACE_ASSERT(arg_idx_ < kMaxArgNum); + args_[arg_idx_].name = name; + args_[arg_idx_].value = static_cast(value); + ++arg_idx_; + return *this; + } + + template + SubstituteOp &AddRepeatArg(const char *name, const T *value, uint32_t len) { + MACE_ASSERT(repeat_arg_idx_ < kMaxArgNum); + repeat_args_[repeat_arg_idx_].name = name; + repeat_args_[repeat_arg_idx_].ptr = value; + repeat_args_[repeat_arg_idx_].length = len; + ++repeat_arg_idx_; + return *this; + } + + public: + template + T GetArgByName(const char *name, T default_value) const { + for (uint32_t i = 0; i < arg_idx_; ++i) { + if (base::strcmp(name, args_[i].name) == 0) { + return static_cast(args_[i].value); + } + } + return default_value; + } + + template + const T *GetRepeatArgByName( + const char *name, uint32_t *size = NULL) const { + for (uint32_t i = 0; i < repeat_arg_idx_; ++i) { + if (base::strcmp(name, repeat_args_[i].name) == 0) { + if (size != NULL) { + *size = repeat_args_[i].length; + } + return static_cast(repeat_args_[i].ptr); + } + } + if (size != NULL) { + *size = 0; + } + return NULL; + } + + uint32_t GetInputSize(); + const void *DoGetInputData(uint32_t idx); + uint32_t GetInputShapeDimSize(uint32_t idx); + const int32_t *GetInputShapeDims(uint32_t idx); + uint32_t GetOutputSize(); + void *DoGetOutputData(uint32_t idx); + uint32_t GetOutputShapeDimSize(uint32_t idx); + const int32_t *GetOutputShapeDims(uint32_t idx); + MaceStatus ResizeOutputShape(uint32_t idx, uint32_t input_dim_size, + const int32_t *input_dims); + MaceStatus ReuseInputBufferForOutput(uint32_t output_idx, uint32_t input_idx); + + template + const T *GetInputData(uint32_t idx) { + return static_cast(DoGetInputData(idx)); + } + + template + T *GetOutputData(uint32_t idx) { + return static_cast(DoGetOutputData(idx)); + } + + private: + const void *inputs_[kMaxInputNum]; + const int32_t *input_dims_[kMaxInputNum]; + uint32_t input_dim_sizes_[kMaxInputNum]; + uint32_t input_idx_; + + void *outputs_[kMaxOutputNum]; + int32_t *output_dims_[kMaxOutputNum]; + uint32_t output_dim_sizes_[kMaxOutputNum]; + uint32_t output_idx_; + + // for arg + Arg args_[kMaxArgNum]; + uint32_t arg_idx_; + RepeatArg repeat_args_[kMaxArgNum]; + uint32_t repeat_arg_idx_; +}; + +} // namespace framework +} // namespace micro + +#endif // MICRO_TEST_CCUTILS_MICRO_OPS_SUBSTITUTE_OP_H_ diff --git a/micro/test/ccutils/micro/ops/test_utils.cc b/micro/test/ccutils/micro/ops/test_utils.cc new file mode 100644 index 00000000..7cbe5163 --- /dev/null +++ b/micro/test/ccutils/micro/ops/test_utils.cc @@ -0,0 +1,74 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#include "micro/ops/test_utils.h" + +namespace micro { +namespace ops { +namespace test { + +namespace { +// for FillRandomInput +const int32_t kRandM = 1 << 20; +const int32_t kRandA = 9; +const int32_t kRandB = 7; +} + +void PrintDims(const int32_t *dims, const uint32_t dim_size) { + MACE_ASSERT1(dim_size > 0, "invalide dim size"); + if (dim_size == 1) { + LOG(INFO) << "[ " << dims[0] << " ]"; + } else if (dim_size == 2) { + LOG(INFO) << "[ " << dims[0] << ", " << dims[1] << " ]"; + } else if (dim_size == 3) { + LOG(INFO) << "[ " << dims[0] << ", " << dims[1] << ", " << dims[2] << " ]"; + } else if (dim_size == 4) { + LOG(INFO) << "[ " << dims[0] << ", " << dims[1] + << ", " << dims[2] << ", " << dims[3] << " ]"; + } else { + for (uint32_t i = 0; i < dim_size; ++i) { + LOG(INFO) << dims[i]; + } + } +} + +void AssertSameDims(const int32_t *x_dims, const uint32_t x_dim_size, + const int32_t *y_dims, const uint32_t y_dim_size) { + if (x_dim_size != y_dim_size) { + LOG(FATAL) << "invalide dim size. x_dim_size = " << x_dim_size + << ", y_dim_size = " << y_dim_size; + } + for (uint32_t i = 0; i < x_dim_size; ++i) { + if (x_dims[i] != y_dims[i]) { + PrintDims(x_dims, x_dim_size); + PrintDims(y_dims, y_dim_size); + LOG(FATAL) << "AssertSameDims failed."; + } + } +} + +void FillRandomInput(void *input, const int32_t shape_size) { + uint8_t *mem = static_cast (input); + mem[0] = port::api::NowMicros() % 256; + for (int32_t i = 1; i < shape_size; ++i) { + mem[i] = (kRandA * mem[i - 1] + kRandB) % kRandM; + } +} + +} // namespace test +} // namespace ops +} // namespace micro + + diff --git a/micro/test/ccutils/micro/ops/test_utils.h b/micro/test/ccutils/micro/ops/test_utils.h new file mode 100644 index 00000000..fc64e0b7 --- /dev/null +++ b/micro/test/ccutils/micro/ops/test_utils.h @@ -0,0 +1,46 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#ifndef MICRO_TEST_CCUTILS_MICRO_OPS_TEST_UTILS_H_ +#define MICRO_TEST_CCUTILS_MICRO_OPS_TEST_UTILS_H_ + +#include "micro/base/logging.h" +#include "micro/common/global_buffer.h" +#include "micro/include/public/micro.h" +#include "micro/port/api.h" + +namespace micro { +namespace ops { +namespace test { + +void PrintDims(const int32_t *dims, const uint32_t dim_size); + +void AssertSameDims(const int32_t *x_dims, const uint32_t x_dim_size, + const int32_t *y_dims, const uint32_t y_dim_size); + +void FillRandomInput(void *input, const int32_t shape_size); + +#ifndef MACE_DEFINE_RANDOM_INPUT +#define MACE_DEFINE_RANDOM_INPUT(T, input, shape_size) \ +T *input = common::test::GetGlobalBuffer()->GetBuffer(shape_size); \ +micro::ops::test::FillRandomInput(input, shape_size * sizeof(T)) +#endif + +} // namespace test +} // namespace ops +} // namespace micro + +#endif // MICRO_TEST_CCUTILS_MICRO_OPS_TEST_UTILS_H_ + diff --git a/micro/test/ccutils/rpc/qaic.sh b/micro/test/ccutils/rpc/qaic.sh new file mode 100644 index 00000000..0f62fe2d --- /dev/null +++ b/micro/test/ccutils/rpc/qaic.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +output_dir=${1} +mkdir -p output_dir +echo $HEXAGON_SDK_ROOT/tools/qaic/Ubuntu16/qaic \ + -mdll -o ${output_dir} \ + -I$HEXAGON_SDK_ROOT/libs/fastcv/dspCV/android_Debug/ship \ + -I$HEXAGON_SDK_ROOT/libs/common/rpcmem/android_Debug/ship \ + -I$HEXAGON_SDK_ROOT/libs/common/adspmsgd/ship/android_Debug \ + -I$HEXAGON_SDK_ROOT/incs \ + -I$HEXAGON_SDK_ROOT/libs/common/remote/ship/android_Debug \ + -I$HEXAGON_SDK_ROOT/incs/stddef \ + ${@:2} +$HEXAGON_SDK_ROOT/tools/qaic/Ubuntu16/qaic \ + -mdll -o ${output_dir} \ + -I$HEXAGON_SDK_ROOT/libs/fastcv/dspCV/android_Debug/ship \ + -I$HEXAGON_SDK_ROOT/libs/common/rpcmem/android_Debug/ship \ + -I$HEXAGON_SDK_ROOT/libs/common/adspmsgd/ship/android_Debug \ + -I$HEXAGON_SDK_ROOT/incs \ + -I$HEXAGON_SDK_ROOT/libs/common/remote/ship/android_Debug \ + -I$HEXAGON_SDK_ROOT/incs/stddef \ + ${@:2} diff --git a/micro/test/ccutils/rpc/skel/base_func.cc b/micro/test/ccutils/rpc/skel/base_func.cc new file mode 100644 index 00000000..21dc98f6 --- /dev/null +++ b/micro/test/ccutils/rpc/skel/base_func.cc @@ -0,0 +1,40 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#include "rpc/skel/base_func.h" + +#include + +namespace rpc { +namespace skel { + +namespace { +// for FillRandomValue +const int32_t kRandM = 1 << 20; +const int32_t kRandA = 9; +const int32_t kRandB = 7; +} + +void FillRandomValue(void *buffer, const int32_t buffer_size) { + uint8_t *mem = static_cast (buffer); + mem[0] = HAP_perf_get_time_us() % 256; + for (int32_t i = 1; i < buffer_size; ++i) { + mem[i] = (kRandA * mem[i - 1] + kRandB) % kRandM; + } +} + +} // namespace skel +} // namespace rpc + diff --git a/micro/test/ccutils/rpc/skel/base_func.h b/micro/test/ccutils/rpc/skel/base_func.h new file mode 100644 index 00000000..ea20b53a --- /dev/null +++ b/micro/test/ccutils/rpc/skel/base_func.h @@ -0,0 +1,62 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#ifndef MICRO_TEST_CCUTILS_RPC_SKEL_BASE_FUNC_H_ +#define MICRO_TEST_CCUTILS_RPC_SKEL_BASE_FUNC_H_ + +#include +#include + +#include "AEEStdErr.h" // NOLINT +#include "remote.h" // NOLINT + +#ifndef MACE_DEFINE_RANDOM_INPUT +#define MACE_DEFINE_RANDOM_INPUT(NAME) \ +static remote_handle64 h##NAME = -1; \ +int NAME##_open(const char *uri, remote_handle64 *h) { \ + if (h##NAME == -1) { \ + h##NAME = (remote_handle64)(HAP_perf_get_time_us()); \ + } \ + if (h##NAME == NULL) { \ + h##NAME = -1; \ + return AEE_ENOMEMORY; \ + } \ + *h = h##NAME; \ + return AEE_SUCCESS; \ +} \ +int NAME##_close(remote_handle64 h) { \ + if (h != h##NAME) { \ + return AEE_EBADPARM; \ + } \ + if (h##NAME != -1) { \ + } \ + h##NAME = -1; \ + return AEE_SUCCESS; \ +} +#endif // MACE_DEFINE_RANDOM_INPUT + +#ifdef __cplusplus +namespace rpc { +namespace skel { +#endif // __cplusplus + +void FillRandomValue(void *input, const int32_t shape_size); + +#ifdef __cplusplus +} // namespace skel +} // namespace rpc +#endif // __cplusplus + +#endif // MICRO_TEST_CCUTILS_RPC_SKEL_BASE_FUNC_H_ diff --git a/micro/test/ccutils/rpc/stub/base_handle.cc b/micro/test/ccutils/rpc/stub/base_handle.cc new file mode 100644 index 00000000..e4684b71 --- /dev/null +++ b/micro/test/ccutils/rpc/stub/base_handle.cc @@ -0,0 +1,69 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#include "rpc/stub/base_handle.h" + +namespace rpc { +namespace stub { + +namespace { +const remote_handle64 IVALID_HANDLE = -1; +} + +BaseHandle::BaseHandle(FuncOpen *func_open, + FuncClose *func_close, + const char *uri) + : func_open_(func_open), + func_close_(func_close), + uri_(uri), + remote_handle_(IVALID_HANDLE) {} + +BaseHandle::~BaseHandle() { + Close(); +} + +bool BaseHandle::Open() { + if (Valid()) { + return true; + } + + int ret = func_open_(uri_, &remote_handle_); + if (ret != 0 || remote_handle_ == IVALID_HANDLE) { + remote_handle_ = IVALID_HANDLE; + return false; + } else { + return true; + } +} + +bool BaseHandle::Close() { + bool status = true; + if (Valid()) { + int ret = func_close_(remote_handle_); + remote_handle_ = IVALID_HANDLE; + if (ret != 0) { + status = false; + } + } + + return status; +} + +bool BaseHandle::Valid() { + return (remote_handle_ != IVALID_HANDLE); +} + +} // namespace stub +} // namespace rpc diff --git a/micro/test/ccutils/rpc/stub/base_handle.h b/micro/test/ccutils/rpc/stub/base_handle.h new file mode 100644 index 00000000..737b9d25 --- /dev/null +++ b/micro/test/ccutils/rpc/stub/base_handle.h @@ -0,0 +1,53 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + + +#ifndef MICRO_TEST_CCUTILS_RPC_STUB_BASE_HANDLE_H_ +#define MICRO_TEST_CCUTILS_RPC_STUB_BASE_HANDLE_H_ + +#include + +#include "remote.h" // NOLINT + +namespace rpc { +namespace stub { + +class BaseHandle { + protected: + typedef int FuncOpen(const char *name, remote_handle64 *h); + typedef int FuncClose(remote_handle64 h); + FuncOpen *func_open_; + FuncClose *func_close_; + const char *uri_; + remote_handle64 remote_handle_; + + public: + explicit BaseHandle(FuncOpen *func_open, + FuncClose *func_close, + const char *uri); + + ~BaseHandle(); + + bool Open(); + + bool Close(); + + bool Valid(); +}; + +} // namespace stub +} // namespace rpc + +#endif // MICRO_TEST_CCUTILS_RPC_STUB_BASE_HANDLE_H_ diff --git a/micro/tools/BUILD.bazel b/micro/tools/BUILD.bazel new file mode 100644 index 00000000..5c1bb3ec --- /dev/null +++ b/micro/tools/BUILD.bazel @@ -0,0 +1,20 @@ +licenses(["notice"]) # Apache 2.0 + +cc_binary( + name = "micro_run_static", + srcs = [ + "micro_run.cc", + ], + copts = [ + "-Werror", + "-std=c++11", + "-Wextra", + "-Wno-missing-field-initializers", + ], + linkstatic = 1, + deps = [ + "//external:gflags_nothreads", + "//micro/codegen:generated_models", + "//micro/codegen:micro_engine", + ], +) diff --git a/micro/tools/micro_run.cc b/micro/tools/micro_run.cc new file mode 100644 index 00000000..87a4e6db --- /dev/null +++ b/micro/tools/micro_run.cc @@ -0,0 +1,351 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/** + * Usage: + * micro_run --input=input_node \ + * --output=output_node \ + * --input_shape=1,224,224,3 \ + * --output_shape=1,224,224,2 \ + * --input_file=input_data \ + * --output_file=micro.out + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "gflags/gflags.h" +#include "micro/base/logging.h" +#include "micro/include/public/micro.h" +#include "micro/include/utils/macros.h" +#include "micro/port/api.h" + +#ifndef MICRO_MODEL_NAME +#error Please specify model name in the command +#endif + +namespace micro { +namespace MICRO_MODEL_NAME { +MaceStatus GetMicroEngineSingleton(MaceMicroEngine **engine); +} + +namespace tools { +std::vector Split(const std::string &str, char delims) { + std::vector result; + std::string tmp = str; + while (!tmp.empty()) { + size_t next_offset = tmp.find(delims); + result.push_back(tmp.substr(0, next_offset)); + if (next_offset == std::string::npos) { + break; + } else { + tmp = tmp.substr(next_offset + 1); + } + } + return result; +} + +void ParseShape(const std::string &str, std::vector *shape) { + std::string tmp = str; + while (!tmp.empty()) { + int dim = atoi(tmp.data()); + shape->push_back(dim); + size_t next_offset = tmp.find(","); + if (next_offset == std::string::npos) { + break; + } else { + tmp = tmp.substr(next_offset + 1); + } + } +} + +std::string FormatName(const std::string input) { + std::string res = input; + for (size_t i = 0; i < input.size(); ++i) { + if (!isalnum(res[i])) res[i] = '_'; + } + return res; +} + +DataFormat ParseDataFormat(const std::string &data_format_str) { + if (data_format_str == "NHWC") { + return DataFormat::NHWC; + } else if (data_format_str == "NCHW") { + return DataFormat::NCHW; + } else if (data_format_str == "OIHW") { + return DataFormat::OIHW; + } else { + return DataFormat::NONE; + } +} + +DEFINE_string(model_name, "", "model name in yaml"); +DEFINE_string(input_node, "", "input nodes, separated by comma"); +DEFINE_string(input_shape, "", + "input shapes, separated by colon and comma"); +DEFINE_string(output_node, "", "output nodes, separated by comma"); +DEFINE_string(output_shape, "", + "output shapes, separated by colon and comma"); +DEFINE_string(input_data_format, "NHWC", + "input data formats, NONE|NHWC|NCHW"); +DEFINE_string(output_data_format, "NHWC", + "output data formats, NONE|NHWC|NCHW"); +DEFINE_string(input_file, "", + "input file name | input file prefix for multiple inputs."); +DEFINE_string(output_file, "", + "output file name | output file prefix for multiple outputs"); +DEFINE_string(input_dir, "", "input directory name"); +DEFINE_string(output_dir, "output", "output directory name"); + +DEFINE_int32(round, 1, "round"); +DEFINE_int32(restart_round, 1, "restart round"); +DEFINE_int32(malloc_check_cycle, -1, "malloc debug check cycle, -1 to disable"); +DEFINE_bool(benchmark, false, "enable benchmark op"); + +void GetOutputAndStoreToFile(MaceMicroEngine *micro_engine, + const std::vector &output_names, + const std::string &prefix, + const std::string &suffix) { + for (size_t i = 0; i < output_names.size(); ++i) { + void *output_buffer = NULL; + const int32_t *output_dims = NULL; + uint32_t dim_size = 0; + MaceStatus status = + micro_engine->GetOutputData(i, &output_buffer, &output_dims, &dim_size); + MACE_UNUSED(status); + MACE_ASSERT1(status == MACE_SUCCESS && output_buffer != NULL, + "GetOutputData failed"); + std::string output_name = prefix + FormatName(output_names[i]) + suffix; + std::ofstream out_file(output_name, std::ios::binary); + MACE_ASSERT2(out_file.is_open(), "Open output file failed: ", + strerror(errno)); + int64_t output_size = std::accumulate(output_dims, output_dims + dim_size, + sizeof(float), + std::multiplies()); + out_file.write(reinterpret_cast(output_buffer), + output_size); + MACE_ASSERT1(!out_file.bad(), "write file failed!"); + out_file.flush(); + out_file.close(); + LOG(INFO) << "Write output file " << output_name.c_str() + << " with size " << output_size << " done."; + } +} + +bool RunModel(const std::vector &input_names, + const std::vector> &input_shapes, + const std::vector &input_data_formats, + const std::vector &output_names, + const std::vector &output_data_formats) { + // for future + MACE_UNUSED(input_data_formats); + MACE_UNUSED(output_data_formats); + + int64_t t0 = port::api::NowMicros(); + MaceMicroEngine *micro_engine = NULL; + MaceStatus status = MICRO_MODEL_NAME::GetMicroEngineSingleton(µ_engine); + MACE_UNUSED(status); + MACE_ASSERT(status == MACE_SUCCESS && micro_engine != NULL); + int64_t t1 = port::api::NowMicros(); + double init_millis = (t1 - t0) / 1000.0; + LOG(INFO) << "Total init latency: " + << static_cast(init_millis) << " ms"; + + std::vector> inputs; + std::vector input_sizes; + for (size_t i = 0; i < input_shapes.size(); ++i) { + input_sizes.push_back(std::accumulate(input_shapes[i].begin(), + input_shapes[i].end(), sizeof(float), + std::multiplies())); + inputs.push_back(std::shared_ptr(new char[input_sizes[i]], + std::default_delete())); + } + + if (!FLAGS_input_dir.empty()) { + DIR *dir_parent; + struct dirent *entry; + dir_parent = opendir(FLAGS_input_dir.c_str()); + if (dir_parent == NULL) { + LOG(FATAL) << "Open input_dir " << FLAGS_input_dir.c_str() + << " failed: " << strerror(errno); + } + while ((entry = readdir(dir_parent))) { + std::string file_name = std::string(entry->d_name); + std::string prefix = FormatName(input_names[0]); + if (file_name.find(prefix) == 0) { + std::string suffix = file_name.substr(prefix.size()); + + for (size_t i = 0; i < input_names.size(); ++i) { + file_name = FLAGS_input_dir + "/" + FormatName(input_names[i]) + + suffix; + std::ifstream in_file(file_name, std::ios::in | std::ios::binary); + LOG(INFO) << "Read " << file_name.c_str(); + MACE_ASSERT2(in_file.is_open(), "Open input file failed: ", + strerror(errno)); + in_file.read(inputs[i].get(), input_sizes[i]); + in_file.close(); + micro_engine->RegisterInputData(i, inputs[i].get(), + input_shapes[i].data()); + } + status = micro_engine->Run(); + MACE_ASSERT(status == MACE_SUCCESS); + + if (!FLAGS_output_dir.empty()) { + GetOutputAndStoreToFile(micro_engine, output_names, + FLAGS_output_dir + "/", suffix); + } + } + } + + closedir(dir_parent); + } else { + for (size_t i = 0; i < input_names.size(); ++i) { + // load input + std::ifstream in_file(FLAGS_input_file + "_" + FormatName(input_names[i]), + std::ios::in | std::ios::binary); + if (in_file.is_open()) { + in_file.read(inputs[i].get(), input_sizes[i]); + in_file.close(); + } else { + LOG(INFO) << "Open input file failed"; + return -1; + } + micro_engine->RegisterInputData(i, inputs[i].get(), + input_shapes[i].data()); + } + + LOG(INFO) << "Warm up run"; + int64_t t3 = port::api::NowMicros(); + status = micro_engine->Run(); + MACE_ASSERT1(status == MACE_SUCCESS, "run micro engine failed"); + int64_t t4 = port::api::NowMicros(); + double warmup_millis = (t4 - t3) / 1000.0; + LOG(INFO) << "1st warm up run latency: " + << static_cast(warmup_millis) << " ms"; + + double model_run_millis = -1; + if (FLAGS_round > 0) { + LOG(INFO) << "Run model"; + int64_t total_run_duration = 0; + for (int i = 0; i < FLAGS_round; ++i) { + int64_t t0 = port::api::NowMicros(); + // TODO(luxuhui): add metadata to benchmark op + status = micro_engine->Run(); + MACE_ASSERT(status == MACE_SUCCESS); + int64_t t1 = port::api::NowMicros(); + total_run_duration += (t1 - t0); + } + model_run_millis = total_run_duration / 1000.0 / FLAGS_round; + LOG(INFO) << "Average latency: " + << static_cast(model_run_millis) << " ms"; + } + GetOutputAndStoreToFile(micro_engine, output_names, + FLAGS_output_file + "_", ""); + + // Metrics reporting tools depends on the format, keep in consistent + printf("=============================================\n"); + printf("---- init warmup run_avg \n"); + printf("=============================================\n"); + printf("time %11.3f %11.3f %11.3f\n", + init_millis, warmup_millis, model_run_millis); + } + + return true; +} + +int Main(int argc, char **argv) { + std::string usage = "MACE micro run model tool, please specify proper" + " arguments.\nusage: " + std::string(argv[0]) + " --help"; + gflags::SetUsageMessage(usage); + gflags::ParseCommandLineFlags(&argc, &argv, true); + + std::vector input_names = Split(FLAGS_input_node, ','); + std::vector output_names = Split(FLAGS_output_node, ','); + if (input_names.empty() || output_names.empty()) { + LOG(INFO) << gflags::ProgramUsage(); + return 0; + } + + LOG(INFO) << "model name: " << FLAGS_model_name.c_str(); + LOG(INFO) << "input node: " << FLAGS_input_node.c_str(); + LOG(INFO) << "input shape: " << FLAGS_input_shape.c_str(); + LOG(INFO) << "output node: " << FLAGS_output_node.c_str(); + LOG(INFO) << "output shape: " << FLAGS_output_shape.c_str(); + LOG(INFO) << "input_file: " << FLAGS_input_file.c_str(); + LOG(INFO) << "output_file: " << FLAGS_output_file.c_str(); + LOG(INFO) << "input dir: " << FLAGS_input_dir.c_str(); + LOG(INFO) << "output dir: " << FLAGS_output_dir.c_str(); + LOG(INFO) << "round: " << FLAGS_round; + LOG(INFO) << "restart_round: " << FLAGS_restart_round; + + std::vector input_shapes = Split(FLAGS_input_shape, ':'); + std::vector output_shapes = Split(FLAGS_output_shape, ':'); + + const size_t input_count = input_shapes.size(); + const size_t output_count = output_shapes.size(); + std::vector> input_shape_vec(input_count); + std::vector> output_shape_vec(output_count); + for (size_t i = 0; i < input_count; ++i) { + ParseShape(input_shapes[i], &input_shape_vec[i]); + } + for (size_t i = 0; i < output_count; ++i) { + ParseShape(output_shapes[i], &output_shape_vec[i]); + } + if (input_names.size() != input_shape_vec.size() + || output_names.size() != output_shape_vec.size()) { + LOG(INFO) << "inputs' names do not match inputs' shapes " + "or outputs' names do not match outputs' shapes"; + return 0; + } + std::vector raw_input_data_formats = + Split(FLAGS_input_data_format, ','); + std::vector raw_output_data_formats = + Split(FLAGS_output_data_format, ','); + std::vector input_data_formats(input_count); + std::vector output_data_formats(output_count); + for (size_t i = 0; i < input_count; ++i) { + input_data_formats[i] = ParseDataFormat(raw_input_data_formats[i]); + } + for (size_t i = 0; i < output_count; ++i) { + output_data_formats[i] = ParseDataFormat(raw_output_data_formats[i]); + } + bool ret = false; + for (int i = 0; i < FLAGS_restart_round; ++i) { + LOG(INFO) << "restart round " << i; + + ret = RunModel(input_names, input_shape_vec, input_data_formats, + output_names, output_data_formats); + } + if (ret) { + return 0; + } + return -1; +} + +} // namespace tools +} // namespace micro + +int main(int argc, char **argv) { + micro::tools::Main(argc, argv); +} diff --git a/test/ccbenchmark/mace/benchmark_utils/test_benchmark.h b/test/ccbenchmark/mace/benchmark_utils/test_benchmark.h index 12c7672b..810555e1 100644 --- a/test/ccbenchmark/mace/benchmark_utils/test_benchmark.h +++ b/test/ccbenchmark/mace/benchmark_utils/test_benchmark.h @@ -29,7 +29,7 @@ namespace testing { class Benchmark { public: - Benchmark(const char *name, void (*benchmark_func)(int)); + Benchmark(const char *name, void (*benchmark_func)(int32_t)); static void Run(const char *pattern); diff --git a/third_party/hexagon/hexagon_sdk.BUILD b/third_party/hexagon/hexagon_sdk.BUILD new file mode 100644 index 00000000..30bc49b6 --- /dev/null +++ b/third_party/hexagon/hexagon_sdk.BUILD @@ -0,0 +1,58 @@ +package(default_visibility = ['//visibility:public']) + +filegroup( + name = 'sdk_location', + srcs = ["readme.txt"], +) + +cc_library( + name = 'headers_incs', + hdrs = glob([ + "incs/*.h", + ]), + strip_include_prefix = "incs/", +) + +cc_library( + name = 'headers_incs_stddef', + hdrs = glob([ + "incs/stddef/*.h", + ]), + strip_include_prefix = "incs/stddef/", +) + +cc_library( + name = 'headers_dsp', + hdrs = glob([ + "libs/common/remote/ship/hexagon_Release_toolv81_v60/*.h", + ]), + strip_include_prefix = "libs/common/remote/ship/hexagon_Release_toolv81_v60/", + deps = [ + ":headers_incs", + ":headers_incs_stddef", + "@hexagon_tools//:headers_tools_target", + ], +) + +cc_library( + name = 'headers_arm', + hdrs = glob([ + "libs/common/remote/ship/android_Release_aarch64/*.h", + ]), + strip_include_prefix = "libs/common/remote/ship/android_Release_aarch64/", + deps = [ + ":headers_incs", + ":headers_incs_stddef", + ], +) + +cc_library( + name = 'sdk_arm', + srcs = glob([ + "libs/common/remote/ship/android_Release_aarch64/libcdsprpc.so", + "libs/common/rpcmem/rpcmem.a", + ]), + deps = [ + ":headers_arm", + ], +) \ No newline at end of file diff --git a/third_party/hexagon/hexagon_tools.BUILD b/third_party/hexagon/hexagon_tools.BUILD new file mode 100644 index 00000000..ca4f798f --- /dev/null +++ b/third_party/hexagon/hexagon_tools.BUILD @@ -0,0 +1,88 @@ +package(default_visibility = ['//visibility:public']) + +cc_library( + name = "headers_tools_target", + hdrs = glob([ + "target/hexagon/include/**/*.h", + ]), + strip_include_prefix = "target/hexagon/include/", +) + +filegroup( + name = 'gcc', + srcs = [ + 'bin/hexagon-clang', + ], +) + +filegroup( + name = 'ar', + srcs = [ + 'bin/hexagon-ar', + ], +) + +filegroup( + name = 'ld', + srcs = [ + 'bin/hexagon-link', + ], +) + +filegroup( + name = 'nm', + srcs = [ + 'bin/hexagon-nm', + ], +) + +filegroup( + name = 'objcopy', + srcs = [ + 'bin/hexagon-elfcopy', + ], +) + +filegroup( + name = 'objdump', + srcs = [ + 'bin/hexagon-llvm-objdump', + ], +) + +filegroup( + name = 'strip', + srcs = [ + 'bin/hexagon-strip', + ], +) + +filegroup( + name = 'as', + srcs = [ + 'bin/hexagon-llvm-mc', + ], +) + +filegroup( + name = "compiler_pieces", + srcs = glob([ + "libexec/**", + "lib/**", + "include/**", + ]), +) + +filegroup( + name = "compiler_components", + srcs = [ + ":ar", + ":as", + ":gcc", + ":ld", + ":nm", + ":objcopy", + ":objdump", + ":strip", + ], +) diff --git a/tools/bazel.rc b/tools/bazel.rc index 7081296c..27aa8695 100644 --- a/tools/bazel.rc +++ b/tools/bazel.rc @@ -128,3 +128,22 @@ build:ubsan --copt -O0 build:ubsan --copt -fno-omit-frame-pointer build:ubsan --linkopt -fsanitize=undefined build:ubsan --linkopt -lubsan + +# Usage example: bazel build --config hexagon_qualcomm +build:hexagon_qualcomm --crosstool_top=//tools/hexagon_compiler:toolchain +build:hexagon_qualcomm --host_crosstool_top=@bazel_tools//tools/cpp:toolchain +build:hexagon_qualcomm --cpu=hexagon +build:hexagon_qualcomm --copt=-std=c++98 +build:hexagon_qualcomm --copt -Wno-ignored-attributes +build:hexagon_qualcomm --copt -Wno-unused-function +build:hexagon_qualcomm --copt -Wno-sequence-point +build:hexagon_qualcomm --copt -Wno-implicit-fallthrough +build:hexagon_qualcomm --copt -Wno-missing-braces +build:hexagon_qualcomm --copt -mv60 +build:hexagon_qualcomm --copt -Wno-cast-align +build:hexagon_qualcomm --copt -Wpointer-arith +build:hexagon_qualcomm --copt -Wnested-externs +build:hexagon_qualcomm --copt -Wno-pointer-to-int-cast +build:hexagon_qualcomm --copt -Wno-int-to-pointer-cast +build:hexagon_qualcomm --copt -fno-rtti +build:hexagon_qualcomm --copt -fno-exceptions diff --git a/tools/converter.py b/tools/converter.py index ed0a0169..32490794 100644 --- a/tools/converter.py +++ b/tools/converter.py @@ -85,6 +85,7 @@ InOutDataType = Enum('InputDataType', FPDataTypeStrs = [ "fp16_fp32", "fp32_fp32", + "bf16_fp32", ] FPDataType = Enum('GPUDataType', [(ele, ele) for ele in FPDataTypeStrs], @@ -278,8 +279,10 @@ def get_model_files(model_config, model_output_dir): model_file_path = model_config[YAMLKeyword.model_file_path] model_sha256_checksum = model_config[YAMLKeyword.model_sha256_checksum] weight_file_path = model_config.get(YAMLKeyword.weight_file_path, "") - weight_sha256_checksum = model_config.get(YAMLKeyword.weight_sha256_checksum, "") # noqa - quantize_range_file_path = model_config.get(YAMLKeyword.quantize_range_file, "") # noqa + weight_sha256_checksum = \ + model_config.get(YAMLKeyword.weight_sha256_checksum, "") + quantize_range_file_path = \ + model_config.get(YAMLKeyword.quantize_range_file, "") model_file = model_file_path weight_file = weight_file_path quantize_range_file = quantize_range_file_path @@ -808,7 +811,12 @@ def convert_func(flags): else: model_graph_format = configs.get(YAMLKeyword.model_graph_format, "file") - if model_graph_format == ModelFormat.code: + embed_graph_def = model_graph_format == ModelFormat.code + if flags.enable_micro: + mace_check((not embed_model_data) and (not embed_graph_def), + ModuleName.YAML_CONFIG, + "You should specify file mode to convert micro model.") + if embed_graph_def: os.makedirs(model_header_dir) sh_commands.gen_mace_engine_factory_source( configs[YAMLKeyword.models].keys(), @@ -816,9 +824,16 @@ def convert_func(flags): sh.cp("-f", glob.glob("mace/codegen/engine/*.h"), model_header_dir) - convert.convert(configs, MODEL_CODEGEN_DIR) + convert.convert(configs, MODEL_CODEGEN_DIR, flags.enable_micro) for model_name, model_config in configs[YAMLKeyword.models].items(): + if flags.enable_micro: + data_type = model_config.get(YAMLKeyword.data_type, "") + mace_check(data_type == FPDataType.fp32_fp32.value or + data_type == FPDataType.bf16_fp32.value, + ModuleName.YAML_CONFIG, + "You should specify fp32_fp32 or bf16_fp32 data type " + "to convert micro model.") model_codegen_dir = "%s/%s" % (MODEL_CODEGEN_DIR, model_name) encrypt.encrypt(model_name, "%s/model/%s.pb" % (model_codegen_dir, model_name), @@ -837,6 +852,9 @@ def convert_func(flags): sh.mv("-f", '%s/model/%s.data' % (model_codegen_dir, model_name), model_output_dir) + if flags.enable_micro: + sh.mv("-f", '%s/model/%s_micro.tar.gz' % + (model_codegen_dir, model_name), model_output_dir) else: if not embed_model_data: sh.mv("-f", @@ -1031,6 +1049,10 @@ def parse_args(): 'convert', parents=[all_type_parent_parser, convert_run_parent_parser], help='convert to mace model (file or code)') + convert.add_argument( + "--enable_micro", + action="store_true", + help="enable convert micro.") convert.set_defaults(func=convert_func) run = subparsers.add_parser( diff --git a/tools/cpplint.sh b/tools/cpplint.sh index ef8e1c3c..93e275a0 100755 --- a/tools/cpplint.sh +++ b/tools/cpplint.sh @@ -7,3 +7,5 @@ cpplint --linelength=80 --counting=detailed --root=include $(find include -name cpplint --linelength=80 --counting=detailed --root=test/ccutils $(find test/ccutils -name "*.h" -or -name "*.cc") cpplint --linelength=80 --counting=detailed --root=test/ccunit $(find test/ccunit -name "*.h" -or -name "*.cc") cpplint --linelength=80 --counting=detailed --root=test/ccbenchmark $(find test/ccbenchmark -name "*.h" -or -name "*.cc") + +cpplint --linelength=80 --counting=detailed $(find ./micro -path ./micro/codegen -prune -o -name "*.h" -or -name "*.cc") diff --git a/tools/hexagon_compiler/BUILD.bazel b/tools/hexagon_compiler/BUILD.bazel new file mode 100644 index 00000000..f51d7f6d --- /dev/null +++ b/tools/hexagon_compiler/BUILD.bazel @@ -0,0 +1,53 @@ +cc_toolchain_suite( + name = "toolchain", + toolchains = { + "hexagon|gcc": "cc-compiler-hexagon", + }, +) + +filegroup( + name = "hexagon_all_files", + srcs = [ + "//tools/hexagon_compiler/hexagon_gcc:tool-wrappers", + "@hexagon_tools//:compiler_pieces", + ], +) + +filegroup( + name = "hexagon_linker_files", + srcs = [ + "//tools/hexagon_compiler/hexagon_gcc:ar", + "//tools/hexagon_compiler/hexagon_gcc:gcc", + "//tools/hexagon_compiler/hexagon_gcc:ld", + "@hexagon_tools//:compiler_pieces", + ], +) + +filegroup( + name = "hexagon_compiler_files", + srcs = [ + "//tools/hexagon_compiler/hexagon_gcc:as", + "//tools/hexagon_compiler/hexagon_gcc:gcc", + "//tools/hexagon_compiler/hexagon_gcc:ld", + ], +) + +filegroup( + name = "empty", + srcs = [], +) + +cc_toolchain( + name = "cc-compiler-hexagon", + all_files = ":hexagon_all_files", + compiler_files = ":hexagon_compiler_files", + cpu = "hexagon", + dwp_files = ":empty", + dynamic_runtime_libs = [":empty"], + linker_files = ":hexagon_linker_files", + objcopy_files = "//tools/hexagon_compiler/hexagon_gcc:objcopy", + static_runtime_libs = [":empty"], + strip_files = "//tools/hexagon_compiler/hexagon_gcc:strip", + supports_param_files = 1, + visibility = ["//visibility:public"], +) diff --git a/tools/hexagon_compiler/CROSSTOOL b/tools/hexagon_compiler/CROSSTOOL new file mode 100644 index 00000000..e9eb0a0b --- /dev/null +++ b/tools/hexagon_compiler/CROSSTOOL @@ -0,0 +1,95 @@ +major_version: "local" +minor_version: "" +default_target_cpu: "hexagon" + +default_toolchain { + cpu: "hexagon" + toolchain_identifier: "hexagon-qualcomm" +} + +toolchain { + abi_version: "gcc" + abi_libc_version: "" + builtin_sysroot: "" + compiler: "gcc" + host_system_name: "hexagon" + needsPic: true + supports_incremental_linker: false + supports_fission: false + supports_interface_shared_objects: false + #supports_normalizing_ar: true + supports_start_end_lib: false + supports_thin_archives: true + target_libc: "" + target_cpu: "hexagon" + target_system_name: "" + toolchain_identifier: "hexagon-qualcomm" + + tool_path { name: "ar" path: "hexagon_gcc/hexagon-qualcomm-ar" } + tool_path { name: "compat-ld" path: "hexagon_gcc/hexagon-qualcomm-ld" } + tool_path { name: "cpp" path: "hexagon_gcc/hexagon-qualcomm-gcc" } + tool_path { name: "dwp" path: "hexagon_gcc/hexagon-qualcomm-dwp" } + tool_path { name: "gcc" path: "hexagon_gcc/hexagon-qualcomm-gcc" } + tool_path { name: "gcov" path: "hexagon_gcc/hexagon-qualcomm-gcov" } + # C(++) compiles invoke the compiler (as that is the one knowing where + # to find libraries), but we provide LD so other rules can invoke the linker. + tool_path { name: "ld" path: "hexagon_gcc/hexagon-qualcomm-ld" } + tool_path { name: "nm" path: "hexagon_gcc/hexagon-qualcomm-nm" } + tool_path { name: "objcopy" path: "hexagon_gcc/hexagon-qualcomm-objcopy" } + objcopy_embed_flag: "-I" + objcopy_embed_flag: "binary" + tool_path { name: "objdump" path: "hexagon_gcc/hexagon-qualcomm-objdump" } + tool_path { name: "strip" path: "hexagon_gcc/hexagon-qualcomm-strip" } + + linker_flag: "-Wl" + linker_flag: "-lm" + + # Anticipated future default. + # This makes GCC and Clang do what we want when called through symlinks. + unfiltered_cxx_flag: "-no-canonical-prefixes" + linker_flag: "-no-canonical-prefixes" + + + # Security hardening on by default. + # Conservative choice; -D_FORTIFY_SOURCE=2 may be unsafe in some cases. + # We need to undef it before redefining it as some distributions now have + # it enabled by default. + compiler_flag: "-fstack-protector" + compiler_flag: "-fPIE" + linker_flag: "-v" + linker_flag: "-pie" + + # for hexagon + linker_flag: "-march=hexagon" + linker_flag: "-mcpu=hexagonv60" + linker_flag: "-shared" + linker_flag: "-G0" + linker_flag: "-fPIC" + + + + # Enable coloring even if there's no attached terminal. Bazel removes the + # escape sequences if --nocolor is specified. + compiler_flag: "-fdiagnostics-color=always" + + # All warnings are enabled. Maybe enable -Werror as well? + compiler_flag: "-Wall" + + + compilation_mode_flags { + mode: OPT + # No debug symbols. + # Maybe we should enable https://gcc.gnu.org/wiki/DebugFission for opt or + # even generally? However, that can't happen here, as it requires special + # handling in Bazel. + compiler_flag: "-g0" + + # Conservative choice for -O + # -O3 can increase binary size and even slow down the resulting binaries. + # Profile first and / or use FDO if you need better performance than this. + compiler_flag: "-O2" + + compiler_flag: "-ffunction-sections" + linker_flag: "-Wl" + } +} diff --git a/tools/hexagon_compiler/hexagon_gcc/BUILD.bazel b/tools/hexagon_compiler/hexagon_gcc/BUILD.bazel new file mode 100644 index 00000000..258aa185 --- /dev/null +++ b/tools/hexagon_compiler/hexagon_gcc/BUILD.bazel @@ -0,0 +1,84 @@ +package(default_visibility = ["//tools/hexagon_compiler:__pkg__"]) + +filegroup( + name = "srcs", + srcs = glob(["**"]), +) + +filegroup( + name = "gcc", + srcs = [ + "hexagon-qualcomm-gcc", + "@hexagon_tools//:gcc", + ], +) + +filegroup( + name = "ar", + srcs = [ + "hexagon-qualcomm-ar", + "@hexagon_tools//:ar", + ], +) + +filegroup( + name = "ld", + srcs = [ + "hexagon-qualcomm-ld", + "@hexagon_tools//:ld", + ], +) + +filegroup( + name = "nm", + srcs = [ + "hexagon-qualcomm-nm", + "@hexagon_tools//:nm", + ], +) + +filegroup( + name = "objcopy", + srcs = [ + "hexagon-qualcomm-objcopy", + "@hexagon_tools//:objcopy", + ], +) + +filegroup( + name = "objdump", + srcs = [ + "hexagon-qualcomm-objdump", + "@hexagon_tools//:objdump", + ], +) + +filegroup( + name = "strip", + srcs = [ + "hexagon-qualcomm-strip", + "@hexagon_tools//:strip", + ], +) + +filegroup( + name = "as", + srcs = [ + "hexagon-qualcomm-as", + "@hexagon_tools//:as", + ], +) + +filegroup( + name = "tool-wrappers", + srcs = [ + ":ar", + ":as", + ":gcc", + ":ld", + ":nm", + ":objcopy", + ":objdump", + ":strip", + ], +) diff --git a/tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-ar b/tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-ar new file mode 100755 index 00000000..85d42ba4 --- /dev/null +++ b/tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-ar @@ -0,0 +1,5 @@ +#!/bin/bash --norc + +exec -a hexagon-qualcomm-ar \ + ${HL_HEXAGON_TOOLS}/bin/hexagon-ar \ + "$@" diff --git a/tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-as b/tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-as new file mode 100755 index 00000000..c440ab96 --- /dev/null +++ b/tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-as @@ -0,0 +1,5 @@ +#!/bin/bash --norc + +exec -a hexagon-qualcomm-as \ + ${HL_HEXAGON_TOOLS}/bin/hexagon-llvm-mc \ + "$@" diff --git a/tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-gcc b/tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-gcc new file mode 100755 index 00000000..61f59a3e --- /dev/null +++ b/tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-gcc @@ -0,0 +1,5 @@ +#!/bin/bash --norc + +exec \ + ${HL_HEXAGON_TOOLS}/bin/hexagon-clang \ + "$@" diff --git a/tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-gcov b/tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-gcov new file mode 100755 index 00000000..154ff154 --- /dev/null +++ b/tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-gcov @@ -0,0 +1,5 @@ +#!/bin/bash --norc + +exec -a hexagon-qualcomm-gcov \ + ${HL_HEXAGON_TOOLS}/bin/hexagon-coverage \ + "$@" diff --git a/tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-ld b/tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-ld new file mode 100755 index 00000000..d16b728b --- /dev/null +++ b/tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-ld @@ -0,0 +1,5 @@ +#!/bin/bash --norc + +exec -a hexagon-qualcomm-ld \ + ${HL_HEXAGON_TOOLS}/bin/hexagon-link \ + "$@" diff --git a/tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-nm b/tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-nm new file mode 100755 index 00000000..9bd18b0f --- /dev/null +++ b/tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-nm @@ -0,0 +1,5 @@ +#!/bin/bash --norc + +exec -a hexagon-qualcomm-nm \ + ${HL_HEXAGON_TOOLS}/bin/hexagon-nm \ + "$@" diff --git a/tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-objcopy b/tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-objcopy new file mode 100755 index 00000000..8198d14c --- /dev/null +++ b/tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-objcopy @@ -0,0 +1,5 @@ +#!/bin/bash --norc + +exec -a hexagon-qualcomm-objcopy \ + ${HL_HEXAGON_TOOLS}/bin/hexagon-elfcopy \ + "$@" diff --git a/tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-objdump b/tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-objdump new file mode 100755 index 00000000..6664aec4 --- /dev/null +++ b/tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-objdump @@ -0,0 +1,5 @@ +#!/bin/bash --norc + +exec -a hexagon-qualcomm-objdump \ + ${HL_HEXAGON_TOOLS}/bin/hexagon-llvm-objdump \ + "$@" diff --git a/tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-strip b/tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-strip new file mode 100755 index 00000000..0d53341b --- /dev/null +++ b/tools/hexagon_compiler/hexagon_gcc/hexagon-qualcomm-strip @@ -0,0 +1,5 @@ +#!/bin/bash --norc + +exec -a hexagon-qualcomm-strip \ + ${HL_HEXAGON_TOOLS}/bin/hexagon-strip \ + "$@" diff --git a/tools/python/convert.py b/tools/python/convert.py index d367b63c..2f342ce2 100644 --- a/tools/python/convert.py +++ b/tools/python/convert.py @@ -20,10 +20,9 @@ from __future__ import division from __future__ import print_function import argparse +import copy import sys -import numpy as np -import shutil -import tempfile +from micro_converter import MicroConverter from utils import config_parser from utils.config_parser import DataFormat from utils.config_parser import DeviceType @@ -32,7 +31,7 @@ from utils import util from utils.util import mace_check from utils.config_parser import normalize_model_config from utils.config_parser import ModelKeys -from py_proto import mace_pb2 +from utils.convert_util import merge_params from transform import base_converter as cvt from transform import transformer from visualize import visualize_model @@ -45,7 +44,7 @@ def transpose_shape(shape, dst_order): return t_shape -def convert(conf, output): +def convert(conf, output, enable_micro=False): if ModelKeys.quantize_stat in conf: quantize_stat = conf[ModelKeys.quantize_stat] else: @@ -88,7 +87,12 @@ def convert(conf, output): model, params = merge_params(mace_model, model_conf[ModelKeys.data_type]) - + if enable_micro: + micro_converter = MicroConverter(model_conf, copy.deepcopy(model), + copy.deepcopy(params), model_name) + micro_converter.gen_code() + micro_converter.package(model_output + "/" + + model_name + "_micro.tar.gz") output_model_file = model_output + "/" + model_name + ".pb" output_params_file = model_output + "/" + model_name + ".data" with open(output_model_file, "wb") as f: @@ -206,61 +210,6 @@ def convert_model(conf, quantize_stat): return output_graph_def -def merge_params(net_def, data_type): - def tensor_to_bytes(tensor): - if tensor.data_type == mace_pb2.DT_HALF: - data = bytearray( - np.array(tensor.float_data).astype(np.float16).tobytes()) - tensor.data_size = len(tensor.float_data) - elif tensor.data_type == mace_pb2.DT_FLOAT: - data = bytearray( - np.array(tensor.float_data).astype(np.float32).tobytes()) - tensor.data_size = len(tensor.float_data) - elif tensor.data_type == mace_pb2.DT_INT32: - data = bytearray( - np.array(tensor.int32_data).astype(np.int32).tobytes()) - tensor.data_size = len(tensor.int32_data) - elif tensor.data_type == mace_pb2.DT_UINT8: - data = bytearray( - np.array(tensor.int32_data).astype(np.uint8).tolist()) - tensor.data_size = len(tensor.int32_data) - elif tensor.data_type == mace_pb2.DT_FLOAT16: - data = bytearray( - np.array(tensor.float_data).astype(np.float16).tobytes()) - tensor.data_size = len(tensor.float_data) - else: - raise Exception('Tensor data type %s not supported' % - tensor.data_type) - return data - - model_data = [] - offset = 0 - for tensor in net_def.tensors: - if tensor.data_type == mace_pb2.DT_FLOAT: - tensor.data_type = data_type - raw_data = tensor_to_bytes(tensor) - if tensor.data_type != mace_pb2.DT_UINT8 and offset % 4 != 0: - padding = 4 - offset % 4 - model_data.extend(bytearray([0] * padding)) - offset += padding - - tensor.offset = offset - model_data.extend(raw_data) - offset += len(raw_data) - - for tensor in net_def.tensors: - if tensor.data_type == mace_pb2.DT_FLOAT \ - or tensor.data_type == mace_pb2.DT_HALF \ - or tensor.data_type == mace_pb2.DT_FLOAT16: - del tensor.float_data[:] - elif tensor.data_type == mace_pb2.DT_INT32: - del tensor.int32_data[:] - elif tensor.data_type == mace_pb2.DT_UINT8: - del tensor.int32_data[:] - - return net_def, model_data - - def parse_args(): parser = argparse.ArgumentParser() parser.add_argument( diff --git a/tools/python/layers_validate.py b/tools/python/layers_validate.py new file mode 100644 index 00000000..2aab64af --- /dev/null +++ b/tools/python/layers_validate.py @@ -0,0 +1,235 @@ +# Copyright 2018 The MACE Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import copy +import os +import sh +import yaml + +from py_proto import mace_pb2 +from transform.base_converter import ConverterUtil +from transform.base_converter import MaceKeyword +from transform.base_converter import MaceOp +from transform.hexagon_converter import HexagonOp +from utils.util import mace_check + + +def normalize_op_name(name): + return name.replace('/', '_').replace(':', '_') + + +def handle_index(start, end, layers): + num_layers = end - start + 1 + if ':' in layers: + start_index, end_index = layers.split(':') + start_index = int(start_index) if start_index else 0 + end_index = int(end_index) if end_index else num_layers - 1 + else: + start_index = int(layers) + end_index = start_index + 1 + if start_index < 0: + start_index += num_layers + if end_index < 0: + end_index += num_layers + start_index += start + end_index += start + start_index = max(start, min(end - 1, start_index)) + end_index = max(start + 1, min(end, end_index)) + + return start_index, end_index + + +def save_model_to_proto(net_def, model_name, output_dir): + output_path = output_dir + "/" + model_name + ".pb" + with open(output_path, "wb") as f: + f.write(net_def.SerializeToString()) + with open(output_path + "_txt", "w") as f: + f.write(str(net_def)) + return output_path + + +def convert(model_file, output_dir, layers): + mace_check(os.path.isfile(model_file), + "Input graph file '" + model_file + "' does not exist!") + mace_check(os.path.isdir(output_dir), + "Output directory '" + output_dir + "' does not exist!") + net_def = mace_pb2.NetDef() + with open(model_file, "rb") as f: + net_def.ParseFromString(f.read()) + + is_quantize = ConverterUtil.get_arg( + net_def, MaceKeyword.mace_quantize_flag_arg_str) + is_quantize = False if is_quantize is None else is_quantize.i == 1 + is_hexagon = False + index = 0 + end_index = len(net_def.op) + if is_quantize: + while index < end_index: + # omit op quantize + if net_def.op[index].type == MaceOp.Quantize.name or \ + net_def.op[index].type == \ + HexagonOp.QuantizeINPUT_f_to_8.name or \ + net_def.op[index].type == HexagonOp.INPUT.name: + index += 1 + # omit op dequantize + elif net_def.op[end_index - 1].type == MaceOp.Dequantize.name or \ + net_def.op[end_index - 1].type == \ + HexagonOp.DequantizeOUTPUT_8tof.name or \ + net_def.op[end_index - 1].type == HexagonOp.OUTPUT.name: + + end_index -= 1 + else: + break + mace_check(0 < index < end_index < len(net_def.op), + "Wrong number of op quantize(%d) or dequantize(%d)." % + (index, len(net_def.op) - end_index)) + if net_def.op[-1].type == HexagonOp.DequantizeOUTPUT_8tof.name or \ + net_def.op[-1].type == HexagonOp.OUTPUT.name: + is_hexagon = True + + index, end_index = handle_index(index, end_index, layers) + + data_format = net_def.output_info[0].data_format + output_configs = {"subgraphs": []} + while index < end_index: + # omit BatchToSpaceND and op before that due to changed graph + if net_def.op[index].type == MaceOp.BatchToSpaceND.name or \ + net_def.op[index].type == HexagonOp.BatchToSpaceND_8.name or \ + (index + 1 < end_index and + (net_def.op[index + 1].type == MaceOp.BatchToSpaceND.name or + net_def.op[index + 1].type == HexagonOp.BatchToSpaceND_8.name)): # noqa + index += 1 + continue + net = copy.deepcopy(net_def) + if is_hexagon: + # reuse dequantize op and it's min/max tensor's node_id + del net.op[index+1:-1] + else: + del net.op[index+1:] + del net.output_info[:] + op = net.op[index] + index += 1 + + output_tensors = [] + output_shapes = [] + op_name = op.name + if is_quantize: + op.name = MaceKeyword.mace_output_node_name + '_' + op.name + if is_hexagon: + mace_check(len(op.output) == 1, + "Only supports number of outputs of Hexagon op be 1.") + for i in range(len(op.output)): + output_tensors.append(str(op.output[i])) + output_shapes.append( + ",".join([str(dim) for dim in op.output_shape[i].dims])) + # modify output info + output_info = net.output_info.add() + output_info.name = op.output[i] + output_info.data_format = data_format + output_info.dims.extend(op.output_shape[i].dims) + output_info.data_type = mace_pb2.DT_FLOAT + if is_quantize: + output_info.scale = op.quantize_info[0].scale + output_info.zero_point = op.quantize_info[0].zero_point + # modify output op + if is_quantize: + output_name = op.output[i] + new_output_name = \ + MaceKeyword.mace_output_node_name + '_' + op.output[i] + op.output[i] = new_output_name + if not is_hexagon: + dequantize_op = net.op.add() + dequantize_op.name = normalize_op_name(output_name) + dequantize_op.type = MaceOp.Dequantize.name + dequantize_op.input.append(new_output_name) + dequantize_op.output.append(output_name) + output_shape = dequantize_op.output_shape.add() + output_shape.dims.extend(op.output_shape[i].dims) + dequantize_op.output_type.append(mace_pb2.DT_FLOAT) + ConverterUtil.add_data_type_arg(dequantize_op, + mace_pb2.DT_UINT8) + else: + dequantize_op = net.op[-1] + dequantize_op.name = normalize_op_name(output_name) + del dequantize_op.input[:] + del dequantize_op.output[:] + dequantize_op.input.append(new_output_name) + dequantize_op.node_input[0].node_id = op.node_id + dequantize_op.output.append(output_name) + if dequantize_op.type == \ + HexagonOp.DequantizeOUTPUT_8tof.name: + input_min = new_output_name[:-1] + '1' + input_max = new_output_name[:-1] + '2' + dequantize_op.input.extend([input_min, input_max]) + dequantize_op.node_input[1].node_id = op.node_id + dequantize_op.node_input[2].node_id = op.node_id + del dequantize_op.node_input[3:] + else: + del dequantize_op.node_input[1:] + + model_path = save_model_to_proto(net, normalize_op_name(op_name), + output_dir) + output_config = {"model_file_path": str(model_path), + "output_tensors": output_tensors, + "output_shapes": output_shapes} + output_configs["subgraphs"].append(output_config) + + output_configs_path = output_dir + "outputs.yml" + with open(output_configs_path, "w") as f: + yaml.dump(output_configs, f, default_flow_style=False) + + +def get_layers(model_dir, model_name, layers): + model_file = "%s/%s.pb" % (model_dir, model_name) + output_dir = "%s/output_models/" % model_dir + if os.path.exists(output_dir): + sh.rm('-rf', output_dir) + os.makedirs(output_dir) + + convert(model_file, output_dir, layers) + + output_configs_path = output_dir + "outputs.yml" + with open(output_configs_path) as f: + output_configs = yaml.load(f) + output_configs = output_configs['subgraphs'] + + return output_configs + + +def parse_args(): + """Parses command line arguments.""" + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_file", + type=str, + default="", + help="pb file to load.") + parser.add_argument( + "--output_dir", + type=str, + default="", + help="Directory to save the output graph to.") + parser.add_argument( + "--layers", + type=str, + default="-1", + help="'start_layer:end_layer' or 'layer', similar to python slice." + " Use with --validate flag.") + return parser.parse_known_args() + + +if __name__ == '__main__': + FLAGS, _ = parse_args() + convert(FLAGS.model_file, FLAGS.output_dir, FLAGS.layers) diff --git a/tools/python/micro/__init__.py b/tools/python/micro/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tools/python/micro/graph_builder.py b/tools/python/micro/graph_builder.py new file mode 100644 index 00000000..f6f5b8b1 --- /dev/null +++ b/tools/python/micro/graph_builder.py @@ -0,0 +1,98 @@ +# Copyright 2020 The MACE Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from py_proto import micro_mem_pb2 +from utils.util import mace_check + + +class GraphBuilder: + def __init__(self, pb_model, op_resolver): + self.net_def = pb_model + self.ops_desc_map = op_resolver.get_op_desc_map_from_model() + self.op_resolver = op_resolver + + self.init_output_cache() + self.init_const_tensor_cache() + self.init_model_input_cache() + + def get_op_idx(self, op_def): + if op_def.type not in self.ops_desc_map: + return -1 + op_desc_list = self.ops_desc_map[op_def.type] + for op_desc in op_desc_list: + if self.op_resolver.op_def_desc_matched(op_def, op_desc): + return op_desc.idx + return -1 + + def init_output_cache(self): + model_outputs = [] + for output_info in self.net_def.output_info: + model_outputs.append(output_info.name) + self.output_cache = {} + self.output_infos = [] + for i in range(len(self.net_def.op)): + op_def = self.net_def.op[i] + for k in range(len(op_def.output)): + tensor_name = op_def.output[k] + output_info_uint = ((i & 0x0000ffff) << 16) | (k & 0x0000ffff) + if tensor_name in model_outputs: + self.output_infos.append(output_info_uint) + else: + self.output_cache[tensor_name] = output_info_uint + + def init_const_tensor_cache(self): + self.const_tensor_cache = {} + for i in range(len(self.net_def.tensors)): + const_tensor = self.net_def.tensors[i] + self.const_tensor_cache[const_tensor.name] = \ + (0xffff0000 | (i & 0x0000ffff)) + + def init_model_input_cache(self): + self.model_input_cache = {} + for i in range(len(self.net_def.input_info)): + input_info = self.net_def.input_info[i] + self.model_input_cache[input_info.name] = \ + (0xfffe0000 | (i & 0x0000ffff)) + + def build(self): + graph = micro_mem_pb2.Graph() + graph.output_infos.extend(self.output_infos) + for op_def in self.net_def.op: + op_context = graph.op_contexts.add() + idx = self.get_op_idx(op_def) + mace_check(idx >= 0, "Error from the OpResolver.") + op_context.op_idx = idx + + op_with_model_input = False + for input in op_def.input: + input_info = 0 + if input in self.output_cache: + input_info = self.output_cache[input] + elif input in self.const_tensor_cache: + input_info = self.const_tensor_cache[input] + elif input in self.model_input_cache: + input_info = self.model_input_cache[input] + op_with_model_input = True + else: + mace_check(False, + "Model error: can not find input(%s)" % input) + op_context.input_infos.append(input_info) + if op_with_model_input: + graph.input_op_idxs.append(idx) + + for output_shape in op_def.output_shape: + resize_shape = op_context.output_resize_shapes.add() + for dim in output_shape.dims: + resize_shape.dims.append(dim) + return graph diff --git a/tools/python/micro/jinja2_files/micro_engine_c_interface.cc.jinja2 b/tools/python/micro/jinja2_files/micro_engine_c_interface.cc.jinja2 new file mode 100644 index 00000000..453f4a14 --- /dev/null +++ b/tools/python/micro/jinja2_files/micro_engine_c_interface.cc.jinja2 @@ -0,0 +1,61 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This is a generated file. DO NOT EDIT! + +#include "micro/codegen/engines/{{model_tag}}/micro_engine_c_interface.h" + +#include "micro/codegen/engines/{{model_tag}}/micro_engine_factory.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef micro::MaceMicroEngine MaceMicroEngine; +typedef micro::MaceStatus MaceStatus; + +void *{{model_tag}}_GetMaceMicroEngineHandle() { + MaceMicroEngine *micro_engine = NULL; + micro::{{model_tag}}::GetMicroEngineSingleton(µ_engine); + return micro_engine; +} + +bool {{model_tag}}_RegisterInputData(void *handle, uint32_t idx, + const void *input_buffer, + const int32_t *input_dims) { + MaceMicroEngine *micro_engine = static_cast(handle); + MaceStatus status = + micro_engine->RegisterInputData(idx, input_buffer, input_dims); + return (status == micro::MACE_SUCCESS); +} + +bool {{model_tag}}_Interpret(void *handle) { + MaceMicroEngine *micro_engine = static_cast(handle); + MaceStatus status = micro_engine->Run(); + return (status == micro::MACE_SUCCESS); +} + +bool {{model_tag}}_GetInterpretResult(void *handle, const uint32_t idx, + void **output_data, + const int32_t **output_dims, + uint32_t *output_dim_size) { + MaceMicroEngine *micro_engine = static_cast(handle); + MaceStatus status = micro_engine->GetOutputData( + idx, output_data, output_dims, output_dim_size); + return (status == micro::MACE_SUCCESS); +} + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/tools/python/micro/jinja2_files/micro_engine_c_interface.h.jinja2 b/tools/python/micro/jinja2_files/micro_engine_c_interface.h.jinja2 new file mode 100644 index 00000000..a2f440cd --- /dev/null +++ b/tools/python/micro/jinja2_files/micro_engine_c_interface.h.jinja2 @@ -0,0 +1,40 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This is a generated file. DO NOT EDIT! + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +void *{{model_tag}}_GetMaceMicroEngineHandle(); + +bool {{model_tag}}_RegisterInputData(void *handle, uint32_t idx, + const void *input_buffer, + const int32_t *input_dims); + +bool {{model_tag}}_Interpret(void *handle); + +bool {{model_tag}}_GetInterpretResult(void *handle, const uint32_t idx, + void **output_data, + const int32_t **output_dims, + uint32_t *output_dim_size); + +#ifdef __cplusplus +} +#endif + + diff --git a/tools/python/micro/jinja2_files/micro_engine_config.cc.jinja2 b/tools/python/micro/jinja2_files/micro_engine_config.cc.jinja2 new file mode 100644 index 00000000..c5f4d753 --- /dev/null +++ b/tools/python/micro/jinja2_files/micro_engine_config.cc.jinja2 @@ -0,0 +1,58 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This is a generated file. DO NOT EDIT! + +#include + +#include "micro/framework/graph.h" +#include "micro/include/public/micro.h" +#include "micro/model/net_def.h" + +#include "micro/codegen/models/{{model_tag}}/micro_graph_data.h" +#include "micro/codegen/models/{{model_tag}}/micro_model_data.h" +#include "micro/codegen/models/{{model_tag}}/micro_net_def_data.h" +#include "micro/codegen/models/{{model_tag}}/micro_ops_list.h" + + + +namespace micro { +namespace {{model_tag}} { + +namespace { + uint8_t kTensorMem[{{ embed_data.tensor_mem_size }}] = {0}; + uint8_t kScratchBuffer[{{ embed_data.scratch_buffer_size }}] = {0}; + const void *kInputBuffers[{{ embed_data.input_size }}] = {NULL}; + const int32_t *kInputShapes[{{ embed_data.input_size }}] = {NULL}; + + MaceMicroEngineConfig kMicroEngineConfig = { + reinterpret_cast(kNetDef), + kModelData, + reinterpret_cast(kGraphData), + kOpsArray, + kTensorMem, + kInputBuffers, + kInputShapes, + kScratchBuffer, + {{ embed_data.scratch_buffer_size }} + }; +} + +MaceMicroEngineConfig *GetMicroEngineConfig() { + return &kMicroEngineConfig; +} + +} // namespace {{model_tag}} +} // namespace micro + diff --git a/tools/python/micro/jinja2_files/micro_engine_factory.cc.jinja2 b/tools/python/micro/jinja2_files/micro_engine_factory.cc.jinja2 new file mode 100644 index 00000000..c0c5c71a --- /dev/null +++ b/tools/python/micro/jinja2_files/micro_engine_factory.cc.jinja2 @@ -0,0 +1,43 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This is a generated file. DO NOT EDIT! + +#include "micro/codegen/engines/{{model_tag}}/micro_engine_factory.h" + + +namespace micro { +namespace {{model_tag}} { + +namespace { +MaceMicroEngine kMaceMicroEngine; +bool kHasInit = false; +} + +extern MaceMicroEngineConfig *GetMicroEngineConfig(); + +MaceStatus GetMicroEngineSingleton(MaceMicroEngine **engine) { + MaceStatus status = MACE_SUCCESS; + if (!kHasInit) { + MaceMicroEngineConfig *engine_config = GetMicroEngineConfig(); + status = kMaceMicroEngine.Init(engine_config); + } + if (status == MACE_SUCCESS) { + *engine = &kMaceMicroEngine; + } + return status; +} + +} // namespace {{model_tag}} +} // namespace micro diff --git a/tools/python/micro/jinja2_files/micro_engine_factory.h.jinja2 b/tools/python/micro/jinja2_files/micro_engine_factory.h.jinja2 new file mode 100644 index 00000000..72c428a5 --- /dev/null +++ b/tools/python/micro/jinja2_files/micro_engine_factory.h.jinja2 @@ -0,0 +1,27 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This is a generated file. DO NOT EDIT! + +#include "micro/include/public/micro.h" + + +namespace micro { +namespace {{model_tag}} { + +MaceStatus GetMicroEngineSingleton(MaceMicroEngine **engine); + +} // namespace {{model_tag}} +} // namespace micro + diff --git a/tools/python/micro/jinja2_files/micro_graph_data.h.jinja2 b/tools/python/micro/jinja2_files/micro_graph_data.h.jinja2 new file mode 100644 index 00000000..2b0e6c8c --- /dev/null +++ b/tools/python/micro/jinja2_files/micro_graph_data.h.jinja2 @@ -0,0 +1,28 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This is a generated file. DO NOT EDIT! + +#include + +namespace micro { +namespace {{model_tag}} { + +uint8_t kGraphData[{{ data_size }}] = { + {% for d in embed_data %}{{"0x%02X, " % d }}{%endfor%} +}; + +} // namespace {{model_tag}} +} // namespace micro + diff --git a/tools/python/micro/jinja2_files/micro_model_data.h.jinja2 b/tools/python/micro/jinja2_files/micro_model_data.h.jinja2 new file mode 100644 index 00000000..4b664b39 --- /dev/null +++ b/tools/python/micro/jinja2_files/micro_model_data.h.jinja2 @@ -0,0 +1,28 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This is a generated file. DO NOT EDIT! + +#include + +namespace micro { +namespace {{model_tag}} { + +const uint8_t kModelData[{{ data_size }}] = { + {% for d in embed_data %}{{"0x%02X, " % d }}{%endfor%} +}; + +} // namespace {{model_tag}} +} // namespace micro + diff --git a/tools/python/micro/jinja2_files/micro_net_def.h.jinja2 b/tools/python/micro/jinja2_files/micro_net_def.h.jinja2 new file mode 100644 index 00000000..5380ca11 --- /dev/null +++ b/tools/python/micro/jinja2_files/micro_net_def.h.jinja2 @@ -0,0 +1,28 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This is a generated file. DO NOT EDIT! + +#include + +namespace micro { +namespace {{model_tag}} { + +uint8_t kNetDef[{{ data_size }}] = { + {% for d in embed_data %}{{"0x%02X, " % d }}{%endfor%} +}; + +} // namespace {{model_tag}} +} // namespace micro + diff --git a/tools/python/micro/jinja2_files/micro_ops_list.h.jinja2 b/tools/python/micro/jinja2_files/micro_ops_list.h.jinja2 new file mode 100644 index 00000000..f4504314 --- /dev/null +++ b/tools/python/micro/jinja2_files/micro_ops_list.h.jinja2 @@ -0,0 +1,38 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This is a generated file. DO NOT EDIT! + +{% for op_src_path in op_src_path_list %} +{{ "#include \"%s\"" % op_src_path }} +{%endfor%} + +namespace micro { +namespace {{model_tag}} { + +namespace { +{% for i in range(0, op_class_name_list_size) %} + {{ "ops::%s op%s;" % (op_class_name_list[i], i) }} +{%endfor%} +} // namespace + +framework::Operator *kOpsArray[{{ data_size }}] = { +{% for i in range(0, op_class_name_list_size) %} + {{ "&op%s," % i }} +{%endfor%} +}; + +} // namespace {{model_tag}} +} // namespace micro + diff --git a/tools/python/micro/mem_computer.py b/tools/python/micro/mem_computer.py new file mode 100644 index 00000000..0fa0b9e1 --- /dev/null +++ b/tools/python/micro/mem_computer.py @@ -0,0 +1,138 @@ +# Copyright 2020 The MACE Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from utils.convert_util import data_type_to_np_dt +from utils.util import mace_check + +import numpy as np + + +class MemBlock: + def __init__(self, tensor_name, offset, size): + self.tensor_name = tensor_name + self.offset = offset + self.size = size + + +class MemComputer: + def __init__(self, net_def, np_data_type): + self.net_def = net_def + self.np_data_type = np_data_type + self.const_tensor_names = [] + for const_tensor in net_def.tensors: + self.const_tensor_names.append(const_tensor.name) + self.input_names = [] + for input_info in net_def.input_info: + self.input_names.append(input_info.name) + + def init_computer(self): + self.free_mem_list = [] + self.used_mem_list = [] + self.buffer_size = 0 + self.ref_counts = {} + for op in self.net_def.op: + for tensor_name in op.input: + if tensor_name in self.const_tensor_names or \ + tensor_name in self.input_names: + continue + if tensor_name not in self.ref_counts: + self.ref_counts[tensor_name] = 0 + self.ref_counts[tensor_name] += 1 + + def get_mem_size(self, op, output_shape): + np_data_type = self.np_data_type + if len(op.output_type) > 0: + np_data_type = \ + data_type_to_np_dt(op.output_type[0], self.np_data_type) + data_type_bytes = np.dtype(np_data_type).itemsize + if op.type == 'WinogradTransform' or op.type == 'GEMM': + mace_check(len(output_shape) == 4, + "WinogradTransform and GEMM only support 4-dim") + mem_size = output_shape[2] * output_shape[3] * output_shape[0] \ + * int((output_shape[1] + 3) / 4) * 4 + else: + dim_size = len(output_shape) + if dim_size > 0: + mem_size = int((output_shape[dim_size - 1] + 3) / 4) * 4 + for i in range(dim_size - 1): + mem_size *= output_shape[i] + else: + print("the op %s's output dim size is 0" % op.type) + mem_size = 0 + return mem_size * data_type_bytes + + def remove_mem_block_by_name(self, mem_list, tensor_name): + return_mem_block = None + for mem_block in mem_list: + if tensor_name == mem_block.tensor_name: + return_mem_block = mem_block + mem_list.remove(mem_block) + break + return return_mem_block + + def fake_new(self, op): + output_size = len(op.output) + for i in range(output_size): + mem_size = self.get_mem_size(op, op.output_shape[i].dims) + final_mem_block = None + reused = False + for mem_block in self.free_mem_list: + if mem_block.size >= mem_size: + mem_block.tensor_name = op.output[i] + final_mem_block = mem_block + self.free_mem_list.remove(mem_block) + mace_check(final_mem_block is not None, + "Error: final_mem_block should not be None") + reused = True + # print("reuse a tensor mem: %s -> %s" % + # (mem_size, mem_block.size)) + break + if not reused: + final_mem_block = MemBlock(op.output[i], self.buffer_size, + mem_size) + self.buffer_size += mem_size + # print("new a tensor mem: %s" % final_mem_block.size) + + # for micro, mem_id is mem_offset + op.mem_id.append(final_mem_block.offset) + self.used_mem_list.append(final_mem_block) + + def fake_delete(self, op): + for tensor_name in op.input: + if tensor_name in self.const_tensor_names or \ + tensor_name in self.input_names: + continue + mace_check(tensor_name in self.ref_counts and + self.ref_counts[tensor_name] > 0, + "Invalid: ref_count is 0.") + self.ref_counts[tensor_name] -= 1 + if self.ref_counts[tensor_name] is 0: + mem_block = self.remove_mem_block_by_name( + self.used_mem_list, tensor_name) + mace_check(mem_block is not None, + "error, can not find tensor: %s" % tensor_name) + self.free_mem_list.append(mem_block) + self.free_mem_list.sort(key=lambda mem_block: mem_block.size) + + def fake_execute_op(self, op): + for i in range(len(op.output)): + self.fake_new(op) + self.fake_delete(op) + + # return the tensor memory size needed by mace micro + def compute(self): + self.init_computer() + for op in self.net_def.op: + self.fake_execute_op(op) + return self.buffer_size diff --git a/tools/python/micro/micro_codegen.py b/tools/python/micro/micro_codegen.py new file mode 100644 index 00000000..4646b62d --- /dev/null +++ b/tools/python/micro/micro_codegen.py @@ -0,0 +1,103 @@ +# Copyright 2020 The MACE Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import numpy as np +import os + +from jinja2 import Environment, FileSystemLoader + +JINJA2_DIR = './jinja2_files/' + + +class MicroCodeGen: + def __init__(self): + pass + + def gen_micro_ops_list_from_bytes(self, model_tag, op_src_path_list, + op_class_name_list, + jinja_file_name, output_path): + cwd = os.path.dirname(__file__) + j2_env = Environment( + loader=FileSystemLoader(cwd), trim_blocks=True) + + template_name = JINJA2_DIR + jinja_file_name + source = j2_env.get_template(template_name).render( + model_tag=model_tag, + op_src_path_list=op_src_path_list, + op_class_name_list=op_class_name_list, + op_class_name_list_size=len(op_class_name_list) + ) + with open(output_path, "w") as f: + f.write(source) + + def gen_micro_source_from_bytes(self, model_tag, embed_data, + jinja_file_name, output_path): + cwd = os.path.dirname(__file__) + j2_env = Environment( + loader=FileSystemLoader(cwd), trim_blocks=True) + + template_name = JINJA2_DIR + jinja_file_name + source = j2_env.get_template(template_name).render( + model_tag=model_tag, + embed_data=embed_data, + data_size=len(embed_data), + ) + with open(output_path, "w") as f: + f.write(source) + + def gen_net_def_data(self, model_tag, model_def_data, output_path): + embed_data = np.frombuffer(model_def_data, dtype=np.uint8) + self.gen_micro_source_from_bytes( + model_tag, embed_data, 'micro_net_def.h.jinja2', output_path) + + def gen_graph_data(self, model_tag, graph_data, output_path): + embed_data = np.frombuffer(graph_data, dtype=np.uint8) + self.gen_micro_source_from_bytes(model_tag, embed_data, + 'micro_graph_data.h.jinja2', + output_path) + + def gen_ops_data(self, model_tag, op_src_path_list, + op_class_name_list, output_path): + self.gen_micro_ops_list_from_bytes(model_tag, op_src_path_list, + op_class_name_list, + 'micro_ops_list.h.jinja2', + output_path) + + def gen_engin_config(self, model_tag, config_data, output_path): + self.gen_micro_source_from_bytes(model_tag, config_data, + 'micro_engine_config.cc.jinja2', + output_path) + + def gen_model_data(self, model_tag, model_param_data, output_path): + embed_data = np.frombuffer(model_param_data, dtype=np.uint8) + self.gen_micro_source_from_bytes(model_tag, embed_data, + 'micro_model_data.h.jinja2', + output_path) + + def gen_engine_factory(self, model_tag, output_path_h, output_path_cc): + self.gen_micro_source_from_bytes(model_tag, '', + 'micro_engine_factory.h.jinja2', + output_path_h) + self.gen_micro_source_from_bytes(model_tag, '', + 'micro_engine_factory.cc.jinja2', + output_path_cc) + + def gen_engine_c_interface(self, model_tag, output_path_h, output_path_cc): + self.gen_micro_source_from_bytes(model_tag, '', + 'micro_engine_c_interface.h.jinja2', + output_path_h) + self.gen_micro_source_from_bytes(model_tag, '', + 'micro_engine_c_interface.cc.jinja2', + output_path_cc) diff --git a/tools/python/micro/micro_io_converter.py b/tools/python/micro/micro_io_converter.py new file mode 100644 index 00000000..dc053eb8 --- /dev/null +++ b/tools/python/micro/micro_io_converter.py @@ -0,0 +1,99 @@ +# Copyright 2020 The MACE Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from py_proto import mace_pb2 +from transform.base_converter import MaceOp +from utils.util import mace_check +import copy + + +class MicroIoConverter: + @staticmethod + def add_dt_cast_for_bf16(net_def): + bf16_net_def = copy.deepcopy(net_def) + op_num = len(bf16_net_def.op) + for i in range(op_num): + bf16_net_def.op.pop() + model_input = {} + for input_info in net_def.input_info: + model_input[input_info.name] = input_info.dims + model_output = {} + for output_info in net_def.output_info: + model_output[output_info.name] = output_info.dims + for op_def in net_def.op: + op_added = False + if len(model_input) > 0: + for i in range(len(op_def.input)): + input_name = op_def.input[i] + if input_name in model_input: + if op_added: + next_op = bf16_net_def.op.pop() + else: + next_op = copy.deepcopy(op_def) + op_added = True + + op_cast = bf16_net_def.op.add() + op_cast.name = MaceOp.Cast.name + "_op_" + input_name + op_cast.type = MaceOp.Cast.name + op_cast.input.append(input_name) + trans_output_name = \ + MaceOp.Cast.name + "_out_" + input_name + op_cast.output.append(trans_output_name) + data_type_arg = op_cast.arg.add() + data_type_arg.name = 'T' + data_type_arg.i = mace_pb2.DT_FLOAT + op_cast.output_type.append(mace_pb2.DT_BFLOAT16) + output_shape = op_cast.output_shape.add() + output_shape.dims.extend(model_input[input_name]) + + next_op.input[i] = trans_output_name + bf16_net_def.op.append(next_op) + model_input.pop(input_name) + if len(model_output) > 0: + mace_check(len(op_def.output) == 1, + "Not support output num > 1") + output_name = op_def.output[0] + if output_name in model_output: + if not op_added: + last_op = copy.deepcopy(op_def) + op_added = True + else: + last_op = bf16_net_def.op.pop() + last_op.output[0] = output_name + "_" + MaceOp.Cast.name + bf16_net_def.op.append(last_op) + + op_cast = bf16_net_def.op.add() + op_cast.name = MaceOp.Cast.name + "_op_" + output_name + op_cast.type = MaceOp.Cast.name + op_cast.input.append(last_op.output[0]) + op_cast.output.append(output_name) + data_type_arg = op_cast.arg.add() + data_type_arg.name = 'T' + data_type_arg.i = mace_pb2.DT_BFLOAT16 + op_cast.output_type.append(mace_pb2.DT_FLOAT) + output_shape = op_cast.output_shape.add() + output_shape.dims.extend(model_output[output_name]) + model_output.pop(output_name) + if not op_added: + bf16_net_def.op.append(copy.deepcopy(op_def)) + return bf16_net_def + + @staticmethod + def convert(net_def, data_type): + if data_type == mace_pb2.DT_BFLOAT16: + print("data type is bfloat16, add input/output layers") + return MicroIoConverter.add_dt_cast_for_bf16(net_def) + else: + print("data type is %s" % data_type) + return net_def diff --git a/tools/python/micro/micro_op_converter.py b/tools/python/micro/micro_op_converter.py new file mode 100644 index 00000000..a2c691ee --- /dev/null +++ b/tools/python/micro/micro_op_converter.py @@ -0,0 +1,58 @@ +# Copyright 2020 The MACE Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from transform.base_converter import ConverterUtil +from transform.base_converter import DataFormat +from transform.base_converter import MaceKeyword +from transform.base_converter import MaceOp +from utils.util import mace_check +import numpy as np + + +class MicroOpConverter: + def __init__(self, pb_model, model_weights, data_type=np.float32): + self.net_def = pb_model + self.model_weights = model_weights + self.weight_bytes = bytearray(model_weights) + self.data_type = data_type + self._consts = {} + for tensor in self.net_def.tensors: + self._consts[tensor.name] = tensor + + def convert_filters_format(self): + arg_format = ConverterUtil.get_arg(self.net_def, + MaceKeyword.mace_filter_format_str) + mace_check(arg_format.i == DataFormat.OIHW.value, "Invalid model") + arg_format.i = DataFormat.OHWI.value + + transposed_filter = set() + for op in self.net_def.op: + # OIHW => OHWI + if (op.type == MaceOp.Conv2D.name or + op.type == MaceOp.DepthwiseConv2d.name) and \ + op.input[1] not in transposed_filter: + print("transform filter: %s" % op.type) + filter = self._consts[op.input[1]] + tensor_data = np.frombuffer(self.weight_bytes, self.data_type, + filter.data_size, filter.offset) + filter_data = np.array(tensor_data).reshape(filter.dims) \ + .transpose(0, 2, 3, 1) + filter_bytes = np.array(filter_data).tobytes() + slice_end = filter.offset + len(filter_bytes) + self.model_weights[filter.offset: slice_end] = filter_bytes + filter.dims[:] = filter_data.shape + transposed_filter.add(op.input[1]) + + def convert_op_params(self): + self.convert_filters_format() diff --git a/tools/python/micro/micro_support_ops.py b/tools/python/micro/micro_support_ops.py new file mode 100644 index 00000000..33e471bb --- /dev/null +++ b/tools/python/micro/micro_support_ops.py @@ -0,0 +1,281 @@ +# Copyright 2020 The MACE Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from enum import Enum +from py_proto import mace_pb2 +from transform.base_converter import MaceKeyword +from transform.base_converter import MaceOp +from utils.config_parser import DataFormat +from utils.config_parser import ModelKeys +from utils.config_parser import Platform +from utils.util import mace_check + +import copy + + +class OpDescriptor: + def __init__(self, src_path, class_name, type, + data_type, data_format, tag=None): + self.src_path = src_path + self.class_name = class_name + self.type = type + self.data_type = data_type + self.data_format = data_format + self.tag = tag + self.name = None + self.idx = -1 + + +McSupportedOps = [ + OpDescriptor('micro/ops/argmax.h', 'ArgMaxOp', MaceOp.ArgMax.name, + mace_pb2.DT_FLOAT, DataFormat.NHWC), + OpDescriptor('micro/ops/nhwc/conv_2d_ref.h', 'Conv2dRefOp', + MaceOp.Conv2D.name, mace_pb2.DT_FLOAT, + DataFormat.NHWC, None), + OpDescriptor('micro/ops/nhwc/conv_2d_c4_s4.h', 'Conv2dC4S4Op', + MaceOp.Conv2D.name, mace_pb2.DT_FLOAT, + DataFormat.NHWC, 'c4s4'), + OpDescriptor('micro/ops/nhwc/conv_2d_c3_s4.h', 'Conv2dC3S4Op', + MaceOp.Conv2D.name, mace_pb2.DT_FLOAT, + DataFormat.NHWC, 'c3s4'), + OpDescriptor('micro/ops/nhwc/conv_2d_c2_s4.h', 'Conv2dC2S4Op', + MaceOp.Conv2D.name, mace_pb2.DT_FLOAT, + DataFormat.NHWC, 'c2s4'), + OpDescriptor('micro/ops/cast.h', 'CastOp', + MaceOp.Cast.name, mace_pb2.DT_FLOAT, DataFormat.NHWC), + OpDescriptor('micro/ops/nhwc/pooling_ref.h', 'PoolingRefOp', + MaceOp.Pooling.name, mace_pb2.DT_FLOAT, DataFormat.NHWC), + OpDescriptor('micro/ops/nhwc/pooling_s4.h', 'PoolingS4Op', + MaceOp.Pooling.name, mace_pb2.DT_FLOAT, + DataFormat.NHWC, "s4"), + OpDescriptor('micro/ops/squeeze.h', 'SqueezeOp', MaceOp.Squeeze.name, + mace_pb2.DT_FLOAT, None), + OpDescriptor('micro/ops/softmax.h', 'SoftmaxOp', MaceOp.Softmax.name, + mace_pb2.DT_FLOAT, DataFormat.NHWC), + OpDescriptor('micro/ops/eltwise.h', 'EltwiseOp', + MaceOp.Eltwise.name, mace_pb2.DT_FLOAT, None), + OpDescriptor('micro/ops/eltwise.h', 'EltwiseOp', + MaceOp.Eltwise.name, mace_pb2.DT_INT32, None), + OpDescriptor('micro/ops/activation.h', 'ActivationOp', + MaceOp.Activation.name, mace_pb2.DT_FLOAT, DataFormat.NHWC), + OpDescriptor('micro/ops/strided_slice.h', 'StridedSliceOp', + MaceOp.StridedSlice.name, mace_pb2.DT_FLOAT, + DataFormat.NHWC), + OpDescriptor('micro/ops/strided_slice.h', 'StridedSliceOp', + MaceOp.StridedSlice.name, mace_pb2.DT_INT32, + DataFormat.NHWC), + OpDescriptor('micro/ops/reduce.h', 'ReduceOp', MaceOp.Reduce.name, + mace_pb2.DT_FLOAT, DataFormat.NHWC), + OpDescriptor('micro/ops/reduce.h', 'ReduceOp', MaceOp.Reduce.name, + mace_pb2.DT_INT32, DataFormat.NHWC), + OpDescriptor('micro/ops/stack.h', 'StackOp', MaceOp.Stack.name, + mace_pb2.DT_FLOAT, None), + OpDescriptor('micro/ops/stack.h', 'StackOp', MaceOp.Stack.name, + mace_pb2.DT_INT32, None), + OpDescriptor('micro/ops/bias_add.h', 'BiasAddOp', MaceOp.BiasAdd.name, + mace_pb2.DT_FLOAT, DataFormat.NHWC), + OpDescriptor('micro/ops/matmul.h', 'MatMulOp', MaceOp.MatMul.name, + mace_pb2.DT_FLOAT, DataFormat.NHWC), + OpDescriptor('micro/ops/nhwc/batch_norm.h', 'BatchNormOp', + MaceOp.BatchNorm.name, mace_pb2.DT_FLOAT, + DataFormat.NHWC), + OpDescriptor('micro/ops/shape.h', 'ShapeOp', MaceOp.Shape.name, + mace_pb2.DT_FLOAT, DataFormat.NHWC), + OpDescriptor('micro/ops/reshape.h', 'ReshapeOp', MaceOp.Reshape.name, + mace_pb2.DT_FLOAT, DataFormat.NHWC), + OpDescriptor('micro/ops/expand_dims.h', 'ExpandDimsOp', + MaceOp.ExpandDims.name, mace_pb2.DT_FLOAT, DataFormat.NHWC), + OpDescriptor('micro/ops/nhwc/depthwise_conv_2d_ref.h', + 'DepthwiseConv2dRefOp', + MaceOp.DepthwiseConv2d.name, mace_pb2.DT_FLOAT, + DataFormat.NHWC), + OpDescriptor('micro/ops/nhwc/depthwise_conv_2d_kb4_s4.h', + 'DepthwiseConv2dKB4S4Op', + MaceOp.DepthwiseConv2d.name, mace_pb2.DT_FLOAT, + DataFormat.NHWC, 'kb4s4'), + OpDescriptor('micro/ops/nhwc/depthwise_conv_2d_kb3_s4.h', + 'DepthwiseConv2dKB3S4Op', + MaceOp.DepthwiseConv2d.name, mace_pb2.DT_FLOAT, + DataFormat.NHWC, 'kb3s4'), + OpDescriptor('micro/ops/nhwc/depthwise_conv_2d_kb2_s4.h', + 'DepthwiseConv2dKB2S4Op', + MaceOp.DepthwiseConv2d.name, mace_pb2.DT_FLOAT, + DataFormat.NHWC, 'kb2s4'), + OpDescriptor('micro/ops/nhwc/depthwise_conv_2d_kb1_s4.h', + 'DepthwiseConv2dKB1S4Op', + MaceOp.DepthwiseConv2d.name, mace_pb2.DT_FLOAT, + DataFormat.NHWC, 'kb1s4'), +] + + +class OpResolver: + def __init__(self, pb_model, model_conf): + self.net_def = pb_model + self.op_desc_map = {} + self.op_desc_list = [] + if model_conf[ModelKeys.platform] == Platform.TENSORFLOW: + self.default_data_format = DataFormat.NHWC + else: + self.default_data_format = DataFormat.NCHW + print("OpResolver set default_data_format: %s" % + self.default_data_format) + if ModelKeys.quantize in model_conf and \ + model_conf[ModelKeys.quantize] == 1: + self.default_data_type = mace_pb2.DT_UINT8 + else: + self.default_data_type = \ + model_conf.get(ModelKeys.data_type, mace_pb2.DT_FLOAT) + + def get_op_data_format(self, op_def): + arg = self.get_op_def_arg(op_def, MaceKeyword.mace_data_format_str) + if arg is None or arg.i == DataFormat.AUTO.value: + return self.default_data_format + else: + return DataFormat(arg.i) + + def get_op_data_type(self, op_def): + arg = self.get_op_def_arg(op_def, MaceKeyword.mace_op_data_type_str) + if arg is None: + return self.default_data_type + else: + return arg.i + + def get_op_def_arg(self, op_def, name): + for arg in op_def.arg: + if arg.name == name: + return arg + return None + + def get_op_def_input_dims(self, op_def, idx): + input_name = op_def.input[idx] + for const_tensor in self.net_def.tensors: + if input_name == const_tensor.name: + return const_tensor.dims + for pre_op in self.net_def.op: + for i in range(len(pre_op.output)): + if input_name == pre_op.output[i]: + return pre_op.output_shape[i].dims + return None + + def get_op_tag(self, op_def): + if op_def.type == MaceOp.Conv2D.name: + output_shape = op_def.output_shape[0].dims + size = output_shape[0] * output_shape[1] * output_shape[2] + if size >= 4: + size = 4 + channel = output_shape[3] + if channel >= 4: + channel = 4 + if channel >= 2 and size >= 4: + return ("c%ss%s" % (channel, size)) + elif op_def.type == MaceOp.DepthwiseConv2d.name: + output_shape = op_def.output_shape[0].dims + size = output_shape[0] * output_shape[1] * output_shape[2] + if size >= 4: + size = 4 + filter_dims = self.get_op_def_input_dims(op_def, 1) + mace_check(filter_dims is not None, "Get filter dims failed.") + k_batch = filter_dims[0] + if k_batch >= 4: + k_batch = 4 + if size >= 4: + return ("kb%ss%s" % (k_batch, size)) + elif op_def.type == MaceOp.Pooling.name: + kernels = self.get_op_def_arg(op_def, MaceKeyword.mace_kernel_str) + mace_check(kernels is not None, "Get kernels failed.") + size = kernels.ints[0] * kernels.ints[1] + if size >= 4: + return "s4" + return None + + def op_def_desc_type_matched(self, op_def, op_desc): + data_format_match = op_desc.data_format is None or \ + op_desc.data_format == \ + self.get_op_data_format(op_def) + if not data_format_match: + return False + op_data_type = self.get_op_data_type(op_def) + data_type_match = \ + op_desc.data_type is None or \ + op_desc.data_type == op_data_type or \ + (op_desc.data_type == mace_pb2.DT_FLOAT and + (op_data_type == mace_pb2.DT_HALF or + op_data_type == mace_pb2.DT_FLOAT16 or + op_data_type == mace_pb2.DT_BFLOAT16)) + if not data_type_match: + return False + op_tag = self.get_op_tag(op_def) + if op_tag != op_desc.tag: + return False + return True + + def op_def_desc_matched(self, op_def, op_desc): + if not self.op_def_desc_type_matched(op_def, op_desc): + return False + return op_def.name == op_desc.name + + def find_op_in_desc_map(self, op_def, op_desc_map): + if op_def.type not in op_desc_map: + return None + op_descs = op_desc_map[op_def.type] + for op_desc in op_descs: + if self.op_def_desc_type_matched(op_def, op_desc): + return op_desc + print("The op %s's data type can not be found in op_desc_map" % + op_def.type) + return None + + def get_op_desc_map_from_model(self): + if len(self.op_desc_map) > 0: + return self.op_desc_map + op_desc_raw_map = {} + for i in range(len(McSupportedOps)): + op_desc = McSupportedOps[i] + if op_desc.type not in op_desc_raw_map: + op_desc_raw_map[op_desc.type] = [] + op_desc_raw_map[op_desc.type].append(op_desc) + + self.op_class_name_list = [] + self.op_src_path_list = [] + self.op_desc_map = {} + idx = 0 + for op_def in self.net_def.op: + new_op_desc = None + op_desc = self.find_op_in_desc_map(op_def, self.op_desc_map) + if op_desc is None: + new_op_desc = self.find_op_in_desc_map(op_def, op_desc_raw_map) + mace_check(new_op_desc is not None, + "not support op type %s, data type is %s, format is %s" % # noqa + (op_def.type, self.get_op_data_type(op_def), + self.get_op_data_format(op_def))) + if op_def.type not in self.op_desc_map: + self.op_desc_map[op_def.type] = [] + else: + new_op_desc = copy.deepcopy(op_desc) + new_op_desc.name = op_def.name + new_op_desc.idx = idx + idx += 1 + self.op_desc_map[op_def.type].append(new_op_desc) + return self.op_desc_map + + def get_op_desc_list_from_model(self): + op_desc_map = self.get_op_desc_map_from_model() + op_desc_list = [] + for op_descs in op_desc_map.values(): + op_desc_list.extend(op_descs) + op_desc_list.sort(key=lambda op_desc: op_desc.idx) + op_class_name_list = [op_desc.class_name for op_desc in op_desc_list] + op_desc_list.sort(key=lambda op_desc: op_desc.src_path) + op_src_path_list = [op_desc.src_path for op_desc in op_desc_list] + return (list(set(op_src_path_list)), op_class_name_list) diff --git a/tools/python/micro/proto_to_bytes.py b/tools/python/micro/proto_to_bytes.py new file mode 100644 index 00000000..cbb6f41e --- /dev/null +++ b/tools/python/micro/proto_to_bytes.py @@ -0,0 +1,219 @@ +# Copyright 2020 The MACE Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from google.protobuf.descriptor import FieldDescriptor +from utils.util import mace_check +import sys +import struct +import tempfile + +if sys.version > '3': + import queue +else: + import Queue as queue + +SimpleTypeArray = [ + FieldDescriptor.TYPE_DOUBLE, + FieldDescriptor.TYPE_FLOAT, + FieldDescriptor.TYPE_INT64, + FieldDescriptor.TYPE_UINT64, + FieldDescriptor.TYPE_INT32, + FieldDescriptor.TYPE_BOOL, + FieldDescriptor.TYPE_UINT32, + FieldDescriptor.TYPE_ENUM, +] + +# This type is string but it should be stored specially +TYPE_STRING_EX = FieldDescriptor.MAX_TYPE + 1000 +TYPE_BYTES_EX = FieldDescriptor.MAX_TYPE + 1001 +TYPE_UINT16 = FieldDescriptor.MAX_TYPE + 1002 + + +class ObjInfo: + def __init__(self, obj, parent_addr, offset, type=None): + self.obj = obj + self.parent_addr = parent_addr + self.offset = offset + self.type = type + + +class ProtoConverter: + def __init__(self, offset16=False, write_magic=False, exclude_fileds={}): + self.offset16 = offset16 + self.write_magic = write_magic + self.exclude_fileds = exclude_fileds + + # return the length of string with '\0' + def str_raw_len(self, str): + length = len(str) + if length > 0: + length += 1 + return length + + # return the string length which can by devided by 4 + def str_pack_len(self, str): + return int((self.str_raw_len(str) + 3) / 4) * 4 + + def pack(self, value, pb_type): + if pb_type is FieldDescriptor.TYPE_INT32 or \ + pb_type is FieldDescriptor.TYPE_INT64: + return struct.pack(' 0: + value += b'\0' + empty_len -= 1 + return struct.pack('<' + str(pack_length) + 's', value) + else: + mace_check(False, + "The pack's pb_type is not supported: %s" % pb_type) + + def get_pack_type(self): + pack_type = FieldDescriptor.TYPE_UINT32 + if self.offset16: + pack_type = TYPE_UINT16 + return pack_type + + def bs_info_to_bytes(self, in_bytes, bs, + object_queue, parent_addr, type): + length = self.str_pack_len(bs) + in_bytes += self.pack(length, self.get_pack_type()) + offset = len(in_bytes) + in_bytes += self.pack(offset, self.get_pack_type()) + if length > 0: + object_queue.put(ObjInfo(bs, parent_addr, offset, type)) + return in_bytes + + def string_info_to_bytes(self, in_bytes, string, + object_queue, parent_addr): + return self.bs_info_to_bytes(in_bytes, string, object_queue, + parent_addr, FieldDescriptor.TYPE_STRING) + + def bytes_info_to_bytes(self, in_bytes, bytes, object_queue, parent_addr): + return self.bs_info_to_bytes(in_bytes, bytes, object_queue, + parent_addr, FieldDescriptor.TYPE_BYTES) + + def array_to_bytes(self, in_bytes, array, + object_queue, parent_addr, descriptor): + length = len(array) + in_bytes += self.pack(length, self.get_pack_type()) + offset = len(in_bytes) + in_bytes += self.pack(offset, self.get_pack_type()) + if length > 0: + array_length = len(array) + for i in range(array_length): + # other units needn't write offset to their parent + array_parent_addr = parent_addr + if i > 0: + array_parent_addr = -1 + des_type = descriptor.type + if des_type is FieldDescriptor.TYPE_STRING: + des_type = TYPE_STRING_EX + elif des_type is FieldDescriptor.TYPE_BYTES: + des_type = TYPE_BYTES_EX + object_queue.put( + ObjInfo(array[i], array_parent_addr, offset, des_type)) + return in_bytes + + def container_obj_to_bytes(self, obj_info, object_queue, parent_addr): + bytes = b'' + if self.write_magic: + bytes = struct.pack('<4s', obj_info.obj.DESCRIPTOR.name[0:4]) + + for descriptor in obj_info.obj.DESCRIPTOR.fields: + if obj_info.obj.DESCRIPTOR.name in self.exclude_fileds and \ + descriptor.name in self.exclude_fileds[ + obj_info.obj.DESCRIPTOR.name]: # noqa + continue + value = getattr(obj_info.obj, descriptor.name) + if descriptor.label == descriptor.LABEL_REPEATED: + array = value + bytes = self.array_to_bytes(bytes, array, object_queue, + parent_addr, descriptor) + elif descriptor.type in SimpleTypeArray: + bytes += self.pack(value, descriptor.type) + elif descriptor.type is descriptor.TYPE_STRING: + bytes = self.string_info_to_bytes(bytes, value, object_queue, + parent_addr) + elif descriptor.type is descriptor.TYPE_BYTES: + bytes = self.bytes_info_to_bytes(bytes, value, object_queue, + parent_addr) + else: + mace_check( + False, + "The pb type is not supported: %s" % descriptor.type) + return bytes + + def object_to_bytes(self, obj_info, object_queue, start_addr): + if hasattr(obj_info.obj, 'DESCRIPTOR'): + obj_bytes = self.container_obj_to_bytes(obj_info, object_queue, + start_addr) + elif obj_info.type is FieldDescriptor.TYPE_STRING: + obj_bytes = self.pack(bytes(obj_info.obj.encode('utf-8')), + obj_info.type) + elif obj_info.type is FieldDescriptor.TYPE_BYTES: + obj_bytes = self.pack(obj_info.obj, obj_info.type) + elif obj_info.type is TYPE_STRING_EX: + obj_bytes = self.string_info_to_bytes(b'', obj_info.obj, + object_queue, start_addr) + elif obj_info.type is TYPE_BYTES_EX: + obj_bytes = self.bytes_info_to_bytes(b'', obj_info.obj, + object_queue, start_addr) + else: # simple obj + obj_bytes = self.pack(obj_info.obj, obj_info.type) + return obj_bytes + + def write_obj_queue_to_file(self, object_queue, f): + while not object_queue.empty(): + obj_info = object_queue.get() + start_addr = f.tell() + bytes = self.object_to_bytes(obj_info, object_queue, start_addr) + f.write(bytes) + + # write the obj's offset in its parent + if obj_info.parent_addr >= 0: + end_addr = f.tell() + f.seek(obj_info.parent_addr + obj_info.offset, 0) + f.write(self.pack(start_addr - obj_info.parent_addr, + self.get_pack_type())) + f.seek(end_addr, 0) + + def proto_to_bytes(self, root_obj, ): + object_queue = queue.Queue() + object_queue.put(ObjInfo(root_obj, -1, -1)) + with tempfile.TemporaryFile() as f: + self.write_obj_queue_to_file(object_queue, f) + f.seek(0) + return f.read() + return None diff --git a/tools/python/micro/scratch_computer.py b/tools/python/micro/scratch_computer.py new file mode 100644 index 00000000..b88469bd --- /dev/null +++ b/tools/python/micro/scratch_computer.py @@ -0,0 +1,128 @@ +# Copyright 2020 The MACE Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from py_proto import mace_pb2 +from utils.config_parser import ModelKeys +from utils.util import mace_check +from transform.base_converter import MaceKeyword +from transform.base_converter import MaceOp + + +class ScratchComputer: + def __init__(self, net_def, model_conf): + self.net_def = net_def + if ModelKeys.quantize in model_conf and \ + model_conf[ModelKeys.quantize] == 1: + self.default_data_type = mace_pb2.DT_UINT8 + else: + self.default_data_type = mace_pb2.DT_FLOAT + self._scratch_map = { + MaceOp.Conv2D: self.scratch_size_no_need, + MaceOp.Squeeze: self.scratch_size_of_squeeze, + MaceOp.Softmax: self.scratch_size_no_need, + MaceOp.Eltwise: self.scratch_size_no_need, + MaceOp.Activation: self.scratch_size_no_need, + MaceOp.StridedSlice: self.scratch_size_no_need, + MaceOp.Reduce: self.scratch_size_no_need, + MaceOp.Stack: self.scratch_size_no_need, + MaceOp.BiasAdd: self.scratch_size_no_need, + MaceOp.BatchNorm: self.scratch_size_no_need, + MaceOp.Shape: self.scratch_size_no_need, + MaceOp.Reshape: self.scratch_size_no_need, + MaceOp.ExpandDims: self.scratch_size_of_expand_dims, + MaceOp.MatMul: self.scratch_size_of_matmul, + MaceOp.Pooling: self.scratch_size_of_pooling, + MaceOp.DepthwiseConv2d: self.scratch_size_of_depthwise_conv, + MaceOp.ArgMax: self.scratch_size_no_need, + MaceOp.Cast: self.scratch_size_no_need, + } + + def compute_size(self): + scratch_size = 1 + for op_def in self.net_def.op: + mace_check(op_def.type in self._scratch_map, + "The %s's scratch func is lost." % op_def.type) + size = self._scratch_map[op_def.type](op_def) + if scratch_size < size: + scratch_size = size + print("micro scatch buffer size is: %s" % scratch_size) + return scratch_size + + def scratch_size_no_need(self, op_def): + return 0 + + def get_op_data_type(self, op_def): + arg = self.get_op_def_arg(op_def, MaceKeyword.mace_op_data_type_str) + if arg is None: + return self.default_data_type + else: + return arg.i + + def get_data_bytes(self, data_type): + if data_type == mace_pb2.DT_FLOAT or \ + data_type == mace_pb2.DT_INT32: + return 4 + elif data_type == mace_pb2.DT_HALF or \ + data_type == mace_pb2.DT_FLOAT16: + return 2 + elif data_type == mace_pb2.DT_UINT8: + return 1 + else: + mace_check(False, "Invalid data type: %s" % data_type) + + def scratch_size_of_expand_dims(self, op_def): + output_dim_size = len(op_def.output_shape[0].dims) + data_type_bytes = self.get_data_bytes(mace_pb2.DT_INT32) + return output_dim_size * data_type_bytes + + def scratch_size_of_matmul(self, op_def): + output_dim_size = len(op_def.output_shape[0].dims) + data_type_bytes = self.get_data_bytes(mace_pb2.DT_INT32) + return output_dim_size * data_type_bytes + + def get_op_input_dims(self, op_def, idx): + input_name = op_def.input[idx] + for const_tensor in self.net_def.tensors: + if input_name == const_tensor.name: + return const_tensor.dims + for pre_op in self.net_def.op: + for i in range(len(pre_op.output)): + if pre_op.output[i] == input_name: + return pre_op.output_shape[i].dims + return None + + def scratch_size_of_pooling(self, op_def): + input0_dims = self.get_op_input_dims(op_def, 0) + channels = input0_dims[3] + mace_check(channels > 0, + "can not inference pooling's input shape.") + + int_bytes = self.get_data_bytes(mace_pb2.DT_INT32) + float_bytes = self.get_data_bytes(mace_pb2.DT_FLOAT) + + return channels * (int_bytes + float_bytes) + + def scratch_size_of_depthwise_conv(self, op_def): + filter_dims = self.get_op_input_dims(op_def, 1) + k_batch = filter_dims[0] + block_size = k_batch + if block_size > 4: + block_size = 4 + k_channels = filter_dims[3] + float_bytes = self.get_data_bytes(mace_pb2.DT_FLOAT) + return block_size * 4 * k_channels * float_bytes + + def scratch_size_of_squeeze(self, op_def): + input0_dims = self.get_op_input_dims(op_def, 0) + return len(input0_dims) * self.get_data_bytes(mace_pb2.DT_FLOAT) diff --git a/tools/python/micro_converter.py b/tools/python/micro_converter.py new file mode 100644 index 00000000..a6a3dc28 --- /dev/null +++ b/tools/python/micro_converter.py @@ -0,0 +1,153 @@ +# Copyright 2020 The MACE Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import shutil +import numpy as np + +from micro.graph_builder import GraphBuilder +from micro.mem_computer import MemComputer +from micro.micro_codegen import MicroCodeGen +from micro.micro_io_converter import MicroIoConverter +from micro.micro_op_converter import MicroOpConverter +from micro.micro_support_ops import OpResolver +from micro.micro_support_ops import McSupportedOps +from micro.proto_to_bytes import ProtoConverter +from micro.scratch_computer import ScratchComputer +from py_proto import mace_pb2 +from utils import util +from utils.config_parser import ModelKeys +from utils.convert_util import data_type_to_np_dt +from utils.util import mace_check + +NetDefExcludeFields = { + 'OperatorDef': [ + 'quantize_info', + 'node_id', + 'op_id', + 'padding', + 'node_input', + 'out_max_byte_size', + ], +} + + +class MicroConverter: + def __init__(self, model_conf, net_def, model_weights, + model_name, offset16=False, write_magic=False): + self.model_conf = model_conf + data_type = model_conf.get(ModelKeys.data_type, mace_pb2.DT_FLOAT) + self.net_def = MicroIoConverter.convert(net_def, data_type) + self.model_weights = model_weights + self.model_name = model_name + self.offset16 = offset16 + self.write_magic = write_magic + self.code_gen = MicroCodeGen() + data_type = model_conf.get(ModelKeys.data_type, mace_pb2.DT_FLOAT) + self.np_data_type = data_type_to_np_dt(data_type, np.float32) + self.gen_folder = 'micro/codegen/' + util.mkdir_p(self.gen_folder) + self.op_resolver = OpResolver(self.net_def, self.model_conf) + + def gen_code_from_model(self, model_name, pb_model, model_weights): + net_def = pb_model + output_dir = self.gen_folder + 'models/' + model_name + '/' + shutil.rmtree(output_dir, ignore_errors=True) + util.mkdir_p(output_dir) + + # comput mem size and mem block offset and update the net_def, + # should count before ProtoConverter + mem_computer = MemComputer(net_def, self.np_data_type) + tensor_mem_size = mem_computer.compute() + + # gen the c++ NetDef struct + net_def_converter = ProtoConverter(self.offset16, self.write_magic, + NetDefExcludeFields) + net_def_bytes = net_def_converter.proto_to_bytes(net_def) + mace_check(net_def_bytes is not None, "proto_to_bytes failed.") + self.code_gen.gen_net_def_data(model_name, net_def_bytes, + output_dir + 'micro_net_def_data.h') + + # gen operator array + (op_src_path_list, op_class_name_list) = \ + self.op_resolver.get_op_desc_list_from_model() + self.code_gen.gen_ops_data( + model_name, op_src_path_list, op_class_name_list, + output_dir + 'micro_ops_list.h') + + # gen the c++ Graph struct + graph = GraphBuilder(net_def, self.op_resolver).build() + graph_converter = ProtoConverter(self.offset16, self.write_magic) + graph_bytes = graph_converter.proto_to_bytes(graph) + self.code_gen.gen_graph_data(model_name, graph_bytes, + output_dir + 'micro_graph_data.h') + + scratch_buffer_size = ScratchComputer( + net_def, self.model_conf).compute_size() + + # gen micro engine config + engine_data = {} + engine_data['tensor_mem_size'] = tensor_mem_size + engine_data['input_size'] = len(net_def.input_info) + engine_data['scratch_buffer_size'] = scratch_buffer_size + self.code_gen.gen_engin_config(model_name, engine_data, + output_dir + 'micro_engine_config.cc') + + # gen micro model tensor data + tensor_bytes = bytearray(model_weights) + self.code_gen.gen_model_data(model_name, tensor_bytes, + output_dir + 'micro_model_data.h') + + def gen_engine_interface_code(self, model_name): + output_dir = self.gen_folder + 'engines/' + model_name + '/' + shutil.rmtree(output_dir, ignore_errors=True) + util.mkdir_p(output_dir) + self.code_gen.gen_engine_factory( + model_name, + output_dir + 'micro_engine_factory.h', + output_dir + 'micro_engine_factory.cc') + self.code_gen.gen_engine_c_interface( + model_name, + output_dir + 'micro_engine_c_interface.h', + output_dir + 'micro_engine_c_interface.cc') + + def gen_code(self): + MicroOpConverter(self.net_def, self.model_weights, + self.np_data_type).convert_op_params() + self.gen_code_from_model( + self.model_name, self.net_def, self.model_weights) + self.gen_engine_interface_code(self.model_name) + + def package(self, tar_package_path): + (op_h_path_list, op_class_name_list) = \ + self.op_resolver.get_op_desc_list_from_model() + all_op_header_list = [op_desc.src_path for op_desc in McSupportedOps] + op_h_exclude_list = [] + for op_header in all_op_header_list: + if op_header not in op_h_path_list: + op_h_exclude_list.append(op_header) + op_cc_exclude_list = \ + [op_h.replace(".h", ".cc") for op_h in op_h_exclude_list] + exclude_list = ["--exclude=" + op_h for op_h in op_h_exclude_list] + exclude_list.extend( + ["--exclude=" + op_h for op_h in op_cc_exclude_list]) + tmp_dir = "/tmp/micro" + tmp_workspace_file = "WORKSPACE" + os.system("mkdir -p %s && touch %s/%s" % + (tmp_dir, tmp_dir, tmp_workspace_file)) + tar_command = "tar --exclude=micro/tools --exclude=micro/test " + tar_command += " ".join(exclude_list) + tar_command += " -zcf " + tar_package_path + tar_command += " micro -C %s %s" % (tmp_dir, tmp_workspace_file) + os.system(tar_command) diff --git a/tools/python/py_proto/__init__.py b/tools/python/py_proto/__init__.py index c94fa6f3..c2dfd046 100644 --- a/tools/python/py_proto/__init__.py +++ b/tools/python/py_proto/__init__.py @@ -32,6 +32,10 @@ else: device.execute("bazel build //mace/proto:mace_py") device.execute("cp -f bazel-genfiles/mace/proto/mace_pb2.py %s" % cwd) + device.execute("bazel build //mace/proto:micro_mem_py") + device.execute( + "cp -f bazel-genfiles/mace/proto/micro_mem_pb2.py %s" % cwd) + device.execute("bazel build //third_party/caffe:caffe_py") device.execute( "cp -f bazel-genfiles/third_party/caffe/caffe_pb2.py %s" % cwd) diff --git a/tools/python/run_micro.py b/tools/python/run_micro.py new file mode 100644 index 00000000..9008ec02 --- /dev/null +++ b/tools/python/run_micro.py @@ -0,0 +1,292 @@ +# Copyright 2020 The MACE Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import copy +import numpy as np +import shutil +import tempfile + +from micro_converter import MicroConverter +from py_proto import mace_pb2 +import run_target +from utils import util +from utils import device +from utils import config_parser +from utils.target import Target +from utils.config_parser import ModelKeys +from utils.util import MaceLogger +from utils.util import mace_check +import validate +import layers_validate + + +def join_2d_array(xs): + return ":".join([",".join([str(y) for y in x]) for x in xs]) + + +def build_engine(model_name, data_type): + mace_check(flags.model_name is not None and len(model_name) > 0, + "you should specify model name for build.") + command = "bazel build //micro/tools:micro_run_static" \ + " --config optimization " \ + " --copt \"-DMICRO_MODEL_NAME=%s\"" % model_name + if data_type == mace_pb2.DT_BFLOAT16: + command += " --copt \"-DMACE_ENABLE_BFLOAT16\"" + print("The current engine's data type is bfloat16.") + device.execute(command) + + +def get_model_conf_by_name(flags, conf): + for name, model_conf in conf["models"].items(): + if not flags.model_name or name == flags.model_name: + return model_conf + return None + + +def run_model(flags, args, conf): + model_conf = get_model_conf_by_name(flags, conf) + mace_check(model_conf is not None, "Get model conf failed.") + model_conf = config_parser.normalize_model_config(model_conf) + run_model_with_conf(flags, args, flags.model_name, model_conf) + + +def gen_sub_model_conf(output_config, flags, conf): + model_conf = copy.deepcopy(get_model_conf_by_name(flags, conf)) + model_conf['subgraphs'][0]['output_tensors'] = \ + output_config['output_tensors'] + model_conf['subgraphs'][0]['output_shapes'] = \ + output_config['output_shapes'] + return model_conf + + +def run_layers_validate(flags, args, original_conf): + model_name = flags.model_name + original_model_dir = flags.output + "/" + \ + original_conf['library_name'] + "/model" + model_dir = "/tmp/micro_run/model" + device.execute("mkdir -p %s" % model_dir) + device.execute("cp -p %s/%s.pb %s" % + (original_model_dir, model_name, model_dir)) + params_file_path = "%s/%s.data" % (original_model_dir, model_name) + output_configs = layers_validate.get_layers( + model_dir, model_name, flags.layers) + + for i in range(len(output_configs)): + sub_model_conf = gen_sub_model_conf( + output_configs[i], flags, original_conf) + with open(output_configs[i]['model_file_path'], "rb") as model_file: + net_def = mace_pb2.NetDef() + net_def.ParseFromString(model_file.read()) + with open(params_file_path, "rb") as params_file: + weights = bytearray(params_file.read()) + micro_conf = \ + config_parser.normalize_model_config(sub_model_conf) + MicroConverter(micro_conf, net_def, + weights, model_name).gen_code() + build_engine(model_name, micro_conf[ModelKeys.data_type]) + run_model_with_conf(flags, args, model_name, micro_conf) + + +def run_model_with_conf(flags, args, model_name, model_conf): + target_abi = "host" + dev = device.HostDevice("host", target_abi) + install_dir = "/tmp/micro_run/" + model_name + + if ModelKeys.check_tensors in model_conf: + model_conf[ModelKeys.output_tensors] = model_conf[ + ModelKeys.check_tensors] + model_conf[ModelKeys.output_shapes] = model_conf[ + ModelKeys.check_shapes] + + model_args = {"model_name": model_name, + "input_node": ",".join( + model_conf[ModelKeys.input_tensors]), + "input_shape": join_2d_array( + model_conf[ModelKeys.input_shapes]), + "output_node": ",".join( + model_conf[ModelKeys.output_tensors]), + "output_shape": join_2d_array( + model_conf[ModelKeys.output_shapes]), + "input_data_format": ",".join( + [df.name for df in + model_conf[ModelKeys.input_data_formats]]), + "output_data_format": ",".join( + [df.name for df in + model_conf[ModelKeys.output_data_formats]]) + } + + opts = ["--%s=%s" % (arg_key, arg_val) for arg_key, arg_val in + model_args.items()] + args + + # generate data start + tmp_dir_name = tempfile.mkdtemp() + input_file_prefix = tmp_dir_name + "/" + model_name + if ModelKeys.validation_inputs_data in model_conf: + input_tensor = model_conf[ModelKeys.input_tensors] + input_data = model_conf[ModelKeys.validation_inputs_data] + mace_check(len(input_tensor) == len(input_data), + "len(input_tensor) != len(validate_data") + + for i in range(len(input_tensor)): + util.download_or_get_file( + model_conf[ModelKeys.validation_inputs_data][i], "", + util.formatted_file_name(input_file_prefix, + input_tensor[i])) + else: + generate_input_data(input_file_prefix, + model_conf[ModelKeys.input_tensors], + model_conf[ModelKeys.input_shapes], + model_conf[ModelKeys.input_ranges], + model_conf[ModelKeys.input_data_types]) + + dev.install(Target(tmp_dir_name), install_dir + "/validate_in") + target_input_file = "%s/validate_in/%s" % ( + install_dir, model_name) + target_output_dir = "%s/validate_out" % install_dir + dev.mkdir(target_output_dir) + target_output_file = target_output_dir + "/" + model_name + opts += ["--input_file=%s" % target_input_file, + "--output_file=%s" % target_output_file] + # generate data end + + envs = [] + if flags.vlog_level > 0: + envs += ["MACE_CPP_MIN_VLOG_LEVEL=%s" % flags.vlog_level] + + target = Target("bazel-bin/micro/tools/micro_run_static", [], + opts=opts, envs=envs) + run_target.run_target(target_abi, install_dir, target, + device_ids="host") + + if flags.validate: + validate_model_file = util.download_or_get_model( + model_conf[ModelKeys.model_file_path], + model_conf[ModelKeys.model_sha256_checksum], + tmp_dir_name) + + validate_weight_file = "" + if ModelKeys.weight_file_path in model_conf: + validate_weight_file = util.download_or_get_model( + model_conf[ModelKeys.weight_file_path], + model_conf[ModelKeys.weight_sha256_checksum], + tmp_dir_name) + + dev.pull(Target(target_output_dir), tmp_dir_name + "/validate_out") + output_file_prefix = tmp_dir_name + "/validate_out/" + model_name + validate.validate(model_conf[ModelKeys.platform], + validate_model_file, + validate_weight_file, + input_file_prefix, + output_file_prefix, + model_conf[ModelKeys.input_shapes], + model_conf[ModelKeys.output_shapes], + model_conf[ModelKeys.input_data_formats], + model_conf[ModelKeys.output_data_formats], + model_conf[ModelKeys.input_tensors], + model_conf[ModelKeys.output_tensors], + flags.validate_threshold, + model_conf[ModelKeys.input_data_types], + flags.backend, + "", + "") + shutil.rmtree(tmp_dir_name) + + +def generate_input_data(input_file, input_node, input_shape, input_ranges, + input_data_type): + np.random.seed() + for i in range(len(input_node)): + data = np.random.random(input_shape[i]) * ( + input_ranges[i][1] - input_ranges[i][0]) + input_ranges[i][0] + input_file_name = util.formatted_file_name(input_file, input_node[i]) + MaceLogger.info('Generate input file: %s' % input_file_name) + if input_data_type[i] == mace_pb2.DT_FLOAT: + np_data_type = np.float32 + elif input_data_type[i] == mace_pb2.DT_INT32: + np_data_type = np.int32 + + data.astype(np_data_type).tofile(input_file_name) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--config", + type=str, + default="", + help="yaml conf path" + ) + parser.add_argument( + "--model_name", + type=str, + default="", + help="model name in yaml conf" + ) + parser.add_argument( + "--validate", + action="store_true", + help="enable validate" + ) + parser.add_argument( + "--validate_threshold", + type=float, + default="0.99", + help="validate threshold" + ) + parser.add_argument( + "--layers", + type=str, + default="-1", + help="'start_layer:end_layer' or 'layer', similar to python slice." + " Use with --validate flag.") + parser.add_argument( + "--backend", + type=str, + default="tensorflow", + help="onnx backend framework") + parser.add_argument( + "--build", + action="store_true", + help="if build before run" + ) + parser.add_argument( + '--output', + type=str, + default="build", + help="output dir") + parser.add_argument( + '--vlog_level', + type=int, + default="0", + help="vlog level") + + return parser.parse_known_args() + + +if __name__ == "__main__": + flags, args = parse_args() + conf = config_parser.parse(flags.config) + if flags.build or flags.validate: + micro_conf = config_parser.normalize_model_config( + conf[ModelKeys.models][flags.model_name]) + build_engine(flags.model_name, micro_conf[ModelKeys.data_type]) + if flags.validate and flags.layers != "-1": + run_layers_validate(flags, args, conf) + else: + run_model(flags, args, conf) diff --git a/tools/python/template/file_binary.cc.jinja2 b/tools/python/template/file_binary.cc.jinja2 index d3fa6e19..a92dd2f9 100644 --- a/tools/python/template/file_binary.cc.jinja2 +++ b/tools/python/template/file_binary.cc.jinja2 @@ -20,7 +20,7 @@ namespace mace { const unsigned char *{{ load_func_name }}() { {% if data_size == 0 %} - return nullptr; + return NULL; {% else %} static const unsigned char kData[{{ data_size }}] = { {% for d in data %}{{"0x%02X, " % d }}{%endfor%} diff --git a/tools/python/utils/config_parser.py b/tools/python/utils/config_parser.py index 5713303e..6f0237ff 100644 --- a/tools/python/utils/config_parser.py +++ b/tools/python/utils/config_parser.py @@ -73,6 +73,7 @@ def parse_device_info(path): class ModelKeys(object): platform = "platform" runtime = "runtime" + models = 'models' graph_optimize_options = "graph_optimize_options" input_tensors = "input_tensors" input_shapes = "input_shapes" @@ -175,6 +176,8 @@ def parse_data_type(str): def parse_internal_data_type(str): if str == 'fp32_fp32': return mace_pb2.DT_FLOAT + elif str == 'bf16_fp32': + return mace_pb2.DT_BFLOAT16 else: return mace_pb2.DT_HALF @@ -187,6 +190,8 @@ def to_list(x): def parse_int_array(xs): + if len(xs) is 0: + return [1] return [int(x) for x in xs.split(",")] @@ -201,7 +206,6 @@ def normalize_model_config(conf): del conf[ModelKeys.subgraphs] conf.update(subgraph) - print(conf) conf[ModelKeys.platform] = parse_platform(conf[ModelKeys.platform]) conf[ModelKeys.runtime] = parse_device_type(conf[ModelKeys.runtime]) diff --git a/tools/python/utils/convert_util.py b/tools/python/utils/convert_util.py new file mode 100644 index 00000000..7d37a32b --- /dev/null +++ b/tools/python/utils/convert_util.py @@ -0,0 +1,105 @@ + +# Copyright 2020 The MACE Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# python tools/python/convert.py \ +# --config ../mace-models/mobilenet-v2/mobilenet-v2.yml + +import array +import numpy as np +import struct +from py_proto import mace_pb2 + + +def Float2BFloat16Bytes(float_data): + int_datas = [] + for value in float_data: + bytes = struct.pack("f", value) + int_data = struct.unpack('i', bytes)[0] + int_datas.append(int_data >> 16) + return np.array(int_datas).astype(np.uint16).tobytes() + + +def merge_params(net_def, data_type): + def tensor_to_bytes(tensor): + if tensor.data_type == mace_pb2.DT_HALF: + data = bytearray( + np.array(tensor.float_data).astype(np.float16).tobytes()) + tensor.data_size = len(tensor.float_data) + elif tensor.data_type == mace_pb2.DT_FLOAT: + data = bytearray( + np.array(tensor.float_data).astype(np.float32).tobytes()) + tensor.data_size = len(tensor.float_data) + elif tensor.data_type == mace_pb2.DT_INT32: + data = bytearray( + np.array(tensor.int32_data).astype(np.int32).tobytes()) + tensor.data_size = len(tensor.int32_data) + elif tensor.data_type == mace_pb2.DT_UINT8: + data = bytearray( + np.array(tensor.int32_data).astype(np.uint8).tolist()) + tensor.data_size = len(tensor.int32_data) + elif tensor.data_type == mace_pb2.DT_FLOAT16: + data = bytearray( + np.array(tensor.float_data).astype(np.float16).tobytes()) + tensor.data_size = len(tensor.float_data) + elif tensor.data_type == mace_pb2.DT_BFLOAT16: + data = Float2BFloat16Bytes(tensor.float_data) + tensor.data_size = len(tensor.float_data) + else: + raise Exception('Tensor data type %s not supported' % + tensor.data_type) + return data + + model_data = [] + offset = 0 + for tensor in net_def.tensors: + if tensor.data_type == mace_pb2.DT_FLOAT: + tensor.data_type = data_type + raw_data = tensor_to_bytes(tensor) + if tensor.data_type != mace_pb2.DT_UINT8 and offset % 4 != 0: + padding = 4 - offset % 4 + model_data.extend(bytearray([0] * padding)) + offset += padding + + tensor.offset = offset + model_data.extend(raw_data) + offset += len(raw_data) + + for tensor in net_def.tensors: + if tensor.data_type == mace_pb2.DT_FLOAT \ + or tensor.data_type == mace_pb2.DT_HALF \ + or tensor.data_type == mace_pb2.DT_FLOAT16\ + or tensor.data_type == mace_pb2.DT_BFLOAT16: + del tensor.float_data[:] + elif tensor.data_type == mace_pb2.DT_INT32: + del tensor.int32_data[:] + elif tensor.data_type == mace_pb2.DT_UINT8: + del tensor.int32_data[:] + + return net_def, model_data + + +def data_type_to_np_dt(data_type, default_np_dt): + if data_type is None: + return default_np_dt + elif data_type == mace_pb2.DT_HALF or data_type == mace_pb2.DT_FLOAT16: + return np.float16 + elif data_type == mace_pb2.DT_INT32: + return np.int + elif data_type == mace_pb2.DT_UINT8: + return np.uint8 + elif data_type == mace_pb2.DT_BFLOAT16: + return np.uint16 + else: + return np.float32 diff --git a/tools/python/utils/device.py b/tools/python/utils/device.py index 908a0dba..0b174088 100644 --- a/tools/python/utils/device.py +++ b/tools/python/utils/device.py @@ -55,6 +55,8 @@ def execute(cmd, verbose=True): buf.append(line) if p.returncode != 0: + if verbose: + print(line) raise Exception("errorcode: %s" % p.returncode) return "\n".join(buf) @@ -95,11 +97,11 @@ class HostDevice(Device): if install_dir.strip() and install_dir != os.path.dirname(target.path): execute("mkdir -p %s" % install_dir) if os.path.isdir(target.path): - execute("cp %s/* %s" % (target.path, install_dir)) + execute("cp -f %s/* %s" % (target.path, install_dir)) else: - execute("cp %s %s" % (target.path, install_dir)) + execute("cp -f %s %s" % (target.path, install_dir)) for lib in target.libs: - execute("cp %s %s" % (lib, install_dir)) + execute("cp -f %s %s" % (lib, install_dir)) target.path = "%s/%s" % (install_dir, os.path.basename(target.path)) @@ -117,7 +119,7 @@ class HostDevice(Device): out_dir = os.path.abspath(out_dir) if out_dir.strip() and out_dir != os.path.dirname(target.path): - execute("cp -r %s %s" % (target.path, out_dir)) + execute("cp -rp %s %s" % (target.path, out_dir)) def mkdir(self, dirname): execute("mkdir -p %s" % dirname) diff --git a/tools/python/validate.py b/tools/python/validate.py index 7004d2a8..d4c4887c 100644 --- a/tools/python/validate.py +++ b/tools/python/validate.py @@ -96,7 +96,7 @@ def compare_output(output_name, mace_out_value, util.StringFormatter.block("Similarity Test Passed")) else: util.MaceLogger.error( - "", util.StringFormatter.block("Similarity Test Failed")) + util.StringFormatter.block("Similarity Test Failed")) else: util.MaceLogger.error( "", util.StringFormatter.block( @@ -110,6 +110,16 @@ def normalize_tf_tensor_name(name): return name +def get_data_type_by_value(value): + data_type = value.dtype + if data_type == np.float32: + return mace_pb2.DT_FLOAT + elif data_type == np.int32: + return mace_pb2.DT_INT32 + else: + return mace_pb2.DT_FLOAT + + def validate_with_file(output_names, output_shapes, mace_out_file, validation_outputs_data, validation_threshold, log_file): @@ -182,7 +192,9 @@ def validate_tf_model(model_file, for i in range(len(output_names)): output_file_name = util.formatted_file_name( mace_out_file, output_names[i]) - mace_out_value = load_data(output_file_name) + mace_out_value = load_data( + output_file_name, + get_data_type_by_value(output_values[i])) if output_data_formats[i] == DataFormat.NCHW and \ len(output_shapes[i]) == 4: mace_out_value = mace_out_value. \ -- GitLab