diff --git a/mace/core/future.h b/mace/core/future.h index 4dc778ad5da83613dd033bd18159086c7bed39be..b5bf30dceb25616bad892b0681767fe056c6045c 100644 --- a/mace/core/future.h +++ b/mace/core/future.h @@ -30,6 +30,17 @@ struct StatsFuture { }; }; +inline void SetFutureDefaultWaitFn(StatsFuture *future) { + if (future != nullptr) { + future->wait_fn = [](CallStats * stats) { + if (stats != nullptr) { + stats->start_micros = NowMicros(); + stats->end_micros = stats->start_micros; + } + }; + } +} + } // namespace mace #endif // MACE_CORE_FUTURE_H_ diff --git a/mace/kernels/eltwise.h b/mace/kernels/eltwise.h index 824186185ba63f41222942063380c462985c0413..bfcfa9ce528a5e5c03a6f6188aafd2cd130d4a89 100644 --- a/mace/kernels/eltwise.h +++ b/mace/kernels/eltwise.h @@ -652,11 +652,14 @@ struct EltwiseFunctor : EltwiseFunctorBase { static_cast(input0->dim_size() - input1->dim_size()); if (data_format_ == NCHW) { MACE_CHECK( - input0->dim_size() == 4 && - (input1->dim_size() == 0 || - input1->dim_size() == 4 && input1->dim(1) == input0->dim(1) && - (input1->dim(0) == input0->dim(0) || input1->dim(0) == 1) || - input1->dim_size() == 1 && input1->dim(0) == input0->dim(1)), + (input0->dim_size() == 4) + && ((input1->dim_size() == 0) + || (input1->dim_size() == 4 + && input1->dim(1) == input0->dim(1) + && (input1->dim(0) == input0->dim(0) + || input1->dim(0) == 1)) + || (input1->dim_size() == 1 + && input1->dim(0) == input0->dim(1))), "only support broadcast channel dimension"); } else { if (rank_diff > 0 && rank_diff < input0->dim_size()) { diff --git a/mace/kernels/stack.h b/mace/kernels/stack.h index 873b84ad5252adfbe14c734a5521972e92fd047b..3a630d8f28caa18e7950a22f8835a78305ffc79e 100644 --- a/mace/kernels/stack.h +++ b/mace/kernels/stack.h @@ -34,8 +34,6 @@ struct StackFunctor { MaceStatus operator()(const std::vector &inputs, Tensor *output, StatsFuture *future) { - MACE_UNUSED(future); - MACE_CHECK(!inputs.empty(), "stack inputs are empty."); std::vector input_shape = inputs[0]->shape(); MACE_CHECK(axis_ >= -(inputs[0]->dim_size() + 1) && @@ -51,7 +49,7 @@ struct StackFunctor { // On host, no need to map data T *output_data = output->mutable_data(); std::vector input_data(inputs.size()); - for (int i = 0; i < inputs.size(); ++i) { + for (size_t i = 0; i < inputs.size(); ++i) { input_data[i] = inputs[i]->data(); } @@ -62,13 +60,14 @@ struct StackFunctor { std::accumulate(input_shape.begin() + axis_, input_shape.end(), 1, std::multiplies()); for (index_t h = 0; h < high_dim_elem_size; ++h) { - for (index_t i = 0; i < inputs.size(); ++i) { + for (size_t i = 0; i < inputs.size(); ++i) { memcpy(output_data, input_data[i] + h * low_dim_elem_size, sizeof(T) * low_dim_elem_size); output_data += low_dim_elem_size; } } + SetFutureDefaultWaitFn(future); return MACE_SUCCESS; } diff --git a/mace/kernels/strided_slice.h b/mace/kernels/strided_slice.h index efdd9141663eea93bd0ac554118858541631b8fa..6974b8f8d8ddecae2b81aec766ac40321d6299a4 100644 --- a/mace/kernels/strided_slice.h +++ b/mace/kernels/strided_slice.h @@ -49,7 +49,6 @@ struct StridedSliceFunctor { const Tensor *strides, Tensor *output, StatsFuture *future) { - MACE_UNUSED(future); MACE_CHECK(ellipsis_mask_ == 0 && new_axis_mask_ == 0, "ellipsis_mask and new_axis_mask are not supported yet."); @@ -169,6 +168,7 @@ struct StridedSliceFunctor { } } + SetFutureDefaultWaitFn(future); return MACE_SUCCESS; } diff --git a/mace/kernels/winograd_transform.h b/mace/kernels/winograd_transform.h index d3f544d6f1a580f142b37300cabafc088558b839..49ecb8492e8e1ec896863f14785a43aea61ebc3f 100644 --- a/mace/kernels/winograd_transform.h +++ b/mace/kernels/winograd_transform.h @@ -95,9 +95,9 @@ struct WinogradInverseTransformFunctorBase { : batch_(batch), height_(height), width_(width), + wino_blk_size_(block_size), activation_(activation), - relux_max_limit_(relux_max_limit), - wino_blk_size_(block_size) {} + relux_max_limit_(relux_max_limit) {} const int batch_; const int height_; diff --git a/mace/ops/BUILD b/mace/ops/BUILD index 09b5fa3b7f71e4b92237f602e6b01beb35d833f5..2bfc0b3d9f34f162edf076be48785e0897050d6c 100644 --- a/mace/ops/BUILD +++ b/mace/ops/BUILD @@ -41,7 +41,10 @@ cc_library( ["*.h"], exclude = ["ops_test_util.h"], ), - copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled([ + copts = [ + "-Werror", + "-Wextra", + ] + if_openmp_enabled(["-fopenmp"]) + if_neon_enabled([ "-DMACE_ENABLE_NEON", ]) + if_android_armv7([ "-mfpu=neon", @@ -64,7 +67,10 @@ cc_test( srcs = glob( ["*_test.cc"], ), - copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled([ + copts = [ + "-Werror", + "-Wextra", + ] + if_openmp_enabled(["-fopenmp"]) + if_neon_enabled([ "-DMACE_ENABLE_NEON", ]) + if_android_armv7([ "-mfpu=neon", @@ -88,7 +94,10 @@ cc_test( name = "ops_benchmark", testonly = 1, srcs = glob(["*_benchmark.cc"]), - copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled([ + copts = [ + "-Werror", + "-Wextra", + ] + if_openmp_enabled(["-fopenmp"]) + if_neon_enabled([ "-DMACE_ENABLE_NEON", ]) + if_android_armv7([ "-mfpu=neon", diff --git a/mace/ops/identity.h b/mace/ops/identity.h index d2aa7446e52a50b4134c4f7455c6a93bfe71a8dd..7140314cc25fe5ce809f577de9a4e4ed9bd8ec1c 100644 --- a/mace/ops/identity.h +++ b/mace/ops/identity.h @@ -32,6 +32,7 @@ class IdentityOp : public Operator { const Tensor *input = this->Input(INPUT); Tensor *output = this->Output(OUTPUT); output->ReuseTensorBuffer(*input); + SetFutureDefaultWaitFn(future); return MACE_SUCCESS; } diff --git a/mace/ops/shape.h b/mace/ops/shape.h index aaac1b39447a6ee53bcd326733d3b9b61526d16a..18d73e614e24f560e247330b65fac38ce25cb762 100644 --- a/mace/ops/shape.h +++ b/mace/ops/shape.h @@ -42,6 +42,7 @@ class ShapeOp : public Operator { for (index_t i = 0; i < input->dim_size(); ++i) { output_data[i] = input->dim(i); } + SetFutureDefaultWaitFn(future); return MACE_SUCCESS; } diff --git a/mace/ops/squeeze.h b/mace/ops/squeeze.h index b736955f24d76936b4c15451860353337487a444..35b2aed4c2585f5bc85c427962270d9e35baf973 100644 --- a/mace/ops/squeeze.h +++ b/mace/ops/squeeze.h @@ -31,8 +31,6 @@ class SqueezeOp : public Operator { axis_(OperatorBase::GetRepeatedArgs("axis", {})) {} MaceStatus Run(StatsFuture *future) override { - MACE_UNUSED(future); - const Tensor *input = this->Input(INPUT); Tensor *output = this->Output(OUTPUT); @@ -47,6 +45,7 @@ class SqueezeOp : public Operator { output->ReuseTensorBuffer(*input); output->Reshape(output_shape); + SetFutureDefaultWaitFn(future); return MACE_SUCCESS; } diff --git a/mace/ops/stack.h b/mace/ops/stack.h index 27a90fc32ae6870dcac2c0c52fc42d17cb769f93..17210fb29259cfbdf52b91840424863c0c3c62c4 100644 --- a/mace/ops/stack.h +++ b/mace/ops/stack.h @@ -33,7 +33,6 @@ class StackOp : public Operator { MaceStatus Run(StatsFuture *future) override { const std::vector &inputs = this->Inputs(); Tensor *output = this->Output(OUTPUT); - return functor_(inputs, output, future); } diff --git a/mace/ops/stack_test.cc b/mace/ops/stack_test.cc index d91c4a022f38130b259d64629f8d2eac2a6a2d9f..8cccb133026026e3b24799fb95684956277849d9 100644 --- a/mace/ops/stack_test.cc +++ b/mace/ops/stack_test.cc @@ -29,7 +29,7 @@ void TestStack(const std::vector &input_shape, const std::vector &output_shape, const std::vector &output) { OpsTestNet net; - for (int i = 0; i < inputs.size(); ++i) { + for (size_t i = 0; i < inputs.size(); ++i) { net.AddInputFromArray(MakeString("Input", i), input_shape, inputs[i]); } @@ -38,7 +38,7 @@ void TestStack(const std::vector &input_shape, .Output("Output") .AddIntArg("axis", axis); - for (int i = 0; i < inputs.size(); ++i) { + for (size_t i = 0; i < inputs.size(); ++i) { op_builder.Input(MakeString("Input", i)); } op_builder.Finalize(net.NewOperatorDef()); diff --git a/mace/tools/validation/BUILD b/mace/tools/validation/BUILD index 7004ade7501021c2fa4854ac0637d98a581cb770..af4483899b2920409a6c3f13dd4c2efe5609e0e8 100644 --- a/mace/tools/validation/BUILD +++ b/mace/tools/validation/BUILD @@ -4,7 +4,10 @@ load("//mace:mace.bzl", "if_openmp_enabled", "if_android") cc_binary( name = "mace_run_static", srcs = ["mace_run.cc"], - copts = if_android([ + copts = [ + "-Werror", + "-Wextra", + ] + if_android([ "-DMACE_ENABLE_OPENCL", ]), linkopts = if_openmp_enabled(["-fopenmp"]), @@ -19,7 +22,10 @@ cc_binary( cc_binary( name = "mace_run_shared", srcs = ["mace_run.cc"], - copts = if_android([ + copts = [ + "-Werror", + "-Wextra", + ] + if_android([ "-DMACE_ENABLE_OPENCL", ]), linkopts = ["-lm", "-pie", "-fPIE"] + if_openmp_enabled(["-fopenmp"]), diff --git a/tools/converter.py b/tools/converter.py index 153b9b197829c4260752030ba95954e26107c184..9b460a1f90d3460f2414318176e57514f853eb1a 100644 --- a/tools/converter.py +++ b/tools/converter.py @@ -1045,7 +1045,7 @@ def run_specific_target(flags, configs, target_abi, def run_mace(flags): - configs = format_model_config(flags.config) + configs = format_model_config(flags) target_socs = configs[YAMLKeyword.target_socs] if not target_socs or ALL_SOC_TAG in target_socs: @@ -1159,7 +1159,7 @@ def bm_specific_target(flags, configs, target_abi, target_soc, serial_num): def benchmark_model(flags): - configs = format_model_config(flags.config) + configs = format_model_config(flags) target_socs = configs[YAMLKeyword.target_socs] if not target_socs or ALL_SOC_TAG in target_socs: @@ -1211,6 +1211,16 @@ def parse_args(): default="", required=True, help="model yaml configuration file path") + all_type_parent_parser.add_argument( + "--build_type", + type=str, + default="", + help="Model build type, can be ['proto', 'code'].") + all_type_parent_parser.add_argument( + "--target_abis", + type=str, + default="", + help="Target ABIs, comma seperated list.") build_run_parent_parser = argparse.ArgumentParser(add_help=False) build_run_parent_parser.add_argument( '--address_sanitizer', @@ -1253,16 +1263,6 @@ def parse_args(): "--enable_openmp", action="store_false", help="Enable openmp for multiple thread.") - build.add_argument( - "--build_type", - type=str, - default="", - help="Model build type, can be ['proto', 'code'].") - build.add_argument( - "--target_abis", - type=str, - default="", - help="Target ABIs, comma seperated list.") run = subparsers.add_parser( 'run', parents=[all_type_parent_parser, run_bm_parent_parser,