From ebff986dfa93b473ea329590c9f6947c4147a24a Mon Sep 17 00:00:00 2001 From: Liangliang He Date: Tue, 17 Oct 2017 17:27:19 +0800 Subject: [PATCH] Reformat with google coding style --- mace/core/common.h | 4 +- mace/core/logging.cc | 12 +- mace/core/logging.h | 26 ++-- mace/core/net.cc | 33 +++-- mace/core/net.h | 22 +-- mace/core/proto_utils.cc | 74 +++++----- mace/core/proto_utils.h | 126 ++++++++-------- mace/core/registry.h | 18 +-- mace/core/runtime/opencl/opencl_allocator.cc | 2 +- mace/core/runtime/opencl/opencl_runtime.cc | 4 +- mace/core/serializer.h | 4 +- mace/core/tensor.h | 18 +-- mace/core/testing/test_benchmark.cc | 18 +-- mace/core/testing/test_benchmark.h | 16 +- mace/core/testing/test_benchmark_main.cc | 2 +- mace/core/types.cc | 2 +- mace/core/workspace.cc | 27 ++-- mace/core/workspace.h | 12 +- mace/examples/benchmark_example.cc | 8 +- mace/examples/mace_run.cc | 16 +- mace/kernels/BUILD | 2 +- mace/kernels/addn.h | 4 +- mace/kernels/batch_norm.h | 29 ++-- mace/kernels/channel_shuffle.h | 9 +- mace/kernels/concat.h | 8 +- mace/kernels/conv_2d.h | 24 ++- mace/kernels/conv_pool_2d_util.cc | 13 +- mace/kernels/conv_pool_2d_util.h | 10 +- mace/kernels/depthwise_conv2d.h | 60 ++++---- mace/kernels/global_avg_pooling.h | 4 +- mace/kernels/neon/avg_pooling_neon_2x2.cc | 2 +- mace/kernels/neon/avg_pooling_neon_3x3.cc | 11 +- mace/kernels/neon/batch_norm_neon.cc | 16 +- mace/kernels/neon/conv_2d_neon.cc | 29 ++-- mace/kernels/neon/conv_2d_neon_1x1.cc | 95 ++++++------ mace/kernels/neon/conv_2d_neon_3x3.cc | 145 +++++++++++-------- mace/kernels/neon/conv_2d_neon_5x5.cc | 6 +- mace/kernels/neon/depthwise_conv_neon.cc | 40 ++--- mace/kernels/neon/global_avg_pooling_neon.cc | 6 +- mace/kernels/neon/pooling_neon.cc | 12 +- mace/ops/addn.h | 10 +- mace/ops/addn_benchmark.cc | 2 +- mace/ops/addn_test.cc | 2 +- mace/ops/batch_norm.h | 37 +++-- mace/ops/batch_norm_benchmark.cc | 2 +- mace/ops/batch_norm_test.cc | 6 +- mace/ops/channel_shuffle.h | 4 +- mace/ops/channel_shuffle_benchmark.cc | 27 ++-- mace/ops/channel_shuffle_test.cc | 13 +- mace/ops/concat.h | 30 ++-- mace/ops/concat_benchmark.cc | 5 +- mace/ops/concat_test.cc | 15 +- mace/ops/conv_2d.h | 11 +- mace/ops/conv_2d_benchmark.cc | 19 +-- mace/ops/conv_2d_test.cc | 14 +- mace/ops/conv_pool_2d_base.h | 7 +- mace/ops/depthwise_conv2d.cc | 6 +- mace/ops/depthwise_conv2d.h | 20 +-- mace/ops/depthwise_conv2d_test.cc | 25 ++-- mace/ops/depthwise_conv_2d_benchmark.cc | 38 ++--- mace/ops/global_avg_pooling.h | 2 +- mace/ops/global_avg_pooling_benchmark.cc | 24 ++- mace/ops/global_avg_pooling_test.cc | 20 +-- mace/ops/ops_test_util.h | 57 ++++---- mace/ops/pooling.h | 15 +- mace/ops/pooling_benchmark.cc | 2 +- mace/ops/pooling_test.cc | 35 ++--- mace/ops/relu.h | 14 +- mace/ops/relu_benchmark.cc | 2 +- mace/ops/relu_test.cc | 5 +- mace/ops/resize_bilinear.h | 12 +- mace/ops/resize_bilinear_test.cc | 4 +- mace/proto/BUILD | 6 +- mace/python/tools/BUILD | 1 - mace/tools/benchmark/benchmark_model.cc | 72 ++++----- mace/tools/benchmark/stat_summarizer.cc | 55 ++++--- mace/tools/benchmark/stat_summarizer.h | 36 ++--- mace/utils/command_line_flags.cc | 50 ++++--- mace/utils/command_line_flags.h | 10 +- mace/utils/utils.h | 4 +- 80 files changed, 850 insertions(+), 838 deletions(-) diff --git a/mace/core/common.h b/mace/core/common.h index 75060255..e9b78221 100644 --- a/mace/core/common.h +++ b/mace/core/common.h @@ -26,8 +26,8 @@ typedef int64_t index_t; #ifndef DISABLE_COPY_AND_ASSIGN #define DISABLE_COPY_AND_ASSIGN(classname) \ private: \ - classname(const classname&) = delete; \ - classname& operator=(const classname&) = delete + classname(const classname &) = delete; \ + classname &operator=(const classname &) = delete #endif #define MACE_NOT_IMPLEMENTED MACE_CHECK(false, "not implemented") diff --git a/mace/core/logging.cc b/mace/core/logging.cc index ca479176..ffc359ab 100644 --- a/mace/core/logging.cc +++ b/mace/core/logging.cc @@ -14,7 +14,7 @@ namespace mace { namespace internal { -LogMessage::LogMessage(const char* fname, int line, int severity) +LogMessage::LogMessage(const char *fname, int line, int severity) : fname_(fname), line_(line), severity_(severity) {} #if defined(PLATFORM_POSIX_ANDROID) @@ -43,7 +43,7 @@ void LogMessage::GenerateLogMessage() { } std::stringstream ss; - const char* const partial_name = strrchr(fname_, '/'); + const char *const partial_name = strrchr(fname_, '/'); ss << (partial_name != nullptr ? partial_name + 1 : fname_) << ":" << line_ << " " << str(); __android_log_write(android_log_level, "native", ss.str().c_str()); @@ -69,7 +69,7 @@ void LogMessage::GenerateLogMessage() { namespace { // Parse log level (int64_t) from environment variable (char*) -int64_t LogLevelStrToInt(const char* mace_env_var_val) { +int64_t LogLevelStrToInt(const char *mace_env_var_val) { if (mace_env_var_val == nullptr) { return 0; } @@ -89,12 +89,12 @@ int64_t LogLevelStrToInt(const char* mace_env_var_val) { } int64_t MinLogLevelFromEnv() { - const char* mace_env_var_val = getenv("MACE_CPP_MIN_LOG_LEVEL"); + const char *mace_env_var_val = getenv("MACE_CPP_MIN_LOG_LEVEL"); return LogLevelStrToInt(mace_env_var_val); } int64_t MinVLogLevelFromEnv() { - const char* mace_env_var_val = getenv("MACE_CPP_MIN_VLOG_LEVEL"); + const char *mace_env_var_val = getenv("MACE_CPP_MIN_VLOG_LEVEL"); return LogLevelStrToInt(mace_env_var_val); } @@ -111,7 +111,7 @@ int64_t LogMessage::MinVLogLevel() { return min_vlog_level; } -LogMessageFatal::LogMessageFatal(const char* file, int line) +LogMessageFatal::LogMessageFatal(const char *file, int line) : LogMessage(file, line, FATAL) {} LogMessageFatal::~LogMessageFatal() { // abort() ensures we don't return (we promised we would not via diff --git a/mace/core/logging.h b/mace/core/logging.h index f4f427b3..44853279 100644 --- a/mace/core/logging.h +++ b/mace/core/logging.h @@ -23,23 +23,23 @@ namespace internal { using std::string; -inline void MakeStringInternal(std::stringstream& /*ss*/) {} +inline void MakeStringInternal(std::stringstream & /*ss*/) {} template -inline void MakeStringInternal(std::stringstream& ss, const T& t) { +inline void MakeStringInternal(std::stringstream &ss, const T &t) { ss << t; } template -inline void MakeStringInternal(std::stringstream& ss, - const T& t, - const Args&... args) { +inline void MakeStringInternal(std::stringstream &ss, + const T &t, + const Args &... args) { MakeStringInternal(ss, t); MakeStringInternal(ss, args...); } template -string MakeString(const Args&... args) { +string MakeString(const Args &... args) { std::stringstream ss; MakeStringInternal(ss, args...); return ss.str(); @@ -48,7 +48,7 @@ string MakeString(const Args&... args) { template string MakeString(const std::vector &args) { std::stringstream ss; - for (const T& arg: args) { + for (const T &arg : args) { ss << arg << ", "; } return ss.str(); @@ -56,14 +56,14 @@ string MakeString(const std::vector &args) { // Specializations for already-a-string types. template <> -inline string MakeString(const string& str) { +inline string MakeString(const string &str) { return str; } -inline string MakeString(const char* c_str) { return string(c_str); } +inline string MakeString(const char *c_str) { return string(c_str); } class LogMessage : public std::basic_ostringstream { public: - LogMessage(const char* fname, int line, int severity); + LogMessage(const char *fname, int line, int severity); ~LogMessage(); // Returns the minimum log level for VLOG statements. @@ -75,7 +75,7 @@ class LogMessage : public std::basic_ostringstream { void GenerateLogMessage(); private: - const char* fname_; + const char *fname_; int line_; int severity_; }; @@ -84,7 +84,7 @@ class LogMessage : public std::basic_ostringstream { // logging this message. class LogMessageFatal : public LogMessage { public: - LogMessageFatal(const char* file, int line); + LogMessageFatal(const char *file, int line); ~LogMessageFatal(); }; @@ -136,7 +136,7 @@ class LogMessageFatal : public LogMessage { #endif template -T&& CheckNotNull(const char* file, int line, const char* exprtext, T&& t) { +T &&CheckNotNull(const char *file, int line, const char *exprtext, T &&t) { if (t == nullptr) { LogMessageFatal(file, line) << string(exprtext); } diff --git a/mace/core/net.cc b/mace/core/net.cc index 80ccc16d..22a2fd11 100644 --- a/mace/core/net.cc +++ b/mace/core/net.cc @@ -7,18 +7,18 @@ namespace mace { -NetBase::NetBase(const std::shared_ptr& net_def, - Workspace* ws, +NetBase::NetBase(const std::shared_ptr &net_def, + Workspace *ws, DeviceType type) : name_(net_def->name()) {} -SimpleNet::SimpleNet(const std::shared_ptr& net_def, - Workspace* ws, +SimpleNet::SimpleNet(const std::shared_ptr &net_def, + Workspace *ws, DeviceType type) : NetBase(net_def, ws, type) { VLOG(1) << "Constructing SimpleNet " << net_def->name(); for (int idx = 0; idx < net_def->op_size(); ++idx) { - const auto& operator_def = net_def->op(idx); + const auto &operator_def = net_def->op(idx); VLOG(1) << "Creating operator " << operator_def.name() << ":" << operator_def.type(); std::unique_ptr op{nullptr}; @@ -29,26 +29,29 @@ SimpleNet::SimpleNet(const std::shared_ptr& net_def, } } } -bool SimpleNet::Run(RunMetadata* run_metadata) { +bool SimpleNet::Run(RunMetadata *run_metadata) { VLOG(1) << "Running net " << name_; - for (auto& op : operators_) { + for (auto &op : operators_) { VLOG(1) << "Running operator " << op->debug_def().name() << "(" << op->debug_def().type() << ")."; - OperatorStats* op_stats = nullptr; + OperatorStats *op_stats = nullptr; if (run_metadata) { op_stats = run_metadata->add_op_stats(); op_stats->set_operator_name(op->debug_def().name()); op_stats->set_type(op->debug_def().type()); op_stats->set_all_start_micros(NowInMicroSec()); - op_stats->set_op_start_rel_micros(NowInMicroSec() - op_stats->all_start_micros()); + op_stats->set_op_start_rel_micros(NowInMicroSec() - + op_stats->all_start_micros()); } if (!op->Run()) { LOG(ERROR) << "Operator failed: " << ProtoDebugString(op->debug_def()); return false; } if (op_stats) { - op_stats->set_op_end_rel_micros(NowInMicroSec() - op_stats->all_start_micros()); - op_stats->set_all_end_rel_micros(NowInMicroSec() - op_stats->all_start_micros()); + op_stats->set_op_end_rel_micros(NowInMicroSec() - + op_stats->all_start_micros()); + op_stats->set_all_end_rel_micros(NowInMicroSec() - + op_stats->all_start_micros()); } VLOG(1) << "Op " << op->debug_def().name() << " has shape: " << internal::MakeString(op->Output(0)->shape()); @@ -56,15 +59,15 @@ bool SimpleNet::Run(RunMetadata* run_metadata) { return true; } -unique_ptr CreateNet(const NetDef& net_def, - Workspace* ws, +unique_ptr CreateNet(const NetDef &net_def, + Workspace *ws, DeviceType type) { std::shared_ptr tmp_net_def(new NetDef(net_def)); return CreateNet(tmp_net_def, ws, type); } -unique_ptr CreateNet(const std::shared_ptr& net_def, - Workspace* ws, +unique_ptr CreateNet(const std::shared_ptr &net_def, + Workspace *ws, DeviceType type) { unique_ptr net(new SimpleNet(net_def, ws, type)); return net; diff --git a/mace/core/net.h b/mace/core/net.h index 14b140fb..541f1b82 100644 --- a/mace/core/net.h +++ b/mace/core/net.h @@ -15,14 +15,14 @@ namespace mace { class NetBase { public: - NetBase(const std::shared_ptr& net_def, - Workspace* ws, + NetBase(const std::shared_ptr &net_def, + Workspace *ws, DeviceType type); virtual ~NetBase() noexcept {} - virtual bool Run(RunMetadata* run_metadata = nullptr) = 0; + virtual bool Run(RunMetadata *run_metadata = nullptr) = 0; - const string& Name() const { return name_; } + const string &Name() const { return name_; } protected: string name_; @@ -32,11 +32,11 @@ class NetBase { class SimpleNet : public NetBase { public: - SimpleNet(const std::shared_ptr& net_def, - Workspace* ws, + SimpleNet(const std::shared_ptr &net_def, + Workspace *ws, DeviceType type); - bool Run(RunMetadata* run_metadata = nullptr) override; + bool Run(RunMetadata *run_metadata = nullptr) override; protected: vector > operators_; @@ -44,11 +44,11 @@ class SimpleNet : public NetBase { DISABLE_COPY_AND_ASSIGN(SimpleNet); }; -unique_ptr CreateNet(const NetDef& net_def, - Workspace* ws, +unique_ptr CreateNet(const NetDef &net_def, + Workspace *ws, DeviceType type); -unique_ptr CreateNet(const std::shared_ptr& net_def, - Workspace* ws, +unique_ptr CreateNet(const std::shared_ptr &net_def, + Workspace *ws, DeviceType type); } // namespace mace diff --git a/mace/core/proto_utils.cc b/mace/core/proto_utils.cc index 7d9c437e..064e9b53 100644 --- a/mace/core/proto_utils.cc +++ b/mace/core/proto_utils.cc @@ -18,7 +18,7 @@ namespace mace { -bool ReadStringFromFile(const char* filename, string* str) { +bool ReadStringFromFile(const char *filename, string *str) { std::ifstream ifs(filename, std::ios::in); if (!ifs) { VLOG(1) << "File cannot be opened: " << filename @@ -33,7 +33,7 @@ bool ReadStringFromFile(const char* filename, string* str) { return true; } -bool WriteStringToFile(const string& str, const char* filename) { +bool WriteStringToFile(const string &str, const char *filename) { std::ofstream ofs(filename, std::ios::out | std::ios::trunc); if (!ofs.is_open()) { VLOG(1) << "File cannot be created: " << filename @@ -54,15 +54,15 @@ bool WriteStringToFile(const string& str, const char* filename) { namespace { class IfstreamInputStream : public ::google::protobuf::io::CopyingInputStream { public: - explicit IfstreamInputStream(const string& filename) + explicit IfstreamInputStream(const string &filename) : ifs_(filename.c_str(), std::ios::in | std::ios::binary) {} ~IfstreamInputStream() { ifs_.close(); } - int Read(void* buffer, int size) { + int Read(void *buffer, int size) { if (!ifs_) { return -1; } - ifs_.read(static_cast(buffer), size); + ifs_.read(static_cast(buffer), size); return ifs_.gcount(); } @@ -71,7 +71,7 @@ class IfstreamInputStream : public ::google::protobuf::io::CopyingInputStream { }; } // namespace -bool ReadProtoFromBinaryFile(const char* filename, MessageLite* proto) { +bool ReadProtoFromBinaryFile(const char *filename, MessageLite *proto) { ::google::protobuf::io::CopyingInputStreamAdaptor stream( new IfstreamInputStream(filename)); stream.SetOwnsCopyingStream(true); @@ -82,8 +82,8 @@ bool ReadProtoFromBinaryFile(const char* filename, MessageLite* proto) { return proto->ParseFromCodedStream(&coded_stream); } -void WriteProtoToBinaryFile(const MessageLite& /*proto*/, - const char* /*filename*/) { +void WriteProtoToBinaryFile(const MessageLite & /*proto*/, + const char * /*filename*/) { LOG(FATAL) << "Not implemented yet."; } @@ -98,25 +98,25 @@ using ::google::protobuf::io::CodedInputStream; using ::google::protobuf::io::ZeroCopyOutputStream; using ::google::protobuf::io::CodedOutputStream; -bool ReadProtoFromTextFile(const char* filename, Message* proto) { +bool ReadProtoFromTextFile(const char *filename, Message *proto) { int fd = open(filename, O_RDONLY); MACE_CHECK(fd != -1, "File not found: ", filename); - FileInputStream* input = new FileInputStream(fd); + FileInputStream *input = new FileInputStream(fd); bool success = google::protobuf::TextFormat::Parse(input, proto); delete input; close(fd); return success; } -void WriteProtoToTextFile(const Message& proto, const char* filename) { +void WriteProtoToTextFile(const Message &proto, const char *filename) { int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0644); - FileOutputStream* output = new FileOutputStream(fd); + FileOutputStream *output = new FileOutputStream(fd); MACE_CHECK(google::protobuf::TextFormat::Print(proto, output)); delete output; close(fd); } -bool ReadProtoFromBinaryFile(const char* filename, MessageLite* proto) { +bool ReadProtoFromBinaryFile(const char *filename, MessageLite *proto) { #if defined(_MSC_VER) // for MSC compiler binary flag needs to be specified int fd = open(filename, O_RDONLY | O_BINARY); #else @@ -135,7 +135,7 @@ bool ReadProtoFromBinaryFile(const char* filename, MessageLite* proto) { return success; } -void WriteProtoToBinaryFile(const MessageLite& proto, const char* filename) { +void WriteProtoToBinaryFile(const MessageLite &proto, const char *filename) { int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0644); MACE_CHECK(fd != -1, "File cannot be created: ", filename, " error number: ", errno); @@ -150,8 +150,8 @@ void WriteProtoToBinaryFile(const MessageLite& proto, const char* filename) { #endif // MACE_USE_LITE_PROTO -ArgumentHelper::ArgumentHelper(const OperatorDef& def) { - for (auto& arg : def.arg()) { +ArgumentHelper::ArgumentHelper(const OperatorDef &def) { + for (auto &arg : def.arg()) { if (arg_map_.find(arg.name()) != arg_map_.end()) { MACE_CHECK( arg.SerializeAsString() == arg_map_[arg.name()].SerializeAsString(), @@ -167,8 +167,8 @@ ArgumentHelper::ArgumentHelper(const OperatorDef& def) { } } -ArgumentHelper::ArgumentHelper(const NetDef& netdef) { - for (auto& arg : netdef.arg()) { +ArgumentHelper::ArgumentHelper(const NetDef &netdef) { + for (auto &arg : netdef.arg()) { MACE_CHECK(arg_map_.count(arg.name()) == 0, "Duplicated argument name found in net def: ", ProtoDebugString(netdef)); @@ -176,7 +176,7 @@ ArgumentHelper::ArgumentHelper(const NetDef& netdef) { } } -bool ArgumentHelper::HasArgument(const string& name) const { +bool ArgumentHelper::HasArgument(const string &name) const { return arg_map_.count(name); } @@ -184,7 +184,7 @@ namespace { // Helper function to verify that conversion between types won't loose any // significant bit. template -bool SupportsLosslessConversion(const InputType& value) { +bool SupportsLosslessConversion(const InputType &value) { return static_cast(static_cast(value)) == value; } } @@ -192,8 +192,8 @@ bool SupportsLosslessConversion(const InputType& value) { #define INSTANTIATE_GET_SINGLE_ARGUMENT(T, fieldname, \ enforce_lossless_conversion) \ template <> \ - T ArgumentHelper::GetSingleArgument(const string& name, \ - const T& default_value) const { \ + T ArgumentHelper::GetSingleArgument(const string &name, \ + const T &default_value) const { \ if (arg_map_.count(name) == 0) { \ VLOG(1) << "Using default parameter value " << default_value \ << " for parameter " << name; \ @@ -211,7 +211,7 @@ bool SupportsLosslessConversion(const InputType& value) { return value; \ } \ template <> \ - bool ArgumentHelper::HasSingleArgumentOfType(const string& name) const { \ + bool ArgumentHelper::HasSingleArgumentOfType(const string &name) const { \ if (arg_map_.count(name) == 0) { \ return false; \ } \ @@ -235,12 +235,12 @@ INSTANTIATE_GET_SINGLE_ARGUMENT(string, s, false) enforce_lossless_conversion) \ template <> \ vector ArgumentHelper::GetRepeatedArgument( \ - const string& name, const std::vector& default_value) const { \ + const string &name, const std::vector &default_value) const { \ if (arg_map_.count(name) == 0) { \ return default_value; \ } \ vector values; \ - for (const auto& v : arg_map_.at(name).fieldname()) { \ + for (const auto &v : arg_map_.at(name).fieldname()) { \ if (enforce_lossless_conversion) { \ auto supportsConversion = \ SupportsLosslessConversion(v); \ @@ -267,7 +267,7 @@ INSTANTIATE_GET_REPEATED_ARGUMENT(string, strings, false) #define MACE_MAKE_SINGULAR_ARGUMENT(T, fieldname) \ template <> \ - Argument MakeArgument(const string& name, const T& value) { \ + Argument MakeArgument(const string &name, const T &value) { \ Argument arg; \ arg.set_name(name); \ arg.set_##fieldname(value); \ @@ -282,7 +282,7 @@ MACE_MAKE_SINGULAR_ARGUMENT(string, s) #undef MACE_MAKE_SINGULAR_ARGUMENT template <> -Argument MakeArgument(const string& name, const MessageLite& value) { +Argument MakeArgument(const string &name, const MessageLite &value) { Argument arg; arg.set_name(name); arg.set_s(value.SerializeAsString()); @@ -291,10 +291,10 @@ Argument MakeArgument(const string& name, const MessageLite& value) { #define MACE_MAKE_REPEATED_ARGUMENT(T, fieldname) \ template <> \ - Argument MakeArgument(const string& name, const vector& value) { \ + Argument MakeArgument(const string &name, const vector &value) { \ Argument arg; \ arg.set_name(name); \ - for (const auto& v : value) { \ + for (const auto &v : value) { \ arg.add_##fieldname(v); \ } \ return arg; \ @@ -306,8 +306,8 @@ MACE_MAKE_REPEATED_ARGUMENT(int64_t, ints) MACE_MAKE_REPEATED_ARGUMENT(string, strings) #undef MACE_MAKE_REPEATED_ARGUMENT -const Argument& GetArgument(const OperatorDef& def, const string& name) { - for (const Argument& arg : def.arg()) { +const Argument &GetArgument(const OperatorDef &def, const string &name) { + for (const Argument &arg : def.arg()) { if (arg.name() == name) { return arg; } @@ -318,10 +318,10 @@ const Argument& GetArgument(const OperatorDef& def, const string& name) { return std::move(Argument()); } -bool GetFlagArgument(const OperatorDef& def, - const string& name, +bool GetFlagArgument(const OperatorDef &def, + const string &name, bool def_value) { - for (const Argument& arg : def.arg()) { + for (const Argument &arg : def.arg()) { if (arg.name() == name) { MACE_CHECK(arg.has_i(), "Can't parse argument as bool: ", ProtoDebugString(arg)); @@ -331,9 +331,9 @@ bool GetFlagArgument(const OperatorDef& def, return def_value; } -Argument* GetMutableArgument(const string& name, +Argument *GetMutableArgument(const string &name, const bool create_if_missing, - OperatorDef* def) { + OperatorDef *def) { for (int i = 0; i < def->arg_size(); ++i) { if (def->arg(i).name() == name) { return def->mutable_arg(i); @@ -341,7 +341,7 @@ Argument* GetMutableArgument(const string& name, } // If no argument of the right name is found... if (create_if_missing) { - Argument* arg = def->add_arg(); + Argument *arg = def->add_arg(); arg->set_name(name); return arg; } else { diff --git a/mace/core/proto_utils.h b/mace/core/proto_utils.h index fb02ab96..90747a41 100644 --- a/mace/core/proto_utils.h +++ b/mace/core/proto_utils.h @@ -21,56 +21,56 @@ using std::string; using ::google::protobuf::MessageLite; // Common interfaces that reads file contents into a string. -bool ReadStringFromFile(const char* filename, string* str); -bool WriteStringToFile(const string& str, const char* filename); +bool ReadStringFromFile(const char *filename, string *str); +bool WriteStringToFile(const string &str, const char *filename); // Common interfaces that are supported by both lite and full protobuf. -bool ReadProtoFromBinaryFile(const char* filename, MessageLite* proto); -inline bool ReadProtoFromBinaryFile(const string filename, MessageLite* proto) { +bool ReadProtoFromBinaryFile(const char *filename, MessageLite *proto); +inline bool ReadProtoFromBinaryFile(const string filename, MessageLite *proto) { return ReadProtoFromBinaryFile(filename.c_str(), proto); } -void WriteProtoToBinaryFile(const MessageLite& proto, const char* filename); -inline void WriteProtoToBinaryFile(const MessageLite& proto, - const string& filename) { +void WriteProtoToBinaryFile(const MessageLite &proto, const char *filename); +inline void WriteProtoToBinaryFile(const MessageLite &proto, + const string &filename) { return WriteProtoToBinaryFile(proto, filename.c_str()); } #ifdef MACE_USE_LITE_PROTO -inline string ProtoDebugString(const MessageLite& proto) { +inline string ProtoDebugString(const MessageLite &proto) { return proto.SerializeAsString(); } // Text format MessageLite wrappers: these functions do nothing but just // allowing things to compile. It will produce a runtime error if you are using // MessageLite but still want text support. -inline bool ReadProtoFromTextFile(const char* /*filename*/, - MessageLite* /*proto*/) { +inline bool ReadProtoFromTextFile(const char * /*filename*/, + MessageLite * /*proto*/) { LOG(FATAL) << "If you are running lite version, you should not be " << "calling any text-format protobuffers."; return false; // Just to suppress compiler warning. } -inline bool ReadProtoFromTextFile(const string filename, MessageLite* proto) { +inline bool ReadProtoFromTextFile(const string filename, MessageLite *proto) { return ReadProtoFromTextFile(filename.c_str(), proto); } -inline void WriteProtoToTextFile(const MessageLite& /*proto*/, - const char* /*filename*/) { +inline void WriteProtoToTextFile(const MessageLite & /*proto*/, + const char * /*filename*/) { LOG(FATAL) << "If you are running lite version, you should not be " << "calling any text-format protobuffers."; } -inline void WriteProtoToTextFile(const MessageLite& proto, - const string& filename) { +inline void WriteProtoToTextFile(const MessageLite &proto, + const string &filename) { return WriteProtoToTextFile(proto, filename.c_str()); } -inline bool ReadProtoFromFile(const char* filename, MessageLite* proto) { +inline bool ReadProtoFromFile(const char *filename, MessageLite *proto) { return (ReadProtoFromBinaryFile(filename, proto) || ReadProtoFromTextFile(filename, proto)); } -inline bool ReadProtoFromFile(const string& filename, MessageLite* proto) { +inline bool ReadProtoFromFile(const string &filename, MessageLite *proto) { return ReadProtoFromFile(filename.c_str(), proto); } @@ -78,27 +78,27 @@ inline bool ReadProtoFromFile(const string& filename, MessageLite* proto) { using ::google::protobuf::Message; -inline string ProtoDebugString(const Message& proto) { +inline string ProtoDebugString(const Message &proto) { return proto.ShortDebugString(); } -bool ReadProtoFromTextFile(const char* filename, Message* proto); -inline bool ReadProtoFromTextFile(const string filename, Message* proto) { +bool ReadProtoFromTextFile(const char *filename, Message *proto); +inline bool ReadProtoFromTextFile(const string filename, Message *proto) { return ReadProtoFromTextFile(filename.c_str(), proto); } -void WriteProtoToTextFile(const Message& proto, const char* filename); -inline void WriteProtoToTextFile(const Message& proto, const string& filename) { +void WriteProtoToTextFile(const Message &proto, const char *filename); +inline void WriteProtoToTextFile(const Message &proto, const string &filename) { return WriteProtoToTextFile(proto, filename.c_str()); } // Read Proto from a file, letting the code figure out if it is text or binary. -inline bool ReadProtoFromFile(const char* filename, Message* proto) { +inline bool ReadProtoFromFile(const char *filename, Message *proto) { return (ReadProtoFromBinaryFile(filename, proto) || ReadProtoFromTextFile(filename, proto)); } -inline bool ReadProtoFromFile(const string& filename, Message* proto) { +inline bool ReadProtoFromFile(const string &filename, Message *proto) { return ReadProtoFromFile(filename.c_str(), proto); } @@ -107,21 +107,21 @@ inline bool ReadProtoFromFile(const string& filename, Message* proto) { template , class IterableOutputs = std::initializer_list, class IterableArgs = std::initializer_list> -OperatorDef CreateOperatorDef(const string& type, - const string& name, - const IterableInputs& inputs, - const IterableOutputs& outputs, - const IterableArgs& args) { +OperatorDef CreateOperatorDef(const string &type, + const string &name, + const IterableInputs &inputs, + const IterableOutputs &outputs, + const IterableArgs &args) { OperatorDef def; def.set_type(type); def.set_name(name); - for (const string& in : inputs) { + for (const string &in : inputs) { def.add_input(in); } - for (const string& out : outputs) { + for (const string &out : outputs) { def.add_output(out); } - for (const Argument& arg : args) { + for (const Argument &arg : args) { def.add_arg()->CopyFrom(arg); } return def; @@ -131,10 +131,10 @@ OperatorDef CreateOperatorDef(const string& type, // to specify args. template , class IterableOutputs = std::initializer_list> -inline OperatorDef CreateOperatorDef(const string& type, - const string& name, - const IterableInputs& inputs, - const IterableOutputs& outputs) { +inline OperatorDef CreateOperatorDef(const string &type, + const string &name, + const IterableInputs &inputs, + const IterableOutputs &outputs) { return CreateOperatorDef(type, name, inputs, outputs, std::vector()); } @@ -150,56 +150,56 @@ inline OperatorDef CreateOperatorDef(const string& type, class ArgumentHelper { public: template - static bool HasArgument(const Def& def, const string& name) { + static bool HasArgument(const Def &def, const string &name) { return ArgumentHelper(def).HasArgument(name); } template - static T GetSingleArgument(const Def& def, - const string& name, - const T& default_value) { + static T GetSingleArgument(const Def &def, + const string &name, + const T &default_value) { return ArgumentHelper(def).GetSingleArgument(name, default_value); } template - static bool HasSingleArgumentOfType(const Def& def, const string& name) { + static bool HasSingleArgumentOfType(const Def &def, const string &name) { return ArgumentHelper(def).HasSingleArgumentOfType(name); } template static vector GetRepeatedArgument( - const Def& def, - const string& name, - const std::vector& default_value = std::vector()) { + const Def &def, + const string &name, + const std::vector &default_value = std::vector()) { return ArgumentHelper(def).GetRepeatedArgument(name, default_value); } template - static MessageType GetMessageArgument(const Def& def, const string& name) { + static MessageType GetMessageArgument(const Def &def, const string &name) { return ArgumentHelper(def).GetMessageArgument(name); } template - static vector GetRepeatedMessageArgument(const Def& def, - const string& name) { + static vector GetRepeatedMessageArgument(const Def &def, + const string &name) { return ArgumentHelper(def).GetRepeatedMessageArgument(name); } - explicit ArgumentHelper(const OperatorDef& def); - explicit ArgumentHelper(const NetDef& netdef); - bool HasArgument(const string& name) const; + explicit ArgumentHelper(const OperatorDef &def); + explicit ArgumentHelper(const NetDef &netdef); + bool HasArgument(const string &name) const; template - T GetSingleArgument(const string& name, const T& default_value) const; + T GetSingleArgument(const string &name, const T &default_value) const; template - bool HasSingleArgumentOfType(const string& name) const; + bool HasSingleArgumentOfType(const string &name) const; template vector GetRepeatedArgument( - const string& name, - const std::vector& default_value = std::vector()) const; + const string &name, + const std::vector &default_value = std::vector()) const; template - MessageType GetMessageArgument(const string& name) const { + MessageType GetMessageArgument(const string &name) const { MACE_CHECK(arg_map_.count(name), "Cannot find parameter named " + name); MessageType message; if (arg_map_.at(name).has_s()) { @@ -212,7 +212,7 @@ class ArgumentHelper { } template - vector GetRepeatedMessageArgument(const string& name) const { + vector GetRepeatedMessageArgument(const string &name) const { MACE_CHECK(arg_map_.count(name), "Cannot find parameter named " + name); vector messages(arg_map_.at(name).strings_size()); for (int i = 0; i < messages.size(); ++i) { @@ -226,20 +226,20 @@ class ArgumentHelper { std::map arg_map_; }; -const Argument& GetArgument(const OperatorDef& def, const string& name); -bool GetFlagArgument(const OperatorDef& def, - const string& name, +const Argument &GetArgument(const OperatorDef &def, const string &name); +bool GetFlagArgument(const OperatorDef &def, + const string &name, bool def_value = false); -Argument* GetMutableArgument(const string& name, +Argument *GetMutableArgument(const string &name, const bool create_if_missing, - OperatorDef* def); + OperatorDef *def); template -Argument MakeArgument(const string& name, const T& value); +Argument MakeArgument(const string &name, const T &value); template -inline void AddArgument(const string& name, const T& value, OperatorDef* def) { +inline void AddArgument(const string &name, const T &value, OperatorDef *def) { GetMutableArgument(name, true, def)->CopyFrom(MakeArgument(name, value)); } diff --git a/mace/core/registry.h b/mace/core/registry.h index 1295128c..9a61ba12 100644 --- a/mace/core/registry.h +++ b/mace/core/registry.h @@ -16,15 +16,15 @@ class Registry { Registry() : registry_() {} - void Register(const SrcType& key, Creator creator) { + void Register(const SrcType &key, Creator creator) { std::lock_guard lock(register_mutex_); MACE_CHECK(registry_.count(key) == 0, "Key already registered."); registry_[key] = creator; } - inline bool Has(const SrcType& key) { return registry_.count(key) != 0; } + inline bool Has(const SrcType &key) { return registry_.count(key) != 0; } - unique_ptr Create(const SrcType& key, Args... args) { + unique_ptr Create(const SrcType &key, Args... args) { if (registry_.count(key) == 0) { LOG(FATAL) << "Key not registered: " << key; } @@ -36,7 +36,7 @@ class Registry { */ vector Keys() { vector keys; - for (const auto& it : registry_) { + for (const auto &it : registry_) { keys.push_back(it.first); } return keys; @@ -52,8 +52,8 @@ class Registry { template class Registerer { public: - Registerer(const SrcType& key, - Registry* registry, + Registerer(const SrcType &key, + Registry *registry, typename Registry::Creator creator) { registry->Register(key, creator); } @@ -73,13 +73,13 @@ class Registerer { #endif #define MACE_DECLARE_TYPED_REGISTRY(RegistryName, SrcType, ObjectType, ...) \ - Registry* RegistryName(); \ + Registry *RegistryName(); \ typedef Registerer \ Registerer##RegistryName; #define MACE_DEFINE_TYPED_REGISTRY(RegistryName, SrcType, ObjectType, ...) \ - Registry* RegistryName() { \ - static Registry* registry = \ + Registry *RegistryName() { \ + static Registry *registry = \ new Registry(); \ return registry; \ } diff --git a/mace/core/runtime/opencl/opencl_allocator.cc b/mace/core/runtime/opencl/opencl_allocator.cc index 6ad3fb1f..b501e42b 100644 --- a/mace/core/runtime/opencl/opencl_allocator.cc +++ b/mace/core/runtime/opencl/opencl_allocator.cc @@ -3,8 +3,8 @@ // #include "mace/core/runtime/opencl/opencl_allocator.h" -#include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/runtime/opencl/cl2.hpp" +#include "mace/core/runtime/opencl/opencl_runtime.h" namespace mace { diff --git a/mace/core/runtime/opencl/opencl_runtime.cc b/mace/core/runtime/opencl/opencl_runtime.cc index 435a9f27..0e1b1bfd 100644 --- a/mace/core/runtime/opencl/opencl_runtime.cc +++ b/mace/core/runtime/opencl/opencl_runtime.cc @@ -30,7 +30,9 @@ bool ReadSourceFile(const char *filename, std::string *content) { return true; } -bool BuildProgram(OpenCLRuntime *runtime, const char *filename, cl::Program *program) { +bool BuildProgram(OpenCLRuntime *runtime, + const char *filename, + cl::Program *program) { MACE_CHECK_NOTNULL(filename); MACE_CHECK_NOTNULL(program); diff --git a/mace/core/serializer.h b/mace/core/serializer.h index f9966a5a..107d9f4e 100644 --- a/mace/core/serializer.h +++ b/mace/core/serializer.h @@ -16,9 +16,9 @@ class Serializer { Serializer() {} ~Serializer() {} - unique_ptr Serialize(const Tensor& tensor, const string& name); + unique_ptr Serialize(const Tensor &tensor, const string &name); - unique_ptr Deserialize(const TensorProto& proto, DeviceType type); + unique_ptr Deserialize(const TensorProto &proto, DeviceType type); DISABLE_COPY_AND_ASSIGN(Serializer); }; diff --git a/mace/core/tensor.h b/mace/core/tensor.h index 0147f50e..6de5a8ac 100644 --- a/mace/core/tensor.h +++ b/mace/core/tensor.h @@ -202,15 +202,15 @@ class Tensor { } class MappingGuard { - public: - MappingGuard(Tensor *tensor) : tensor_(tensor) { - MACE_ASSERT(tensor_ != nullptr); - tensor_->Map(); - } - ~MappingGuard() { tensor_->Unmap(); } - - private: - Tensor *tensor_; + public: + MappingGuard(Tensor *tensor) : tensor_(tensor) { + MACE_ASSERT(tensor_ != nullptr); + tensor_->Map(); + } + ~MappingGuard() { tensor_->Unmap(); } + + private: + Tensor *tensor_; }; private: diff --git a/mace/core/testing/test_benchmark.cc b/mace/core/testing/test_benchmark.cc index 66078911..cf32aa6e 100644 --- a/mace/core/testing/test_benchmark.cc +++ b/mace/core/testing/test_benchmark.cc @@ -16,36 +16,36 @@ namespace mace { namespace testing { -static std::vector* all_benchmarks = nullptr; +static std::vector *all_benchmarks = nullptr; static std::string label; static int64_t bytes_processed; static int64_t items_processed; static int64_t accum_time = 0; static int64_t start_time = 0; -Benchmark::Benchmark(const char* name, void (*fn)(int)) +Benchmark::Benchmark(const char *name, void (*fn)(int)) : name_(name), num_args_(0), fn0_(fn) { args_.push_back(std::make_pair(-1, -1)); Register(); } -Benchmark::Benchmark(const char* name, void (*fn)(int, int)) +Benchmark::Benchmark(const char *name, void (*fn)(int, int)) : name_(name), num_args_(1), fn1_(fn) { Register(); } -Benchmark::Benchmark(const char* name, void (*fn)(int, int, int)) +Benchmark::Benchmark(const char *name, void (*fn)(int, int, int)) : name_(name), num_args_(2), fn2_(fn) { Register(); } -Benchmark* Benchmark::Arg(int x) { +Benchmark *Benchmark::Arg(int x) { MACE_CHECK(num_args_ == 1); args_.push_back(std::make_pair(x, -1)); return this; } -Benchmark* Benchmark::ArgPair(int x, int y) { +Benchmark *Benchmark::ArgPair(int x, int y) { MACE_CHECK(num_args_ == 2); args_.push_back(std::make_pair(x, y)); return this; @@ -54,7 +54,7 @@ Benchmark* Benchmark::ArgPair(int x, int y) { // Run all benchmarks void Benchmark::Run() { Run("all"); } -void Benchmark::Run(const char* pattern) { +void Benchmark::Run(const char *pattern) { if (!all_benchmarks) return; if (std::string(pattern) == "all") { @@ -117,11 +117,11 @@ void Benchmark::Run(const char* pattern) { } void Benchmark::Register() { - if (!all_benchmarks) all_benchmarks = new std::vector; + if (!all_benchmarks) all_benchmarks = new std::vector; all_benchmarks->push_back(this); } -void Benchmark::Run(int arg1, int arg2, int* run_count, double* run_seconds) { +void Benchmark::Run(int arg1, int arg2, int *run_count, double *run_seconds) { static const int64_t kMinIters = 10; static const int64_t kMaxIters = 1000000000; static const double kMinTime = 0.5; diff --git a/mace/core/testing/test_benchmark.h b/mace/core/testing/test_benchmark.h index 25d12459..01236b15 100644 --- a/mace/core/testing/test_benchmark.h +++ b/mace/core/testing/test_benchmark.h @@ -13,7 +13,7 @@ #define MACE_BENCHMARK_CONCAT(a, b, c) a##b##c #define BENCHMARK(n) \ - static ::mace::testing::Benchmark* MACE_BENCHMARK_CONCAT( \ + static ::mace::testing::Benchmark *MACE_BENCHMARK_CONCAT( \ __benchmark_, n, __LINE__) = (new ::mace::testing::Benchmark(#n, (n))) namespace mace { @@ -21,14 +21,14 @@ namespace testing { class Benchmark { public: - Benchmark(const char* name, void (*fn)(int)); - Benchmark(const char* name, void (*fn)(int, int)); - Benchmark(const char* name, void (*fn)(int, int, int)); - Benchmark* Arg(int x); - Benchmark* ArgPair(int x, int y); + Benchmark(const char *name, void (*fn)(int)); + Benchmark(const char *name, void (*fn)(int, int)); + Benchmark(const char *name, void (*fn)(int, int, int)); + Benchmark *Arg(int x); + Benchmark *ArgPair(int x, int y); static void Run(); - static void Run(const char* pattern); + static void Run(const char *pattern); private: string name_; @@ -39,7 +39,7 @@ class Benchmark { void (*fn2_)(int, int, int) = nullptr; void Register(); - void Run(int arg1, int arg2, int* run_count, double* run_seconds); + void Run(int arg1, int arg2, int *run_count, double *run_seconds); }; void RunBenchmarks(); diff --git a/mace/core/testing/test_benchmark_main.cc b/mace/core/testing/test_benchmark_main.cc index cc0c0172..ae8a7a2e 100644 --- a/mace/core/testing/test_benchmark_main.cc +++ b/mace/core/testing/test_benchmark_main.cc @@ -6,7 +6,7 @@ #include "mace/core/testing/test_benchmark.h" -int main(int argc, char** argv) { +int main(int argc, char **argv) { std::cout << "Running main() from test_main.cc\n"; // TODO Use gflags diff --git a/mace/core/types.cc b/mace/core/types.cc index bc37f6d2..8ad8fba9 100644 --- a/mace/core/types.cc +++ b/mace/core/types.cc @@ -23,4 +23,4 @@ bool DataTypeCanUseMemcpy(DataType dt) { } } -} // namespace mace \ No newline at end of file +} // namespace mace \ No newline at end of file diff --git a/mace/core/workspace.cc b/mace/core/workspace.cc index a421770b..a0eab8bf 100644 --- a/mace/core/workspace.cc +++ b/mace/core/workspace.cc @@ -10,14 +10,14 @@ namespace mace { vector Workspace::Tensors() const { vector names; - for (auto& entry : tensor_map_) { + for (auto &entry : tensor_map_) { names.push_back(entry.first); } return names; } -Tensor* Workspace::CreateTensor(const string& name, - Allocator* alloc, +Tensor *Workspace::CreateTensor(const string &name, + Allocator *alloc, DataType type) { if (HasTensor(name)) { VLOG(1) << "Tensor " << name << " already exists. Skipping."; @@ -28,7 +28,7 @@ Tensor* Workspace::CreateTensor(const string& name, return GetTensor(name); } -bool Workspace::RemoveTensor(const string& name) { +bool Workspace::RemoveTensor(const string &name) { auto it = tensor_map_.find(name); if (it != tensor_map_.end()) { VLOG(1) << "Removing blob " << name << " from this workspace."; @@ -38,7 +38,7 @@ bool Workspace::RemoveTensor(const string& name) { return false; } -const Tensor* Workspace::GetTensor(const string& name) const { +const Tensor *Workspace::GetTensor(const string &name) const { if (tensor_map_.count(name)) { return tensor_map_.at(name).get(); } else { @@ -47,18 +47,17 @@ const Tensor* Workspace::GetTensor(const string& name) const { return nullptr; } -Tensor* Workspace::GetTensor(const string& name) { - return const_cast( - static_cast(this)->GetTensor(name)); +Tensor *Workspace::GetTensor(const string &name) { + return const_cast( + static_cast(this)->GetTensor(name)); } -void Workspace::LoadModelTensor(const NetDef& net_def, DeviceType type) { +void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { Serializer serializer; - for (auto& tensor_proto : net_def.tensors()) { - - VLOG(1) << "Load tensor: " << tensor_proto.name() - << " has shape: " << internal::MakeString(vector( - tensor_proto.dims().begin(), tensor_proto.dims().end())); + for (auto &tensor_proto : net_def.tensors()) { + VLOG(1) << "Load tensor: " << tensor_proto.name() << " has shape: " + << internal::MakeString(vector(tensor_proto.dims().begin(), + tensor_proto.dims().end())); tensor_map_[tensor_proto.name()] = serializer.Deserialize(tensor_proto, type); } diff --git a/mace/core/workspace.h b/mace/core/workspace.h index 5d87abf7..291bc059 100644 --- a/mace/core/workspace.h +++ b/mace/core/workspace.h @@ -19,19 +19,19 @@ class Workspace { vector Tensors() const; - Tensor* CreateTensor(const string& name, Allocator* alloc, DataType type); + Tensor *CreateTensor(const string &name, Allocator *alloc, DataType type); - bool RemoveTensor(const string& name); + bool RemoveTensor(const string &name); - inline bool HasTensor(const string& name) const { + inline bool HasTensor(const string &name) const { return tensor_map_.count(name); } - const Tensor* GetTensor(const string& name) const; + const Tensor *GetTensor(const string &name) const; - Tensor* GetTensor(const string& name); + Tensor *GetTensor(const string &name); - void LoadModelTensor(const NetDef& net_def, DeviceType type); + void LoadModelTensor(const NetDef &net_def, DeviceType type); private: TensorMap tensor_map_; diff --git a/mace/examples/benchmark_example.cc b/mace/examples/benchmark_example.cc index 4fa34bea..93d1bd1a 100644 --- a/mace/examples/benchmark_example.cc +++ b/mace/examples/benchmark_example.cc @@ -10,8 +10,8 @@ static void foo(int iters) { mace::testing::ItemsProcessed(tot); mace::testing::BytesProcessed(tot * (sizeof(float))); - float* inp = new float[N]; - float* out = new float[N]; + float *inp = new float[N]; + float *out = new float[N]; while (iters--) { for (int i = 0; i < N; i++) { @@ -29,8 +29,8 @@ static void bar(int iters, int n) { mace::testing::ItemsProcessed(tot); mace::testing::BytesProcessed(tot * (sizeof(float))); - float* inp = new float[n]; - float* out = new float[n]; + float *inp = new float[n]; + float *out = new float[n]; while (iters--) { for (int i = 0; i < n; i++) { diff --git a/mace/examples/mace_run.cc b/mace/examples/mace_run.cc index fcd88ae1..102f862e 100644 --- a/mace/examples/mace_run.cc +++ b/mace/examples/mace_run.cc @@ -12,8 +12,8 @@ * --output_file=mace.out \ * --device=NEON */ -#include #include +#include #include "mace/core/net.h" #include "mace/utils/command_line_flags.h" @@ -83,12 +83,11 @@ int main(int argc, char **argv) { Workspace ws; ws.LoadModelTensor(net_def, DeviceType::CPU); - Tensor *input_tensor = ws.CreateTensor(input_node + ":0", - cpu_allocator(), DT_FLOAT); + Tensor *input_tensor = + ws.CreateTensor(input_node + ":0", cpu_allocator(), DT_FLOAT); input_tensor->Resize(shape); float *input_data = input_tensor->mutable_data(); - // load input ifstream in_file(input_file, ios::in | ios::binary); in_file.read(reinterpret_cast(input_data), @@ -112,14 +111,17 @@ int main(int argc, char **argv) { net->Run(); } gettimeofday(&tv2, NULL); - cout << "avg duration: " << ((tv2.tv_sec - tv1.tv_sec) * 1000 - + (tv2.tv_usec - tv1.tv_usec) / 1000) / round << endl; + cout << "avg duration: " + << ((tv2.tv_sec - tv1.tv_sec) * 1000 + + (tv2.tv_usec - tv1.tv_usec) / 1000) / + round + << endl; // save output const Tensor *output = ws.GetTensor(output_node + ":0"); ofstream out_file(output_file, ios::binary); - out_file.write((const char *) (output->data()), + out_file.write((const char *)(output->data()), output->size() * sizeof(float)); out_file.flush(); out_file.close(); diff --git a/mace/kernels/BUILD b/mace/kernels/BUILD index 92d68761..8f86e6eb 100644 --- a/mace/kernels/BUILD +++ b/mace/kernels/BUILD @@ -20,7 +20,7 @@ cc_library( linkopts = if_android(["-lm"]), deps = [ "//mace/core", - "//mace/utils:utils", + "//mace/utils", ], ) diff --git a/mace/kernels/addn.h b/mace/kernels/addn.h index 3e5845b3..4e30d314 100644 --- a/mace/kernels/addn.h +++ b/mace/kernels/addn.h @@ -12,7 +12,7 @@ namespace kernels { template struct AddNFunctor { - void operator()(const vector& inputs, T* output, index_t size) { + void operator()(const vector &inputs, T *output, index_t size) { memset(output, 0, size * sizeof(T)); int n = inputs.size(); for (int i = 0; i < n; ++i) { @@ -25,7 +25,7 @@ struct AddNFunctor { template <> void AddNFunctor::operator()( - const vector& inputs, float* output, index_t size); + const vector &inputs, float *output, index_t size); } // namespace kernels } // namespace mace diff --git a/mace/kernels/batch_norm.h b/mace/kernels/batch_norm.h index be50df0f..5c838be4 100644 --- a/mace/kernels/batch_norm.h +++ b/mace/kernels/batch_norm.h @@ -13,17 +13,16 @@ namespace kernels { template struct BatchNormFunctor { - - void operator()(const T* input, - const T* scale, - const T* offset, - const T* mean, - const T* var, + void operator()(const T *input, + const T *scale, + const T *offset, + const T *mean, + const T *var, const float variance_epsilon, const index_t n, const index_t channel, const index_t sample_size, - T* output) { + T *output) { // Batch normalization in the paper https://arxiv.org/abs/1502.03167 . // The calculation formula for inference is // Y = \frac{ \scale } { \sqrt{var+\variance_epsilon} } * X + @@ -40,8 +39,8 @@ struct BatchNormFunctor { index_t pos = c * sample_size; for (index_t i = 0; i < n; ++i) { - const T* input_sample_ptr = input + pos; - T* output_sample_ptr = output + pos; + const T *input_sample_ptr = input + pos; + T *output_sample_ptr = output + pos; for (index_t j = 0; j < sample_size; ++j) { output_sample_ptr[j] = new_scale * input_sample_ptr[j] + new_offset; } @@ -53,16 +52,16 @@ struct BatchNormFunctor { template <> void BatchNormFunctor::operator()( - const float* input, - const float* scale, - const float* offset, - const float* mean, - const float* var, + const float *input, + const float *scale, + const float *offset, + const float *mean, + const float *var, const float variance_epsilon, const index_t n, const index_t channel, const index_t sample_size, - float* output); + float *output); } // namepsace kernels } // namespace mace diff --git a/mace/kernels/channel_shuffle.h b/mace/kernels/channel_shuffle.h index 49b12661..b4829a4e 100644 --- a/mace/kernels/channel_shuffle.h +++ b/mace/kernels/channel_shuffle.h @@ -10,11 +10,10 @@ namespace mace { namespace kernels { -template +template class ChannelShuffleFunctor { public: - ChannelShuffleFunctor(const int group) - : group_(group) {} + ChannelShuffleFunctor(const int group) : group_(group) {} void operator()(const T *input, const index_t *input_shape, T *output) { index_t batch = input_shape[0]; @@ -28,8 +27,8 @@ class ChannelShuffleFunctor { for (int b = 0; b < batch; ++b) { for (int c = 0; c < channels_of_group; ++c) { for (int g = 0; g < group_; ++g) { - index_t input_offset = (b * channels + g * channels_of_group + c) * - image_size; + index_t input_offset = + (b * channels + g * channels_of_group + c) * image_size; index_t output_offset = (b * channels + c * group_ + g) * image_size; memcpy(output + output_offset, input + input_offset, image_size * sizeof(T)); diff --git a/mace/kernels/concat.h b/mace/kernels/concat.h index 0a294166..807fda0a 100644 --- a/mace/kernels/concat.h +++ b/mace/kernels/concat.h @@ -5,13 +5,13 @@ #ifndef MACE_KERNELS_CONCAT_H_ #define MACE_KERNELS_CONCAT_H_ -#include "mace/proto/mace.pb.h" #include "mace/core/common.h" #include "mace/core/types.h" +#include "mace/proto/mace.pb.h" namespace mace { namespace kernels { -template +template struct ConcatFunctor { void operator()(std::vector &input_list, const index_t inner_dim, @@ -35,6 +35,6 @@ struct ConcatFunctor { }; } // namepsace kernels -} // namespace mace +} // namespace mace -#endif // MACE_KERNELS_CONCAT_H_ +#endif // MACE_KERNELS_CONCAT_H_ diff --git a/mace/kernels/conv_2d.h b/mace/kernels/conv_2d.h index fa568684..d520baf7 100644 --- a/mace/kernels/conv_2d.h +++ b/mace/kernels/conv_2d.h @@ -11,15 +11,13 @@ namespace mace { namespace kernels { -template +template struct Conv2dFunctor { Conv2dFunctor() {} Conv2dFunctor(const int *strides, const std::vector &paddings, - const int *dilations) : - strides_(strides), - paddings_(paddings), - dilations_(dilations) {} + const int *dilations) + : strides_(strides), paddings_(paddings), dilations_(dilations) {} void operator()(const T *input, // NCHW const index_t *input_shape, @@ -66,7 +64,7 @@ struct Conv2dFunctor { for (int h = 0; h < height; ++h) { for (int w = 0; w < width; ++w) { index_t offset = n * channels * height * width + - c * height * width + h * width + w; + c * height * width + h * width + w; output[offset] = bias_channel; T sum = 0; const T *filter_ptr = filter + c * kernel_size; @@ -78,7 +76,7 @@ struct Conv2dFunctor { if (inh < 0 || inh >= input_height || inw < 0 || inw >= input_width) { MACE_CHECK(inh >= padded_h_start && inh < padded_h_stop && - inw >= padded_w_start && inw < padded_w_stop, + inw >= padded_w_start && inw < padded_w_stop, "Out of range read from input: ", inh, ", ", inw); // else padding with 0: @@ -86,8 +84,8 @@ struct Conv2dFunctor { } else { index_t input_offset = n * input_channels * input_height * input_width + - inc * input_height * input_width + inh * input_width + - inw; + inc * input_height * input_width + inh * input_width + + inw; sum += input[input_offset] * *filter_ptr; } ++filter_ptr; @@ -101,12 +99,12 @@ struct Conv2dFunctor { } } - const int *strides_; // [stride_h, stride_w] - std::vector paddings_; // [padding_h, padding_w] - const int *dilations_; // [dilation_h, dilation_w] + const int *strides_; // [stride_h, stride_w] + std::vector paddings_; // [padding_h, padding_w] + const int *dilations_; // [dilation_h, dilation_w] }; -template<> +template <> void Conv2dFunctor::operator()( const float *input, const index_t *input_shape, diff --git a/mace/kernels/conv_pool_2d_util.cc b/mace/kernels/conv_pool_2d_util.cc index eb371d66..d979ee44 100644 --- a/mace/kernels/conv_pool_2d_util.cc +++ b/mace/kernels/conv_pool_2d_util.cc @@ -72,16 +72,15 @@ void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW } void CalPaddingSize(const index_t *input_shape, // NCHW - const index_t *filter_shape, // OIHW - const int *dilations, - const int *strides, - Padding padding, - int *padding_size) { - + const index_t *filter_shape, // OIHW + const int *dilations, + const int *strides, + Padding padding, + int *padding_size) { MACE_CHECK(dilations[0] > 0 && dilations[1] > 0, "Invalid dilations, must >= 1"); MACE_CHECK((dilations[0] == 1 || strides[0] == 1) && - (dilations[1] == 1 || strides[1] == 1), + (dilations[1] == 1 || strides[1] == 1), "If dilations > 1, strides should be 1"); MACE_CHECK_NOTNULL(padding_size); diff --git a/mace/kernels/conv_pool_2d_util.h b/mace/kernels/conv_pool_2d_util.h index 26f2ab37..0424f43d 100644 --- a/mace/kernels/conv_pool_2d_util.h +++ b/mace/kernels/conv_pool_2d_util.h @@ -26,11 +26,11 @@ void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW int *padding_size); void CalPaddingSize(const index_t *input_shape, // NCHW - const index_t *filter_shape, // OIHW - const int *dilations, - const int *strides, - Padding padding, - int *padding_size); + const index_t *filter_shape, // OIHW + const int *dilations, + const int *strides, + Padding padding, + int *padding_size); void ConstructInputWithPadding(const float *input, const index_t *input_shape, diff --git a/mace/kernels/depthwise_conv2d.h b/mace/kernels/depthwise_conv2d.h index 276287bb..dab8cebb 100644 --- a/mace/kernels/depthwise_conv2d.h +++ b/mace/kernels/depthwise_conv2d.h @@ -5,29 +5,27 @@ #ifndef MACE_KERNELS_DEPTHWISE_CONV_H_ #define MACE_KERNELS_DEPTHWISE_CONV_H_ -#include "mace/proto/mace.pb.h" #include "mace/core/common.h" #include "mace/kernels/conv_pool_2d_util.h" +#include "mace/proto/mace.pb.h" namespace mace { namespace kernels { -template +template struct DepthwiseConv2dFunctor { DepthwiseConv2dFunctor() {} DepthwiseConv2dFunctor(const int *strides, const std::vector &paddings, - const int *dilations) : - strides_(strides), - paddings_(paddings), - dilations_(dilations) {} + const int *dilations) + : strides_(strides), paddings_(paddings), dilations_(dilations) {} - void operator()(const T *input, // NCHW + void operator()(const T *input, // NCHW const index_t *input_shape, - const T *filter, // c_out, c_in, kernel_h, kernel_w + const T *filter, // c_out, c_in, kernel_h, kernel_w const index_t *filter_shape, - const T *bias, // c_out - T *output, // NCHW + const T *bias, // c_out + T *output, // NCHW const index_t *output_shape) { MACE_CHECK_NOTNULL(output); @@ -68,7 +66,7 @@ struct DepthwiseConv2dFunctor { for (int h = 0; h < height; ++h) { for (int w = 0; w < width; ++w) { index_t offset = n * channels * height * width + - c * height * width + h * width + w; + c * height * width + h * width + w; output[offset] = bias_channel; T sum = 0; const T *filter_ptr = filter + c * kernel_size; @@ -79,16 +77,15 @@ struct DepthwiseConv2dFunctor { if (inh < 0 || inh >= input_height || inw < 0 || inw >= input_width) { MACE_CHECK(inh >= padded_h_start && inh < padded_h_stop && - inw >= padded_w_start && inw < padded_w_stop, - "Out of range read from input: ", inh, ", ", - inw); + inw >= padded_w_start && inw < padded_w_stop, + "Out of range read from input: ", inh, ", ", inw); // else padding with 0: // sum += 0; } else { index_t input_offset = n * input_channels * input_height * input_width + - (c / multiplier) * input_height * input_width + inh * input_width + - inw; + (c / multiplier) * input_height * input_width + + inh * input_width + inw; sum += input[input_offset] * *filter_ptr; } ++filter_ptr; @@ -101,20 +98,21 @@ struct DepthwiseConv2dFunctor { } } - const int *strides_; // [stride_h, stride_w] - std::vector paddings_; // [padding_h, padding_w] - const int *dilations_; // [dilation_h, dilation_w] + const int *strides_; // [stride_h, stride_w] + std::vector paddings_; // [padding_h, padding_w] + const int *dilations_; // [dilation_h, dilation_w] }; -template<> -void DepthwiseConv2dFunctor::operator()(const float *input, - const index_t *input_shape, - const float *filter, - const index_t *filter_shape, - const float *bias, - float *output, - const index_t *output_shape); -} // namespace kernels -} // namespace mace - -#endif // MACE_KERNELS_DEPTHWISE_CONV_H_ +template <> +void DepthwiseConv2dFunctor::operator()( + const float *input, + const index_t *input_shape, + const float *filter, + const index_t *filter_shape, + const float *bias, + float *output, + const index_t *output_shape); +} // namespace kernels +} // namespace mace + +#endif // MACE_KERNELS_DEPTHWISE_CONV_H_ diff --git a/mace/kernels/global_avg_pooling.h b/mace/kernels/global_avg_pooling.h index ed96c66b..f321bcbf 100644 --- a/mace/kernels/global_avg_pooling.h +++ b/mace/kernels/global_avg_pooling.h @@ -35,9 +35,7 @@ struct GlobalAvgPoolingFunctor { template <> void GlobalAvgPoolingFunctor::operator()( - const float *input, - const index_t *input_shape, - float *output); + const float *input, const index_t *input_shape, float *output); } // namespace kernels } // namespace mace diff --git a/mace/kernels/neon/avg_pooling_neon_2x2.cc b/mace/kernels/neon/avg_pooling_neon_2x2.cc index 586e3f4a..a5c2a0ac 100644 --- a/mace/kernels/neon/avg_pooling_neon_2x2.cc +++ b/mace/kernels/neon/avg_pooling_neon_2x2.cc @@ -45,7 +45,7 @@ void PoolingAvgNeonK2x2S2x2(const float *input, int w = 0; int num_vectors = 0; if (!((h == 0 && padding_top > 0) || - (h == out_height - 1 && padding_bottom > 0))) { + (h == out_height - 1 && padding_bottom > 0))) { r0 = input + input_offset + (h * 2 - padding_top) * in_width; r1 = r0 + in_width; if (padding_left > 0) { diff --git a/mace/kernels/neon/avg_pooling_neon_3x3.cc b/mace/kernels/neon/avg_pooling_neon_3x3.cc index 3c977f59..e50f454c 100644 --- a/mace/kernels/neon/avg_pooling_neon_3x3.cc +++ b/mace/kernels/neon/avg_pooling_neon_3x3.cc @@ -33,7 +33,7 @@ void PoolingAvgNeonK3x3S2x2(const float *input, int out_image_size = out_height * out_width; index_t input_offset = 0; index_t output_offset = 0; - float avg_factors[4] = {1.0/9.0, 1.0/9.0, 1.0/9.0, 1.0/9.0}; + float avg_factors[4] = {1.0 / 9.0, 1.0 / 9.0, 1.0 / 9.0, 1.0 / 9.0}; #pragma omp parallel for collapse(2) for (int b = 0; b < batch; ++b) { @@ -45,7 +45,7 @@ void PoolingAvgNeonK3x3S2x2(const float *input, int num_vectors = 0; const float *r0, *r1, *r2; if (!((h == 0 && padding_top > 0) || - (h == out_height - 1 && padding_bottom > 0))) { + (h == out_height - 1 && padding_bottom > 0))) { r0 = input + input_offset + (h * 2 - padding_top) * in_width; r1 = r0 + in_width; r2 = r1 + in_width; @@ -147,7 +147,7 @@ void PoolingAvgNeonK3x3S2x2Padded(const float *input, int out_image_size = out_height * out_width; index_t input_offset = 0; index_t output_offset = 0; - float avg_factors[4] = {1.0/9.0, 1.0/9.0, 1.0/9.0, 1.0/9.0}; + float avg_factors[4] = {1.0 / 9.0, 1.0 / 9.0, 1.0 / 9.0, 1.0 / 9.0}; #pragma omp parallel for collapse(2) for (int b = 0; b < batch; ++b) { @@ -200,8 +200,9 @@ void PoolingAvgNeonK3x3S2x2Padded(const float *input, } for (; remain > 0; remain--) { - *outptr = (r0[0] + r0[1] + r0[2] + r1[0] + r1[1] + r1[2] + - r2[0] + r2[1] + r2[2]) / 9.0; + *outptr = (r0[0] + r0[1] + r0[2] + r1[0] + r1[1] + r1[2] + r2[0] + + r2[1] + r2[2]) / + 9.0; r0 += 2; r1 += 2; diff --git a/mace/kernels/neon/batch_norm_neon.cc b/mace/kernels/neon/batch_norm_neon.cc index cba69533..cd5fff22 100644 --- a/mace/kernels/neon/batch_norm_neon.cc +++ b/mace/kernels/neon/batch_norm_neon.cc @@ -10,16 +10,16 @@ namespace kernels { template <> void BatchNormFunctor::operator()( - const float* input, - const float* scale, - const float* offset, - const float* mean, - const float* var, + const float *input, + const float *scale, + const float *offset, + const float *mean, + const float *var, const float variance_epsilon, const index_t n, const index_t channel, const index_t sample_size, - float* output) { + float *output) { // Batch normalization in the paper https://arxiv.org/abs/1502.03167 . // The calculation formula for inference is // Y = \frac{ \scale } { \sqrt{var+\variance_epsilon} } * X + @@ -40,8 +40,8 @@ void BatchNormFunctor::operator()( float32x4_t new_scale_f = vdupq_n_f32(new_scale); float32x4_t new_offset_f = vdupq_n_f32(new_offset); for (index_t i = 0; i < n; ++i) { - const float* input_sample_ptr = input + pos; - float* output_sample_ptr = output + pos; + const float *input_sample_ptr = input + pos; + float *output_sample_ptr = output + pos; for (index_t j = 0; j < count; ++j) { float32x4_t input_f = vld1q_f32(input_sample_ptr); diff --git a/mace/kernels/neon/conv_2d_neon.cc b/mace/kernels/neon/conv_2d_neon.cc index 29bccaca..c135cb8c 100644 --- a/mace/kernels/neon/conv_2d_neon.cc +++ b/mace/kernels/neon/conv_2d_neon.cc @@ -41,20 +41,17 @@ extern void Conv2dNeonK5x5S1(const float *input, const index_t *output_shape); template <> -void Conv2dFunctor::operator()(const float *input, - const index_t *input_shape, - const float *filter, - const index_t *filter_shape, - const float *bias, - float *output, - const index_t *output_shape) { +void Conv2dFunctor::operator()( + const float *input, + const index_t *input_shape, + const float *filter, + const index_t *filter_shape, + const float *bias, + float *output, + const index_t *output_shape) { typedef void (*Conv2dNeonFunction)( - const float *input, - const index_t *input_shape, - const float *filter, - const index_t *filter_shape, - const float *bias, - float *output, + const float *input, const index_t *input_shape, const float *filter, + const index_t *filter_shape, const float *bias, float *output, const index_t *output_shape); // Selection matrix: kernel_size x stride_size static const Conv2dNeonFunction selector[5][2] = { @@ -81,12 +78,14 @@ void Conv2dFunctor::operator()(const float *input, // Keep this alive during kernel execution Tensor padded_input; if (paddings_[0] > 0 || paddings_[1] > 0) { - ConstructInputWithPadding(input, input_shape, paddings_.data(), &padded_input); + ConstructInputWithPadding(input, input_shape, paddings_.data(), + &padded_input); input = padded_input.data(); input_shape = padded_input.shape().data(); } auto conv2d_neon_func = selector[kernel_h - 1][strides_[0] - 1]; - conv2d_neon_func(input, input_shape, filter, nullptr, bias, output, output_shape); + conv2d_neon_func(input, input_shape, filter, nullptr, bias, output, + output_shape); } } // namespace kernels diff --git a/mace/kernels/neon/conv_2d_neon_1x1.cc b/mace/kernels/neon/conv_2d_neon_1x1.cc index b4c2b164..119b48e2 100644 --- a/mace/kernels/neon/conv_2d_neon_1x1.cc +++ b/mace/kernels/neon/conv_2d_neon_1x1.cc @@ -10,9 +10,8 @@ namespace mace { namespace kernels { static constexpr index_t kInputChannelBlockSize = 2; static constexpr index_t kOutputChannelBlockSize = 4; -static __attribute__((__aligned__(64))) int32_t mask_array[8] = { - 0, 0, 0, 0, -1, -1, -1, -1 -}; +static __attribute__((__aligned__(64))) +int32_t mask_array[8] = {0, 0, 0, 0, -1, -1, -1, -1}; static inline void NeonConv2x4Kernel(index_t input_channels, index_t pixel_size, @@ -77,15 +76,15 @@ static inline void NeonConv2x4Kernel(index_t input_channels, output3 = output3 + pixel_size - 4; float32x4_t voutput3 = vld1q_f32(output3); - const float32x4_t vinput0 = vreinterpretq_f32_s32( - vandq_s32(vmask, vreinterpretq_s32_f32(vld1q_f32(&input0[pixel_size - 4])))); + const float32x4_t vinput0 = vreinterpretq_f32_s32(vandq_s32( + vmask, vreinterpretq_s32_f32(vld1q_f32(&input0[pixel_size - 4])))); voutput0 = vfmaq_lane_f32(voutput0, vinput0, vfilter0x, 0); voutput1 = vfmaq_lane_f32(voutput1, vinput0, vfilter1x, 0); voutput2 = vfmaq_lane_f32(voutput2, vinput0, vfilter2x, 0); voutput3 = vfmaq_lane_f32(voutput3, vinput0, vfilter3x, 0); - const float32x4_t vinput1 = vreinterpretq_f32_s32( - vandq_s32(vmask, vreinterpretq_s32_f32(vld1q_f32(&input1[pixel_size - 4])))); + const float32x4_t vinput1 = vreinterpretq_f32_s32(vandq_s32( + vmask, vreinterpretq_s32_f32(vld1q_f32(&input1[pixel_size - 4])))); voutput0 = vfmaq_lane_f32(voutput0, vinput1, vfilter0x, 1); voutput1 = vfmaq_lane_f32(voutput1, vinput1, vfilter1x, 1); voutput2 = vfmaq_lane_f32(voutput2, vinput1, vfilter2x, 1); @@ -98,13 +97,14 @@ static inline void NeonConv2x4Kernel(index_t input_channels, } } -static inline void NeonConv2x4SubBlockKernel(index_t input_channels_subblock_size, - index_t output_channels_subblock_size, - index_t input_channels, - index_t pixel_size, - const float *input, - const float *filter, - float *output) { +static inline void NeonConv2x4SubBlockKernel( + index_t input_channels_subblock_size, + index_t output_channels_subblock_size, + index_t input_channels, + index_t pixel_size, + const float *input, + const float *filter, + float *output) { const float *input0 = input; const float *input1 = input + pixel_size; @@ -204,16 +204,16 @@ static inline void NeonConv2x4SubBlockKernel(index_t input_channels_subblock_siz } } - const float32x4_t vinput0 = vreinterpretq_f32_s32( - vandq_s32(vmask, vreinterpretq_s32_f32(vld1q_f32(&input0[pixel_size - 4])))); + const float32x4_t vinput0 = vreinterpretq_f32_s32(vandq_s32( + vmask, vreinterpretq_s32_f32(vld1q_f32(&input0[pixel_size - 4])))); voutput0 = vfmaq_lane_f32(voutput0, vinput0, vfilter0x, 0); voutput1 = vfmaq_lane_f32(voutput1, vinput0, vfilter1x, 0); voutput2 = vfmaq_lane_f32(voutput2, vinput0, vfilter2x, 0); voutput3 = vfmaq_lane_f32(voutput3, vinput0, vfilter3x, 0); if (input_channels_subblock_size > 1) { - const float32x4_t vinput1 = vreinterpretq_f32_s32( - vandq_s32(vmask, vreinterpretq_s32_f32(vld1q_f32(&input1[pixel_size - 4])))); + const float32x4_t vinput1 = vreinterpretq_f32_s32(vandq_s32( + vmask, vreinterpretq_s32_f32(vld1q_f32(&input1[pixel_size - 4])))); voutput0 = vfmaq_lane_f32(voutput0, vinput1, vfilter0x, 1); voutput1 = vfmaq_lane_f32(voutput1, vinput1, vfilter1x, 1); voutput2 = vfmaq_lane_f32(voutput2, vinput1, vfilter2x, 1); @@ -237,8 +237,8 @@ void Conv2dNeonK1x1S1(const float *input, // NCHW const index_t *input_shape, const float *filter, // c_out, c_in, filter_h, filter_w const index_t *filter_shape, - const float *bias, // c_out - float *output, // NCHW + const float *bias, // c_out + float *output, // NCHW const index_t *output_shape) { const index_t batch = output_shape[0]; const index_t channels = output_shape[1]; @@ -251,7 +251,7 @@ void Conv2dNeonK1x1S1(const float *input, // NCHW const index_t input_width = input_shape[3]; MACE_CHECK(input_batch == batch && input_height == height && - input_width == width); + input_width == width); const index_t total_pixels = height * width; const index_t round_up_channels = RoundUp(channels, kOutputChannelBlockSize); @@ -259,22 +259,27 @@ void Conv2dNeonK1x1S1(const float *input, // NCHW #pragma omp parallel for collapse(2) for (index_t n = 0; n < batch; ++n) { for (int i = 0; i < channels; ++i) { - float *output_ptr_base = output + n * channels * total_pixels + i * total_pixels; - std::fill(output_ptr_base, output_ptr_base + total_pixels, bias ? bias[i] : 0); + float *output_ptr_base = + output + n * channels * total_pixels + i * total_pixels; + std::fill(output_ptr_base, output_ptr_base + total_pixels, + bias ? bias[i] : 0); } } - // benchmark omp collapsed(2) +// benchmark omp collapsed(2) #pragma omp parallel for collapse(2) for (index_t n = 0; n < batch; ++n) { for (index_t c = 0; c < round_up_channels; c += kOutputChannelBlockSize) { const float *input_ptr = input + n * input_channels * total_pixels; const float *filter_ptr = filter + c * input_channels; - float *output_ptr = output + n * channels * total_pixels + c * total_pixels; - const index_t output_channel_block_size = std::min(channels - c, kOutputChannelBlockSize); + float *output_ptr = + output + n * channels * total_pixels + c * total_pixels; + const index_t output_channel_block_size = + std::min(channels - c, kOutputChannelBlockSize); index_t remain_input_channels = input_channels; if (c + kOutputChannelBlockSize <= channels) { while (remain_input_channels >= kInputChannelBlockSize) { - NeonConv2x4Kernel(input_channels, total_pixels, input_ptr, filter_ptr, output_ptr); + NeonConv2x4Kernel(input_channels, total_pixels, input_ptr, filter_ptr, + output_ptr); input_ptr += kInputChannelBlockSize * total_pixels; filter_ptr += kInputChannelBlockSize; @@ -282,25 +287,27 @@ void Conv2dNeonK1x1S1(const float *input, // NCHW } } while (remain_input_channels != 0) { - const index_t input_channel_block_size = std::min(remain_input_channels, kInputChannelBlockSize); - NeonConv2x4SubBlockKernel(input_channel_block_size, output_channel_block_size, - input_channels, total_pixels, input_ptr, filter_ptr, output_ptr); + const index_t input_channel_block_size = + std::min(remain_input_channels, kInputChannelBlockSize); + NeonConv2x4SubBlockKernel( + input_channel_block_size, output_channel_block_size, input_channels, + total_pixels, input_ptr, filter_ptr, output_ptr); input_ptr += kInputChannelBlockSize * total_pixels; filter_ptr += kInputChannelBlockSize; remain_input_channels -= input_channel_block_size; } - } } }; -void Conv2dNeonPixelK1x1S1(const float *input, // NCHW - const index_t *input_shape, - const float *filter, // c_out, c_in, kernel_h, kernel_w - const index_t *filter_shape, - const float *bias, // c_out - float *output, // NCHW - const index_t *output_shape) { +void Conv2dNeonPixelK1x1S1( + const float *input, // NCHW + const index_t *input_shape, + const float *filter, // c_out, c_in, kernel_h, kernel_w + const index_t *filter_shape, + const float *bias, // c_out + float *output, // NCHW + const index_t *output_shape) { const index_t batch = output_shape[0]; const index_t channels = output_shape[1]; const index_t height = output_shape[2]; @@ -312,7 +319,7 @@ void Conv2dNeonPixelK1x1S1(const float *input, // NCHW const index_t input_width = input_shape[3]; MACE_CHECK(input_batch == batch && input_height == height && - input_width == width); + input_width == width); const index_t total_pixels = height * width; // Process 4 * 2 = 8 pixels for each innermost loop @@ -320,7 +327,7 @@ void Conv2dNeonPixelK1x1S1(const float *input, // NCHW const index_t total_loops = total_pixels >> 3; const index_t loop_remaining = total_pixels & 7; - // benchmark omp collapsed(2) +// benchmark omp collapsed(2) #pragma omp parallel for collapse(2) for (index_t n = 0; n < batch; ++n) { for (index_t c = 0; c < channels; ++c) { @@ -341,8 +348,8 @@ void Conv2dNeonPixelK1x1S1(const float *input, // NCHW float *output_ptr = channel_output_start; // The begining of each input feature map channel MACE_ASSERT(input_ptr == - input + n * input_channels * input_height * input_width + - inc * input_height * input_width); + input + n * input_channels * input_height * input_width + + inc * input_height * input_width); const float *input_ptr1 = input_ptr + total_pixels; const float *input_ptr2 = input_ptr1 + total_pixels; @@ -426,8 +433,8 @@ void Conv2dNeonPixelK1x1S1(const float *input, // NCHW for (; inc < input_channels; ++inc) { float *output_ptr = channel_output_start; MACE_ASSERT(input_ptr == - input + n * input_channels * input_height * input_width + - inc * input_height * input_width); + input + n * input_channels * input_height * input_width + + inc * input_height * input_width); MACE_ASSERT(filter_ptr == filter + c * input_channels + inc); const float k0 = filter_ptr[0]; diff --git a/mace/kernels/neon/conv_2d_neon_3x3.cc b/mace/kernels/neon/conv_2d_neon_3x3.cc index 93ff3c91..9a88aa18 100644 --- a/mace/kernels/neon/conv_2d_neon_3x3.cc +++ b/mace/kernels/neon/conv_2d_neon_3x3.cc @@ -11,44 +11,49 @@ namespace kernels { static const int kRegisterSize = 4; static const int kFilterSize = 9; -void Conv2dNeonK3x3S1(const float *input, // NCHW +void Conv2dNeonK3x3S1(const float *input, // NCHW const index_t *input_shape, - const float *filter, // c_out, c_in, kernel_h, kernel_w + const float *filter, // c_out, c_in, kernel_h, kernel_w const index_t *filter_shape, - const float *bias, // c_out - float *output, // NCHW + const float *bias, // c_out + float *output, // NCHW const index_t *output_shape) { - int height_count = (output_shape[2] >> 1) << 1; - int output_batch = output_shape[0]; + int output_batch = output_shape[0]; int output_channels = output_shape[1]; - int output_height = output_shape[2]; - int output_width = output_shape[3]; - int input_batch = input_shape[0]; + int output_height = output_shape[2]; + int output_width = output_shape[3]; + int input_batch = input_shape[0]; int input_channels = input_shape[1]; - int input_height = input_shape[2]; - int input_width = input_shape[3]; - int multiplier = filter_shape == nullptr ? 0 : (filter_shape[0] / input_channels); - int filter_in_channels = filter_shape == nullptr ? input_channels : filter_shape[1]; + int input_height = input_shape[2]; + int input_width = input_shape[3]; + int multiplier = + filter_shape == nullptr ? 0 : (filter_shape[0] / input_channels); + int filter_in_channels = + filter_shape == nullptr ? input_channels : filter_shape[1]; #pragma omp parallel for collapse(2) for (int b = 0; b < output_batch; ++b) { for (int oc = 0; oc < output_channels; ++oc) { - float *output_ptr_base = output + b * output_channels * output_height * output_width; + float *output_ptr_base = + output + b * output_channels * output_height * output_width; const float *filter_ptr = filter + oc * filter_in_channels * kFilterSize; - const float *input_ptr = input + b * input_channels * input_height * input_width; + const float *input_ptr = + input + b * input_channels * input_height * input_width; if (filter_shape != nullptr) { input_ptr += (oc / multiplier) * input_height * input_width; } float *output_ptr = output_ptr_base + oc * output_height * output_width; - std::fill(output_ptr, output_ptr + output_height * output_width, bias ? bias[oc] : 0); + std::fill(output_ptr, output_ptr + output_height * output_width, + bias ? bias[oc] : 0); for (int ic = 0; ic < filter_in_channels; ++ic) { - float32x4_t n_filter_v[3] = {vld1q_f32(filter_ptr), vld1q_f32(filter_ptr+3), vld1q_f32(filter_ptr+6)}; + float32x4_t n_filter_v[3] = {vld1q_f32(filter_ptr), + vld1q_f32(filter_ptr + 3), + vld1q_f32(filter_ptr + 6)}; const float *row_ptr_v[kRegisterSize] = { - input_ptr, input_ptr + input_width, - input_ptr + 2 * input_width, input_ptr + 3 * input_width - }; + input_ptr, input_ptr + input_width, input_ptr + 2 * input_width, + input_ptr + 3 * input_width}; float *output_ptr_v[] = {output_ptr, output_ptr + output_width}; @@ -69,8 +74,10 @@ void Conv2dNeonK3x3S1(const float *input, // NCHW float32x4_t n_row1_former = vld1q_f32(row_ptr_v[1]); float32x4_t n_row1_latter = vld1q_f32(row_ptr_v[1] + kRegisterSize); - float32x4_t n_row1_ext0 = vextq_f32(n_row1_former, n_row1_latter, 1); - float32x4_t n_row1_ext1 = vextq_f32(n_row1_former, n_row1_latter, 2); + float32x4_t n_row1_ext0 = + vextq_f32(n_row1_former, n_row1_latter, 1); + float32x4_t n_row1_ext1 = + vextq_f32(n_row1_former, n_row1_latter, 2); n_sum0 = vfmaq_laneq_f32(n_sum0, n_row1_former, n_filter_v[1], 0); n_sum0 = vfmaq_laneq_f32(n_sum0, n_row1_ext0, n_filter_v[1], 1); n_sum0 = vfmaq_laneq_f32(n_sum0, n_row1_ext1, n_filter_v[1], 2); @@ -115,11 +122,9 @@ void Conv2dNeonK3x3S1(const float *input, // NCHW } } for (; remain_count > 0; --remain_count) { - float32x4_t n_row_v[] = { - vld1q_f32(row_ptr_v[0]), - vld1q_f32(row_ptr_v[1]), - vld1q_f32(row_ptr_v[2]) - }; + float32x4_t n_row_v[] = {vld1q_f32(row_ptr_v[0]), + vld1q_f32(row_ptr_v[1]), + vld1q_f32(row_ptr_v[2])}; float32x4_t n_sum0 = vmulq_f32(n_row_v[0], n_filter_v[0]); n_sum0 = vmlaq_f32(n_sum0, n_row_v[1], n_filter_v[1]); n_sum0 = vmlaq_f32(n_sum0, n_row_v[2], n_filter_v[2]); @@ -185,8 +190,7 @@ void Conv2dNeonK3x3S1(const float *input, // NCHW } for (; remain_count > 0; --remain_count) { float32x4_t n_row_v[] = { - vld1q_f32(row_ptr_v[0]), - vld1q_f32(row_ptr_v[1]), + vld1q_f32(row_ptr_v[0]), vld1q_f32(row_ptr_v[1]), vld1q_f32(row_ptr_v[2]), }; @@ -210,43 +214,49 @@ void Conv2dNeonK3x3S1(const float *input, // NCHW } } -void Conv2dNeonK3x3S2(const float *input, // NCHW +void Conv2dNeonK3x3S2(const float *input, // NCHW const index_t *input_shape, - const float *filter, // c_out, c_in, kernel_h, kernel_w + const float *filter, // c_out, c_in, kernel_h, kernel_w const index_t *filter_shape, - const float *bias, // c_out - float *output, // NCHW + const float *bias, // c_out + float *output, // NCHW const index_t *output_shape) { int tail_step = 2 * (input_shape[3] - output_shape[3]); - int output_batch = output_shape[0]; + int output_batch = output_shape[0]; int output_channels = output_shape[1]; - int output_height = output_shape[2]; - int output_width = output_shape[3]; - int input_batch = input_shape[0]; + int output_height = output_shape[2]; + int output_width = output_shape[3]; + int input_batch = input_shape[0]; int input_channels = input_shape[1]; - int input_height = input_shape[2]; - int input_width = input_shape[3]; - int multiplier = filter_shape == nullptr ? 0 : (filter_shape[0] / input_channels); - int filter_in_channels = filter_shape == nullptr ? input_channels : filter_shape[1]; + int input_height = input_shape[2]; + int input_width = input_shape[3]; + int multiplier = + filter_shape == nullptr ? 0 : (filter_shape[0] / input_channels); + int filter_in_channels = + filter_shape == nullptr ? input_channels : filter_shape[1]; #pragma omp parallel for collapse(2) for (int b = 0; b < output_batch; ++b) { for (int oc = 0; oc < output_channels; ++oc) { - float *output_ptr_base = output + b * output_channels * output_height * output_width; + float *output_ptr_base = + output + b * output_channels * output_height * output_width; const float *filter_ptr = filter + oc * filter_in_channels * kFilterSize; - const float *input_ptr = input + b * input_channels * input_height * input_width; + const float *input_ptr = + input + b * input_channels * input_height * input_width; if (filter_shape != nullptr) { input_ptr += (oc / multiplier) * input_height * input_width; } float *output_ptr = output_ptr_base + oc * output_height * output_width; - std::fill(output_ptr, output_ptr + output_height * output_width, bias ? bias[oc] : 0); + std::fill(output_ptr, output_ptr + output_height * output_width, + bias ? bias[oc] : 0); for (int ic = 0; ic < filter_in_channels; ++ic) { - float32x4_t n_filter_v[3] = {vld1q_f32(filter_ptr), vld1q_f32(filter_ptr+3), vld1q_f32(filter_ptr+6)}; + float32x4_t n_filter_v[3] = {vld1q_f32(filter_ptr), + vld1q_f32(filter_ptr + 3), + vld1q_f32(filter_ptr + 6)}; - const float *row_ptr_v[3] = { - input_ptr, input_ptr + input_width, input_ptr + 2 * input_width - }; + const float *row_ptr_v[3] = {input_ptr, input_ptr + input_width, + input_ptr + 2 * input_width}; float *output_ptr_inner = output_ptr; @@ -259,24 +269,33 @@ void Conv2dNeonK3x3S2(const float *input, // NCHW float32x4x2_t n_row_former = vld2q_f32(row_ptr_v[0]); float32x4_t n_row_latter = vld1q_f32(row_ptr_v[0] + 8); - float32x4_t n_row_ext = vextq_f32(n_row_former.val[0], n_row_latter, 1); + float32x4_t n_row_ext = + vextq_f32(n_row_former.val[0], n_row_latter, 1); - n_sum = vfmaq_laneq_f32(n_sum, n_row_former.val[0], n_filter_v[0], 0); - n_sum = vfmaq_laneq_f32(n_sum, n_row_former.val[1], n_filter_v[0], 1); + n_sum = + vfmaq_laneq_f32(n_sum, n_row_former.val[0], n_filter_v[0], 0); + n_sum = + vfmaq_laneq_f32(n_sum, n_row_former.val[1], n_filter_v[0], 1); n_sum = vfmaq_laneq_f32(n_sum, n_row_ext, n_filter_v[0], 2); float32x4x2_t n_row1_former = vld2q_f32(row_ptr_v[1]); float32x4_t n_row1_latter = vld1q_f32(row_ptr_v[1] + 8); - float32x4_t n_row1_ext = vextq_f32(n_row1_former.val[0], n_row1_latter, 1); - n_sum = vfmaq_laneq_f32(n_sum, n_row1_former.val[0], n_filter_v[1], 0); - n_sum = vfmaq_laneq_f32(n_sum, n_row1_former.val[1], n_filter_v[1], 1); + float32x4_t n_row1_ext = + vextq_f32(n_row1_former.val[0], n_row1_latter, 1); + n_sum = + vfmaq_laneq_f32(n_sum, n_row1_former.val[0], n_filter_v[1], 0); + n_sum = + vfmaq_laneq_f32(n_sum, n_row1_former.val[1], n_filter_v[1], 1); n_sum = vfmaq_laneq_f32(n_sum, n_row1_ext, n_filter_v[1], 2); float32x4x2_t n_row2_former = vld2q_f32(row_ptr_v[2]); float32x4_t n_row2_latter = vld1q_f32(row_ptr_v[2] + 8); - float32x4_t n_row2_ext = vextq_f32(n_row2_former.val[0], n_row2_latter, 1); - n_sum = vfmaq_laneq_f32(n_sum, n_row2_former.val[0], n_filter_v[2], 0); - n_sum = vfmaq_laneq_f32(n_sum, n_row2_former.val[1], n_filter_v[2], 1); + float32x4_t n_row2_ext = + vextq_f32(n_row2_former.val[0], n_row2_latter, 1); + n_sum = + vfmaq_laneq_f32(n_sum, n_row2_former.val[0], n_filter_v[2], 0); + n_sum = + vfmaq_laneq_f32(n_sum, n_row2_former.val[1], n_filter_v[2], 1); n_sum = vfmaq_laneq_f32(n_sum, n_row2_ext, n_filter_v[2], 2); float32x4_t n_output_row = vld1q_f32(output_ptr_inner); @@ -288,11 +307,9 @@ void Conv2dNeonK3x3S2(const float *input, // NCHW } } for (; remain_count > 0; --remain_count) { - float32x4_t n_row_v[] = { - vld1q_f32(row_ptr_v[0]), - vld1q_f32(row_ptr_v[1]), - vld1q_f32(row_ptr_v[2]) - }; + float32x4_t n_row_v[] = {vld1q_f32(row_ptr_v[0]), + vld1q_f32(row_ptr_v[1]), + vld1q_f32(row_ptr_v[2])}; float32x4_t n_sum = vmulq_f32(n_row_v[0], n_filter_v[0]); n_sum = vmlaq_f32(n_sum, n_row_v[1], n_filter_v[1]); n_sum = vmlaq_f32(n_sum, n_row_v[2], n_filter_v[2]); @@ -315,5 +332,5 @@ void Conv2dNeonK3x3S2(const float *input, // NCHW } } } -} // namespace kernels -} // namespace mace +} // namespace kernels +} // namespace mace diff --git a/mace/kernels/neon/conv_2d_neon_5x5.cc b/mace/kernels/neon/conv_2d_neon_5x5.cc index 88120f13..26cdc0b0 100644 --- a/mace/kernels/neon/conv_2d_neon_5x5.cc +++ b/mace/kernels/neon/conv_2d_neon_5x5.cc @@ -14,8 +14,8 @@ void Conv2dNeonK5x5S1(const float *input, // NCHW const index_t *input_shape, const float *filter, // c_out, c_in, kernel_h, kernel_w const index_t *filter_shape, - const float *bias, // c_out - float *output, // NCHW + const float *bias, // c_out + float *output, // NCHW const index_t *output_shape) { const index_t batch = output_shape[0]; const index_t channels = output_shape[1]; @@ -41,7 +41,7 @@ void Conv2dNeonK5x5S1(const float *input, // NCHW for (index_t n = 0; n < batch; ++n) { for (index_t c = 0; c < channels; ++c) { float *output_ptr = output + n * output_total_pixels_per_batch + - c * output_total_pixels_per_channel; + c * output_total_pixels_per_channel; const float *input_ptr = input + n * input_total_pixels_per_batch; // Fill with bias diff --git a/mace/kernels/neon/depthwise_conv_neon.cc b/mace/kernels/neon/depthwise_conv_neon.cc index eda2325d..75f01707 100644 --- a/mace/kernels/neon/depthwise_conv_neon.cc +++ b/mace/kernels/neon/depthwise_conv_neon.cc @@ -2,8 +2,8 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // -#include "mace/kernels/depthwise_conv2d.h" #include "mace/kernels/conv_2d.h" +#include "mace/kernels/depthwise_conv2d.h" namespace mace { namespace kernels { @@ -24,21 +24,18 @@ extern void Conv2dNeonK3x3S2(const float *input, float *output, const index_t *output_shape); -template<> -void DepthwiseConv2dFunctor::operator()(const float *input, // NCHW - const index_t *input_shape, - const float *filter, // c_out, c_in, kernel_h, kernel_w - const index_t *filter_shape, - const float *bias, // c_out - float *output, // NCHW - const index_t *output_shape) { +template <> +void DepthwiseConv2dFunctor::operator()( + const float *input, // NCHW + const index_t *input_shape, + const float *filter, // c_out, c_in, kernel_h, kernel_w + const index_t *filter_shape, + const float *bias, // c_out + float *output, // NCHW + const index_t *output_shape) { typedef void (*Conv2dNeonFunction)( - const float *input, - const index_t *input_shape, - const float *filter, - const index_t *filter_shape, - const float *bias, - float *output, + const float *input, const index_t *input_shape, const float *filter, + const index_t *filter_shape, const float *bias, float *output, const index_t *output_shape); // Selection matrix: kernel_size x stride_size static const Conv2dNeonFunction selector[5][2] = { @@ -57,7 +54,8 @@ void DepthwiseConv2dFunctor::operator()(const float *in << "filter" << kernel_h << "x" << kernel_w << "," << " stride " << strides_[0] << "x" << strides_[1] << " is not implemented yet, using slow version"; - DepthwiseConv2dFunctor(strides_, paddings_, dilations_)( + DepthwiseConv2dFunctor(strides_, paddings_, + dilations_)( input, input_shape, filter, filter_shape, bias, output, output_shape); return; } @@ -65,13 +63,15 @@ void DepthwiseConv2dFunctor::operator()(const float *in // Keep this alive during kernel execution Tensor padded_input; if (paddings_[0] > 0 || paddings_[1] > 0) { - ConstructInputWithPadding(input, input_shape, paddings_.data(), &padded_input); + ConstructInputWithPadding(input, input_shape, paddings_.data(), + &padded_input); input = padded_input.data(); input_shape = padded_input.shape().data(); } auto conv2d_neon_func = selector[kernel_h - 1][strides_[0] - 1]; - conv2d_neon_func(input, input_shape, filter, filter_shape, bias, output, output_shape); + conv2d_neon_func(input, input_shape, filter, filter_shape, bias, output, + output_shape); } -} // namespace kernels -} // namespace mace \ No newline at end of file +} // namespace kernels +} // namespace mace \ No newline at end of file diff --git a/mace/kernels/neon/global_avg_pooling_neon.cc b/mace/kernels/neon/global_avg_pooling_neon.cc index 2980afec..88c54fdc 100644 --- a/mace/kernels/neon/global_avg_pooling_neon.cc +++ b/mace/kernels/neon/global_avg_pooling_neon.cc @@ -8,11 +8,9 @@ namespace mace { namespace kernels { -template<> +template <> void GlobalAvgPoolingFunctor::operator()( - const float *input, - const index_t *input_shape, - float *output) { + const float *input, const index_t *input_shape, float *output) { index_t batch = input_shape[0]; index_t channels = input_shape[1]; index_t height = input_shape[2]; diff --git a/mace/kernels/neon/pooling_neon.cc b/mace/kernels/neon/pooling_neon.cc index 06efdeaa..6960c7ee 100644 --- a/mace/kernels/neon/pooling_neon.cc +++ b/mace/kernels/neon/pooling_neon.cc @@ -55,7 +55,7 @@ extern void PoolingAvgNeonK3x3S2x2Padded(const float *input, const index_t *out_shape); #endif -template<> +template <> void PoolingFunctor::operator()( const float *input, const index_t *input_shape, @@ -71,14 +71,14 @@ void PoolingFunctor::operator()( if (kernels_[0] == 2 && kernels_[1] == 2 && strides_[0] == 2 && strides_[1] == 2) { // kernel_size: 2x2, strides: 2x2 - if (pooling_type_ == MAX) { // MAX_POOL_2x2s2x2 + if (pooling_type_ == MAX) { // MAX_POOL_2x2s2x2 #ifdef __COPY_MAKE_PADDING PoolingMaxNeonK2x2S2x2Padded(input, input_shape, output, output_shape); #else PoolingMaxNeonK2x2S2x2(input, input_shape, output, output_shape, paddings_); #endif - } else { // AVG_POOL_2x2s2x2 + } else { // AVG_POOL_2x2s2x2 #ifdef __COPY_MAKE_PADDING PoolingAvgNeonK2x2S2x2Padded(input, input_shape, output, output_shape); #else @@ -87,16 +87,16 @@ void PoolingFunctor::operator()( #endif } } else if (kernels_[0] == 3 && kernels_[1] == 3 && strides_[0] == 2 && - strides_[1] == 2) { + strides_[1] == 2) { // kernel_size: 3x3, strides: 2x2 - if (pooling_type_ == MAX) { // MAX_POOL_3x3s2x2 + if (pooling_type_ == MAX) { // MAX_POOL_3x3s2x2 #ifdef __COPY_MAKE_PADDING PoolingMaxNeonK3x3S2x2Padded(input, input_shape, output, output_shape); #else PoolingMaxNeonK3x3S2x2(input, input_shape, output, output_shape, paddings_); #endif - } else { // AVG_POOL_3x3s2x2 + } else { // AVG_POOL_3x3s2x2 #ifdef __COPY_MAKE_PADDING PoolingAvgNeonK3x3S2x2Padded(input, input_shape, output, output_shape); #else diff --git a/mace/ops/addn.h b/mace/ops/addn.h index 064be034..b6265963 100644 --- a/mace/ops/addn.h +++ b/mace/ops/addn.h @@ -13,18 +13,18 @@ namespace mace { template class AddNOp : public Operator { public: - AddNOp(const OperatorDef& operator_def, Workspace* ws) + AddNOp(const OperatorDef &operator_def, Workspace *ws) : Operator(operator_def, ws) {} bool Run() override { - Tensor* output_tensor = this->outputs_[0]; + Tensor *output_tensor = this->outputs_[0]; output_tensor->ResizeLike(this->inputs_[0]); - T* output = output_tensor->mutable_data(); + T *output = output_tensor->mutable_data(); index_t size = this->inputs_[0]->size(); int n = this->inputs_.size(); - vector inputs(n); + vector inputs(n); for (int i = 0; i < n; ++i) { - const Tensor* input_tensor = this->inputs_[i]; + const Tensor *input_tensor = this->inputs_[i]; inputs[i] = input_tensor->data(); } diff --git a/mace/ops/addn_benchmark.cc b/mace/ops/addn_benchmark.cc index f7329d1b..4893c850 100644 --- a/mace/ops/addn_benchmark.cc +++ b/mace/ops/addn_benchmark.cc @@ -39,7 +39,7 @@ static void AddNBenchmark(int iters, int n, int size) { static void BM_ADDN_##N##_##SIZE##_##TYPE##_##DEVICE(int iters) { \ const int64_t tot = static_cast(iters) * N * SIZE; \ mace::testing::ItemsProcessed(tot); \ - mace::testing::BytesProcessed(tot*(sizeof(TYPE))); \ + mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ AddNBenchmark(iters, N, SIZE); \ } \ BENCHMARK(BM_ADDN_##N##_##SIZE##_##TYPE##_##DEVICE) diff --git a/mace/ops/addn_test.cc b/mace/ops/addn_test.cc index dd5f906f..8e6497f2 100644 --- a/mace/ops/addn_test.cc +++ b/mace/ops/addn_test.cc @@ -11,7 +11,7 @@ class AddnOpTest : public OpsTestBase {}; TEST_F(AddnOpTest, AddnOp) { // Construct graph - auto& net = test_net(); + auto &net = test_net(); OpDefBuilder("AddN", "AddNTest") .Input("Input1") .Input("Input2") diff --git a/mace/ops/batch_norm.h b/mace/ops/batch_norm.h index e92d9ebb..a7292601 100644 --- a/mace/ops/batch_norm.h +++ b/mace/ops/batch_norm.h @@ -13,17 +13,16 @@ namespace mace { template class BatchNormOp : public Operator { public: - BatchNormOp(const OperatorDef& operator_def, Workspace* ws) - : Operator(operator_def, ws), - functor_() {} + BatchNormOp(const OperatorDef &operator_def, Workspace *ws) + : Operator(operator_def, ws), functor_() {} bool Run() override { - const Tensor* input = this->Input(0); - const Tensor* scale = this->Input(1); - const Tensor* offset = this->Input(2); - const Tensor* mean = this->Input(3); - const Tensor* var = this->Input(4); - const Tensor* epsilon = this->Input(5); + const Tensor *input = this->Input(0); + const Tensor *scale = this->Input(1); + const Tensor *offset = this->Input(2); + const Tensor *mean = this->Input(3); + const Tensor *var = this->Input(4); + const Tensor *epsilon = this->Input(5); MACE_CHECK(input->dim_size() == 4, "input must be 4-dimensional. ", input->dim_size()); @@ -38,23 +37,23 @@ class BatchNormOp : public Operator { MACE_CHECK(epsilon->dim_size() == 0, "epsilon must be 0-dimensional. ", epsilon->dim_size()); - Tensor* output = this->Output(0); + Tensor *output = this->Output(0); output->ResizeLike(input); const index_t n = input->dim(0); const index_t channel = input->dim(1); const index_t sample_size = input->dim(2) * input->dim(3); - const T* input_ptr = input->data(); - const T* scale_ptr = scale->data(); - const T* offset_ptr = offset->data(); - const T* mean_ptr = mean->data(); - const T* var_ptr = var->data(); - const T* epsilon_ptr = epsilon->data(); - T* output_ptr = output->mutable_data(); + const T *input_ptr = input->data(); + const T *scale_ptr = scale->data(); + const T *offset_ptr = offset->data(); + const T *mean_ptr = mean->data(); + const T *var_ptr = var->data(); + const T *epsilon_ptr = epsilon->data(); + T *output_ptr = output->mutable_data(); - functor_(input_ptr, scale_ptr, offset_ptr, mean_ptr, var_ptr, *epsilon_ptr, n, channel, - sample_size, output_ptr); + functor_(input_ptr, scale_ptr, offset_ptr, mean_ptr, var_ptr, *epsilon_ptr, + n, channel, sample_size, output_ptr); return true; } diff --git a/mace/ops/batch_norm_benchmark.cc b/mace/ops/batch_norm_benchmark.cc index 16763322..6607695a 100644 --- a/mace/ops/batch_norm_benchmark.cc +++ b/mace/ops/batch_norm_benchmark.cc @@ -47,7 +47,7 @@ static void BatchNorm( int iters) { \ const int64_t tot = static_cast(iters) * N * C * H * W; \ mace::testing::ItemsProcessed(tot); \ - mace::testing::BytesProcessed(tot*(sizeof(TYPE))); \ + mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ BatchNorm(iters, N, C, H, W); \ } \ BENCHMARK(BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE) diff --git a/mace/ops/batch_norm_test.cc b/mace/ops/batch_norm_test.cc index fd503ed5..2e931782 100644 --- a/mace/ops/batch_norm_test.cc +++ b/mace/ops/batch_norm_test.cc @@ -11,7 +11,7 @@ class BatchNormOpTest : public OpsTestBase {}; TEST_F(BatchNormOpTest, SimpleCPU) { // Construct graph - auto& net = test_net(); + auto &net = test_net(); OpDefBuilder("BatchNorm", "BatchNormTest") .Input("Input") .Input("Scale") @@ -51,7 +51,7 @@ TEST_F(BatchNormOpTest, SimpleNeon) { index_t height = 103; index_t width = 113; // Construct graph - auto& net = test_net(); + auto &net = test_net(); OpDefBuilder("BatchNorm", "BatchNormTest") .Input("Input") .Input("Scale") @@ -74,7 +74,7 @@ TEST_F(BatchNormOpTest, SimpleNeon) { net.RunOp(); // Check - Tensor* expected = net.GetOutput("Output"); + Tensor *expected = net.GetOutput("Output"); // Run NEON net.RunOp(DeviceType::NEON); diff --git a/mace/ops/channel_shuffle.h b/mace/ops/channel_shuffle.h index 3393efdb..53cd8aee 100644 --- a/mace/ops/channel_shuffle.h +++ b/mace/ops/channel_shuffle.h @@ -12,10 +12,10 @@ namespace mace { -template +template class ChannelShuffleOp : public Operator { public: - ChannelShuffleOp(const OperatorDef& operator_def, Workspace* ws) + ChannelShuffleOp(const OperatorDef &operator_def, Workspace *ws) : Operator(operator_def, ws), group_(OperatorBase::GetSingleArgument("group", 1)), functor_(this->group_) {} diff --git a/mace/ops/channel_shuffle_benchmark.cc b/mace/ops/channel_shuffle_benchmark.cc index 13d426f8..ecbc3610 100644 --- a/mace/ops/channel_shuffle_benchmark.cc +++ b/mace/ops/channel_shuffle_benchmark.cc @@ -11,12 +11,8 @@ using namespace mace; using namespace mace::kernels; template -static void ChannelShuffle(int iters, - int batch, - int channels, - int height, - int width, - int group) { +static void ChannelShuffle( + int iters, int batch, int channels, int height, int width, int group) { mace::testing::StopTiming(); OpsTestNet net; @@ -40,18 +36,17 @@ static void ChannelShuffle(int iters, } } -#define BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, DEVICE) \ - static void \ - BM_CHANNEL_SHUFFLE_##N##_##C##_##H##_##W##_##G##_##DEVICE( \ - int iters) { \ - const int64_t tot = static_cast(iters) * N * C * H * W; \ - mace::testing::ItemsProcessed(tot); \ - mace::testing::BytesProcessed(tot*(sizeof(float))); \ - ChannelShuffle(iters, N, C, H, W, G); \ - } \ +#define BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, DEVICE) \ + static void BM_CHANNEL_SHUFFLE_##N##_##C##_##H##_##W##_##G##_##DEVICE( \ + int iters) { \ + const int64_t tot = static_cast(iters) * N * C * H * W; \ + mace::testing::ItemsProcessed(tot); \ + mace::testing::BytesProcessed(tot *(sizeof(float))); \ + ChannelShuffle(iters, N, C, H, W, G); \ + } \ BENCHMARK(BM_CHANNEL_SHUFFLE_##N##_##C##_##H##_##W##_##G##_##DEVICE) -#define BM_CHANNEL_SHUFFLE(N, C, H, W, G) \ +#define BM_CHANNEL_SHUFFLE(N, C, H, W, G) \ BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, CPU); BM_CHANNEL_SHUFFLE(1, 64, 64, 64, 8); diff --git a/mace/ops/channel_shuffle_test.cc b/mace/ops/channel_shuffle_test.cc index 9722ab2d..dcf0a21e 100644 --- a/mace/ops/channel_shuffle_test.cc +++ b/mace/ops/channel_shuffle_test.cc @@ -10,7 +10,7 @@ class ChannelShuffleOpTest : public OpsTestBase {}; TEST_F(ChannelShuffleOpTest, C8G4) { // Construct graph - auto& net = test_net(); + auto &net = test_net(); OpDefBuilder("ChannelShuffle", "ChannelShuffleTest") .Input("Input") .Output("Output") @@ -20,18 +20,15 @@ TEST_F(ChannelShuffleOpTest, C8G4) { // Add input data net.AddInputFromArray( - "Input", {1, 8, 1, 2}, - {0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15}); + "Input", {1, 8, 1, 2}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}); // Run net.RunOp(); // Check - auto expected = - CreateTensor({1, 8, 1, 2}, - {0, 1, 4, 5, 8, 9, 12, 13, - 2, 3, 6, 7, 10, 11, 14, 15}); + auto expected = CreateTensor( + {1, 8, 1, 2}, {0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); } diff --git a/mace/ops/concat.h b/mace/ops/concat.h index 28a97ac8..2b82c0cb 100644 --- a/mace/ops/concat.h +++ b/mace/ops/concat.h @@ -5,12 +5,12 @@ #ifndef MACE_OPS_CONCAT_H_ #define MACE_OPS_CONCAT_H_ -#include "mace/proto/mace.pb.h" #include "mace/core/operator.h" #include "mace/kernels/concat.h" +#include "mace/proto/mace.pb.h" namespace mace { -template +template class ConcatOp : public Operator { public: ConcatOp(const OperatorDef &op_def, Workspace *ws) @@ -25,9 +25,11 @@ class ConcatOp : public Operator { axis_tensor->dim_size()); const int32_t concat_axis = *(axis_tensor->data()); const int32_t input_dims = input0->dim_size(); - const int32_t axis = concat_axis < 0 ? concat_axis + input_dims : concat_axis; - MACE_CHECK((0 <= axis && axis < input_dims), "Expected concatenating axis in the range [", - -input_dims, ", ", input_dims, "], but got", concat_axis); + const int32_t axis = + concat_axis < 0 ? concat_axis + input_dims : concat_axis; + MACE_CHECK((0 <= axis && axis < input_dims), + "Expected concatenating axis in the range [", -input_dims, ", ", + input_dims, "], but got", concat_axis); std::vector output_shape(input0->shape()); index_t inner_size = 1; for (int i = 0; i < axis; ++i) { @@ -40,10 +42,14 @@ class ConcatOp : public Operator { const Tensor *input = nullptr; for (int i = 1; i < values_count; ++i) { input = this->Input(i); - MACE_CHECK(input->dim_size() == input0->dim_size(), "Ranks of all input tensors must be same."); + MACE_CHECK(input->dim_size() == input0->dim_size(), + "Ranks of all input tensors must be same."); for (int j = 0; j < axis_tensor->dim_size(); ++j) { - if (j == axis) { continue; } - MACE_CHECK(input->dim(j) == input0->dim(j), "Dimensions of inputs should equal except axis."); + if (j == axis) { + continue; + } + MACE_CHECK(input->dim(j) == input0->dim(j), + "Dimensions of inputs should equal except axis."); } input_list[i] = input->data(); outer_sizes[i] = input->size() / inner_size; @@ -53,9 +59,11 @@ class ConcatOp : public Operator { Tensor *output = this->Output(OUTPUT); output->Resize(output_shape); - functor_(input_list, inner_size, outer_sizes.data(), output->mutable_data()); + functor_(input_list, inner_size, outer_sizes.data(), + output->mutable_data()); return true; } + private: kernels::ConcatFunctor functor_; @@ -63,6 +71,6 @@ class ConcatOp : public Operator { OP_OUTPUT_TAGS(OUTPUT); }; -} // namespace mace +} // namespace mace -#endif // MACE_OPS_CONCAT_H_ +#endif // MACE_OPS_CONCAT_H_ diff --git a/mace/ops/concat_benchmark.cc b/mace/ops/concat_benchmark.cc index 58d9c8f3..bd56c495 100644 --- a/mace/ops/concat_benchmark.cc +++ b/mace/ops/concat_benchmark.cc @@ -7,9 +7,8 @@ #include "mace/ops/ops_test_util.h" namespace mace { -template -static void ConcatHelper( - int iters, int concat_dim, int dim1) { +template +static void ConcatHelper(int iters, int concat_dim, int dim1) { mace::testing::StopTiming(); OpsTestNet net; diff --git a/mace/ops/concat_test.cc b/mace/ops/concat_test.cc index 216d3413..f537e385 100644 --- a/mace/ops/concat_test.cc +++ b/mace/ops/concat_test.cc @@ -3,8 +3,8 @@ // #include "mace/ops/concat.h" -#include "mace/ops/ops_test_util.h" #include "gmock/gmock.h" +#include "mace/ops/ops_test_util.h" using namespace mace; @@ -99,9 +99,7 @@ TEST_F(ConcatOpTest, Random) { for (int i = 0; i < num_inputs; ++i) { builder = builder.Input(("Input" + ToString(i)).c_str()); } - builder.Input("Axis") - .Output("Output") - .Finalize(net.operator_def()); + builder.Input("Axis").Output("Output").Finalize(net.operator_def()); std::vector shape_data; GenerateRandomIntTypeData({dim}, shape_data, 1, dim); @@ -114,7 +112,8 @@ TEST_F(ConcatOpTest, Random) { concat_axis_size += input_shapes[i][axis]; GenerateRandomRealTypeData(input_shapes[i], inputs[i]); input_ptrs[i] = inputs[i].data(); - net.AddInputFromArray(("Input" + ToString(i)).c_str(), input_shapes[i], inputs[i]); + net.AddInputFromArray(("Input" + ToString(i)).c_str(), + input_shapes[i], inputs[i]); } net.AddInputFromArray("Axis", {}, {axis}); @@ -131,9 +130,9 @@ TEST_F(ConcatOpTest, Random) { const float *output_ptr = output->data(); while (output_ptr != (output->data() + output->size())) { for (int i = 0; i < num_inputs; ++i) { - index_t num_elements = std::accumulate(input_shapes[i].begin() + axis, - input_shapes[i].end(), 1, - std::multiplies()); + index_t num_elements = + std::accumulate(input_shapes[i].begin() + axis, input_shapes[i].end(), + 1, std::multiplies()); for (int j = 0; j < num_elements; ++j) { EXPECT_EQ(*input_ptrs[i]++, *output_ptr++); } diff --git a/mace/ops/conv_2d.h b/mace/ops/conv_2d.h index d8603ef0..a223514a 100644 --- a/mace/ops/conv_2d.h +++ b/mace/ops/conv_2d.h @@ -13,7 +13,7 @@ namespace mace { -template +template class Conv2dOp : public ConvPool2dOpBase { public: Conv2dOp(const OperatorDef &op_def, Workspace *ws) @@ -35,11 +35,10 @@ class Conv2dOp : public ConvPool2dOpBase { std::vector output_shape(4); std::vector paddings(2); - kernels::CalcPaddingAndOutputSize(input->shape().data(), - filter->shape().data(), - this->dilations_.data(), - this->strides_.data(), this->padding_, - output_shape.data(), paddings.data()); + kernels::CalcPaddingAndOutputSize( + input->shape().data(), filter->shape().data(), this->dilations_.data(), + this->strides_.data(), this->padding_, output_shape.data(), + paddings.data()); output->Resize(output_shape); functor_.paddings_ = paddings; diff --git a/mace/ops/conv_2d_benchmark.cc b/mace/ops/conv_2d_benchmark.cc index 844fe32e..d682f709 100644 --- a/mace/ops/conv_2d_benchmark.cc +++ b/mace/ops/conv_2d_benchmark.cc @@ -54,16 +54,17 @@ static void Conv2d(int iters, } } -#define BM_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, OC, TYPE, DEVICE) \ - static void \ +#define BM_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, OC, TYPE, DEVICE) \ + static void \ BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE( \ - int iters) { \ - const int64_t tot = static_cast(iters) * N * C * H * W; \ - mace::testing::ItemsProcessed(tot); \ - mace::testing::BytesProcessed(tot*(sizeof(TYPE))); \ - Conv2d(iters, N, C, H, W, KH, KW, STRIDE, mace::Padding::P, OC); \ - } \ - BENCHMARK( \ + int iters) { \ + const int64_t tot = static_cast(iters) * N * C * H * W; \ + mace::testing::ItemsProcessed(tot); \ + mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + Conv2d(iters, N, C, H, W, KH, KW, STRIDE, mace::Padding::P, \ + OC); \ + } \ + BENCHMARK( \ BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE) #define BM_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE) \ diff --git a/mace/ops/conv_2d_test.cc b/mace/ops/conv_2d_test.cc index 8aaf0d00..2202caf2 100644 --- a/mace/ops/conv_2d_test.cc +++ b/mace/ops/conv_2d_test.cc @@ -12,7 +12,7 @@ class Conv2dOpTest : public OpsTestBase {}; TEST_F(Conv2dOpTest, Simple_VALID) { // Construct graph - auto& net = test_net(); + auto &net = test_net(); OpDefBuilder("Conv2D", "Conv2dTest") .Input("Input") .Input("Filter") @@ -46,7 +46,7 @@ TEST_F(Conv2dOpTest, Simple_VALID) { TEST_F(Conv2dOpTest, Simple_SAME) { // Construct graph - auto& net = test_net(); + auto &net = test_net(); OpDefBuilder("Conv2D", "Conv2dTest") .Input("Input") .Input("Filter") @@ -82,7 +82,7 @@ TEST_F(Conv2dOpTest, Simple_SAME) { TEST_F(Conv2dOpTest, Combined) { // Construct graph - auto& net = test_net(); + auto &net = test_net(); OpDefBuilder("Conv2D", "Conv2DTest") .Input("Input") .Input("Filter") @@ -120,7 +120,7 @@ TEST_F(Conv2dOpTest, Combined) { TEST_F(Conv2dOpTest, Conv1x1) { // Construct graph - auto& net = test_net(); + auto &net = test_net(); OpDefBuilder("Conv2D", "Conv2DTest") .Input("Input") .Input("Filter") @@ -172,13 +172,13 @@ TEST_F(Conv2dOpTest, IdleConvNxNS12) { srand(time(NULL)); // generate random input - index_t batch = 3 ; + index_t batch = 3; index_t input_channels = 64; index_t height = 32; index_t width = 32; index_t output_channels = 128; // Construct graph - auto& net = test_net(); + auto &net = test_net(); OpDefBuilder("Conv2D", "Conv2dTest") .Input("Input") .Input("Filter") @@ -229,7 +229,7 @@ TEST_F(Conv2dOpTest, DisgustConvNxNS12) { index_t width = 113; index_t output_channels = 3 + rand() % 10; // Construct graph - auto& net = test_net(); + auto &net = test_net(); OpDefBuilder("Conv2D", "Conv2dTest") .Input("Input") .Input("Filter") diff --git a/mace/ops/conv_pool_2d_base.h b/mace/ops/conv_pool_2d_base.h index c9ba9c25..c1c8d3d7 100644 --- a/mace/ops/conv_pool_2d_base.h +++ b/mace/ops/conv_pool_2d_base.h @@ -13,13 +13,14 @@ namespace mace { template class ConvPool2dOpBase : public Operator { public: - ConvPool2dOpBase(const OperatorDef& op_def, Workspace* ws) + ConvPool2dOpBase(const OperatorDef &op_def, Workspace *ws) : Operator(op_def, ws), strides_(OperatorBase::GetRepeatedArgument("strides")), padding_(static_cast(OperatorBase::GetSingleArgument( "padding", static_cast(SAME)))), - dilations_(OperatorBase::GetRepeatedArgument("dilations", {1, 1})) {} - + dilations_( + OperatorBase::GetRepeatedArgument("dilations", {1, 1})) {} + protected: std::vector strides_; Padding padding_; diff --git a/mace/ops/depthwise_conv2d.cc b/mace/ops/depthwise_conv2d.cc index 320842e1..6d66a688 100644 --- a/mace/ops/depthwise_conv2d.cc +++ b/mace/ops/depthwise_conv2d.cc @@ -6,10 +6,12 @@ namespace mace { -REGISTER_CPU_OPERATOR(DepthwiseConv2d, DepthwiseConv2dOp); +REGISTER_CPU_OPERATOR(DepthwiseConv2d, + DepthwiseConv2dOp); #if __ARM_NEON -REGISTER_NEON_OPERATOR(DepthwiseConv2d, DepthwiseConv2dOp); +REGISTER_NEON_OPERATOR(DepthwiseConv2d, + DepthwiseConv2dOp); #endif // __ARM_NEON } // namespace mace diff --git a/mace/ops/depthwise_conv2d.h b/mace/ops/depthwise_conv2d.h index b977115a..58c126fc 100644 --- a/mace/ops/depthwise_conv2d.h +++ b/mace/ops/depthwise_conv2d.h @@ -9,12 +9,12 @@ #include "mace/core/operator.h" #include "mace/kernels/conv_2d.h" -#include "mace/ops/conv_pool_2d_base.h" #include "mace/kernels/depthwise_conv2d.h" +#include "mace/ops/conv_pool_2d_base.h" namespace mace { -template +template class DepthwiseConv2dOp : public ConvPool2dOpBase { public: DepthwiseConv2dOp(const OperatorDef &op_def, Workspace *ws) @@ -34,16 +34,16 @@ class DepthwiseConv2dOp : public ConvPool2dOpBase { Tensor *output = this->Output(OUTPUT); // resize filter shape. - std::vector filter_shape(filter->shape().begin(), filter->shape().end()); + std::vector filter_shape(filter->shape().begin(), + filter->shape().end()); filter_shape[0] *= filter_shape[1]; filter_shape[1] = 1; std::vector output_shape(4); std::vector paddings(2); - kernels::CalcPaddingAndOutputSize(input->shape().data(), - filter_shape.data(), - this->dilations_.data(), - this->strides_.data(), this->padding_, - output_shape.data(), paddings.data()); + kernels::CalcPaddingAndOutputSize( + input->shape().data(), filter_shape.data(), this->dilations_.data(), + this->strides_.data(), this->padding_, output_shape.data(), + paddings.data()); output->Resize(output_shape); functor_.paddings_ = paddings; @@ -62,6 +62,6 @@ class DepthwiseConv2dOp : public ConvPool2dOpBase { OP_OUTPUT_TAGS(OUTPUT); }; -} // namespace mace +} // namespace mace -#endif // MACE_OPS_DEPTHWISE_CONV_H_ +#endif // MACE_OPS_DEPTHWISE_CONV_H_ diff --git a/mace/ops/depthwise_conv2d_test.cc b/mace/ops/depthwise_conv2d_test.cc index 24002b15..6868e8c3 100644 --- a/mace/ops/depthwise_conv2d_test.cc +++ b/mace/ops/depthwise_conv2d_test.cc @@ -12,7 +12,7 @@ class DepthwiseConv2dOpTest : public OpsTestBase {}; TEST_F(DepthwiseConv2dOpTest, Simple_VALID) { testing::internal::LogToStderr(); // Construct graph - auto& net = test_net(); + auto &net = test_net(); OpDefBuilder("DepthwiseConv2d", "DepthwiseConv2DTest") .Input("Input") .Input("Filter") @@ -26,23 +26,20 @@ TEST_F(DepthwiseConv2dOpTest, Simple_VALID) { net.AddIntsArg("dilations", {1, 1}); // Add input data - net.AddInputFromArray( - "Input", {1, 2, 2, 3}, - {1, 3, 5, 7, 9, 11, 2, 4, 6, 8, 10, 12}); + net.AddInputFromArray("Input", {1, 2, 2, 3}, + {1, 3, 5, 7, 9, 11, 2, 4, 6, 8, 10, 12}); net.AddInputFromArray( "Filter", {2, 2, 2, 2}, - {1.0f, 5.0f, 9.0f, 13.0f, - 2.0f, 6.0f, 10.0f, 14.0f, - 3.0f, 7.0f, 11.0f, 15.0f, - 4.0f, 8.0f, 12.0f, 16.0f}); + {1.0f, 5.0f, 9.0f, 13.0f, 2.0f, 6.0f, 10.0f, 14.0f, 3.0f, 7.0f, 11.0f, + 15.0f, 4.0f, 8.0f, 12.0f, 16.0f}); net.AddInputFromArray("Bias", {4}, {.1f, .2f, .3f, .4f}); // Run net.RunOp(); // Check - auto expected = CreateTensor({1, 4, 1, 2}, - {196.1f, 252.1f, 216.2f, 280.2f, - 272.3f, 344.3f, 296.4f, 376.4f}); + auto expected = CreateTensor( + {1, 4, 1, 2}, + {196.1f, 252.1f, 216.2f, 280.2f, 272.3f, 344.3f, 296.4f, 376.4f}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); } @@ -60,7 +57,7 @@ TEST_F(DepthwiseConv2dOpTest, ConvNxNS12) { index_t width = 113; index_t multiplier = 3 + rand() % 10; // Construct graph - auto& net = test_net(); + auto &net = test_net(); OpDefBuilder("DepthwiseConv2d", "DepthwiseConv2DTest") .Input("Input") .Input("Filter") @@ -75,8 +72,8 @@ TEST_F(DepthwiseConv2dOpTest, ConvNxNS12) { // Add input data net.AddRandomInput("Input", {batch, input_channels, height, width}); - net.AddRandomInput( - "Filter", {multiplier, input_channels, kernel_h, kernel_w}); + net.AddRandomInput("Filter", + {multiplier, input_channels, kernel_h, kernel_w}); net.AddRandomInput("Bias", {multiplier * input_channels}); // run cpu net.RunOp(); diff --git a/mace/ops/depthwise_conv_2d_benchmark.cc b/mace/ops/depthwise_conv_2d_benchmark.cc index f535ea17..9ba7001d 100644 --- a/mace/ops/depthwise_conv_2d_benchmark.cc +++ b/mace/ops/depthwise_conv_2d_benchmark.cc @@ -13,15 +13,15 @@ namespace mace { template static void DepthwiseConv2d(int iters, - int batch, - int channels, - int height, - int width, - int kernel_h, - int kernel_w, - int stride, - Padding padding, - int output_channels) { + int batch, + int channels, + int height, + int width, + int kernel_h, + int kernel_w, + int stride, + Padding padding, + int output_channels) { mace::testing::StopTiming(); OpsTestNet net; @@ -54,16 +54,18 @@ static void DepthwiseConv2d(int iters, } } -#define BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, OC, TYPE, DEVICE) \ - static void \ +#define BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, OC, TYPE, \ + DEVICE) \ + static void \ BM_DEPTHWISE_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE( \ - int iters) { \ - const int64_t tot = static_cast(iters) * N * C * H * W; \ - mace::testing::ItemsProcessed(tot); \ - mace::testing::BytesProcessed(tot*(sizeof(TYPE))); \ - DepthwiseConv2d(iters, N, C, H, W, KH, KW, STRIDE, mace::Padding::P, OC); \ - } \ - BENCHMARK( \ + int iters) { \ + const int64_t tot = static_cast(iters) * N * C * H * W; \ + mace::testing::ItemsProcessed(tot); \ + mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + DepthwiseConv2d(iters, N, C, H, W, KH, KW, STRIDE, \ + mace::Padding::P, OC); \ + } \ + BENCHMARK( \ BM_DEPTHWISE_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE) #define BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE) \ diff --git a/mace/ops/global_avg_pooling.h b/mace/ops/global_avg_pooling.h index 151e791e..117857c1 100644 --- a/mace/ops/global_avg_pooling.h +++ b/mace/ops/global_avg_pooling.h @@ -10,7 +10,7 @@ namespace mace { -template +template class GlobalAvgPoolingOp : public Operator { public: GlobalAvgPoolingOp(const OperatorDef &operator_def, Workspace *ws) diff --git a/mace/ops/global_avg_pooling_benchmark.cc b/mace/ops/global_avg_pooling_benchmark.cc index 7097a2ae..d2521e7c 100644 --- a/mace/ops/global_avg_pooling_benchmark.cc +++ b/mace/ops/global_avg_pooling_benchmark.cc @@ -11,11 +11,8 @@ using namespace mace; using namespace mace::kernels; template -static void GlobalAvgPooling(int iters, - int batch, - int channels, - int height, - int width) { +static void GlobalAvgPooling( + int iters, int batch, int channels, int height, int width) { mace::testing::StopTiming(); OpsTestNet net; @@ -38,15 +35,14 @@ static void GlobalAvgPooling(int iters, } } -#define BM_GLOBAL_AVG_POOLING_MACRO(N, C, H, W, DEVICE) \ - static void \ - BM_GLOBAL_AVG_POOLING_##N##_##C##_##H##_##W##_##DEVICE( \ - int iters) { \ - const int64_t tot = static_cast(iters) * N * C * H * W; \ - mace::testing::ItemsProcessed(tot); \ - mace::testing::BytesProcessed(tot*(sizeof(float))); \ - GlobalAvgPooling(iters, N, C, H, W); \ - } \ +#define BM_GLOBAL_AVG_POOLING_MACRO(N, C, H, W, DEVICE) \ + static void BM_GLOBAL_AVG_POOLING_##N##_##C##_##H##_##W##_##DEVICE( \ + int iters) { \ + const int64_t tot = static_cast(iters) * N * C * H * W; \ + mace::testing::ItemsProcessed(tot); \ + mace::testing::BytesProcessed(tot *(sizeof(float))); \ + GlobalAvgPooling(iters, N, C, H, W); \ + } \ BENCHMARK(BM_GLOBAL_AVG_POOLING_##N##_##C##_##H##_##W##_##DEVICE) #define BM_GLOBAL_AVG_POOLING(N, C, H, W) \ diff --git a/mace/ops/global_avg_pooling_test.cc b/mace/ops/global_avg_pooling_test.cc index d5d99330..bf9e4269 100644 --- a/mace/ops/global_avg_pooling_test.cc +++ b/mace/ops/global_avg_pooling_test.cc @@ -10,7 +10,7 @@ class GlobalAvgPoolingOpTest : public OpsTestBase {}; TEST_F(GlobalAvgPoolingOpTest, 3x7x7_CPU) { // Construct graph - auto& net = test_net(); + auto &net = test_net(); OpDefBuilder("GlobalAvgPooling", "GlobalAvgPoolingTest") .Input("Input") .Output("Output") @@ -19,24 +19,22 @@ TEST_F(GlobalAvgPoolingOpTest, 3x7x7_CPU) { // Add input data std::vector input(147); for (int i = 0; i < 147; ++i) { - input[i] = i/49 + 1; + input[i] = i / 49 + 1; } - net.AddInputFromArray( - "Input", {1, 3, 7, 7}, input); + net.AddInputFromArray("Input", {1, 3, 7, 7}, input); // Run net.RunOp(); // Check - auto expected = - CreateTensor({1, 3, 1, 1}, {1, 2, 3}); + auto expected = CreateTensor({1, 3, 1, 1}, {1, 2, 3}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); } TEST_F(GlobalAvgPoolingOpTest, 3x7x7_NEON) { // Construct graph - auto& net = test_net(); + auto &net = test_net(); OpDefBuilder("GlobalAvgPooling", "GlobalAvgPoolingTest") .Input("Input") .Output("Output") @@ -45,17 +43,15 @@ TEST_F(GlobalAvgPoolingOpTest, 3x7x7_NEON) { // Add input data std::vector input(147); for (int i = 0; i < 147; ++i) { - input[i] = i/49 + 1; + input[i] = i / 49 + 1; } - net.AddInputFromArray( - "Input", {1, 3, 7, 7}, input); + net.AddInputFromArray("Input", {1, 3, 7, 7}, input); // Run net.RunOp(DeviceType::NEON); // Check - auto expected = - CreateTensor({1, 3, 1, 1}, {1, 2, 3}); + auto expected = CreateTensor({1, 3, 1, 1}, {1, 2, 3}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); } diff --git a/mace/ops/ops_test_util.h b/mace/ops/ops_test_util.h index b8723082..480e7de0 100644 --- a/mace/ops/ops_test_util.h +++ b/mace/ops/ops_test_util.h @@ -43,7 +43,7 @@ class OpsTestNet { public: OpsTestNet() {} - template + template void AddInputFromArray(const char *name, const std::vector &shape, const std::vector &data) { @@ -55,7 +55,7 @@ class OpsTestNet { memcpy(input_data, data.data(), data.size() * sizeof(T)); } - template + template void AddRepeatedInput(const char *name, const std::vector &shape, const T data) { @@ -66,7 +66,7 @@ class OpsTestNet { std::fill(input_data, input_data + input->size(), data); } - template + template void AddRandomInput(const char *name, const std::vector &shape, bool positive = false) { @@ -173,38 +173,37 @@ class OpsTestBase : public ::testing::Test { OpsTestNet test_net_; }; -template -void GenerateRandomRealTypeData(const std::vector &shape, std::vector &res) { +template +void GenerateRandomRealTypeData(const std::vector &shape, + std::vector &res) { std::random_device rd; std::mt19937 gen(rd()); std::normal_distribution nd(0, 1); - index_t size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); + index_t size = std::accumulate(shape.begin(), shape.end(), 1, + std::multiplies()); res.resize(size); - std::generate(res.begin(), res.end(), - [&gen, &nd] { - return nd(gen); - }); + std::generate(res.begin(), res.end(), [&gen, &nd] { return nd(gen); }); } -template -void GenerateRandomIntTypeData(const std::vector &shape, std::vector &res, - const T a = 0, const T b = std::numeric_limits::max()) { +template +void GenerateRandomIntTypeData(const std::vector &shape, + std::vector &res, + const T a = 0, + const T b = std::numeric_limits::max()) { std::random_device rd; std::mt19937 gen(rd()); std::uniform_int_distribution<> nd(a, b); - index_t size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); + index_t size = std::accumulate(shape.begin(), shape.end(), 1, + std::multiplies()); res.resize(size); - std::generate(res.begin(), res.end(), - [&gen, &nd] { - return nd(gen); - }); + std::generate(res.begin(), res.end(), [&gen, &nd] { return nd(gen); }); } -template +template unique_ptr CreateTensor(const std::vector &shape, const std::vector &data) { unique_ptr res(new Tensor(cpu_allocator(), DataTypeToEnum::v())); @@ -237,23 +236,23 @@ inline std::string ShapeToString(const Tensor &x) { return std::string(stream.str()); } -template +template struct is_floating_point_type { static const bool value = std::is_same::value || std::is_same::value; }; -template +template inline void ExpectEqual(const T &a, const T &b) { EXPECT_EQ(a, b); } -template<> +template <> inline void ExpectEqual(const float &a, const float &b) { EXPECT_FLOAT_EQ(a, b); } -template<> +template <> inline void ExpectEqual(const double &a, const double &b) { EXPECT_DOUBLE_EQ(a, b); } @@ -264,11 +263,11 @@ inline void AssertSameTypeDims(const Tensor &x, const Tensor &y) { << "y.shape [ " << ShapeToString(y) << "]"; } -template::value> +template ::value> struct Expector; // Partial specialization for float and double. -template +template struct Expector { static void Equal(const T &a, const T &b) { ExpectEqual(a, b); } @@ -294,17 +293,17 @@ struct Expector { } }; -template +template void ExpectTensorNear(const Tensor &x, const Tensor &y, const double abs_err) { static_assert(is_floating_point_type::value, "T is not a floating point type"); Expector::Near(x, y, abs_err); } -template -std::string ToString(const T& input) { +template +std::string ToString(const T &input) { std::stringstream ss; - ss< class PoolingOp : public ConvPool2dOpBase { public: - PoolingOp(const OperatorDef& op_def, Workspace* ws) + PoolingOp(const OperatorDef &op_def, Workspace *ws) : ConvPool2dOpBase(op_def, ws), kernels_(OperatorBase::GetRepeatedArgument("kernels")), pooling_type_( @@ -22,8 +22,8 @@ class PoolingOp : public ConvPool2dOpBase { "pooling_type", static_cast(AVG)))){}; bool Run() override { - const Tensor* input = this->Input(INPUT); - Tensor* output = this->Output(OUTPUT); + const Tensor *input = this->Input(INPUT); + Tensor *output = this->Output(OUTPUT); std::vector output_shape(4); std::vector paddings(2); @@ -34,11 +34,10 @@ class PoolingOp : public ConvPool2dOpBase { filter_shape[2] = kernels_[0]; filter_shape[3] = kernels_[1]; - kernels::CalcPaddingAndOutputSize(input->shape().data(), - filter_shape.data(), - this->dilations_.data(), - this->strides_.data(), this->padding_, - output_shape.data(), paddings.data()); + kernels::CalcPaddingAndOutputSize( + input->shape().data(), filter_shape.data(), this->dilations_.data(), + this->strides_.data(), this->padding_, output_shape.data(), + paddings.data()); output->Resize(output_shape); auto pooling_func = kernels::PoolingFunctor( diff --git a/mace/ops/pooling_benchmark.cc b/mace/ops/pooling_benchmark.cc index 37dbd5b0..bae9bc2e 100644 --- a/mace/ops/pooling_benchmark.cc +++ b/mace/ops/pooling_benchmark.cc @@ -56,7 +56,7 @@ static void Pooling(int iters, int iters) { \ const int64_t tot = static_cast(iters) * N * C * H * W; \ mace::testing::ItemsProcessed(tot); \ - mace::testing::BytesProcessed(tot*(sizeof(float))); \ + mace::testing::BytesProcessed(tot *(sizeof(float))); \ Pooling(iters, N, C, H, W, KE, STRIDE, Padding::PA, \ PoolingType::PO); \ } \ diff --git a/mace/ops/pooling_test.cc b/mace/ops/pooling_test.cc index f5e9b599..6c977d59 100644 --- a/mace/ops/pooling_test.cc +++ b/mace/ops/pooling_test.cc @@ -15,7 +15,7 @@ class PoolingOpTest : public OpsTestBase {}; TEST_F(PoolingOpTest, MAX_VALID) { // Construct graph - auto& net = test_net(); + auto &net = test_net(); OpDefBuilder("Pooling", "PoolingTest") .Input("Input") .Output("Output") @@ -46,7 +46,7 @@ TEST_F(PoolingOpTest, MAX_VALID) { TEST_F(PoolingOpTest, AVG_VALID) { // Construct graph - auto& net = test_net(); + auto &net = test_net(); OpDefBuilder("Pooling", "PoolingTest") .Input("Input") .Output("Output") @@ -77,7 +77,7 @@ TEST_F(PoolingOpTest, AVG_VALID) { TEST_F(PoolingOpTest, MAX_SAME) { // Construct graph - auto& net = test_net(); + auto &net = test_net(); OpDefBuilder("Pooling", "PoolingTest") .Input("Input") .Output("Output") @@ -105,7 +105,7 @@ TEST_F(PoolingOpTest, MAX_SAME) { TEST_F(PoolingOpTest, MAX_VALID_DILATION) { // Construct graph - auto& net = test_net(); + auto &net = test_net(); OpDefBuilder("Pooling", "PoolingTest") .Input("Input") .Output("Output") @@ -134,7 +134,7 @@ TEST_F(PoolingOpTest, MAX_VALID_DILATION) { TEST_F(PoolingOpTest, MAX_k2x2s2x2) { // Construct graph - auto& net = test_net(); + auto &net = test_net(); OpDefBuilder("Pooling", "PoolingTest") .Input("Input") .Output("Output") @@ -148,9 +148,9 @@ TEST_F(PoolingOpTest, MAX_k2x2s2x2) { net.AddIntsArg("dilations", {1, 1}); // Add input data - net.AddInputFromArray("Input", {1, 1, 2, 9}, - {0, 1, 2, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 16, 17}); + net.AddInputFromArray( + "Input", {1, 1, 2, 9}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}); // Run net.RunOp(DeviceType::NEON); @@ -162,7 +162,7 @@ TEST_F(PoolingOpTest, MAX_k2x2s2x2) { TEST_F(PoolingOpTest, MAX_k3x3s2x2) { // Construct graph - auto& net = test_net(); + auto &net = test_net(); OpDefBuilder("Pooling", "PoolingTest") .Input("Input") .Output("Output") @@ -176,10 +176,10 @@ TEST_F(PoolingOpTest, MAX_k3x3s2x2) { net.AddIntsArg("dilations", {1, 1}); // Add input data - net.AddInputFromArray("Input", {1, 1, 3, 9}, - {0, 1, 2, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 16, 17, - 18, 19, 20, 21, 22, 23, 24, 25, 26}); + net.AddInputFromArray( + "Input", {1, 1, 3, 9}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26}); // Run net.RunOp(DeviceType::NEON); @@ -191,7 +191,7 @@ TEST_F(PoolingOpTest, MAX_k3x3s2x2) { TEST_F(PoolingOpTest, AVG_k2x2s2x2) { // Construct graph - auto& net = test_net(); + auto &net = test_net(); OpDefBuilder("Pooling", "PoolingTest") .Input("Input") .Output("Output") @@ -207,15 +207,12 @@ TEST_F(PoolingOpTest, AVG_k2x2s2x2) { // Add input data net.AddInputFromArray( "Input", {1, 1, 2, 8}, - {0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15}); + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}); // Run net.RunOp(DeviceType::NEON); // Check - auto expected = CreateTensor({1, 1, 1, 4}, - {4.5, 6.5, 8.5, 10.5}); + auto expected = CreateTensor({1, 1, 1, 4}, {4.5, 6.5, 8.5, 10.5}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); } - diff --git a/mace/ops/relu.h b/mace/ops/relu.h index 5f68cca9..654130fa 100644 --- a/mace/ops/relu.h +++ b/mace/ops/relu.h @@ -13,17 +13,17 @@ namespace mace { template class ReluOp : public Operator { public: - ReluOp(const OperatorDef& operator_def, Workspace* ws) + ReluOp(const OperatorDef &operator_def, Workspace *ws) : Operator(operator_def, ws) { - functor_.max_limit_ = - OperatorBase::GetSingleArgument("max_limit", static_cast(-1)); + functor_.max_limit_ = + OperatorBase::GetSingleArgument("max_limit", static_cast(-1)); } bool Run() override { - const Tensor* input_tensor = this->inputs_[0]; - Tensor* output_tensor = this->outputs_[0]; + const Tensor *input_tensor = this->inputs_[0]; + Tensor *output_tensor = this->outputs_[0]; output_tensor->ResizeLike(input_tensor); - const T* input = input_tensor->data(); - T* output = output_tensor->mutable_data(); + const T *input = input_tensor->data(); + T *output = output_tensor->mutable_data(); index_t size = input_tensor->size(); functor_(input, output, size); diff --git a/mace/ops/relu_benchmark.cc b/mace/ops/relu_benchmark.cc index 4605990e..e25b0b8f 100644 --- a/mace/ops/relu_benchmark.cc +++ b/mace/ops/relu_benchmark.cc @@ -36,7 +36,7 @@ static void ReluBenchmark(int iters, int size) { static void BM_RELU_##SIZE##_##TYPE##_##DEVICE(int iters) { \ const int64_t tot = static_cast(iters) * SIZE; \ mace::testing::ItemsProcessed(tot); \ - mace::testing::BytesProcessed(tot*(sizeof(TYPE))); \ + mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ ReluBenchmark(iters, SIZE); \ } \ BENCHMARK(BM_RELU_##SIZE##_##TYPE##_##DEVICE) diff --git a/mace/ops/relu_test.cc b/mace/ops/relu_test.cc index bf4c8100..d930444e 100644 --- a/mace/ops/relu_test.cc +++ b/mace/ops/relu_test.cc @@ -11,7 +11,7 @@ class ReluOpTest : public OpsTestBase {}; TEST_F(ReluOpTest, ReluOp) { // Construct graph - auto& net = test_net(); + auto &net = test_net(); OpDefBuilder("Relu", "ReluTest") .Input("Input") .Output("Output") @@ -34,7 +34,7 @@ TEST_F(ReluOpTest, ReluOp) { TEST_F(ReluOpTest, ReluOpWithMax) { // Construct graph - auto& net = test_net(); + auto &net = test_net(); OpDefBuilder("Relu", "ReluTestWithMax") .Input("Input") .Output("Output") @@ -56,5 +56,4 @@ TEST_F(ReluOpTest, ReluOpWithMax) { ExpectTensorNear(expected, *net.GetOutput("Output"), 0.01); } - } // namespace mace diff --git a/mace/ops/resize_bilinear.h b/mace/ops/resize_bilinear.h index 8daa3176..6f85b3f1 100644 --- a/mace/ops/resize_bilinear.h +++ b/mace/ops/resize_bilinear.h @@ -13,21 +13,21 @@ namespace mace { template class ResizeBilinearOp : public Operator { public: - ResizeBilinearOp(const OperatorDef& operator_def, Workspace* ws) + ResizeBilinearOp(const OperatorDef &operator_def, Workspace *ws) : Operator(operator_def, ws), functor_( OperatorBase::GetSingleArgument("align_corners", false)) {} bool Run() override { - const Tensor* input = this->Input(0); - const Tensor* resize_dims = this->Input(1); + const Tensor *input = this->Input(0); + const Tensor *resize_dims = this->Input(1); MACE_CHECK(input->dim_size() == 4, "input must be 4-dimensional.", input->dim_size()); MACE_CHECK(resize_dims->dim_size() == 1, "resize dim must be 2-dimensional.", resize_dims->dim_size()); - Tensor* output = this->Output(0); + Tensor *output = this->Output(0); index_t n = input->dim(0); index_t channels = input->dim(1); @@ -38,8 +38,8 @@ class ResizeBilinearOp : public Operator { vector out_shape{n, channels, out_height, out_width}; output->Resize(out_shape); - const T* input_ptr = input->data(); - T* output_ptr = output->mutable_data(); + const T *input_ptr = input->data(); + T *output_ptr = output->mutable_data(); functor_(input_ptr, output_ptr, n, channels, in_height, in_width, out_height, out_width); diff --git a/mace/ops/resize_bilinear_test.cc b/mace/ops/resize_bilinear_test.cc index 333d32af..1690e8d0 100644 --- a/mace/ops/resize_bilinear_test.cc +++ b/mace/ops/resize_bilinear_test.cc @@ -13,7 +13,7 @@ class ResizeBilinearTest : public OpsTestBase {}; TEST_F(ResizeBilinearTest, ResizeBilinearWOAlignCorners) { testing::internal::LogToStderr(); // Construct graph - auto& net = test_net(); + auto &net = test_net(); OpDefBuilder("ResizeBilinear", "ResizeBilinearTest") .Input("Input") .Input("OutSize") @@ -38,7 +38,7 @@ TEST_F(ResizeBilinearTest, ResizeBilinearWOAlignCorners) { TEST_F(ResizeBilinearTest, ResizeBilinearWAlignCorners) { testing::internal::LogToStderr(); // Construct graph - auto& net = test_net(); + auto &net = test_net(); OpDefBuilder("ResizeBilinear", "ResizeBilinearTest") .Input("Input") .Input("OutSize") diff --git a/mace/proto/BUILD b/mace/proto/BUILD index 1ab23234..593fdea5 100644 --- a/mace/proto/BUILD +++ b/mace/proto/BUILD @@ -33,8 +33,8 @@ cc_proto_library( py_proto_library( name = "mace_py", srcs = ["mace.proto"], + default_runtime = "@com_google_protobuf//:protobuf_python", + protoc = "@com_google_protobuf//:protoc", srcs_version = "PY2AND3", deps = ["@com_google_protobuf//:protobuf_python"], - protoc = "@com_google_protobuf//:protoc", - default_runtime = "@com_google_protobuf//:protobuf_python", -) \ No newline at end of file +) diff --git a/mace/python/tools/BUILD b/mace/python/tools/BUILD index ab7af6c2..be756608 100644 --- a/mace/python/tools/BUILD +++ b/mace/python/tools/BUILD @@ -16,4 +16,3 @@ py_binary( "@six_archive//:six", ], ) - diff --git a/mace/tools/benchmark/benchmark_model.cc b/mace/tools/benchmark/benchmark_model.cc index 3b74da18..3cd64402 100644 --- a/mace/tools/benchmark/benchmark_model.cc +++ b/mace/tools/benchmark/benchmark_model.cc @@ -3,14 +3,13 @@ // #include "mace/core/net.h" -#include "mace/utils/command_line_flags.h" #include "mace/tools/benchmark/stat_summarizer.h" +#include "mace/utils/command_line_flags.h" #include "mace/utils/utils.h" #include #include - namespace mace { namespace str_util { @@ -29,8 +28,9 @@ std::vector Split(const string &str, char delims) { return result; } - -bool SplitAndParseToInts(const string &str, char delims, std::vector* result) { +bool SplitAndParseToInts(const string &str, + char delims, + std::vector *result) { string tmp = str; while (!tmp.empty()) { index_t dim = atoi(tmp.data()); @@ -44,13 +44,15 @@ bool SplitAndParseToInts(const string &str, char delims, std::vector* r } } -} // namespace str_util +} // namespace str_util namespace benchmark { -bool RunInference(NetBase* net, StatSummarizer* summarizer, int64_t* inference_time_us) { +bool RunInference(NetBase *net, + StatSummarizer *summarizer, + int64_t *inference_time_us) { RunMetadata run_metadata; - RunMetadata* run_metadata_ptr = nullptr; + RunMetadata *run_metadata_ptr = nullptr; if (summarizer) { run_metadata_ptr = &run_metadata; } @@ -71,9 +73,13 @@ bool RunInference(NetBase* net, StatSummarizer* summarizer, int64_t* inference_t return true; } -bool Run(NetBase* net, StatSummarizer* summarizer, - int num_runs, double max_time_sec, int64_t sleep_sec, - int64_t* total_time_us, int64_t* actual_num_runs) { +bool Run(NetBase *net, + StatSummarizer *summarizer, + int num_runs, + double max_time_sec, + int64_t sleep_sec, + int64_t *total_time_us, + int64_t *actual_num_runs) { *total_time_us = 0; LOG(INFO) << "Running benchmark for max " << num_runs << " iterators, max " @@ -85,7 +91,7 @@ bool Run(NetBase* net, StatSummarizer* summarizer, Stat stat; bool util_max_time = (num_runs <= 0); - for (int i = 0; util_max_time || i < num_runs ; ++i) { + for (int i = 0; util_max_time || i < num_runs; ++i) { int64_t inference_time_us = 0; bool s = RunInference(net, summarizer, &inference_time_us); stat.UpdateStat(inference_time_us); @@ -113,7 +119,7 @@ bool Run(NetBase* net, StatSummarizer* summarizer, return true; } -int Main(int argc, char** argv) { +int Main(int argc, char **argv) { std::string model_file = "/data/local/tmp/mobi_mace.pb"; std::string device = "CPU"; std::string input_layer_string = "input:0"; @@ -182,8 +188,10 @@ int Main(int argc, char** argv) { return -1; } - std::vector input_layers = str_util::Split(input_layer_string, ','); - std::vector input_layer_shapes = str_util::Split(input_layer_shape_string, ':'); + std::vector input_layers = + str_util::Split(input_layer_string, ','); + std::vector input_layer_shapes = + str_util::Split(input_layer_shape_string, ':'); std::vector input_layer_types = str_util::Split(input_layer_type_string, ','); std::vector input_layer_files = @@ -260,17 +268,17 @@ int Main(int argc, char** argv) { ws.LoadModelTensor(net_def, DeviceType::CPU); // Load inputs for (size_t i = 0; i < inputs_count; ++i) { - Tensor *input_tensor = ws.CreateTensor(input_layers[i], - cpu_allocator(), DT_FLOAT); + Tensor *input_tensor = + ws.CreateTensor(input_layers[i], cpu_allocator(), DT_FLOAT); vector shapes; str_util::SplitAndParseToInts(input_layer_shapes[i], ',', &shapes); input_tensor->Resize(shapes); float *input_data = input_tensor->mutable_data(); - // load input if (i < input_layer_files.size()) { - std::ifstream in_file(input_layer_files[i], std::ios::in | std::ios::binary); + std::ifstream in_file(input_layer_files[i], + std::ios::in | std::ios::binary); in_file.read(reinterpret_cast(input_data), input_tensor->size() * sizeof(float)); in_file.close(); @@ -285,31 +293,31 @@ int Main(int argc, char** argv) { int64_t warmup_time_us = 0; int64_t num_warmup_runs = 0; if (warmup_runs > 0) { - bool status = Run(net.get(), nullptr, - warmup_runs, -1.0, inter_inference_sleep_seconds, - &warmup_time_us, &num_warmup_runs); + bool status = + Run(net.get(), nullptr, warmup_runs, -1.0, + inter_inference_sleep_seconds, &warmup_time_us, &num_warmup_runs); if (!status) { LOG(ERROR) << "Failed at warm up run"; } } if (inter_benchmark_sleep_seconds > 0) { - std::this_thread::sleep_for(std::chrono::seconds(inter_benchmark_sleep_seconds)); + std::this_thread::sleep_for( + std::chrono::seconds(inter_benchmark_sleep_seconds)); } int64_t no_stat_time_us = 0; int64_t no_stat_runs = 0; - bool status = Run(net.get(), nullptr, - max_num_runs, max_benchmark_time_seconds, inter_inference_sleep_seconds, - &no_stat_time_us, &no_stat_runs); + bool status = + Run(net.get(), nullptr, max_num_runs, max_benchmark_time_seconds, + inter_inference_sleep_seconds, &no_stat_time_us, &no_stat_runs); if (!status) { LOG(ERROR) << "Failed at normal no-stat run"; } int64_t stat_time_us = 0; int64_t stat_runs = 0; - status = Run(net.get(), stats.get(), - max_num_runs, max_benchmark_time_seconds, inter_inference_sleep_seconds, - &stat_time_us, &stat_runs); + status = Run(net.get(), stats.get(), max_num_runs, max_benchmark_time_seconds, + inter_inference_sleep_seconds, &stat_time_us, &stat_runs); if (!status) { LOG(ERROR) << "Failed at normal stat run"; } @@ -325,9 +333,7 @@ int Main(int argc, char** argv) { return 0; } -} // namespace benchmark -} // namespace mace +} // namespace benchmark +} // namespace mace -int main (int argc, char** argv) { - mace::benchmark::Main(argc, argv); -} +int main(int argc, char **argv) { mace::benchmark::Main(argc, argv); } diff --git a/mace/tools/benchmark/stat_summarizer.cc b/mace/tools/benchmark/stat_summarizer.cc index ff3d504b..fd0e820a 100644 --- a/mace/tools/benchmark/stat_summarizer.cc +++ b/mace/tools/benchmark/stat_summarizer.cc @@ -2,17 +2,16 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // -#include "mace/core/common.h" #include "mace/tools/benchmark/stat_summarizer.h" +#include "mace/core/common.h" #include "mace/proto/stats.pb.h" #include #include - namespace mace { -StatSummarizer::StatSummarizer(const StatSummarizerOptions& options) +StatSummarizer::StatSummarizer(const StatSummarizerOptions &options) : options_(options) {} StatSummarizer::~StatSummarizer() {} @@ -23,17 +22,14 @@ void StatSummarizer::Reset() { details_.clear(); } - void StatSummarizer::ProcessMetadata(const RunMetadata &run_metadata) { int64_t curr_total_us = 0; int64_t mem_total = 0; - int64_t first_node_start_us = - run_metadata.op_stats(0).all_start_micros(); + int64_t first_node_start_us = run_metadata.op_stats(0).all_start_micros(); int node_num = 0; - for (const auto& ops : run_metadata.op_stats()) { - + for (const auto &ops : run_metadata.op_stats()) { std::string name = ops.operator_name(); std::string op_type = ops.type(); @@ -41,7 +37,7 @@ void StatSummarizer::ProcessMetadata(const RunMetadata &run_metadata) { const int64_t curr_time = ops.all_end_rel_micros(); curr_total_us += curr_time; auto result = details_.emplace(name, Detail()); - Detail* detail = &(result.first->second); + Detail *detail = &(result.first->second); detail->start_us.UpdateStat(ops.all_start_micros() - first_node_start_us); detail->rel_end_us.UpdateStat(curr_time); @@ -77,13 +73,13 @@ std::string StatSummarizer::ShortSummary() const { return stream.str(); } -std::ostream& InitField(std::ostream& stream, int width) { +std::ostream &InitField(std::ostream &stream, int width) { stream << "\t" << std::right << std::setw(width) << std::fixed << std::setprecision(3); return stream; } -std::string StatSummarizer::HeaderString(const std::string& title) const { +std::string StatSummarizer::HeaderString(const std::string &title) const { std::stringstream stream; stream << "============================== " << title @@ -102,9 +98,9 @@ std::string StatSummarizer::HeaderString(const std::string& title) const { return stream.str(); } -std::string StatSummarizer::ColumnString(const StatSummarizer::Detail& detail, +std::string StatSummarizer::ColumnString(const StatSummarizer::Detail &detail, const int64_t cumulative_stat_on_node, - const Stat& stat) const { + const Stat &stat) const { const double start_ms = detail.start_us.avg() / 1000.0; const double first_time_ms = detail.rel_end_us.first() / 1000.0; const double avg_time_ms = detail.rel_end_us.avg() / 1000.0; @@ -127,12 +123,12 @@ std::string StatSummarizer::ColumnString(const StatSummarizer::Detail& detail, } void StatSummarizer::OrderNodesByMetric( - SortingMetric metric, std::vector* details) const { - std::priority_queue> sorted_list; + SortingMetric metric, std::vector *details) const { + std::priority_queue> sorted_list; const int num_nodes = details_.size(); - for (const auto& det : details_) { - const Detail* detail = &(det.second); + for (const auto &det : details_) { + const Detail *detail = &(det.second); std::stringstream stream; stream << std::setw(20) << std::right << std::setprecision(10) << std::fixed; @@ -169,16 +165,16 @@ void StatSummarizer::OrderNodesByMetric( } void StatSummarizer::ComputeStatsByType( - std::map* node_type_map_count, - std::map* node_type_map_time, - std::map* node_type_map_memory, - std::map* node_type_map_times_called, - int64_t* accumulated_us) const { + std::map *node_type_map_count, + std::map *node_type_map_time, + std::map *node_type_map_memory, + std::map *node_type_map_times_called, + int64_t *accumulated_us) const { int64_t run_count = run_total_us_.count(); - for (const auto& det : details_) { + for (const auto &det : details_) { const std::string node_name = det.first; - const Detail& detail = det.second; + const Detail &detail = det.second; int64_t curr_time_val = static_cast(detail.rel_end_us.sum() / run_count); @@ -186,7 +182,7 @@ void StatSummarizer::ComputeStatsByType( int64_t curr_memory_val = detail.mem_used.newest(); - const std::string& node_type = detail.type; + const std::string &node_type = detail.type; (*node_type_map_count)[node_type] += 1; (*node_type_map_time)[node_type] += curr_time_val; @@ -215,8 +211,9 @@ std::string StatSummarizer::GetStatsByNodeType() const { &accumulated_us); // Sort them. - std::priority_queue>> timings; - for (const auto& node_type : node_type_map_time) { + std::priority_queue>> + timings; + for (const auto &node_type : node_type_map_time) { const int64_t mem_used = node_type_map_memory[node_type.first]; timings.emplace(node_type.second, std::pair(node_type.first, mem_used)); @@ -259,10 +256,10 @@ std::string StatSummarizer::GetStatsByNodeType() const { return stream.str(); } -std::string StatSummarizer::GetStatsByMetric(const std::string& title, +std::string StatSummarizer::GetStatsByMetric(const std::string &title, SortingMetric sorting_metric, int num_stats) const { - std::vector details; + std::vector details; OrderNodesByMetric(sorting_metric, &details); double cumulative_stat_on_node = 0; diff --git a/mace/tools/benchmark/stat_summarizer.h b/mace/tools/benchmark/stat_summarizer.h index c8604e24..e58a6f09 100644 --- a/mace/tools/benchmark/stat_summarizer.h +++ b/mace/tools/benchmark/stat_summarizer.h @@ -12,7 +12,6 @@ #include #include - namespace mace { class RunMetadata; @@ -62,7 +61,7 @@ class Stat { return all_same() ? 0 : std::sqrt(squared_sum_ / count_ - avg() * avg()); } - void OutputToStream(std::ostream* stream) const { + void OutputToStream(std::ostream *stream) const { if (empty()) { *stream << "count=0"; } else if (all_same()) { @@ -75,8 +74,8 @@ class Stat { } } - friend std::ostream& operator<<(std::ostream& stream, - const Stat& stat) { + friend std::ostream &operator<<(std::ostream &stream, + const Stat &stat) { stat.OutputToStream(&stream); return stream; } @@ -131,12 +130,12 @@ class StatSummarizer { BY_TYPE, }; - explicit StatSummarizer(const StatSummarizerOptions& options); + explicit StatSummarizer(const StatSummarizerOptions &options); ~StatSummarizer(); // Adds another run's StepStats output to the aggregate counts. - void ProcessMetadata(const RunMetadata& run_metadata); + void ProcessMetadata(const RunMetadata &run_metadata); // Returns a string detailing the accumulated runtime stats in a tab-separated // format which can be pasted into a spreadsheet for further analysis. @@ -147,15 +146,16 @@ class StatSummarizer { // Prints the string returned by GetOutputString(). void PrintOperatorStats() const; - void ComputeStatsByType(std::map* node_type_map_count, - std::map* node_type_map_time, - std::map* node_type_map_memory, - std::map* node_type_map_times_called, - int64_t* accumulated_us) const; + void ComputeStatsByType( + std::map *node_type_map_count, + std::map *node_type_map_time, + std::map *node_type_map_memory, + std::map *node_type_map_times_called, + int64_t *accumulated_us) const; std::string GetStatsByNodeType() const; - std::string GetStatsByMetric(const std::string& title, + std::string GetStatsByMetric(const std::string &title, SortingMetric sorting_metric, int num_stats) const; @@ -165,7 +165,7 @@ class StatSummarizer { int num_runs() const { return run_total_us_.count(); } // Returns stats of total microseconds spent by all nodes in each run. - const Stat& run_total_us() const { return run_total_us_; } + const Stat &run_total_us() const { return run_total_us_; } private: struct Detail { @@ -179,12 +179,12 @@ class StatSummarizer { }; void OrderNodesByMetric(SortingMetric sorting_metric, - std::vector* details) const; + std::vector *details) const; - std::string HeaderString(const std::string& title) const; - std::string ColumnString(const Detail& detail, + std::string HeaderString(const std::string &title) const; + std::string ColumnString(const Detail &detail, const int64_t cumulative_stat_on_node, - const Stat& stat) const; + const Stat &stat) const; Stat run_total_us_; Stat memory_; @@ -193,6 +193,6 @@ class StatSummarizer { StatSummarizerOptions options_; }; -} // namespace mace +} // namespace mace #endif // MACE_TOOLS_BENCHMARK_STAT_SUMMARIZER_H_ diff --git a/mace/utils/command_line_flags.cc b/mace/utils/command_line_flags.cc index d9a249b8..146ead01 100644 --- a/mace/utils/command_line_flags.cc +++ b/mace/utils/command_line_flags.cc @@ -10,19 +10,21 @@ namespace mace { namespace { bool StringConsume(string &arg, const string &x) { - if ((arg.size() >= x.size()) - && (memcmp(arg.data(), x.data(), x.size()) == 0)) { + if ((arg.size() >= x.size()) && + (memcmp(arg.data(), x.data(), x.size()) == 0)) { arg = arg.substr(x.size()); return true; } return false; } -bool ParseStringFlag(string arg, string flag, - string *dst, bool *value_parsing_ok) { +bool ParseStringFlag(string arg, + string flag, + string *dst, + bool *value_parsing_ok) { *value_parsing_ok = true; - if (StringConsume(arg, "--") && StringConsume(arg, flag) - && StringConsume(arg, "=")) { + if (StringConsume(arg, "--") && StringConsume(arg, flag) && + StringConsume(arg, "=")) { *dst = arg; return true; } @@ -30,11 +32,13 @@ bool ParseStringFlag(string arg, string flag, return false; } -bool ParseInt32Flag(string arg, string flag, - int32_t *dst, bool *value_parsing_ok) { +bool ParseInt32Flag(string arg, + string flag, + int32_t *dst, + bool *value_parsing_ok) { *value_parsing_ok = true; - if (StringConsume(arg, "--") && StringConsume(arg, flag) - && StringConsume(arg, "=")) { + if (StringConsume(arg, "--") && StringConsume(arg, flag) && + StringConsume(arg, "=")) { char extra; if (sscanf(arg.data(), "%d%c", dst, &extra) != 1) { LOG(ERROR) << "Couldn't interpret value " << arg << " for flag " << flag @@ -47,11 +51,13 @@ bool ParseInt32Flag(string arg, string flag, return false; } -bool ParseInt64Flag(string arg, string flag, - long long *dst, bool *value_parsing_ok) { +bool ParseInt64Flag(string arg, + string flag, + long long *dst, + bool *value_parsing_ok) { *value_parsing_ok = true; - if (StringConsume(arg, "--") && StringConsume(arg, flag) - && StringConsume(arg, "=")) { + if (StringConsume(arg, "--") && StringConsume(arg, flag) && + StringConsume(arg, "=")) { char extra; if (sscanf(arg.data(), "%lld%c", dst, &extra) != 1) { LOG(ERROR) << "Couldn't interpret value " << arg << " for flag " << flag @@ -64,8 +70,7 @@ bool ParseInt64Flag(string arg, string flag, return false; } -bool ParseBoolFlag(string arg, string flag, - bool *dst, bool *value_parsing_ok) { +bool ParseBoolFlag(string arg, string flag, bool *dst, bool *value_parsing_ok) { *value_parsing_ok = true; if (StringConsume(arg, "--") && StringConsume(arg, flag)) { if (arg.empty()) { @@ -90,11 +95,13 @@ bool ParseBoolFlag(string arg, string flag, return false; } -bool ParseFloatFlag(string arg, string flag, - float *dst, bool *value_parsing_ok) { +bool ParseFloatFlag(string arg, + string flag, + float *dst, + bool *value_parsing_ok) { *value_parsing_ok = true; - if (StringConsume(arg, "--") && StringConsume(arg, flag) - && StringConsume(arg, "=")) { + if (StringConsume(arg, "--") && StringConsume(arg, flag) && + StringConsume(arg, "=")) { char extra; if (sscanf(arg.data(), "%f%c", dst, &extra) != 1) { LOG(ERROR) << "Couldn't interpret value " << arg << " for flag " << flag @@ -152,7 +159,8 @@ bool Flag::Parse(string arg, bool *value_parsing_ok) const { return result; } -/*static*/ bool Flags::Parse(int *argc, char **argv, +/*static*/ bool Flags::Parse(int *argc, + char **argv, const std::vector &flag_list) { bool result = true; std::vector unknown_flags; diff --git a/mace/utils/command_line_flags.h b/mace/utils/command_line_flags.h index 0d3daf28..48eea0b4 100644 --- a/mace/utils/command_line_flags.h +++ b/mace/utils/command_line_flags.h @@ -39,16 +39,14 @@ class Flags { // with matching flags, and remove the matching arguments from (*argc, argv). // Return true iff all recognized flag values were parsed correctly, and the // first remaining argument is not "--help". - static bool Parse(int *argc, - char **argv, - const std::vector &flag_list); + static bool Parse(int *argc, char **argv, const std::vector &flag_list); // Return a usage message with command line cmdline, and the // usage_text strings in flag_list[]. static string Usage(const string &cmdline, - const std::vector &flag_list); + const std::vector &flag_list); }; -} // namespace mace +} // namespace mace -#endif // MACE_CORE_COMMAND_LINE_FLAGS_H +#endif // MACE_CORE_COMMAND_LINE_FLAGS_H diff --git a/mace/utils/utils.h b/mace/utils/utils.h index 1c075632..3fb90074 100644 --- a/mace/utils/utils.h +++ b/mace/utils/utils.h @@ -24,5 +24,5 @@ inline int64_t NowInMicroSec() { return static_cast(tv.tv_sec * 1000000 + tv.tv_usec); } -} // namespace mace -#endif // MACE_UTILS_UTILS_H_ +} // namespace mace +#endif // MACE_UTILS_UTILS_H_ -- GitLab