From 4410ecd239caf53de680d04557620781cc9e8953 Mon Sep 17 00:00:00 2001 From: Liangliang He Date: Wed, 7 Mar 2018 21:47:25 +0800 Subject: [PATCH] Reformatting code and enable cpplint --- .gitlab-ci.yml | 9 + mace/core/allocator.h | 4 +- mace/core/arg_helper.cc | 4 +- mace/core/buffer.h | 143 +++--- mace/core/mace.cc | 425 ++++++------------ mace/core/net.cc | 14 +- mace/core/operator.h | 4 +- mace/core/preallocated_pooled_allocator.h | 4 +- .../hexagon/hexagon_control_wrapper.cc | 152 +++---- .../runtime/hexagon/hexagon_control_wrapper.h | 18 +- .../hexagon/hexagon_controller_dummy.cc | 166 +++++-- mace/core/runtime/hexagon/hexagon_nn.h | 202 ++++++--- mace/core/runtime/hexagon/hexagon_nn_ops.h | 10 +- mace/core/runtime/hexagon/ops.h | 1 - mace/core/runtime/hexagon/quantize.cc | 10 +- mace/core/runtime/hexagon/quantize.h | 18 +- mace/core/runtime/opencl/opencl_allocator.cc | 27 +- .../core/runtime/opencl/opencl_development.cc | 5 +- mace/core/runtime/opencl/opencl_production.cc | 3 +- mace/core/runtime/opencl/opencl_runtime.cc | 37 +- mace/core/runtime/opencl/opencl_runtime.h | 4 +- mace/core/runtime/opencl/opencl_wrapper.h | 8 +- mace/core/tensor.h | 95 ++-- mace/core/testing/test_benchmark.cc | 4 +- mace/core/testing/test_benchmark.h | 2 +- mace/core/types.cc | 20 +- mace/core/workspace.cc | 75 ++-- mace/core/workspace.h | 4 +- mace/kernels/activation.h | 22 +- mace/kernels/addn.h | 2 +- mace/kernels/batch_norm.h | 10 +- mace/kernels/bias_add.h | 4 +- mace/kernels/buffer_to_image.h | 14 +- mace/kernels/channel_shuffle.h | 6 +- mace/kernels/concat.h | 20 +- mace/kernels/conv_2d.h | 12 +- mace/kernels/conv_pool_2d_util.cc | 31 +- mace/kernels/conv_pool_2d_util.h | 2 +- mace/kernels/depthwise_conv2d.h | 9 +- mace/kernels/eltwise.h | 25 +- mace/kernels/fully_connected.h | 18 +- mace/kernels/global_avg_pooling.h | 6 +- mace/kernels/matmul.h | 5 +- mace/kernels/neon/batch_norm_neon.cc | 3 +- mace/kernels/neon/conv_2d_neon.cc | 9 +- mace/kernels/neon/conv_2d_neon_3x3.cc | 12 +- mace/kernels/neon/depthwise_conv_neon.cc | 9 +- mace/kernels/opencl/activation_opencl.cc | 7 +- mace/kernels/opencl/addn.cc | 24 +- mace/kernels/opencl/batch_norm_opencl.cc | 9 +- mace/kernels/opencl/bias_add_opencl.cc | 21 +- mace/kernels/opencl/buffer_to_image.cc | 54 +-- mace/kernels/opencl/cl/common.h | 4 +- mace/kernels/opencl/concat.cc | 53 +-- mace/kernels/opencl/conv_2d_opencl.cc | 37 +- mace/kernels/opencl/conv_2d_opencl_1x1.cc | 16 +- mace/kernels/opencl/conv_2d_opencl_3x3.cc | 15 +- mace/kernels/opencl/conv_2d_opencl_general.cc | 15 +- mace/kernels/opencl/depthwise_conv_opencl.cc | 23 +- mace/kernels/opencl/eltwise_opencl.cc | 20 +- mace/kernels/opencl/fully_connected_opencl.cc | 35 +- mace/kernels/opencl/helper.cc | 88 ++-- mace/kernels/opencl/helper.h | 13 +- mace/kernels/opencl/matmul.cc | 27 +- mace/kernels/opencl/pooling_opencl.cc | 27 +- mace/kernels/opencl/resize_bilinear_opencl.cc | 20 +- mace/kernels/opencl/softmax_opencl.cc | 17 +- mace/kernels/opencl/space_to_batch_opencl.cc | 31 +- mace/kernels/opencl/winograd_transform.cc | 101 +++-- mace/kernels/pooling.h | 51 ++- mace/kernels/reshape.h | 3 +- mace/kernels/resize_bilinear.h | 21 +- mace/kernels/space_to_batch.h | 22 +- mace/kernels/winograd_transform.h | 44 +- mace/ops/activation.h | 3 +- mace/ops/activation_test.cc | 7 +- mace/ops/addn.h | 10 +- mace/ops/addn_benchmark.cc | 3 +- mace/ops/batch_norm_benchmark.cc | 2 +- mace/ops/batch_to_space.h | 27 +- mace/ops/batch_to_space_benchmark.cc | 2 +- mace/ops/bias_add_benchmark.cc | 2 +- mace/ops/buffer_to_image.h | 9 +- mace/ops/buffer_to_image_test.cc | 71 +-- mace/ops/channel_shuffle.h | 4 +- mace/ops/channel_shuffle_benchmark.cc | 2 +- mace/ops/concat.h | 5 +- mace/ops/concat_benchmark.cc | 19 +- mace/ops/concat_test.cc | 8 +- mace/ops/conv_2d_test.cc | 104 ++--- mace/ops/eltwise.h | 10 +- mace/ops/eltwise_benchmark.cc | 2 +- mace/ops/eltwise_test.cc | 96 ++-- mace/ops/folded_batch_norm.cc | 27 +- mace/ops/folded_batch_norm_test.cc | 2 +- mace/ops/fully_connected.h | 9 +- mace/ops/fully_connected_benchmark.cc | 28 +- mace/ops/fully_connected_test.cc | 98 ++-- mace/ops/fused_conv_2d_test.cc | 40 +- mace/ops/global_avg_pooling.h | 2 +- mace/ops/global_avg_pooling_benchmark.cc | 4 +- mace/ops/image_to_buffer.h | 9 +- mace/ops/matmul.h | 4 +- mace/ops/matmul_test.cc | 62 ++- mace/ops/ops_test_util.h | 9 +- mace/ops/pooling.h | 8 +- mace/ops/pooling_benchmark.cc | 4 +- mace/ops/pooling_test.cc | 6 +- mace/ops/reshape.h | 8 +- mace/ops/reshape_test.cc | 1 - mace/ops/softmax.cc | 6 +- mace/ops/softmax.h | 5 +- mace/ops/softmax_test.cc | 16 +- mace/ops/space_to_batch.h | 25 +- mace/ops/space_to_batch_benchmark.cc | 2 +- mace/ops/winograd_convolution_test.cc | 36 +- mace/ops/winograd_inverse_transform.h | 4 +- mace/ops/winograd_transform.h | 6 +- mace/ops/winograd_transform_benchmark.cc | 42 +- mace/public/mace.h | 46 +- mace/utils/command_line_flags.h | 2 +- mace/utils/env_time.h | 1 - mace/utils/logging.h | 2 +- mace/utils/string_util.h | 2 +- mace/utils/timer.h | 16 +- mace/utils/tuner_test.cc | 29 +- mace/utils/utils.h | 4 +- mace/utils/utils_test.cc | 29 +- 128 files changed, 1634 insertions(+), 1831 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 1144cbc3..d25fa503 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,6 +1,15 @@ stages: - ops_test - ops_benchmark + - cpplint + +cpplint: + stage: cpplint + only: + - master + script: + - curl -o cpplint.py https://raw.githubusercontent.com/google/styleguide/gh-pages/cpplint/cpplint.py + - python cpplint.py --root=mace --linelength=80 --counting=detailed $(find mace -name *.h -or -name *.cc | grep -vE "half.h") ops_test: stage: ops_test diff --git a/mace/core/allocator.h b/mace/core/allocator.h index 7ab65f0b..eebbb32b 100644 --- a/mace/core/allocator.h +++ b/mace/core/allocator.h @@ -9,8 +9,8 @@ #include #include "mace/core/registry.h" -#include "mace/public/mace.h" #include "mace/core/types.h" +#include "mace/public/mace.h" namespace mace { @@ -81,7 +81,7 @@ class CPUAllocator : public Allocator { free(data); }; void *Map(void *buffer, size_t offset, size_t nbytes) const override { - return (char*)buffer + offset; + return (char *)buffer + offset; } void *MapImage(void *buffer, const std::vector &image_shape, diff --git a/mace/core/arg_helper.cc b/mace/core/arg_helper.cc index 41892b7a..8b6d57fb 100644 --- a/mace/core/arg_helper.cc +++ b/mace/core/arg_helper.cc @@ -83,12 +83,12 @@ INSTANTIATE_GET_SINGLE_ARGUMENT(string, s, false) #define INSTANTIATE_GET_REPEATED_ARGUMENT(T, fieldname, \ enforce_lossless_conversion) \ template <> \ - std::vector ArgumentHelper::GetRepeatedArgument( \ + std::vector ArgumentHelper::GetRepeatedArgument( \ const string &name, const std::vector &default_value) const { \ if (arg_map_.count(name) == 0) { \ return default_value; \ } \ - std::vector values; \ + std::vector values; \ for (const auto &v : arg_map_.at(name).fieldname()) { \ if (enforce_lossless_conversion) { \ auto supportsConversion = \ diff --git a/mace/core/buffer.h b/mace/core/buffer.h index c17c4a1d..38c577a5 100644 --- a/mace/core/buffer.h +++ b/mace/core/buffer.h @@ -5,9 +5,9 @@ #ifndef MACE_CORE_BUFFER_H_ #define MACE_CORE_BUFFER_H_ -#include "mace/core/types.h" -#include "mace/core/allocator.h" #include +#include "mace/core/allocator.h" +#include "mace/core/types.h" namespace mace { @@ -39,23 +39,19 @@ class BufferBase { virtual bool OnHost() const = 0; - virtual index_t offset() const { - return 0; - }; + virtual index_t offset() const { return 0; }; - template + template const T *data() const { return reinterpret_cast(raw_data()); } - template + template T *mutable_data() { return reinterpret_cast(raw_mutable_data()); } - index_t size() const { - return size_; - } + index_t size() const { return size_; } protected: index_t size_; @@ -64,26 +60,26 @@ class BufferBase { class Buffer : public BufferBase { public: Buffer(Allocator *allocator) - : BufferBase(0), - allocator_(allocator), - buf_(nullptr), - mapped_buf_(nullptr), - is_data_owner_(true) {} + : BufferBase(0), + allocator_(allocator), + buf_(nullptr), + mapped_buf_(nullptr), + is_data_owner_(true) {} Buffer(Allocator *allocator, index_t size) - : BufferBase(size), - allocator_(allocator), - mapped_buf_(nullptr), - is_data_owner_(true) { + : BufferBase(size), + allocator_(allocator), + mapped_buf_(nullptr), + is_data_owner_(true) { buf_ = allocator->New(size); } Buffer(Allocator *allocator, void *data, index_t size) - : BufferBase(size), - allocator_(allocator), - buf_(data), - mapped_buf_(nullptr), - is_data_owner_(false) {} + : BufferBase(size), + allocator_(allocator), + buf_(data), + mapped_buf_(nullptr), + is_data_owner_(false) {} virtual ~Buffer() { if (mapped_buf_ != nullptr) { @@ -155,12 +151,10 @@ class Buffer : public BufferBase { void Copy(void *src, index_t offset, index_t length) { MACE_CHECK_NOTNULL(mapped_buf_); MACE_CHECK(length <= size_, "out of buffer"); - memcpy(mapped_buf_, (char *) src + offset, length); + memcpy(mapped_buf_, (char *)src + offset, length); } - bool OnHost() const { - return allocator_->OnHost(); - } + bool OnHost() const { return allocator_->OnHost(); } private: Allocator *allocator_; @@ -168,23 +162,24 @@ class Buffer : public BufferBase { void *mapped_buf_; bool is_data_owner_; - DISABLE_COPY_AND_ASSIGN(Buffer); + DISABLE_COPY_AND_ASSIGN(Buffer); }; class Image : public BufferBase { public: Image() - : BufferBase(0), - allocator_(GetDeviceAllocator(OPENCL)), - buf_(nullptr), - mapped_buf_(nullptr) {} + : BufferBase(0), + allocator_(GetDeviceAllocator(OPENCL)), + buf_(nullptr), + mapped_buf_(nullptr) {} Image(std::vector shape, DataType data_type) - : BufferBase(std::accumulate(shape.begin(), shape.end(), - 1, std::multiplies()) - * GetEnumTypeSize(data_type)), - allocator_(GetDeviceAllocator(OPENCL)), - mapped_buf_(nullptr) { + : BufferBase( + std::accumulate( + shape.begin(), shape.end(), 1, std::multiplies()) * + GetEnumTypeSize(data_type)), + allocator_(GetDeviceAllocator(OPENCL)), + mapped_buf_(nullptr) { shape_ = shape; data_type_ = data_type; buf_ = allocator_->NewImage(shape, data_type); @@ -214,9 +209,7 @@ class Image : public BufferBase { return mapped_buf_; } - std::vector image_shape() const { - return shape_; - } + std::vector image_shape() const { return shape_; } void *Map(index_t offset, index_t length, std::vector *pitch) const { MACE_NOT_IMPLEMENTED; @@ -241,17 +234,11 @@ class Image : public BufferBase { mapped_buf_ = nullptr; }; - void Resize(index_t size) { - MACE_NOT_IMPLEMENTED; - } + void Resize(index_t size) { MACE_NOT_IMPLEMENTED; } - void Copy(void *src, index_t offset, index_t length) { - MACE_NOT_IMPLEMENTED; - } + void Copy(void *src, index_t offset, index_t length) { MACE_NOT_IMPLEMENTED; } - bool OnHost() const { - return allocator_->OnHost(); - } + bool OnHost() const { return allocator_->OnHost(); } private: Allocator *allocator_; @@ -260,34 +247,25 @@ class Image : public BufferBase { void *buf_; void *mapped_buf_; - DISABLE_COPY_AND_ASSIGN(Image); + DISABLE_COPY_AND_ASSIGN(Image); }; class BufferSlice : public BufferBase { public: BufferSlice() - : buffer_(nullptr), - mapped_buf_(nullptr), - offset_(0), - length_(0) {} + : buffer_(nullptr), mapped_buf_(nullptr), offset_(0), length_(0) {} BufferSlice(BufferBase *buffer, index_t offset, index_t length) - : BufferBase(buffer->size()), - buffer_(buffer), - mapped_buf_(nullptr), - offset_(offset), - length_(length) { + : BufferBase(buffer->size()), + buffer_(buffer), + mapped_buf_(nullptr), + offset_(offset), + length_(length) { MACE_CHECK(offset >= 0, "buffer slice offset should >= 0"); - MACE_CHECK(offset + length <= size_, - "buffer slice offset + length (", - offset, - " + ", - length, - ") should <= ", - size_); + MACE_CHECK(offset + length <= size_, "buffer slice offset + length (", + offset, " + ", length, ") should <= ", size_); } - BufferSlice(const BufferSlice &other) : BufferSlice(other.buffer_, - other.offset_, - other.length_) {} + BufferSlice(const BufferSlice &other) + : BufferSlice(other.buffer_, other.offset_, other.length_) {} ~BufferSlice() { if (buffer_ != nullptr && mapped_buf_ != nullptr) { @@ -303,7 +281,7 @@ class BufferSlice : public BufferBase { const void *raw_data() const { if (OnHost()) { MACE_CHECK_NOTNULL(buffer_); - return (char *) buffer_->raw_data() + offset_; + return (char *)buffer_->raw_data() + offset_; } else { MACE_CHECK_NOTNULL(mapped_buf_); return mapped_buf_; @@ -320,9 +298,7 @@ class BufferSlice : public BufferBase { return nullptr; } - void UnMap(void *mapped_ptr) const { - MACE_NOT_IMPLEMENTED; - } + void UnMap(void *mapped_ptr) const { MACE_NOT_IMPLEMENTED; } void Map(std::vector *pitch) { MACE_CHECK_NOTNULL(buffer_); @@ -336,21 +312,13 @@ class BufferSlice : public BufferBase { mapped_buf_ = nullptr; }; - void Resize(index_t size) { - MACE_NOT_IMPLEMENTED; - } + void Resize(index_t size) { MACE_NOT_IMPLEMENTED; } - void Copy(void *src, index_t offset, index_t length) { - MACE_NOT_IMPLEMENTED; - } + void Copy(void *src, index_t offset, index_t length) { MACE_NOT_IMPLEMENTED; } - index_t offset() const { - return offset_; - } + index_t offset() const { return offset_; } - bool OnHost() const { - return buffer_->OnHost(); - } + bool OnHost() const { return buffer_->OnHost(); } private: BufferBase *buffer_; @@ -358,7 +326,6 @@ class BufferSlice : public BufferBase { index_t offset_; index_t length_; }; - } -#endif // MACE_CORE_BUFFER_H_ +#endif // MACE_CORE_BUFFER_H_ diff --git a/mace/core/mace.cc b/mace/core/mace.cc index 0da8449f..52483181 100644 --- a/mace/core/mace.cc +++ b/mace/core/mace.cc @@ -3,9 +3,9 @@ // #include "mace/public/mace.h" -#include "mace/core/types.h" #include "mace/core/net.h" #include "mace/core/runtime/hexagon/hexagon_control_wrapper.h" +#include "mace/core/types.h" namespace mace { @@ -13,46 +13,34 @@ ConstTensor::ConstTensor(const std::string &name, const unsigned char *data, const std::vector &dims, const DataType data_type, - uint32_t node_id) : - name_(name), - data_(data), - data_size_(std::accumulate(dims.begin(), dims.end(), 1, - std::multiplies())), - dims_(dims.begin(), dims.end()), - data_type_(data_type), - node_id_(node_id) {} + uint32_t node_id) + : name_(name), + data_(data), + data_size_(std::accumulate( + dims.begin(), dims.end(), 1, std::multiplies())), + dims_(dims.begin(), dims.end()), + data_type_(data_type), + node_id_(node_id) {} ConstTensor::ConstTensor(const std::string &name, const unsigned char *data, const std::vector &dims, const int data_type, - uint32_t node_id) : - name_(name), - data_(data), - data_size_(std::accumulate(dims.begin(), dims.end(), 1, - std::multiplies())), - dims_(dims.begin(), dims.end()), - data_type_(static_cast(data_type)), - node_id_(node_id) {} - -const std::string &ConstTensor::name() const { - return name_; -} -const unsigned char *ConstTensor::data() const { - return data_; -} -int64_t ConstTensor::data_size() const { - return data_size_; -} -const std::vector &ConstTensor::dims() const { - return dims_; -} -DataType ConstTensor::data_type() const { - return data_type_; -} -uint32_t ConstTensor::node_id() const { - return node_id_; -} + uint32_t node_id) + : name_(name), + data_(data), + data_size_(std::accumulate( + dims.begin(), dims.end(), 1, std::multiplies())), + dims_(dims.begin(), dims.end()), + data_type_(static_cast(data_type)), + node_id_(node_id) {} + +const std::string &ConstTensor::name() const { return name_; } +const unsigned char *ConstTensor::data() const { return data_; } +int64_t ConstTensor::data_size() const { return data_size_; } +const std::vector &ConstTensor::dims() const { return dims_; } +DataType ConstTensor::data_type() const { return data_type_; } +uint32_t ConstTensor::node_id() const { return node_id_; } Argument::Argument() : has_bits_(0) {} @@ -73,74 +61,42 @@ void Argument::CopyFrom(const Argument &from) { this->has_bits_ = from.has_bits_; } -const std::string &Argument::name() const { - return name_; -} -void Argument::set_name(const std::string &value) { - name_ = value; -} -bool Argument::has_f() const { - return (has_bits_ & 0x00000001u) != 0; -} -void Argument::set_has_f() { - has_bits_ |= 0x00000001u; -} -float Argument::f() const { - return f_; -} +const std::string &Argument::name() const { return name_; } +void Argument::set_name(const std::string &value) { name_ = value; } +bool Argument::has_f() const { return (has_bits_ & 0x00000001u) != 0; } +void Argument::set_has_f() { has_bits_ |= 0x00000001u; } +float Argument::f() const { return f_; } void Argument::set_f(float value) { set_has_f(); f_ = value; } -bool Argument::has_i() const { - return (has_bits_ & 0x00000002u) != 0; -} -void Argument::set_has_i() { - has_bits_ |= 0x00000002u; -} -int64_t Argument::i() const { - return i_; -} +bool Argument::has_i() const { return (has_bits_ & 0x00000002u) != 0; } +void Argument::set_has_i() { has_bits_ |= 0x00000002u; } +int64_t Argument::i() const { return i_; } void Argument::set_i(int64_t value) { set_has_i(); i_ = value; } -bool Argument::has_s() const { - return (has_bits_ & 0x00000004u) != 0; -} -void Argument::set_has_s() { - has_bits_ |= 0x00000004u; -} -std::string Argument::s() const { - return s_; -} +bool Argument::has_s() const { return (has_bits_ & 0x00000004u) != 0; } +void Argument::set_has_s() { has_bits_ |= 0x00000004u; } +std::string Argument::s() const { return s_; } void Argument::set_s(const std::string &value) { set_has_s(); s_ = value; } -const std::vector &Argument::floats() const { - return floats_; -} -void Argument::add_floats(float value) { - floats_.push_back(value); -} +const std::vector &Argument::floats() const { return floats_; } +void Argument::add_floats(float value) { floats_.push_back(value); } void Argument::set_floats(const std::vector &value) { floats_.resize(value.size()); std::copy(value.begin(), value.end(), floats_.begin()); } -const std::vector &Argument::ints() const { - return ints_; -} -void Argument::add_ints(int64_t value) { - ints_.push_back(value); -} +const std::vector &Argument::ints() const { return ints_; } +void Argument::add_ints(int64_t value) { ints_.push_back(value); } void Argument::set_ints(const std::vector &value) { ints_.resize(value.size()); std::copy(value.begin(), value.end(), ints_.begin()); } -const std::vector &Argument::strings() const { - return strings_; -} +const std::vector &Argument::strings() const { return strings_; } void Argument::add_strings(const ::std::string &value) { strings_.push_back(value); } @@ -156,31 +112,21 @@ void NodeInput::CopyFrom(const NodeInput &from) { node_id_ = from.node_id(); output_port_ = from.output_port(); } -int NodeInput::node_id() const { - return node_id_; -} -void NodeInput::set_node_id(int node_id) { - node_id_ = node_id; -} -int NodeInput::output_port() const { - return output_port_; -} -void NodeInput::set_output_port(int output_port) { - output_port_ = output_port; -} +int NodeInput::node_id() const { return node_id_; } +void NodeInput::set_node_id(int node_id) { node_id_ = node_id; } +int NodeInput::output_port() const { return output_port_; } +void NodeInput::set_output_port(int output_port) { output_port_ = output_port; } // OutputShape OutputShape::OutputShape() {} -OutputShape::OutputShape(const std::vector &dims) : - dims_(dims.begin(), dims.end()) {} +OutputShape::OutputShape(const std::vector &dims) + : dims_(dims.begin(), dims.end()) {} void OutputShape::CopyFrom(const OutputShape &from) { auto from_dims = from.dims(); dims_.resize(from_dims.size()); std::copy(from_dims.begin(), from_dims.end(), dims_.begin()); } -const std::vector &OutputShape::dims() const { - return dims_; -} +const std::vector &OutputShape::dims() const { return dims_; } // Operator Def void OperatorDef::CopyFrom(const OperatorDef &from) { @@ -220,68 +166,38 @@ void OperatorDef::CopyFrom(const OperatorDef &from) { } auto from_out_max_byte_size = from.out_max_byte_size(); out_max_byte_size_.resize(from_out_max_byte_size.size()); - std::copy(from_out_max_byte_size.begin(), - from_out_max_byte_size.end(), + std::copy(from_out_max_byte_size.begin(), from_out_max_byte_size.end(), out_max_byte_size_.begin()); has_bits_ = from.has_bits_; - } -const std::string &OperatorDef::name() const { - return name_; -} +const std::string &OperatorDef::name() const { return name_; } void OperatorDef::set_name(const std::string &name_) { set_has_name(); OperatorDef::name_ = name_; } -bool OperatorDef::has_name() const { - return (has_bits_ & 0x00000001u) != 0; -} -void OperatorDef::set_has_name() { - has_bits_ |= 0x00000001u; -} -const std::string &OperatorDef::type() const { - return type_; -} +bool OperatorDef::has_name() const { return (has_bits_ & 0x00000001u) != 0; } +void OperatorDef::set_has_name() { has_bits_ |= 0x00000001u; } +const std::string &OperatorDef::type() const { return type_; } void OperatorDef::set_type(const std::string &type_) { set_has_type(); OperatorDef::type_ = type_; } -bool OperatorDef::has_type() const { - return (has_bits_ & 0x00000002u) != 0; -} -void OperatorDef::set_has_type() { - has_bits_ |= 0x00000002u; -} -int OperatorDef::mem_id() const { - return mem_id_; -} +bool OperatorDef::has_type() const { return (has_bits_ & 0x00000002u) != 0; } +void OperatorDef::set_has_type() { has_bits_ |= 0x00000002u; } +int OperatorDef::mem_id() const { return mem_id_; } void OperatorDef::set_mem_id(const int mem_id) { set_has_mem_id(); mem_id_ = mem_id; } -bool OperatorDef::has_mem_id() const { - return (has_bits_ & 0x00000004u) != 0; -} -void OperatorDef::set_has_mem_id() { - has_bits_ |= 0x00000004u; -} -uint32_t OperatorDef::node_id() const { - return node_id_; -} -void OperatorDef::set_node_id(uint32_t node_id) { - node_id_ = node_id; -} -uint32_t OperatorDef::op_id() const { - return op_id_; -} -uint32_t OperatorDef::padding() const { - return padding_; -} -void OperatorDef::set_padding(uint32_t padding) { - padding_ = padding; -} +bool OperatorDef::has_mem_id() const { return (has_bits_ & 0x00000004u) != 0; } +void OperatorDef::set_has_mem_id() { has_bits_ |= 0x00000004u; } +uint32_t OperatorDef::node_id() const { return node_id_; } +void OperatorDef::set_node_id(uint32_t node_id) { node_id_ = node_id; } +uint32_t OperatorDef::op_id() const { return op_id_; } +uint32_t OperatorDef::padding() const { return padding_; } +void OperatorDef::set_padding(uint32_t padding) { padding_ = padding; } const std::vector &OperatorDef::node_input() const { return node_input_; } @@ -294,9 +210,7 @@ const std::vector &OperatorDef::out_max_byte_size() const { void OperatorDef::add_out_max_byte_size(int value) { out_max_byte_size_.push_back(value); } -const std::vector &OperatorDef::input() const { - return input_; -} +const std::vector &OperatorDef::input() const { return input_; } const std::string &OperatorDef::input(int index) const { MACE_CHECK(0 <= index && index <= input_.size()); return input_[index]; @@ -308,16 +222,12 @@ std::string *OperatorDef::add_input() { void OperatorDef::add_input(const ::std::string &value) { input_.push_back(value); } -void OperatorDef::add_input(::std::string &&value) { - input_.push_back(value); -} +void OperatorDef::add_input(::std::string &&value) { input_.push_back(value); } void OperatorDef::set_input(const std::vector &value) { input_.resize(value.size()); std::copy(value.begin(), value.end(), input_.begin()); } -const std::vector &OperatorDef::output() const { - return output_; -} +const std::vector &OperatorDef::output() const { return output_; } const std::string &OperatorDef::output(int index) const { MACE_CHECK(0 <= index && index <= output_.size()); return output_[index]; @@ -336,9 +246,7 @@ void OperatorDef::set_output(const std::vector &value) { output_.resize(value.size()); std::copy(value.begin(), value.end(), output_.begin()); } -const std::vector &OperatorDef::arg() const { - return arg_; -} +const std::vector &OperatorDef::arg() const { return arg_; } Argument *OperatorDef::add_arg() { arg_.emplace_back(Argument()); return &arg_.back(); @@ -358,18 +266,12 @@ void OperatorDef::set_output_type(const std::vector &value) { } // MemoryBlock -MemoryBlock::MemoryBlock(int mem_id, uint32_t x, uint32_t y) : - mem_id_(mem_id), x_(x), y_(y) {} +MemoryBlock::MemoryBlock(int mem_id, uint32_t x, uint32_t y) + : mem_id_(mem_id), x_(x), y_(y) {} -int MemoryBlock::mem_id() const { - return mem_id_; -} -uint32_t MemoryBlock::x() const { - return x_; -} -uint32_t MemoryBlock::y() const { - return y_; -} +int MemoryBlock::mem_id() const { return mem_id_; } +uint32_t MemoryBlock::x() const { return x_; } +uint32_t MemoryBlock::y() const { return y_; } // MemoryArena const std::vector &MemoryArena::mem_block() const { @@ -378,131 +280,69 @@ const std::vector &MemoryArena::mem_block() const { std::vector &MemoryArena::mutable_mem_block() { return mem_block_; } -int MemoryArena::mem_block_size() const { - return mem_block_.size(); -} +int MemoryArena::mem_block_size() const { return mem_block_.size(); } // InputInfo -const std::string &InputInfo::name() const { - return name_; -} -int32_t InputInfo::node_id() const { - return node_id_; -} -int32_t InputInfo::max_byte_size() const { - return max_byte_size_; -} -DataType InputInfo::data_type() const { - return data_type_; -} -const std::vector &InputInfo::dims() const { - return dims_; -} +const std::string &InputInfo::name() const { return name_; } +int32_t InputInfo::node_id() const { return node_id_; } +int32_t InputInfo::max_byte_size() const { return max_byte_size_; } +DataType InputInfo::data_type() const { return data_type_; } +const std::vector &InputInfo::dims() const { return dims_; } // OutputInfo -const std::string &OutputInfo::name() const { - return name_; -} -int32_t OutputInfo::node_id() const { - return node_id_; -} -int32_t OutputInfo::max_byte_size() const { - return max_byte_size_; -} -DataType OutputInfo::data_type() const { - return data_type_; -} -void OutputInfo::set_data_type(DataType data_type) { - data_type_ = data_type; -} -const std::vector &OutputInfo::dims() const { - return dims_; -} -void OutputInfo::set_dims(const std::vector &dims) { - dims_ = dims; -} +const std::string &OutputInfo::name() const { return name_; } +int32_t OutputInfo::node_id() const { return node_id_; } +int32_t OutputInfo::max_byte_size() const { return max_byte_size_; } +DataType OutputInfo::data_type() const { return data_type_; } +void OutputInfo::set_data_type(DataType data_type) { data_type_ = data_type; } +const std::vector &OutputInfo::dims() const { return dims_; } +void OutputInfo::set_dims(const std::vector &dims) { dims_ = dims; } // NetDef NetDef::NetDef() : has_bits_(0) {} -const std::string &NetDef::name() const { - return name_; -} +const std::string &NetDef::name() const { return name_; } void NetDef::set_name(const std::string &value) { set_has_name(); name_ = value; } -bool NetDef::has_name() const { - return (has_bits_ & 0x00000001u) != 0; -} -void NetDef::set_has_name() { - has_bits_ |= 0x00000001u; -} -const std::string &NetDef::version() const { - return version_; -} +bool NetDef::has_name() const { return (has_bits_ & 0x00000001u) != 0; } +void NetDef::set_has_name() { has_bits_ |= 0x00000001u; } +const std::string &NetDef::version() const { return version_; } void NetDef::set_version(const std::string &value) { set_has_version(); version_ = value; } -bool NetDef::has_version() const { - return (has_bits_ & 0x00000002u) != 0; -} -void NetDef::set_has_version() { - has_bits_ |= 0x00000002u; -} -const std::vector &NetDef::op() const { - return op_; -} +bool NetDef::has_version() const { return (has_bits_ & 0x00000002u) != 0; } +void NetDef::set_has_version() { has_bits_ |= 0x00000002u; } +const std::vector &NetDef::op() const { return op_; } OperatorDef *NetDef::add_op() { op_.emplace_back(OperatorDef()); return &op_.back(); } -std::vector &NetDef::mutable_op() { - return op_; -} -const std::vector &NetDef::arg() const { - return arg_; -} +std::vector &NetDef::mutable_op() { return op_; } +const std::vector &NetDef::arg() const { return arg_; } Argument *NetDef::add_arg() { arg_.emplace_back(Argument()); return &arg_.back(); } -std::vector &NetDef::mutable_arg() { - return arg_; -} -const std::vector &NetDef::tensors() const { - return tensors_; -} -std::vector &NetDef::mutable_tensors() { - return tensors_; -} -const MemoryArena &NetDef::mem_arena() const { - return mem_arena_; -} +std::vector &NetDef::mutable_arg() { return arg_; } +const std::vector &NetDef::tensors() const { return tensors_; } +std::vector &NetDef::mutable_tensors() { return tensors_; } +const MemoryArena &NetDef::mem_arena() const { return mem_arena_; } MemoryArena &NetDef::mutable_mem_arena() { set_has_mem_arena(); return mem_arena_; } -bool NetDef::has_mem_arena() const { - return (has_bits_ & 0x00000004u) != 0; -} -void NetDef::set_has_mem_arena() { - has_bits_ |= 0x00000004u; -} -const std::vector &NetDef::input_info() const { - return input_info_; -} +bool NetDef::has_mem_arena() const { return (has_bits_ & 0x00000004u) != 0; } +void NetDef::set_has_mem_arena() { has_bits_ |= 0x00000004u; } +const std::vector &NetDef::input_info() const { return input_info_; } const std::vector &NetDef::output_info() const { return output_info_; } -std::vector &NetDef::mutable_output_info() { - return output_info_; -} +std::vector &NetDef::mutable_output_info() { return output_info_; } -int NetDef::op_size() const { - return op_.size(); -} +int NetDef::op_size() const { return op_.size(); } const OperatorDef &NetDef::op(const int idx) const { MACE_CHECK(0 <= idx && idx < op_size()); @@ -510,26 +350,27 @@ const OperatorDef &NetDef::op(const int idx) const { } // Mace Engine -MaceEngine::MaceEngine(const NetDef *net_def, DeviceType device_type) : - op_registry_(new OperatorRegistry()), device_type_(device_type), - ws_(new Workspace()), net_(nullptr), hexagon_controller_(nullptr) { - ws_->CreateTensor("mace_input_node:0", - GetDeviceAllocator(device_type_), +MaceEngine::MaceEngine(const NetDef *net_def, DeviceType device_type) + : op_registry_(new OperatorRegistry()), + device_type_(device_type), + ws_(new Workspace()), + net_(nullptr), + hexagon_controller_(nullptr) { + ws_->CreateTensor("mace_input_node:0", GetDeviceAllocator(device_type_), DT_FLOAT); - ws_->CreateTensor("mace_output_node:0", - GetDeviceAllocator(device_type_), + ws_->CreateTensor("mace_output_node:0", GetDeviceAllocator(device_type_), DT_FLOAT); if (device_type == HEXAGON) { hexagon_controller_.reset(new HexagonControlWrapper()); MACE_CHECK(hexagon_controller_->Config(), "hexagon config error"); MACE_CHECK(hexagon_controller_->Init(), "hexagon init error"); hexagon_controller_->SetDebugLevel( - static_cast(mace::logging::LogMessage::MinVLogLevel())); - int dsp_mode = ArgumentHelper::GetSingleArgument( - *net_def, "dsp_mode", 0); + static_cast(mace::logging::LogMessage::MinVLogLevel())); + int dsp_mode = + ArgumentHelper::GetSingleArgument(*net_def, "dsp_mode", 0); hexagon_controller_->SetGraphMode(dsp_mode); MACE_CHECK(hexagon_controller_->SetupGraph(*net_def), - "hexagon setup graph error"); + "hexagon setup graph error"); if (VLOG_IS_ON(2)) { hexagon_controller_->PrintGraph(); } @@ -537,8 +378,8 @@ MaceEngine::MaceEngine(const NetDef *net_def, DeviceType device_type) : ws_->LoadModelTensor(*net_def, device_type); // Init model - auto net = CreateNet(op_registry_, *net_def, ws_.get(), - device_type, NetMode::INIT); + auto net = CreateNet(op_registry_, *net_def, ws_.get(), device_type, + NetMode::INIT); if (!net->Run()) { LOG(FATAL) << "Net init run failed"; } @@ -548,18 +389,19 @@ MaceEngine::MaceEngine(const NetDef *net_def, DeviceType device_type) : MaceEngine::MaceEngine(const NetDef *net_def, DeviceType device_type, const std::vector &input_nodes, - const std::vector &output_nodes) : - op_registry_(new OperatorRegistry()), device_type_(device_type), - ws_(new Workspace()), net_(nullptr), hexagon_controller_(nullptr) { + const std::vector &output_nodes) + : op_registry_(new OperatorRegistry()), + device_type_(device_type), + ws_(new Workspace()), + net_(nullptr), + hexagon_controller_(nullptr) { for (auto input_name : input_nodes) { ws_->CreateTensor(MakeString("mace_input_node_", input_name, ":0"), - GetDeviceAllocator(device_type_), - DT_FLOAT); + GetDeviceAllocator(device_type_), DT_FLOAT); } for (auto output_name : output_nodes) { ws_->CreateTensor(MakeString("mace_output_node_", output_name, ":0"), - GetDeviceAllocator(device_type_), - DT_FLOAT); + GetDeviceAllocator(device_type_), DT_FLOAT); } if (device_type == HEXAGON) { hexagon_controller_.reset(new HexagonControlWrapper()); @@ -567,8 +409,8 @@ MaceEngine::MaceEngine(const NetDef *net_def, MACE_CHECK(hexagon_controller_->Init(), "hexagon init error"); hexagon_controller_->SetDebugLevel( static_cast(mace::logging::LogMessage::MinVLogLevel())); - int dsp_mode = ArgumentHelper::GetSingleArgument( - *net_def, "dsp_mode", 0); + int dsp_mode = + ArgumentHelper::GetSingleArgument(*net_def, "dsp_mode", 0); hexagon_controller_->SetGraphMode(dsp_mode); MACE_CHECK(hexagon_controller_->SetupGraph(*net_def), "hexagon setup graph error"); @@ -579,14 +421,13 @@ MaceEngine::MaceEngine(const NetDef *net_def, ws_->LoadModelTensor(*net_def, device_type); // Init model - auto net = CreateNet(op_registry_, *net_def, ws_.get(), - device_type, NetMode::INIT); + auto net = CreateNet(op_registry_, *net_def, ws_.get(), device_type, + NetMode::INIT); if (!net->Run()) { LOG(FATAL) << "Net init run failed"; } net_ = std::move(CreateNet(op_registry_, *net_def, ws_.get(), device_type)); } - } MaceEngine::~MaceEngine() { if (device_type_ == HEXAGON) { @@ -643,10 +484,11 @@ bool MaceEngine::Run(const float *input, bool MaceEngine::Run(const std::vector &inputs, std::map &outputs, RunMetadata *run_metadata) { - - MACE_CHECK(device_type_ != HEXAGON, "HEXAGON not supports multiple outputs now"); + MACE_CHECK(device_type_ != HEXAGON, + "HEXAGON not supports multiple outputs now"); for (auto input : inputs) { - Tensor *input_tensor = ws_->GetTensor(MakeString("mace_input_node_", input.name, ":0")); + Tensor *input_tensor = + ws_->GetTensor(MakeString("mace_input_node_", input.name, ":0")); input_tensor->Resize(input.shape); { Tensor::MappingGuard input_guard(input_tensor); @@ -658,7 +500,8 @@ bool MaceEngine::Run(const std::vector &inputs, LOG(FATAL) << "Net run failed"; } for (auto output : outputs) { - Tensor *output_tensor = ws_->GetTensor(MakeString("mace_output_node_", output.first + ":0")); + Tensor *output_tensor = + ws_->GetTensor(MakeString("mace_output_node_", output.first + ":0")); // save output if (output_tensor != nullptr && output.second != nullptr) { Tensor::MappingGuard output_guard(output_tensor); diff --git a/mace/core/net.cc b/mace/core/net.cc index ce44b951..2439a67f 100644 --- a/mace/core/net.cc +++ b/mace/core/net.cc @@ -3,9 +3,9 @@ // #include "mace/core/net.h" -#include "mace/utils/utils.h" -#include "mace/utils/timer.h" #include "mace/utils/memory_logging.h" +#include "mace/utils/timer.h" +#include "mace/utils/utils.h" namespace mace { @@ -20,8 +20,7 @@ SerialNet::SerialNet(const std::shared_ptr op_registry, Workspace *ws, DeviceType type, const NetMode mode) - : NetBase(op_registry, net_def, ws, type), - device_type_(type) { + : NetBase(op_registry, net_def, ws, type), device_type_(type) { MACE_LATENCY_LOGGER(1, "Constructing SerialNet ", net_def->name()); for (int idx = 0; idx < net_def->op_size(); ++idx) { const auto &operator_def = net_def->op(idx); @@ -41,8 +40,8 @@ bool SerialNet::Run(RunMetadata *run_metadata) { MACE_LATENCY_LOGGER(1, "Running net"); for (auto iter = operators_.begin(); iter != operators_.end(); ++iter) { auto &op = *iter; - MACE_LATENCY_LOGGER(2, "Running operator ", op->debug_def().name(), - "(", op->debug_def().type(), ")"); + MACE_LATENCY_LOGGER(2, "Running operator ", op->debug_def().name(), "(", + op->debug_def().type(), ")"); bool future_wait = (device_type_ == DeviceType::OPENCL && (run_metadata != nullptr || std::distance(iter, operators_.end()) == 1)); @@ -99,7 +98,8 @@ std::unique_ptr CreateNet( Workspace *ws, DeviceType type, const NetMode mode) { - std::unique_ptr net(new SerialNet(op_registry, net_def, ws, type, mode)); + std::unique_ptr net( + new SerialNet(op_registry, net_def, ws, type, mode)); return net; } diff --git a/mace/core/operator.h b/mace/core/operator.h index 773db3dd..a163c0c8 100644 --- a/mace/core/operator.h +++ b/mace/core/operator.h @@ -7,10 +7,10 @@ #include "mace/core/arg_helper.h" #include "mace/core/future.h" -#include "mace/public/mace.h" #include "mace/core/registry.h" #include "mace/core/tensor.h" #include "mace/core/workspace.h" +#include "mace/public/mace.h" namespace mace { @@ -147,7 +147,7 @@ OpKeyBuilder &OpKeyBuilder::TypeConstraint(const char *attr_name) { class OperatorRegistry { public: typedef Registry - RegistryType; + RegistryType; OperatorRegistry(); ~OperatorRegistry() = default; RegistryType *registry() { return ®istry_; }; diff --git a/mace/core/preallocated_pooled_allocator.h b/mace/core/preallocated_pooled_allocator.h index 75cf4117..ad0c975a 100644 --- a/mace/core/preallocated_pooled_allocator.h +++ b/mace/core/preallocated_pooled_allocator.h @@ -36,6 +36,6 @@ class PreallocatedPooledAllocator { std::unordered_map> buffers_; }; -} // namespace mace +} // namespace mace -#endif // MACE_CORE_PREALLOCATED_POOLED_ALLOCATOR_H_ +#endif // MACE_CORE_PREALLOCATED_POOLED_ALLOCATOR_H_ diff --git a/mace/core/runtime/hexagon/hexagon_control_wrapper.cc b/mace/core/runtime/hexagon/hexagon_control_wrapper.cc index 8735f529..2828ffa0 100644 --- a/mace/core/runtime/hexagon/hexagon_control_wrapper.cc +++ b/mace/core/runtime/hexagon/hexagon_control_wrapper.cc @@ -2,19 +2,19 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // -#include -#include #include +#include +#include #include "mace/core/runtime/hexagon/hexagon_control_wrapper.h" #include "mace/core/runtime/hexagon/hexagon_nn_ops.h" namespace { - inline int64_t NowMicros() { - struct timeval tv; - gettimeofday(&tv, nullptr); - return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; - } +inline int64_t NowMicros() { + struct timeval tv; + gettimeofday(&tv, nullptr); + return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; +} } namespace mace { @@ -63,9 +63,9 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def) { // const node std::thread const_thread([&]() { std::vector const_node_list; - for (const ConstTensor &const_tensor: net_def.tensors()) { + for (const ConstTensor &const_tensor : net_def.tensors()) { std::vector tensor_shape(const_tensor.dims().begin(), - const_tensor.dims().end()); + const_tensor.dims().end()); while (tensor_shape.size() < 4) { tensor_shape.insert(tensor_shape.begin(), 1); } @@ -77,32 +77,32 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def) { const_node.tensor.width = tensor_shape[2]; const_node.tensor.depth = tensor_shape[3]; - if (const_tensor.data_type() == DataType::DT_INT32 - && const_tensor.data_size() == 0) { + if (const_tensor.data_type() == DataType::DT_INT32 && + const_tensor.data_size() == 0) { const_node.tensor.data = NULL; const_node.tensor.dataLen = 0; } else { const_node.tensor.data = - const_cast(const_tensor.data()); - const_node.tensor.dataLen = - const_tensor.data_size() * GetEnumTypeSize(const_tensor.data_type()); + const_cast(const_tensor.data()); + const_node.tensor.dataLen = const_tensor.data_size() * + GetEnumTypeSize(const_tensor.data_type()); } const_node_list.push_back(const_node); // 255 is magic number: why fastrpc limits sequence length to that? if (const_node_list.size() >= 250) { - MACE_CHECK(hexagon_nn_append_const_node_list(nn_id_, - const_node_list.data(), - const_node_list.size()) - == 0, "append const node error"); + MACE_CHECK( + hexagon_nn_append_const_node_list(nn_id_, const_node_list.data(), + const_node_list.size()) == 0, + "append const node error"); const_node_list.clear(); } } if (!const_node_list.empty()) { - MACE_CHECK(hexagon_nn_append_const_node_list(nn_id_, - const_node_list.data(), - const_node_list.size()) == 0, - "append const node error"); + MACE_CHECK( + hexagon_nn_append_const_node_list(nn_id_, const_node_list.data(), + const_node_list.size()) == 0, + "append const node error"); } const_node_list.clear(); }); @@ -117,7 +117,7 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def) { std::vector inputs; std::vector outputs; - for (const OperatorDef &op: net_def.op()) { + for (const OperatorDef &op : net_def.op()) { int op_id = op_map.GetOpId(op.type()); inputs.resize(op.node_input().size()); for (size_t i = 0; i < op.node_input().size(); ++i) { @@ -131,9 +131,8 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def) { cached_inputs.push_back(inputs); cached_outputs.push_back(outputs); - hexagon_nn_padding_type - padding_type = static_cast( - op.padding()); + hexagon_nn_padding_type padding_type = + static_cast(op.padding()); hexagon_nn_op_node op_node; op_node.node_id = node_id(op.node_id()); @@ -146,8 +145,7 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def) { op_node_list.push_back(op_node); if (op_node_list.size() >= 125) { - MACE_CHECK(hexagon_nn_append_node_list(nn_id_, - op_node_list.data(), + MACE_CHECK(hexagon_nn_append_node_list(nn_id_, op_node_list.data(), op_node_list.size()) == 0, "append node error"); op_node_list.clear(); @@ -157,8 +155,7 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def) { } if (!op_node_list.empty()) { - MACE_CHECK(hexagon_nn_append_node_list(nn_id_, - op_node_list.data(), + MACE_CHECK(hexagon_nn_append_node_list(nn_id_, op_node_list.data(), op_node_list.size()) == 0, "append node error"); } @@ -172,10 +169,10 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def) { // input info num_inputs_ = 0; - for (const InputInfo &input_info: net_def.input_info()) { + for (const InputInfo &input_info : net_def.input_info()) { std::vector input_shape; - input_shape.insert(input_shape.begin(), - input_info.dims().begin(), input_info.dims().end()); + input_shape.insert(input_shape.begin(), input_info.dims().begin(), + input_info.dims().end()); while (input_shape.size() < 4) { input_shape.insert(input_shape.begin(), 1); } @@ -186,10 +183,10 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def) { // output info num_outputs_ = 0; - for (const OutputInfo &output_info: net_def.output_info()) { + for (const OutputInfo &output_info : net_def.output_info()) { std::vector output_shape; - output_shape.insert(output_shape.begin(), - output_info.dims().begin(), output_info.dims().end()); + output_shape.insert(output_shape.begin(), output_info.dims().begin(), + output_info.dims().end()); while (output_shape.size() < 4) { output_shape.insert(output_shape.begin(), 1); } @@ -218,27 +215,27 @@ bool HexagonControlWrapper::TeardownGraph() { return hexagon_nn_teardown(nn_id_) == 0; } -#define PRINT_BUFSIZE (2*1024*1024) +#define PRINT_BUFSIZE (2 * 1024 * 1024) void HexagonControlWrapper::PrintLog() { char *buf; if ((buf = new char[PRINT_BUFSIZE]) == NULL) return; - MACE_CHECK(hexagon_nn_getlog(nn_id_, - reinterpret_cast(buf), - PRINT_BUFSIZE) == 0, "print log error"); + MACE_CHECK(hexagon_nn_getlog(nn_id_, reinterpret_cast(buf), + PRINT_BUFSIZE) == 0, + "print log error"); LOG(INFO) << std::string(buf); - delete[]buf; + delete[] buf; } void HexagonControlWrapper::PrintGraph() { LOG(INFO) << "Print Graph"; char *buf; if ((buf = new char[PRINT_BUFSIZE]) == NULL) return; - MACE_CHECK(hexagon_nn_snpprint(nn_id_, - reinterpret_cast(buf), - PRINT_BUFSIZE) == 0, "print graph error"); + MACE_CHECK(hexagon_nn_snpprint(nn_id_, reinterpret_cast(buf), + PRINT_BUFSIZE) == 0, + "print graph error"); LOG(INFO) << std::string(buf); - delete[]buf; + delete[] buf; } void HexagonControlWrapper::SetDebugLevel(int level) { @@ -256,9 +253,9 @@ void HexagonControlWrapper::GetPerfInfo() { LOG(INFO) << "Get perf info"; std::vector perf_info(MAX_NODE); unsigned int n_items = 0; - MACE_CHECK( - hexagon_nn_get_perfinfo(nn_id_, perf_info.data(), MAX_NODE, &n_items) == 0, - "get perf info error"); + MACE_CHECK(hexagon_nn_get_perfinfo(nn_id_, perf_info.data(), MAX_NODE, + &n_items) == 0, + "get perf info error"); std::unordered_map node_id_counters; std::unordered_map> node_type_counters; @@ -269,8 +266,9 @@ void HexagonControlWrapper::GetPerfInfo() { unsigned int node_id = perf_info[i].node_id; unsigned int node_type_id = perf_info[i].node_type; node_id_counters[node_id] = - ((static_cast(perf_info[i].counter_hi) << 32) - + perf_info[i].counter_lo) * 1.0f / perf_info[i].executions; + ((static_cast(perf_info[i].counter_hi) << 32) + + perf_info[i].counter_lo) * + 1.0f / perf_info[i].executions; char node_type_buf[MAX_NODE]; hexagon_nn_op_id_to_name(node_type_id, node_type_buf, MAX_NODE); @@ -288,7 +286,7 @@ void HexagonControlWrapper::GetPerfInfo() { total_duration += node_id_counters[node_id]; } - for (auto &node_type_counter: node_type_counters) { + for (auto &node_type_counter : node_type_counters) { LOG(INFO) << "node type: " << node_type_counter.first << ", time: " << node_type_counter.second.first << ", duration: " << node_type_counter.second.second; @@ -312,33 +310,25 @@ bool HexagonControlWrapper::ExecuteGraph(const Tensor &input_tensor, output_tensor->Resize(output_shapes_[0]); std::vector output_shape(4); uint32_t output_bytes; - int res = hexagon_nn_execute(nn_id_, - input_tensor.shape()[0], - input_tensor.shape()[1], - input_tensor.shape()[2], - input_tensor.shape()[3], - reinterpret_cast( - input_tensor.raw_data()), - input_tensor.raw_size(), - &output_shape[0], - &output_shape[1], - &output_shape[2], - &output_shape[3], - reinterpret_cast( - output_tensor->raw_mutable_data()), - output_tensor->raw_size(), - &output_bytes); + int res = hexagon_nn_execute( + nn_id_, input_tensor.shape()[0], input_tensor.shape()[1], + input_tensor.shape()[2], input_tensor.shape()[3], + reinterpret_cast(input_tensor.raw_data()), + input_tensor.raw_size(), &output_shape[0], &output_shape[1], + &output_shape[2], &output_shape[3], + reinterpret_cast(output_tensor->raw_mutable_data()), + output_tensor->raw_size(), &output_bytes); MACE_CHECK(res == 0, "execute error"); - MACE_ASSERT(output_shape == output_shapes_[0], - "wrong output shape inferred"); + MACE_ASSERT(output_shape == output_shapes_[0], "wrong output shape inferred"); MACE_ASSERT(output_bytes == output_tensor->raw_size(), "wrong output bytes inferred."); return res == 0; }; -bool HexagonControlWrapper::ExecuteGraphNew(const std::vector &input_tensors, - std::vector *output_tensors) { +bool HexagonControlWrapper::ExecuteGraphNew( + const std::vector &input_tensors, + std::vector *output_tensors) { LOG(INFO) << "Execute graph new: " << nn_id_; int num_inputs = input_tensors.size(); int num_outputs = output_tensors->size(); @@ -355,7 +345,7 @@ bool HexagonControlWrapper::ExecuteGraphNew(const std::vector &input_ten inputs[i].width = input_shape[2]; inputs[i].depth = input_shape[3]; inputs[i].data = const_cast( - reinterpret_cast(input_tensors[i].raw_data())); + reinterpret_cast(input_tensors[i].raw_data())); inputs[i].dataLen = input_tensors[i].raw_size(); inputs[i].data_valid_len = input_tensors[i].raw_size(); inputs[i].unused = 0; @@ -365,16 +355,16 @@ bool HexagonControlWrapper::ExecuteGraphNew(const std::vector &input_ten (*output_tensors)[i].SetDtype(output_data_types_[i]); (*output_tensors)[i].Resize(output_shapes_[i]); outputs[i].data = reinterpret_cast( - (*output_tensors)[i].raw_mutable_data()); + (*output_tensors)[i].raw_mutable_data()); outputs[i].dataLen = (*output_tensors)[i].raw_size(); } - int res = hexagon_nn_execute_new(nn_id_, inputs, num_inputs, - outputs, num_outputs); + int res = + hexagon_nn_execute_new(nn_id_, inputs, num_inputs, outputs, num_outputs); for (int i = 0; i < num_outputs; ++i) { std::vector output_shape{outputs[i].batches, outputs[i].height, - outputs[i].width, outputs[i].depth}; + outputs[i].width, outputs[i].depth}; MACE_ASSERT(output_shape == output_shapes_[i], "wrong output shape inferred"); MACE_ASSERT(outputs[i].data_valid_len == (*output_tensors)[i].raw_size(), @@ -397,9 +387,7 @@ bool HexagonControlWrapper::ExecuteGraphPreQuantize(const Tensor &input_tensor, float *min_in_data = input_tensors[1].mutable_data(); input_tensors[2].Resize({1, 1, 1, 1}); float *max_in_data = input_tensors[2].mutable_data(); - quantizer_.Quantize(input_tensor, - &input_tensors[0], - min_in_data, + quantizer_.Quantize(input_tensor, &input_tensors[0], min_in_data, max_in_data); if (!ExecuteGraphNew(input_tensors, &output_tensors)) { return false; @@ -409,11 +397,9 @@ bool HexagonControlWrapper::ExecuteGraphPreQuantize(const Tensor &input_tensor, const float *min_out_data = output_tensors[1].data(); const float *max_out_data = output_tensors[2].data(); - quantizer_.DeQuantize(output_tensors[0], - *min_out_data, - *max_out_data, + quantizer_.DeQuantize(output_tensors[0], *min_out_data, *max_out_data, output_tensor); return true; } -} // namespace mace +} // namespace mace diff --git a/mace/core/runtime/hexagon/hexagon_control_wrapper.h b/mace/core/runtime/hexagon/hexagon_control_wrapper.h index 09a1c778..8cb3b359 100644 --- a/mace/core/runtime/hexagon/hexagon_control_wrapper.h +++ b/mace/core/runtime/hexagon/hexagon_control_wrapper.h @@ -16,16 +16,17 @@ namespace mace { class HexagonControlWrapper { public: - HexagonControlWrapper() {}; + HexagonControlWrapper(){}; int GetVersion(); bool Config(); bool Init(); bool Finalize(); - bool SetupGraph(const NetDef& net_def); + bool SetupGraph(const NetDef &net_def); bool ExecuteGraph(const Tensor &input_tensor, Tensor *output_tensor); - bool ExecuteGraphNew(const std::vector& input_tensors, + bool ExecuteGraphNew(const std::vector &input_tensors, std::vector *output_tensors); - bool ExecuteGraphPreQuantize(const Tensor &input_tensor, Tensor *output_tensor); + bool ExecuteGraphPreQuantize(const Tensor &input_tensor, + Tensor *output_tensor); bool TeardownGraph(); void PrintLog(); @@ -38,9 +39,7 @@ class HexagonControlWrapper { private: static constexpr int NODE_ID_OFFSET = 10000; - inline uint32_t node_id(uint32_t nodeid) { - return NODE_ID_OFFSET + nodeid; - } + inline uint32_t node_id(uint32_t nodeid) { return NODE_ID_OFFSET + nodeid; } int nn_id_; Quantizer quantizer_; @@ -52,9 +51,8 @@ class HexagonControlWrapper { uint32_t num_inputs_; uint32_t num_outputs_; - DISABLE_COPY_AND_ASSIGN(HexagonControlWrapper); + DISABLE_COPY_AND_ASSIGN(HexagonControlWrapper); }; - } -#endif // MACE_DSP_HEXAGON_CONTROL_WRAPPER_H_ +#endif // MACE_DSP_HEXAGON_CONTROL_WRAPPER_H_ diff --git a/mace/core/runtime/hexagon/hexagon_controller_dummy.cc b/mace/core/runtime/hexagon/hexagon_controller_dummy.cc index fdc62ede..c1fdcc42 100644 --- a/mace/core/runtime/hexagon/hexagon_controller_dummy.cc +++ b/mace/core/runtime/hexagon/hexagon_controller_dummy.cc @@ -10,31 +10,145 @@ int hexagon_controller_InitHexagonWithMaxAttributes(int enable_dcvs, return 0; } -int hexagon_controller_DeInitHexagon() { +int hexagon_controller_DeInitHexagon() { return 0; } + +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_config)(void) + __QAIC_HEADER_ATTRIBUTE { + return 0; +} +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_init)(void) + __QAIC_HEADER_ATTRIBUTE { + return 0; +} +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_set_debug_level)( + hexagon_nn_nn_id id, int level) __QAIC_HEADER_ATTRIBUTE { + return 0; +} +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_set_graph_mode)( + hexagon_nn_nn_id id, int mode) __QAIC_HEADER_ATTRIBUTE { + return 0; +} +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_snpprint)(hexagon_nn_nn_id id, + unsigned char *buf, + int bufLen) + __QAIC_HEADER_ATTRIBUTE { + return 0; +} +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_getlog)(hexagon_nn_nn_id id, + unsigned char *buf, + int bufLen) + __QAIC_HEADER_ATTRIBUTE { + return 0; +} +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_node)( + hexagon_nn_nn_id id, + unsigned int node_id, + unsigned int operation, + hexagon_nn_padding_type padding, + const hexagon_nn_input *inputs, + int inputsLen, + const hexagon_nn_output *outputs, + int outputsLen) __QAIC_HEADER_ATTRIBUTE { + return 0; +} +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_node_list)( + hexagon_nn_nn_id id, + const hexagon_nn_op_node *ops, + int opsLen) __QAIC_HEADER_ATTRIBUTE { + return 0; +} +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_const_node)( + hexagon_nn_nn_id id, + unsigned int node_id, + unsigned int batches, + unsigned int height, + unsigned int width, + unsigned int depth, + const unsigned char *data, + int dataLen) __QAIC_HEADER_ATTRIBUTE { + return 0; +} +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_const_node_list)( + hexagon_nn_nn_id id, + const hexagon_nn_const_node *consts, + int constsLen) __QAIC_HEADER_ATTRIBUTE { + return 0; +} +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_prepare)(hexagon_nn_nn_id id) + __QAIC_HEADER_ATTRIBUTE { + return 0; +} +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_execute)( + hexagon_nn_nn_id id, + unsigned int batches_in, + unsigned int height_in, + unsigned int width_in, + unsigned int depth_in, + const unsigned char *data_in, + int data_inLen, + unsigned int *batches_out, + unsigned int *height_out, + unsigned int *width_out, + unsigned int *depth_out, + unsigned char *data_out, + int data_outLen, + unsigned int *data_len_out) __QAIC_HEADER_ATTRIBUTE { + return 0; +} +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_teardown)(hexagon_nn_nn_id id) + __QAIC_HEADER_ATTRIBUTE { + return 0; +} +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_set_powersave_level)( + unsigned int level) __QAIC_HEADER_ATTRIBUTE { + return 0; +} +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_get_perfinfo)( + hexagon_nn_nn_id id, + hexagon_nn_perfinfo *info_out, + int info_outLen, + unsigned int *n_items) __QAIC_HEADER_ATTRIBUTE { + return 0; +} +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_reset_perfinfo)( + hexagon_nn_nn_id id, unsigned int event) __QAIC_HEADER_ATTRIBUTE { + return 0; +} +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_last_execution_cycles)( + hexagon_nn_nn_id id, + unsigned int *cycles_lo, + unsigned int *cycles_hi) __QAIC_HEADER_ATTRIBUTE { + return 0; +} +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_version)(int *ver) + __QAIC_HEADER_ATTRIBUTE { + return 0; +} +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_op_name_to_id)( + const char *name, unsigned int *node_id) __QAIC_HEADER_ATTRIBUTE { + return 0; +} +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_op_id_to_name)( + unsigned int node_id, char *name, int nameLen) __QAIC_HEADER_ATTRIBUTE { + return 0; +} +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_disable_dcvs)(void) + __QAIC_HEADER_ATTRIBUTE { + return 0; +} +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_GetHexagonBinaryVersion)( + int *ver) __QAIC_HEADER_ATTRIBUTE { + return 0; +} +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_PrintLog)( + const unsigned char *buf, int bufLen) __QAIC_HEADER_ATTRIBUTE { + return 0; +} +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_execute_new)( + hexagon_nn_nn_id id, + const hexagon_nn_tensordef *inputs, + int inputsLen, + hexagon_nn_tensordef *outputs, + int outputsLen) __QAIC_HEADER_ATTRIBUTE { return 0; } - -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_config)(void) __QAIC_HEADER_ATTRIBUTE { return 0; } -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_init)(void) __QAIC_HEADER_ATTRIBUTE { return 0; } -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_set_debug_level)(hexagon_nn_nn_id id, int level) __QAIC_HEADER_ATTRIBUTE { return 0; } -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_set_graph_mode)(hexagon_nn_nn_id id, int mode) __QAIC_HEADER_ATTRIBUTE { return 0; } -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_snpprint)(hexagon_nn_nn_id id, unsigned char* buf, int bufLen) __QAIC_HEADER_ATTRIBUTE { return 0; } -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_getlog)(hexagon_nn_nn_id id, unsigned char* buf, int bufLen) __QAIC_HEADER_ATTRIBUTE { return 0; } -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_node)(hexagon_nn_nn_id id, unsigned int node_id, unsigned int operation, hexagon_nn_padding_type padding, const hexagon_nn_input* inputs, int inputsLen, const hexagon_nn_output* outputs, int outputsLen) __QAIC_HEADER_ATTRIBUTE { return 0; } -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_node_list)(hexagon_nn_nn_id id, const hexagon_nn_op_node* ops, int opsLen) __QAIC_HEADER_ATTRIBUTE { return 0; } -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_const_node)(hexagon_nn_nn_id id, unsigned int node_id, unsigned int batches, unsigned int height, unsigned int width, unsigned int depth, const unsigned char* data, int dataLen) __QAIC_HEADER_ATTRIBUTE { return 0; } -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_const_node_list)(hexagon_nn_nn_id id, const hexagon_nn_const_node* consts, int constsLen) __QAIC_HEADER_ATTRIBUTE { return 0; } -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_prepare)(hexagon_nn_nn_id id) __QAIC_HEADER_ATTRIBUTE { return 0; } -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_execute)(hexagon_nn_nn_id id, unsigned int batches_in, unsigned int height_in, unsigned int width_in, unsigned int depth_in, const unsigned char* data_in, int data_inLen, unsigned int* batches_out, unsigned int* height_out, unsigned int* width_out, unsigned int* depth_out, unsigned char* data_out, int data_outLen, unsigned int* data_len_out) __QAIC_HEADER_ATTRIBUTE { return 0; } -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_teardown)(hexagon_nn_nn_id id) __QAIC_HEADER_ATTRIBUTE { return 0; } -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_set_powersave_level)(unsigned int level) __QAIC_HEADER_ATTRIBUTE { return 0; } -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_get_perfinfo)(hexagon_nn_nn_id id, hexagon_nn_perfinfo* info_out, int info_outLen, unsigned int* n_items) __QAIC_HEADER_ATTRIBUTE { return 0; } -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_reset_perfinfo)(hexagon_nn_nn_id id, unsigned int event) __QAIC_HEADER_ATTRIBUTE { return 0; } -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_last_execution_cycles)(hexagon_nn_nn_id id, unsigned int* cycles_lo, unsigned int* cycles_hi) __QAIC_HEADER_ATTRIBUTE { return 0; } -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_version)(int* ver) __QAIC_HEADER_ATTRIBUTE { return 0; } -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_op_name_to_id)(const char* name, unsigned int* node_id) __QAIC_HEADER_ATTRIBUTE { return 0; } -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_op_id_to_name)(unsigned int node_id, char* name, int nameLen) __QAIC_HEADER_ATTRIBUTE { return 0; } -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_disable_dcvs)(void) __QAIC_HEADER_ATTRIBUTE { return 0; } -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_GetHexagonBinaryVersion)(int* ver) __QAIC_HEADER_ATTRIBUTE { return 0; } -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_PrintLog)(const unsigned char* buf, int bufLen) __QAIC_HEADER_ATTRIBUTE { return 0; } -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_execute_new)(hexagon_nn_nn_id id, const hexagon_nn_tensordef* inputs, int inputsLen, hexagon_nn_tensordef* outputs, int outputsLen) __QAIC_HEADER_ATTRIBUTE { return 0; } diff --git a/mace/core/runtime/hexagon/hexagon_nn.h b/mace/core/runtime/hexagon/hexagon_nn.h index 3bfd79c0..0baafd8c 100644 --- a/mace/core/runtime/hexagon/hexagon_nn.h +++ b/mace/core/runtime/hexagon/hexagon_nn.h @@ -2,27 +2,27 @@ #define _HEXAGON_NN_H #ifndef __QAIC_HEADER #define __QAIC_HEADER(ff) ff -#endif //__QAIC_HEADER +#endif //__QAIC_HEADER #ifndef __QAIC_HEADER_EXPORT #define __QAIC_HEADER_EXPORT -#endif // __QAIC_HEADER_EXPORT +#endif // __QAIC_HEADER_EXPORT #ifndef __QAIC_HEADER_ATTRIBUTE #define __QAIC_HEADER_ATTRIBUTE -#endif // __QAIC_HEADER_ATTRIBUTE +#endif // __QAIC_HEADER_ATTRIBUTE #ifndef __QAIC_IMPL #define __QAIC_IMPL(ff) ff -#endif //__QAIC_IMPL +#endif //__QAIC_IMPL #ifndef __QAIC_IMPL_EXPORT #define __QAIC_IMPL_EXPORT -#endif // __QAIC_IMPL_EXPORT +#endif // __QAIC_IMPL_EXPORT #ifndef __QAIC_IMPL_ATTRIBUTE #define __QAIC_IMPL_ATTRIBUTE -#endif // __QAIC_IMPL_ATTRIBUTE +#endif // __QAIC_IMPL_ATTRIBUTE #ifdef __cplusplus extern "C" { #endif @@ -30,92 +30,160 @@ extern "C" { #define __QAIC_STRING1_OBJECT_DEFINED__ #define __STRING1_OBJECT__ typedef struct _cstring1_s { - char* data; - int dataLen; + char *data; + int dataLen; } _cstring1_t; #endif /* __QAIC_STRING1_OBJECT_DEFINED__ */ typedef struct hexagon_nn_input hexagon_nn_input; struct hexagon_nn_input { - unsigned int src_id; - unsigned int output_idx; + unsigned int src_id; + unsigned int output_idx; }; typedef struct hexagon_nn_output hexagon_nn_output; struct hexagon_nn_output { - unsigned int max_size; - unsigned int unused; + unsigned int max_size; + unsigned int unused; }; typedef struct hexagon_nn_perfinfo hexagon_nn_perfinfo; struct hexagon_nn_perfinfo { - unsigned int node_id; - unsigned int node_type; - unsigned int executions; - unsigned int unused; - unsigned int counter_lo; - unsigned int counter_hi; + unsigned int node_id; + unsigned int node_type; + unsigned int executions; + unsigned int unused; + unsigned int counter_lo; + unsigned int counter_hi; }; typedef int hexagon_nn_nn_id; enum hexagon_nn_padding_type { - NN_PAD_NA, - NN_PAD_SAME, - NN_PAD_VALID, - NN_PAD_MIRROR_REFLECT, - NN_PAD_MIRROR_SYMMETRIC, - NN_PAD_SAME_CAFFE, - _32BIT_PLACEHOLDER_hexagon_nn_padding_type = 0x7fffffff + NN_PAD_NA, + NN_PAD_SAME, + NN_PAD_VALID, + NN_PAD_MIRROR_REFLECT, + NN_PAD_MIRROR_SYMMETRIC, + NN_PAD_SAME_CAFFE, + _32BIT_PLACEHOLDER_hexagon_nn_padding_type = 0x7fffffff }; typedef enum hexagon_nn_padding_type hexagon_nn_padding_type; typedef struct hexagon_nn_tensordef hexagon_nn_tensordef; struct hexagon_nn_tensordef { - unsigned int batches; - unsigned int height; - unsigned int width; - unsigned int depth; - unsigned char* data; - int dataLen; - unsigned int data_valid_len; - unsigned int unused; + unsigned int batches; + unsigned int height; + unsigned int width; + unsigned int depth; + unsigned char *data; + int dataLen; + unsigned int data_valid_len; + unsigned int unused; }; typedef struct hexagon_nn_op_node hexagon_nn_op_node; struct hexagon_nn_op_node { - unsigned int node_id; - unsigned int operation; - hexagon_nn_padding_type padding; - hexagon_nn_input* inputs; - int inputsLen; - hexagon_nn_output* outputs; - int outputsLen; + unsigned int node_id; + unsigned int operation; + hexagon_nn_padding_type padding; + hexagon_nn_input *inputs; + int inputsLen; + hexagon_nn_output *outputs; + int outputsLen; }; typedef struct hexagon_nn_const_node hexagon_nn_const_node; struct hexagon_nn_const_node { - unsigned int node_id; - hexagon_nn_tensordef tensor; + unsigned int node_id; + hexagon_nn_tensordef tensor; }; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_config)(void) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_init)(void) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_set_debug_level)(hexagon_nn_nn_id id, int level) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_set_graph_mode)(hexagon_nn_nn_id id, int mode) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_snpprint)(hexagon_nn_nn_id id, unsigned char* buf, int bufLen) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_getlog)(hexagon_nn_nn_id id, unsigned char* buf, int bufLen) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_node)(hexagon_nn_nn_id id, unsigned int node_id, unsigned int operation, hexagon_nn_padding_type padding, const hexagon_nn_input* inputs, int inputsLen, const hexagon_nn_output* outputs, int outputsLen) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_node_list)(hexagon_nn_nn_id id, const hexagon_nn_op_node* ops, int opsLen) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_const_node)(hexagon_nn_nn_id id, unsigned int node_id, unsigned int batches, unsigned int height, unsigned int width, unsigned int depth, const unsigned char* data, int dataLen) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_const_node_list)(hexagon_nn_nn_id id, const hexagon_nn_const_node* consts, int constsLen) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_prepare)(hexagon_nn_nn_id id) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_execute)(hexagon_nn_nn_id id, unsigned int batches_in, unsigned int height_in, unsigned int width_in, unsigned int depth_in, const unsigned char* data_in, int data_inLen, unsigned int* batches_out, unsigned int* height_out, unsigned int* width_out, unsigned int* depth_out, unsigned char* data_out, int data_outLen, unsigned int* data_len_out) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_teardown)(hexagon_nn_nn_id id) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_set_powersave_level)(unsigned int level) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_get_perfinfo)(hexagon_nn_nn_id id, hexagon_nn_perfinfo* info_out, int info_outLen, unsigned int* n_items) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_reset_perfinfo)(hexagon_nn_nn_id id, unsigned int event) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_last_execution_cycles)(hexagon_nn_nn_id id, unsigned int* cycles_lo, unsigned int* cycles_hi) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_version)(int* ver) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_op_name_to_id)(const char* name, unsigned int* node_id) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_op_id_to_name)(unsigned int node_id, char* name, int nameLen) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_disable_dcvs)(void) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_GetHexagonBinaryVersion)(int* ver) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_PrintLog)(const unsigned char* buf, int bufLen) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_execute_new)(hexagon_nn_nn_id id, const hexagon_nn_tensordef* inputs, int inputsLen, hexagon_nn_tensordef* outputs, int outputsLen) __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_config)(void) + __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_init)(void) + __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_set_debug_level)( + hexagon_nn_nn_id id, int level) __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_set_graph_mode)( + hexagon_nn_nn_id id, int mode) __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_snpprint)(hexagon_nn_nn_id id, + unsigned char *buf, + int bufLen) + __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_getlog)(hexagon_nn_nn_id id, + unsigned char *buf, + int bufLen) + __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_node)( + hexagon_nn_nn_id id, + unsigned int node_id, + unsigned int operation, + hexagon_nn_padding_type padding, + const hexagon_nn_input *inputs, + int inputsLen, + const hexagon_nn_output *outputs, + int outputsLen) __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_node_list)( + hexagon_nn_nn_id id, + const hexagon_nn_op_node *ops, + int opsLen) __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_const_node)( + hexagon_nn_nn_id id, + unsigned int node_id, + unsigned int batches, + unsigned int height, + unsigned int width, + unsigned int depth, + const unsigned char *data, + int dataLen) __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_const_node_list)( + hexagon_nn_nn_id id, + const hexagon_nn_const_node *consts, + int constsLen) __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_prepare)(hexagon_nn_nn_id id) + __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_execute)( + hexagon_nn_nn_id id, + unsigned int batches_in, + unsigned int height_in, + unsigned int width_in, + unsigned int depth_in, + const unsigned char *data_in, + int data_inLen, + unsigned int *batches_out, + unsigned int *height_out, + unsigned int *width_out, + unsigned int *depth_out, + unsigned char *data_out, + int data_outLen, + unsigned int *data_len_out) __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_teardown)(hexagon_nn_nn_id id) + __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_set_powersave_level)( + unsigned int level) __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_get_perfinfo)( + hexagon_nn_nn_id id, + hexagon_nn_perfinfo *info_out, + int info_outLen, + unsigned int *n_items) __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_reset_perfinfo)( + hexagon_nn_nn_id id, unsigned int event) __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_last_execution_cycles)( + hexagon_nn_nn_id id, + unsigned int *cycles_lo, + unsigned int *cycles_hi) __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_version)(int *ver) + __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_op_name_to_id)( + const char *name, unsigned int *node_id) __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_op_id_to_name)( + unsigned int node_id, char *name, int nameLen) __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_disable_dcvs)(void) + __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_GetHexagonBinaryVersion)( + int *ver) __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_PrintLog)( + const unsigned char *buf, int bufLen) __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_execute_new)( + hexagon_nn_nn_id id, + const hexagon_nn_tensordef *inputs, + int inputsLen, + hexagon_nn_tensordef *outputs, + int outputsLen) __QAIC_HEADER_ATTRIBUTE; #ifdef __cplusplus } #endif -#endif //_HEXAGON_NN_H +#endif //_HEXAGON_NN_H diff --git a/mace/core/runtime/hexagon/hexagon_nn_ops.h b/mace/core/runtime/hexagon/hexagon_nn_ops.h index dfb3b386..8704ce80 100644 --- a/mace/core/runtime/hexagon/hexagon_nn_ops.h +++ b/mace/core/runtime/hexagon/hexagon_nn_ops.h @@ -5,8 +5,8 @@ #ifndef LIBMACE_HEXAGON_NN_OPS_H #define LIBMACE_HEXAGON_NN_OPS_H -#include "mace/utils/logging.h" #include +#include "mace/utils/logging.h" namespace mace { @@ -24,8 +24,7 @@ typedef enum op_type_enum { class OpMap { public: void Init() { -#define DEF_OP(NAME) \ - op_map_[#NAME] = OP_##NAME; +#define DEF_OP(NAME) op_map_[#NAME] = OP_##NAME; #include "mace/core/runtime/hexagon/ops.h" @@ -40,9 +39,10 @@ class OpMap { return OP_INVALID; } } + private: std::unordered_map op_map_; }; -} // namespace mace +} // namespace mace -#endif // LIBMACE_HEXAGON_NN_OPS_H +#endif // LIBMACE_HEXAGON_NN_OPS_H diff --git a/mace/core/runtime/hexagon/ops.h b/mace/core/runtime/hexagon/ops.h index 79b503cd..55b40413 100644 --- a/mace/core/runtime/hexagon/ops.h +++ b/mace/core/runtime/hexagon/ops.h @@ -178,4 +178,3 @@ DEF_OP(QuantizedBiasAdd_8p8to8) #undef __SELF_DEF_OP_WREF #undef DEF_OP_WREF #endif - diff --git a/mace/core/runtime/hexagon/quantize.cc b/mace/core/runtime/hexagon/quantize.cc index 5e3aad62..c4548bcb 100644 --- a/mace/core/runtime/hexagon/quantize.cc +++ b/mace/core/runtime/hexagon/quantize.cc @@ -29,16 +29,16 @@ void Quantizer::Quantize(const Tensor &in_tensor, float *max_out) { float stepsize; float recip_stepsize; - QuantizeAdjustRange(min_in, max_in, - min_out, max_out, - &stepsize, &recip_stepsize); + QuantizeAdjustRange(min_in, max_in, min_out, max_out, &stepsize, + &recip_stepsize); const float *in = in_tensor.data(); uint8_t *out = out_tensor->mutable_data(); for (int i = 0; i < in_tensor.size(); i++) { const float inval = in[i]; - float ival = static_cast((inval - *min_out) * recip_stepsize + 0.5f); + float ival = + static_cast((inval - *min_out) * recip_stepsize + 0.5f); if (ival < 0) ival = 0; if (ival > 255) ival = 255; out[i] = static_cast(ival); @@ -93,4 +93,4 @@ void Quantizer::DeQuantize(const Tensor &in_tensor, } } -} // namespace mace \ No newline at end of file +} // namespace mace \ No newline at end of file diff --git a/mace/core/runtime/hexagon/quantize.h b/mace/core/runtime/hexagon/quantize.h index 1ec2f41f..216e0c6b 100644 --- a/mace/core/runtime/hexagon/quantize.h +++ b/mace/core/runtime/hexagon/quantize.h @@ -16,13 +16,17 @@ class Quantizer { void Quantize(const Tensor &in_tensor, Tensor *out_tensor, - float *min_out, float *max_out); + float *min_out, + float *max_out); void Quantize(const Tensor &in_tensor, - const float min_in, const float max_in, + const float min_in, + const float max_in, Tensor *out_tensor, - float *min_out, float *max_out); + float *min_out, + float *max_out); void DeQuantize(const Tensor &in_tensor, - const float min_in, const float max_in, + const float min_in, + const float max_in, Tensor *out_tensor); private: @@ -33,9 +37,9 @@ class Quantizer { float *stepsize, float *recip_stepsize); - DISABLE_COPY_AND_ASSIGN(Quantizer); + DISABLE_COPY_AND_ASSIGN(Quantizer); }; -} // mace +} // mace -#endif // MACE_DSP_UTIL_QUANTIZE_H_ +#endif // MACE_DSP_UTIL_QUANTIZE_H_ diff --git a/mace/core/runtime/opencl/opencl_allocator.cc b/mace/core/runtime/opencl/opencl_allocator.cc index abc88bdd..57aa40c2 100644 --- a/mace/core/runtime/opencl/opencl_allocator.cc +++ b/mace/core/runtime/opencl/opencl_allocator.cc @@ -2,8 +2,8 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // -#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/runtime/opencl/opencl_allocator.h" +#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/runtime/opencl/opencl_runtime.h" namespace mace { @@ -29,7 +29,6 @@ static cl_channel_type DataTypeToCLChannelType(const DataType t) { return 0; } } - } OpenCLAllocator::OpenCLAllocator() {} @@ -49,17 +48,16 @@ void *OpenCLAllocator::New(size_t nbytes) const { void *OpenCLAllocator::NewImage(const std::vector &image_shape, const DataType dt) const { MACE_CHECK(image_shape.size() == 2) << "Image shape's size must equal 2"; - VLOG(3) << "Allocate OpenCL image: " << image_shape[0] << ", " << image_shape[1]; + VLOG(3) << "Allocate OpenCL image: " << image_shape[0] << ", " + << image_shape[1]; cl::ImageFormat img_format(CL_RGBA, DataTypeToCLChannelType(dt)); cl_int error; cl::Image2D *cl_image = new cl::Image2D(OpenCLRuntime::Global()->context(), - CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, - img_format, - image_shape[0], image_shape[1], - 0, nullptr, &error); + CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, img_format, + image_shape[0], image_shape[1], 0, nullptr, &error); MACE_CHECK(error == CL_SUCCESS) << error << " with image shape: [" << image_shape[0] << ", " << image_shape[1] << "]"; @@ -89,8 +87,8 @@ void *OpenCLAllocator::Map(void *buffer, size_t offset, size_t nbytes) const { // TODO(heliangliang) Non-blocking call cl_int error; void *mapped_ptr = - queue.enqueueMapBuffer(*cl_buffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, offset, - nbytes, nullptr, nullptr, &error); + queue.enqueueMapBuffer(*cl_buffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, + offset, nbytes, nullptr, nullptr, &error); MACE_CHECK(error == CL_SUCCESS); return mapped_ptr; } @@ -106,13 +104,10 @@ void *OpenCLAllocator::MapImage(void *buffer, mapped_image_pitch->resize(2); cl_int error; - void *mapped_ptr = - OpenCLRuntime::Global()->command_queue().enqueueMapImage(*cl_image, - CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, - origin, region, - mapped_image_pitch->data(), - mapped_image_pitch->data() + 1, - nullptr, nullptr, &error); + void *mapped_ptr = OpenCLRuntime::Global()->command_queue().enqueueMapImage( + *cl_image, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, origin, region, + mapped_image_pitch->data(), mapped_image_pitch->data() + 1, nullptr, + nullptr, &error); MACE_CHECK(error == CL_SUCCESS) << error; return mapped_ptr; diff --git a/mace/core/runtime/opencl/opencl_development.cc b/mace/core/runtime/opencl/opencl_development.cc index 266eceba..71130cf4 100644 --- a/mace/core/runtime/opencl/opencl_development.cc +++ b/mace/core/runtime/opencl/opencl_development.cc @@ -5,8 +5,8 @@ #include #include "mace/core/runtime/opencl/cl2_header.h" -#include "mace/utils/utils.h" #include "mace/utils/logging.h" +#include "mace/utils/utils.h" namespace mace { @@ -16,7 +16,8 @@ bool GetSourceOrBinaryProgram(const std::string &program_name, cl::Device &device, cl::Program *program, bool *is_binary) { - extern const std::map> kEncryptedProgramMap; + extern const std::map> + kEncryptedProgramMap; *is_binary = false; auto it_source = kEncryptedProgramMap.find(program_name); if (it_source == kEncryptedProgramMap.end()) { diff --git a/mace/core/runtime/opencl/opencl_production.cc b/mace/core/runtime/opencl/opencl_production.cc index 265fcbef..78aa5bcf 100644 --- a/mace/core/runtime/opencl/opencl_production.cc +++ b/mace/core/runtime/opencl/opencl_production.cc @@ -14,7 +14,8 @@ bool GetSourceOrBinaryProgram(const std::string &program_name, cl::Device &device, cl::Program *program, bool *is_binary) { - extern const std::map> kCompiledProgramMap; + extern const std::map> + kCompiledProgramMap; *is_binary = true; auto it_binary = kCompiledProgramMap.find(binary_file_name_prefix); if (it_binary == kCompiledProgramMap.end()) { diff --git a/mace/core/runtime/opencl/opencl_runtime.cc b/mace/core/runtime/opencl/opencl_runtime.cc index 36b8a837..4a18f630 100644 --- a/mace/core/runtime/opencl/opencl_runtime.cc +++ b/mace/core/runtime/opencl/opencl_runtime.cc @@ -48,11 +48,9 @@ double OpenCLProfilingTimer::ElapsedMicros() { return (stop_nanos_ - start_nanos_) / 1000.0; } -double OpenCLProfilingTimer::AccumulatedMicros() { - return accumulated_micros_; -} +double OpenCLProfilingTimer::AccumulatedMicros() { return accumulated_micros_; } -void OpenCLProfilingTimer::AccumulateTiming(){ +void OpenCLProfilingTimer::AccumulateTiming() { StopTiming(); accumulated_micros_ += (stop_nanos_ - start_nanos_) / 1000.0; } @@ -116,7 +114,8 @@ OpenCLRuntime::OpenCLRuntime() { cl::CommandQueue command_queue(context, gpu_device, properties); const char *kernel_path = getenv("MACE_KERNEL_PATH"); - this->kernel_path_ = std::string(kernel_path == nullptr ? "" : kernel_path) + "/"; + this->kernel_path_ = + std::string(kernel_path == nullptr ? "" : kernel_path) + "/"; this->device_ = new cl::Device(gpu_device); this->context_ = new cl::Context(context); @@ -163,18 +162,14 @@ void OpenCLRuntime::BuildProgram(const std::string &program_name, MACE_CHECK_NOTNULL(program); std::string binary_file_name_prefix = - GenerateCLBinaryFilenamePrefix(built_program_key); + GenerateCLBinaryFilenamePrefix(built_program_key); std::vector program_vec; bool is_opencl_binary; - const bool found = GetSourceOrBinaryProgram(program_name, - binary_file_name_prefix, - context(), - device(), - program, - &is_opencl_binary); + const bool found = + GetSourceOrBinaryProgram(program_name, binary_file_name_prefix, context(), + device(), program, &is_opencl_binary); MACE_CHECK(found, "Program not found for ", - is_opencl_binary ? "binary: " : "source: ", - built_program_key); + is_opencl_binary ? "binary: " : "source: ", built_program_key); // Build program std::string build_options_str = @@ -190,13 +185,13 @@ void OpenCLRuntime::BuildProgram(const std::string &program_name, } LOG(FATAL) << "Build program from " << (is_opencl_binary ? "binary: " : "source: ") - << built_program_key - << " failed: " << ret; + << built_program_key << " failed: " << ret; } if (!is_opencl_binary) { // Write binary if necessary - std::string binary_filename = kernel_path_ + binary_file_name_prefix + ".bin"; + std::string binary_filename = + kernel_path_ + binary_file_name_prefix + ".bin"; size_t device_list_size = 1; std::unique_ptr program_binary_sizes( new size_t[device_list_size]); @@ -240,8 +235,8 @@ cl::Kernel OpenCLRuntime::BuildKernel( if (built_program_it != built_program_map_.end()) { program = built_program_it->second; } else { - this->BuildProgram(program_name, built_program_key, - build_options_str, &program); + this->BuildProgram(program_name, built_program_key, build_options_str, + &program); built_program_map_.emplace(built_program_key, program); } return cl::Kernel(program, kernel_name.c_str()); @@ -250,9 +245,9 @@ cl::Kernel OpenCLRuntime::BuildKernel( void OpenCLRuntime::GetCallStats(const cl::Event &event, CallStats *stats) { if (stats != nullptr) { stats->start_micros = - event.getProfilingInfo() / 1000; + event.getProfilingInfo() / 1000; stats->end_micros = - event.getProfilingInfo() / 1000; + event.getProfilingInfo() / 1000; } } diff --git a/mace/core/runtime/opencl/opencl_runtime.h b/mace/core/runtime/opencl/opencl_runtime.h index ff596459..1f5ab2a1 100644 --- a/mace/core/runtime/opencl/opencl_runtime.h +++ b/mace/core/runtime/opencl/opencl_runtime.h @@ -19,7 +19,8 @@ namespace mace { class OpenCLProfilingTimer : public Timer { public: - explicit OpenCLProfilingTimer(const cl::Event *event) : event_(event), accumulated_micros_(0) {}; + explicit OpenCLProfilingTimer(const cl::Event *event) + : event_(event), accumulated_micros_(0){}; void StartTiming() override; void StopTiming() override; void AccumulateTiming() override; @@ -48,6 +49,7 @@ class OpenCLRuntime { cl::Kernel BuildKernel(const std::string &program_name, const std::string &kernel_name, const std::set &build_options); + private: OpenCLRuntime(); ~OpenCLRuntime(); diff --git a/mace/core/runtime/opencl/opencl_wrapper.h b/mace/core/runtime/opencl/opencl_wrapper.h index c0e88186..fdf90b02 100644 --- a/mace/core/runtime/opencl/opencl_wrapper.h +++ b/mace/core/runtime/opencl/opencl_wrapper.h @@ -7,10 +7,10 @@ namespace mace { - // These functions are not thread-safe. - void LoadOpenCLLibrary(); - void UnloadOpenCLLibrary(); - +// These functions are not thread-safe. +void LoadOpenCLLibrary(); +void UnloadOpenCLLibrary(); + } // namespace mace #endif // MACE_CORE_RUNTIME_OPENCL_OPENCL_WRAPPER_H_ diff --git a/mace/core/tensor.h b/mace/core/tensor.h index 7adc6a58..9017dafa 100644 --- a/mace/core/tensor.h +++ b/mace/core/tensor.h @@ -65,23 +65,20 @@ inline std::ostream &operator<<(std::ostream &os, unsigned char c) { class Tensor { public: Tensor(Allocator *alloc, DataType type) - : allocator_(alloc), - dtype_(type), - buffer_(nullptr), - is_buffer_owner_(true), - name_("") {}; + : allocator_(alloc), + dtype_(type), + buffer_(nullptr), + is_buffer_owner_(true), + name_(""){}; Tensor(BufferBase *buffer, DataType dtype) - : dtype_(dtype), - buffer_(buffer), - is_buffer_owner_(false), - name_("") {} + : dtype_(dtype), buffer_(buffer), is_buffer_owner_(false), name_("") {} Tensor(const BufferSlice &buffer_slice, DataType dtype) - : dtype_(dtype), - buffer_slice_(buffer_slice), - is_buffer_owner_(false), - name_("") { + : dtype_(dtype), + buffer_slice_(buffer_slice), + is_buffer_owner_(false), + name_("") { buffer_ = &buffer_slice_; } @@ -102,8 +99,8 @@ class Tensor { inline index_t dim_size() const { return shape_.size(); } inline index_t dim(unsigned int index) const { - MACE_CHECK(index < shape_.size(), "Dim out of range: ", - index, " >= ", shape_.size()); + MACE_CHECK(index < shape_.size(), "Dim out of range: ", index, " >= ", + shape_.size()); return shape_[index]; } @@ -112,40 +109,35 @@ class Tensor { std::multiplies()); } - inline index_t raw_size() const { - return size() * SizeOfType(); - } + inline index_t raw_size() const { return size() * SizeOfType(); } inline bool has_opencl_image() const { - return buffer_ != nullptr && !buffer_->OnHost() - && typeid(*buffer_) == typeid(Image); + return buffer_ != nullptr && !buffer_->OnHost() && + typeid(*buffer_) == typeid(Image); } inline bool has_opencl_buffer() const { - return buffer_ != nullptr && !buffer_->OnHost() - && !has_opencl_image(); + return buffer_ != nullptr && !buffer_->OnHost() && !has_opencl_image(); } inline cl::Image *opencl_image() const { MACE_CHECK(has_opencl_image(), "do not have image"); - return static_cast(buffer_->buffer()); + return static_cast(buffer_->buffer()); } inline cl::Buffer *opencl_buffer() const { MACE_CHECK(has_opencl_buffer(), "do not have opencl buffer"); - return static_cast(buffer_->buffer()); + return static_cast(buffer_->buffer()); } - inline index_t buffer_offset() const { - return buffer_->offset(); - } + inline index_t buffer_offset() const { return buffer_->offset(); } inline const void *raw_data() const { MACE_CHECK(buffer_ != nullptr, "buffer is null"); return buffer_->raw_data(); } - template + template inline const T *data() const { MACE_CHECK(buffer_ != nullptr, "buffer is null"); return buffer_->data(); @@ -156,7 +148,7 @@ class Tensor { return buffer_->raw_mutable_data(); } - template + template inline T *mutable_data() { MACE_CHECK(buffer_ != nullptr, "buffer is null"); return static_cast(buffer_->raw_mutable_data()); @@ -188,25 +180,17 @@ class Tensor { is_buffer_owner_ = true; } else { MACE_CHECK(has_opencl_image(), "Cannot ResizeImage buffer, use Resize."); - Image *image = dynamic_cast(buffer_); - MACE_CHECK(image_shape[0] <= image->image_shape()[0] - && image_shape[1] <= image->image_shape()[1], - "tensor (source op ", - name_, - "): current physical image shape: ", - image->image_shape()[0], - ", ", - image->image_shape()[1], - " < logical image shape: ", - image_shape[0], - ", ", - image_shape[1]); + Image *image = dynamic_cast(buffer_); + MACE_CHECK(image_shape[0] <= image->image_shape()[0] && + image_shape[1] <= image->image_shape()[1], + "tensor (source op ", name_, + "): current physical image shape: ", image->image_shape()[0], + ", ", image->image_shape()[1], " < logical image shape: ", + image_shape[0], ", ", image_shape[1]); } } - inline void ResizeLike(const Tensor &other) { - ResizeLike(&other); - } + inline void ResizeLike(const Tensor &other) { ResizeLike(&other); } inline void ResizeLike(const Tensor *other) { if (other->has_opencl_image()) { @@ -229,7 +213,7 @@ class Tensor { memcpy(buffer_->raw_mutable_data(), src, size); } - template + template inline void Copy(const T *src, index_t length) { MACE_CHECK(length == size(), "copy src and dst with different size."); CopyBytes(static_cast(src), sizeof(T) * length); @@ -248,13 +232,9 @@ class Tensor { return type_size; } - inline BufferBase *UnderlyingBuffer() const { - return buffer_; - } + inline BufferBase *UnderlyingBuffer() const { return buffer_; } - inline void SetSourceOpName(const std::string name) { - name_ = name; - } + inline void SetSourceOpName(const std::string name) { name_ = name; } inline void DebugPrint() const { using namespace numerical_chars; @@ -272,8 +252,9 @@ class Tensor { } CASES(dtype_, (os << (this->data()[i]) << ", ")); } - LOG(INFO) << "Tensor size: [" << dim(0) << ", " << dim(1) << ", " - << dim(2) << ", " << dim(3) << "], content:\n" << os.str(); + LOG(INFO) << "Tensor size: [" << dim(0) << ", " << dim(1) << ", " << dim(2) + << ", " << dim(3) << "], content:\n" + << os.str(); } class MappingGuard { @@ -301,20 +282,20 @@ class Tensor { const Tensor *tensor_; std::vector mapped_image_pitch_; - DISABLE_COPY_AND_ASSIGN(MappingGuard); + DISABLE_COPY_AND_ASSIGN(MappingGuard); }; private: Allocator *allocator_; DataType dtype_; std::vector shape_; - std::vector image_shape_; + std::vector image_shape_; BufferBase *buffer_; BufferSlice buffer_slice_; bool is_buffer_owner_; std::string name_; - DISABLE_COPY_AND_ASSIGN(Tensor); + DISABLE_COPY_AND_ASSIGN(Tensor); }; } // namespace tensor diff --git a/mace/core/testing/test_benchmark.cc b/mace/core/testing/test_benchmark.cc index 97848c97..7dcf2a27 100644 --- a/mace/core/testing/test_benchmark.cc +++ b/mace/core/testing/test_benchmark.cc @@ -99,9 +99,7 @@ void RestartTiming() { accum_time = 0; start_time = NowMicros(); } -void StartTiming() { - start_time = NowMicros(); -} +void StartTiming() { start_time = NowMicros(); } void StopTiming() { if (start_time != 0) { accum_time += (NowMicros() - start_time); diff --git a/mace/core/testing/test_benchmark.h b/mace/core/testing/test_benchmark.h index 79030593..2e3526cf 100644 --- a/mace/core/testing/test_benchmark.h +++ b/mace/core/testing/test_benchmark.h @@ -6,9 +6,9 @@ #ifndef MACE_CORE_TESTING_TEST_BENCHMARK_H_ #define MACE_CORE_TESTING_TEST_BENCHMARK_H_ +#include #include #include -#include #define MACE_BENCHMARK_CONCAT(a, b, c) a##b##c #define BENCHMARK(n) \ diff --git a/mace/core/types.cc b/mace/core/types.cc index e466f258..ef0a1755 100644 --- a/mace/core/types.cc +++ b/mace/core/types.cc @@ -2,8 +2,8 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // -#include #include +#include #include "mace/core/types.h" #include "mace/utils/logging.h" @@ -30,18 +30,12 @@ bool DataTypeCanUseMemcpy(DataType dt) { std::string DataTypeToString(const DataType dt) { static std::map dtype_string_map = { - {DT_FLOAT, "DT_FLOAT"}, - {DT_HALF, "DT_HALF"}, - {DT_DOUBLE, "DT_DOUBLE"}, - {DT_UINT8, "DT_UINT8"}, - {DT_INT8, "DT_INT8"}, - {DT_INT32, "DT_INT32"}, - {DT_UINT32, "DT_UINT32"}, - {DT_UINT16, "DT_UINT16"}, - {DT_INT64, "DT_INT64"}, - {DT_BOOL, "DT_BOOL"}, - {DT_STRING, "DT_STRING"} - }; + {DT_FLOAT, "DT_FLOAT"}, {DT_HALF, "DT_HALF"}, + {DT_DOUBLE, "DT_DOUBLE"}, {DT_UINT8, "DT_UINT8"}, + {DT_INT8, "DT_INT8"}, {DT_INT32, "DT_INT32"}, + {DT_UINT32, "DT_UINT32"}, {DT_UINT16, "DT_UINT16"}, + {DT_INT64, "DT_INT64"}, {DT_BOOL, "DT_BOOL"}, + {DT_STRING, "DT_STRING"}}; MACE_CHECK(dt != DT_INVALID) << "Not support Invalid data type"; return dtype_string_map[dt]; } diff --git a/mace/core/workspace.cc b/mace/core/workspace.cc index 59c509e7..1cfa1802 100644 --- a/mace/core/workspace.cc +++ b/mace/core/workspace.cc @@ -5,8 +5,8 @@ #include #include -#include "mace/core/workspace.h" #include "mace/core/arg_helper.h" +#include "mace/core/workspace.h" #include "mace/utils/timer.h" namespace mace { @@ -19,7 +19,7 @@ Tensor *Workspace::CreateTensor(const std::string &name, } else { VLOG(3) << "Creating Tensor " << name; tensor_map_[name] = - std::move(std::unique_ptr(new Tensor(alloc, type))); + std::move(std::unique_ptr(new Tensor(alloc, type))); } return GetTensor(name); } @@ -35,7 +35,7 @@ const Tensor *Workspace::GetTensor(const std::string &name) const { Tensor *Workspace::GetTensor(const std::string &name) { return const_cast( - static_cast(this)->GetTensor(name)); + static_cast(this)->GetTensor(name)); } std::vector Workspace::Tensors() const { @@ -51,28 +51,28 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { index_t model_data_size = 0; unsigned char *model_data_ptr = nullptr; for (auto &const_tensor : net_def.tensors()) { - if (model_data_ptr == nullptr - || reinterpret_cast(const_tensor.data()) - < reinterpret_cast(model_data_ptr)) { + if (model_data_ptr == nullptr || + reinterpret_cast(const_tensor.data()) < + reinterpret_cast(model_data_ptr)) { model_data_ptr = const_cast(const_tensor.data()); } } for (auto &const_tensor : net_def.tensors()) { - model_data_size = std::max(model_data_size, - static_cast( - (reinterpret_cast(const_tensor.data()) - - reinterpret_cast(model_data_ptr)) - + const_tensor.data_size() - * GetEnumTypeSize(const_tensor.data_type()))); + model_data_size = std::max( + model_data_size, + static_cast((reinterpret_cast(const_tensor.data()) - + reinterpret_cast(model_data_ptr)) + + const_tensor.data_size() * + GetEnumTypeSize(const_tensor.data_type()))); } VLOG(3) << "Model data size: " << model_data_size; if (type == DeviceType::CPU) { tensor_buffer_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(type), model_data_ptr, model_data_size))); + new Buffer(GetDeviceAllocator(type), model_data_ptr, model_data_size))); } else { tensor_buffer_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(type), model_data_size))); + new Buffer(GetDeviceAllocator(type), model_data_size))); tensor_buffer_->Map(nullptr); tensor_buffer_->Copy(model_data_ptr, 0, model_data_size); tensor_buffer_->UnMap(); @@ -81,8 +81,7 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { for (auto &const_tensor : net_def.tensors()) { MACE_LATENCY_LOGGER(2, "Load tensor ", const_tensor.name()); VLOG(3) << "Tensor name: " << const_tensor.name() - << ", data type: " << const_tensor.data_type() - << ", shape: " + << ", data type: " << const_tensor.data_type() << ", shape: " << MakeString(std::vector(const_tensor.dims().begin(), const_tensor.dims().end())); std::vector dims; @@ -90,14 +89,12 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { dims.push_back(d); } - index_t - offset = (long long) const_tensor.data() - (long long) model_data_ptr; + index_t offset = (long long)const_tensor.data() - (long long)model_data_ptr; std::unique_ptr tensor( - new Tensor(BufferSlice(tensor_buffer_.get(), - offset, - const_tensor.data_size() - * GetEnumTypeSize(const_tensor.data_type())), - const_tensor.data_type())); + new Tensor(BufferSlice(tensor_buffer_.get(), offset, + const_tensor.data_size() * + GetEnumTypeSize(const_tensor.data_type())), + const_tensor.data_type())); tensor->Reshape(dims); tensor_map_[const_tensor.name()] = std::move(tensor); @@ -118,13 +115,11 @@ void Workspace::CreateImageOutputTensor(const NetDef &net_def) { // as GPU have consistent data type for each layer for now. // As DSP may have different data output type for each op, // we stick to the same concept. - for (auto &op: net_def.op()) { + for (auto &op : net_def.op()) { if (op.has_mem_id()) { const DataType op_dtype = static_cast( - ArgumentHelper::GetSingleArgument( - op, - "T", - static_cast(DT_FLOAT))); + ArgumentHelper::GetSingleArgument( + op, "T", static_cast(DT_FLOAT))); if (op_dtype != DataType::DT_INVALID) { dtype = op_dtype; // find first valid data type, break @@ -133,22 +128,24 @@ void Workspace::CreateImageOutputTensor(const NetDef &net_def) { } } MACE_CHECK(dtype != DataType::DT_INVALID, "data type is invalid."); - for (auto &mem_block: net_def.mem_arena().mem_block()) { - std::unique_ptr - image_buf(new Image({mem_block.x(), mem_block.y()}, dtype)); + for (auto &mem_block : net_def.mem_arena().mem_block()) { + std::unique_ptr image_buf( + new Image({mem_block.x(), mem_block.y()}, dtype)); preallocated_allocator_.SetBuffer(mem_block.mem_id(), std::move(image_buf)); } VLOG(3) << "Preallocate image to tensors"; - for (auto &op: net_def.op()) { + for (auto &op : net_def.op()) { if (op.has_mem_id()) { - std::unique_ptr tensor - (new Tensor(preallocated_allocator_.GetBuffer(op.mem_id()), dtype)); + std::unique_ptr tensor( + new Tensor(preallocated_allocator_.GetBuffer(op.mem_id()), dtype)); tensor->SetSourceOpName(op.name()); - VLOG(3) << "Tensor: " << op.name() << "(" << op.type() << ")" << "; Mem: " - << op.mem_id() << "; Image shape: " - << dynamic_cast(tensor->UnderlyingBuffer())->image_shape()[0] - << ", " - << dynamic_cast(tensor->UnderlyingBuffer())->image_shape()[1]; + VLOG(3) + << "Tensor: " << op.name() << "(" << op.type() << ")" + << "; Mem: " << op.mem_id() << "; Image shape: " + << dynamic_cast(tensor->UnderlyingBuffer())->image_shape()[0] + << ", " + << dynamic_cast(tensor->UnderlyingBuffer()) + ->image_shape()[1]; tensor_map_[op.output(0)] = std::move(tensor); } } diff --git a/mace/core/workspace.h b/mace/core/workspace.h index 84274914..5e990d82 100644 --- a/mace/core/workspace.h +++ b/mace/core/workspace.h @@ -5,9 +5,9 @@ #ifndef MACE_CORE_WORKSPACE_H_ #define MACE_CORE_WORKSPACE_H_ +#include "mace/core/preallocated_pooled_allocator.h" #include "mace/core/tensor.h" #include "mace/public/mace.h" -#include "mace/core/preallocated_pooled_allocator.h" namespace mace { @@ -43,7 +43,7 @@ class Workspace { PreallocatedPooledAllocator preallocated_allocator_; - DISABLE_COPY_AND_ASSIGN(Workspace); + DISABLE_COPY_AND_ASSIGN(Workspace); }; } // namespace mace diff --git a/mace/kernels/activation.h b/mace/kernels/activation.h index 72e52b67..d6689e70 100644 --- a/mace/kernels/activation.h +++ b/mace/kernels/activation.h @@ -6,9 +6,9 @@ #define MACE_KERNELS_ACTIVATION_H_ #include "mace/core/future.h" +#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" #include "mace/core/types.h" -#include "mace/core/runtime/opencl/cl2_header.h" namespace mace { namespace kernels { @@ -99,17 +99,15 @@ void PReLUActivation(const T *input_ptr, output_ptr[i] = in; } } - } template class ActivationFunctor { public: ActivationFunctor(ActivationType type, T relux_max_limit) - : activation_(type), - relux_max_limit_(relux_max_limit){} + : activation_(type), relux_max_limit_(relux_max_limit) {} - void operator()(const Tensor *input, + void operator()(const Tensor *input, const Tensor *alpha, Tensor *output, StatsFuture *future) { @@ -118,9 +116,11 @@ class ActivationFunctor { if (activation_ == PRELU) { MACE_CHECK_NOTNULL(alpha); const T *alpha_ptr = alpha->data(); - PReLUActivation(input_ptr, output->size(), input->dim(3), alpha_ptr, output_ptr); + PReLUActivation(input_ptr, output->size(), input->dim(3), alpha_ptr, + output_ptr); } else { - DoActivation(input_ptr, output_ptr, output->size(), activation_, relux_max_limit_); + DoActivation(input_ptr, output_ptr, output->size(), activation_, + relux_max_limit_); } } @@ -131,14 +131,16 @@ class ActivationFunctor { template <> void ActivationFunctor::operator()( - const Tensor *input, const Tensor *alpha, Tensor *output, StatsFuture *future); + const Tensor *input, + const Tensor *alpha, + Tensor *output, + StatsFuture *future); template class ActivationFunctor { public: ActivationFunctor(ActivationType type, T relux_max_limit) - : activation_(type), - relux_max_limit_(relux_max_limit){} + : activation_(type), relux_max_limit_(relux_max_limit) {} void operator()(const Tensor *input, const Tensor *alpha, diff --git a/mace/kernels/addn.h b/mace/kernels/addn.h index e772d880..6e9ba2d4 100644 --- a/mace/kernels/addn.h +++ b/mace/kernels/addn.h @@ -18,7 +18,7 @@ namespace mace { namespace kernels { namespace { - constexpr int kCostPerGroup = 1024; +constexpr int kCostPerGroup = 1024; } // namespace template diff --git a/mace/kernels/batch_norm.h b/mace/kernels/batch_norm.h index bceee6ff..1e6a12bf 100644 --- a/mace/kernels/batch_norm.h +++ b/mace/kernels/batch_norm.h @@ -10,10 +10,10 @@ #endif #include "mace/core/future.h" -#include "mace/public/mace.h" #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" #include "mace/kernels/activation.h" +#include "mace/public/mace.h" namespace mace { namespace kernels { @@ -24,7 +24,7 @@ struct BatchNormFunctorBase { const float relux_max_limit) : folded_constant_(folded_constant), activation_(activation), - relux_max_limit_(relux_max_limit){} + relux_max_limit_(relux_max_limit) {} const bool folded_constant_; const ActivationType activation_; @@ -36,8 +36,7 @@ struct BatchNormFunctor : BatchNormFunctorBase { BatchNormFunctor(const bool folded_constant, const ActivationType activation, const float relux_max_limit) - : BatchNormFunctorBase( - folded_constant, activation, relux_max_limit) {} + : BatchNormFunctorBase(folded_constant, activation, relux_max_limit) {} void operator()(const Tensor *input, const Tensor *scale, @@ -147,8 +146,7 @@ struct BatchNormFunctor : BatchNormFunctorBase { BatchNormFunctor(const bool folded_constant, const ActivationType activation, const float relux_max_limit) - : BatchNormFunctorBase( - folded_constant, activation, relux_max_limit) {} + : BatchNormFunctorBase(folded_constant, activation, relux_max_limit) {} void operator()(const Tensor *input, const Tensor *scale, const Tensor *offset, diff --git a/mace/kernels/bias_add.h b/mace/kernels/bias_add.h index bea5a790..28adcf8d 100644 --- a/mace/kernels/bias_add.h +++ b/mace/kernels/bias_add.h @@ -6,9 +6,9 @@ #define MACE_KERNELS_BIAS_ADD_H_ #include "mace/core/future.h" +#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" #include "mace/public/mace.h" -#include "mace/core/runtime/opencl/cl2_header.h" namespace mace { namespace kernels { @@ -32,7 +32,6 @@ struct BiasAddFunctor { const T *bias_ptr = bias->data(); T *output_ptr = output->mutable_data(); - #pragma omp parallel for collapse(4) for (index_t n = 0; n < batch; ++n) { for (index_t h = 0; h < height; ++h) { @@ -44,7 +43,6 @@ struct BiasAddFunctor { } } } - } }; diff --git a/mace/kernels/buffer_to_image.h b/mace/kernels/buffer_to_image.h index a3ef4cb2..3292e993 100644 --- a/mace/kernels/buffer_to_image.h +++ b/mace/kernels/buffer_to_image.h @@ -17,10 +17,9 @@ struct BufferToImageFunctorBase { bool i2b_; }; -template -struct BufferToImageFunctor : BufferToImageFunctorBase{ - BufferToImageFunctor(bool i2b = false) : - BufferToImageFunctorBase(i2b) {} +template +struct BufferToImageFunctor : BufferToImageFunctorBase { + BufferToImageFunctor(bool i2b = false) : BufferToImageFunctorBase(i2b) {} void operator()(Tensor *input, const BufferType type, Tensor *output, @@ -29,10 +28,9 @@ struct BufferToImageFunctor : BufferToImageFunctorBase{ } }; -template -struct BufferToImageFunctor : BufferToImageFunctorBase{ - BufferToImageFunctor(bool i2b = false) : - BufferToImageFunctorBase(i2b) {} +template +struct BufferToImageFunctor : BufferToImageFunctorBase { + BufferToImageFunctor(bool i2b = false) : BufferToImageFunctorBase(i2b) {} void operator()(Tensor *input, const BufferType type, Tensor *output, diff --git a/mace/kernels/channel_shuffle.h b/mace/kernels/channel_shuffle.h index c4f48a2f..642a93b9 100644 --- a/mace/kernels/channel_shuffle.h +++ b/mace/kernels/channel_shuffle.h @@ -16,8 +16,10 @@ class ChannelShuffleFunctor { public: ChannelShuffleFunctor(const int group) : group_(group) {} - void operator()(const T *input, const index_t *input_shape, - T *output, StatsFuture *future) { + void operator()(const T *input, + const index_t *input_shape, + T *output, + StatsFuture *future) { index_t batch = input_shape[0]; index_t channels = input_shape[1]; index_t height = input_shape[2]; diff --git a/mace/kernels/concat.h b/mace/kernels/concat.h index 50171db7..021b0f61 100644 --- a/mace/kernels/concat.h +++ b/mace/kernels/concat.h @@ -6,23 +6,23 @@ #define MACE_KERNELS_CONCAT_H_ #include "mace/core/future.h" +#include "mace/core/runtime/opencl/cl2_header.h" +#include "mace/core/tensor.h" #include "mace/core/types.h" #include "mace/public/mace.h" -#include "mace/core/tensor.h" -#include "mace/core/runtime/opencl/cl2_header.h" namespace mace { namespace kernels { struct ConcatFunctorBase { - ConcatFunctorBase(const int32_t axis): axis_(axis){} + ConcatFunctorBase(const int32_t axis) : axis_(axis) {} int32_t axis_; }; -template +template struct ConcatFunctor : ConcatFunctorBase { - ConcatFunctor(const int32_t axis): ConcatFunctorBase(axis){} + ConcatFunctor(const int32_t axis) : ConcatFunctorBase(axis) {} void operator()(const std::vector &input_list, Tensor *output, @@ -75,14 +75,14 @@ struct ConcatFunctor : ConcatFunctorBase { } }; -template -struct ConcatFunctor : ConcatFunctorBase{ - ConcatFunctor(const int32_t axis): ConcatFunctorBase(axis){} +template +struct ConcatFunctor : ConcatFunctorBase { + ConcatFunctor(const int32_t axis) : ConcatFunctorBase(axis) {} void operator()(const std::vector &input_list, - Tensor *output, StatsFuture *future); + Tensor *output, + StatsFuture *future); cl::Kernel kernel_; - }; } // namepsace kernels diff --git a/mace/kernels/conv_2d.h b/mace/kernels/conv_2d.h index f4f49565..99a2eaa3 100644 --- a/mace/kernels/conv_2d.h +++ b/mace/kernels/conv_2d.h @@ -116,9 +116,8 @@ void Conv2dKernelFunc(const T *input_ptr, // batch start sum[sum_idx] += vaddvq_f32(tmp); #else for (int inci = 0; inci < inc_tile_size; ++inci) { - sum[sum_idx] += - in[in_idx * inc_tile_size + inci] * - weights[weights_idx * inc_tile_size + inci]; + sum[sum_idx] += in[in_idx * inc_tile_size + inci] * + weights[weights_idx * inc_tile_size + inci]; } #endif } @@ -188,7 +187,7 @@ struct Conv2dFunctorBase { paddings_(paddings), dilations_(dilations), activation_(activation), - relux_max_limit_(relux_max_limit){} + relux_max_limit_(relux_max_limit) {} const int *strides_; // [stride_h, stride_w] const Padding padding_type_; @@ -230,8 +229,9 @@ struct Conv2dFunctor : Conv2dFunctorBase { padding_type_, output_shape.data(), paddings.data()); } else { paddings = paddings_; - CalcOutputSize(input->shape().data(), filter->shape().data(), paddings_.data(), - dilations_, strides_, RoundType::FLOOR, output_shape.data()); + CalcOutputSize(input->shape().data(), filter->shape().data(), + paddings_.data(), dilations_, strides_, RoundType::FLOOR, + output_shape.data()); } output->Resize(output_shape); diff --git a/mace/kernels/conv_pool_2d_util.cc b/mace/kernels/conv_pool_2d_util.cc index 9b7160a7..b1a83782 100644 --- a/mace/kernels/conv_pool_2d_util.cc +++ b/mace/kernels/conv_pool_2d_util.cc @@ -145,7 +145,7 @@ void CalcOutputSize(const index_t *input_shape, // NHWC MACE_CHECK(dilations[0] > 0 && dilations[1] > 0, "Invalid dilations, must >= 1"); MACE_CHECK((dilations[0] == 1 || strides[0] == 1) && - (dilations[1] == 1 || strides[1] == 1), + (dilations[1] == 1 || strides[1] == 1), "If dilations > 1, strides should be 1"); MACE_CHECK_NOTNULL(output_shape); MACE_CHECK_NOTNULL(padding_size); @@ -159,18 +159,29 @@ void CalcOutputSize(const index_t *input_shape, // NHWC */ output_shape[0] = input_shape[0]; if (round_type == FLOOR) { - output_shape[1] = static_cast(std::floor(1.0 * (input_shape[1] + padding_size[0] - - filter_shape[0] - (filter_shape[0] - 1) * (dilations[0] - 1)) / strides[0]) + 1); - output_shape[2] = static_cast(std::floor(1.0 * (input_shape[2] + padding_size[1] - - filter_shape[1] - (filter_shape[1] - 1) * (dilations[1] - 1)) / strides[1]) + 1); + output_shape[1] = static_cast( + std::floor(1.0 * (input_shape[1] + padding_size[0] - filter_shape[0] - + (filter_shape[0] - 1) * (dilations[0] - 1)) / + strides[0]) + + 1); + output_shape[2] = static_cast( + std::floor(1.0 * (input_shape[2] + padding_size[1] - filter_shape[1] - + (filter_shape[1] - 1) * (dilations[1] - 1)) / + strides[1]) + + 1); } else { - output_shape[1] = static_cast(std::ceil(1.0 * (input_shape[1] + padding_size[0] - - filter_shape[0] - (filter_shape[0] - 1) * (dilations[0] - 1)) / strides[0]) + 1); - output_shape[2] = static_cast(std::ceil(1.0 * (input_shape[2] + padding_size[1] - - filter_shape[1] - (filter_shape[1] - 1) * (dilations[1] - 1)) / strides[1]) + 1); + output_shape[1] = static_cast( + std::ceil(1.0 * (input_shape[1] + padding_size[0] - filter_shape[0] - + (filter_shape[0] - 1) * (dilations[0] - 1)) / + strides[0]) + + 1); + output_shape[2] = static_cast( + std::ceil(1.0 * (input_shape[2] + padding_size[1] - filter_shape[1] - + (filter_shape[1] - 1) * (dilations[1] - 1)) / + strides[1]) + + 1); } output_shape[3] = filter_shape[2]; - } void CalPaddingSize(const index_t *input_shape, // NCHW diff --git a/mace/kernels/conv_pool_2d_util.h b/mace/kernels/conv_pool_2d_util.h index 24097e81..45b1d8a4 100644 --- a/mace/kernels/conv_pool_2d_util.h +++ b/mace/kernels/conv_pool_2d_util.h @@ -15,7 +15,7 @@ enum Padding { FULL = 2, // Pads with one less than the filter size on both sides }; -enum RoundType{ +enum RoundType { FLOOR = 0, CEIL = 1, }; diff --git a/mace/kernels/depthwise_conv2d.h b/mace/kernels/depthwise_conv2d.h index da4d00be..c0a1719f 100644 --- a/mace/kernels/depthwise_conv2d.h +++ b/mace/kernels/depthwise_conv2d.h @@ -10,9 +10,9 @@ #endif #include "mace/core/future.h" -#include "mace/public/mace.h" #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/kernels/conv_pool_2d_util.h" +#include "mace/public/mace.h" namespace mace { namespace kernels { @@ -247,7 +247,7 @@ struct DepthwiseConv2dFunctorBase { paddings_(paddings), dilations_(dilations), activation_(activation), - relux_max_limit_(relux_max_limit){} + relux_max_limit_(relux_max_limit) {} const int *strides_; // [stride_h, stride_w] const Padding padding_type_; @@ -296,8 +296,9 @@ struct DepthwiseConv2dFunctor : public DepthwiseConv2dFunctorBase { padding_type_, output_shape.data(), paddings.data()); } else { paddings = paddings_; - CalcOutputSize(input->shape().data(), fake_filter_shape.data(), paddings_.data(), - dilations_, strides_, RoundType::FLOOR, output_shape.data()); + CalcOutputSize(input->shape().data(), fake_filter_shape.data(), + paddings_.data(), dilations_, strides_, RoundType::FLOOR, + output_shape.data()); } auto input_shape = fake_filter_shape; output->Resize(output_shape); diff --git a/mace/kernels/eltwise.h b/mace/kernels/eltwise.h index 18f0604c..263dfb80 100644 --- a/mace/kernels/eltwise.h +++ b/mace/kernels/eltwise.h @@ -5,13 +5,13 @@ #define MACE_KERNELS_ELTWISE_H_ #include "mace/core/future.h" -#include "mace/core/tensor.h" #include "mace/core/runtime/opencl/cl2_header.h" +#include "mace/core/tensor.h" namespace mace { namespace kernels { -enum EltwiseType{ +enum EltwiseType { PROD = 0, SUM = 1, MAX = 2, @@ -19,8 +19,7 @@ enum EltwiseType{ }; struct EltwiseFunctorBase { - EltwiseFunctorBase(const EltwiseType type, - const std::vector &coeff) + EltwiseFunctorBase(const EltwiseType type, const std::vector &coeff) : type_(type), coeff_(coeff) {} EltwiseType type_; @@ -29,8 +28,7 @@ struct EltwiseFunctorBase { template struct EltwiseFunctor : EltwiseFunctorBase { - EltwiseFunctor(const EltwiseType type, - const std::vector &coeff) + EltwiseFunctor(const EltwiseType type, const std::vector &coeff) : EltwiseFunctorBase(type, coeff) {} void operator()(const Tensor *input0, @@ -49,7 +47,7 @@ struct EltwiseFunctor : EltwiseFunctorBase { switch (type_) { case PROD: #pragma omp parallel for - for(index_t i = 0; i < size; ++i) { + for (index_t i = 0; i < size; ++i) { output_ptr[i] = input0_ptr[i] * input1_ptr[i]; } break; @@ -62,19 +60,20 @@ struct EltwiseFunctor : EltwiseFunctorBase { } else { #pragma omp parallel for for (index_t i = 0; i < size; ++i) { - output_ptr[i] = coeff_[0] * input0_ptr[i] + coeff_[1] * input1_ptr[i]; + output_ptr[i] = + coeff_[0] * input0_ptr[i] + coeff_[1] * input1_ptr[i]; } } break; case MAX: #pragma omp parallel for - for(index_t i = 0; i < size; ++i) { + for (index_t i = 0; i < size; ++i) { output_ptr[i] = std::max(input0_ptr[i], input1_ptr[i]); } break; case MIN: #pragma omp parallel for - for(index_t i = 0; i < size; ++i) { + for (index_t i = 0; i < size; ++i) { output_ptr[i] = std::min(input0_ptr[i], input1_ptr[i]); } break; @@ -84,11 +83,9 @@ struct EltwiseFunctor : EltwiseFunctorBase { } }; - template -struct EltwiseFunctor: EltwiseFunctorBase { - EltwiseFunctor(const EltwiseType type, - const std::vector &coeff) +struct EltwiseFunctor : EltwiseFunctorBase { + EltwiseFunctor(const EltwiseType type, const std::vector &coeff) : EltwiseFunctorBase(type, coeff) {} void operator()(const Tensor *input0, diff --git a/mace/kernels/fully_connected.h b/mace/kernels/fully_connected.h index 031717f1..740faacc 100644 --- a/mace/kernels/fully_connected.h +++ b/mace/kernels/fully_connected.h @@ -6,8 +6,8 @@ #define MACE_KERNELS_FULLY_CONNECTED_H_ #include "mace/core/future.h" -#include "mace/core/tensor.h" #include "mace/core/runtime/opencl/cl2_header.h" +#include "mace/core/tensor.h" #include "mace/kernels/activation.h" namespace mace { @@ -16,25 +16,23 @@ namespace kernels { struct FullyConnectedBase { FullyConnectedBase(const ActivationType activation, const float relux_max_limit) - : activation_(activation), - relux_max_limit_(relux_max_limit){} + : activation_(activation), relux_max_limit_(relux_max_limit) {} const ActivationType activation_; const float relux_max_limit_; }; -template +template struct FullyConnectedFunctor : FullyConnectedBase { FullyConnectedFunctor(const ActivationType activation, - const float relux_max_limit) : - FullyConnectedBase(activation, relux_max_limit) {} + const float relux_max_limit) + : FullyConnectedBase(activation, relux_max_limit) {} void operator()(const Tensor *input, const Tensor *weight, const Tensor *bias, Tensor *output, StatsFuture *future) { - std::vector output_shape = {input->dim(0), 1, 1, weight->dim(0)}; output->Resize(output_shape); const index_t N = output->dim(0); @@ -70,11 +68,11 @@ struct FullyConnectedFunctor : FullyConnectedBase { } }; -template +template struct FullyConnectedFunctor : FullyConnectedBase { FullyConnectedFunctor(const ActivationType activation, - const float relux_max_limit) : - FullyConnectedBase(activation, relux_max_limit) {} + const float relux_max_limit) + : FullyConnectedBase(activation, relux_max_limit) {} void operator()(const Tensor *input, const Tensor *weight, diff --git a/mace/kernels/global_avg_pooling.h b/mace/kernels/global_avg_pooling.h index 8b718e57..cd971558 100644 --- a/mace/kernels/global_avg_pooling.h +++ b/mace/kernels/global_avg_pooling.h @@ -39,8 +39,10 @@ struct GlobalAvgPoolingFunctor { template <> void GlobalAvgPoolingFunctor::operator()( - const float *input, const index_t *input_shape, - float *output, StatsFuture *future); + const float *input, + const index_t *input_shape, + float *output, + StatsFuture *future); } // namespace kernels } // namespace mace diff --git a/mace/kernels/matmul.h b/mace/kernels/matmul.h index 5ed6e77e..d893e951 100644 --- a/mace/kernels/matmul.h +++ b/mace/kernels/matmul.h @@ -6,20 +6,18 @@ #define MACE_KERNELS_MATMUL_H_ #include "mace/core/future.h" -#include "mace/core/tensor.h" #include "mace/core/runtime/opencl/cl2_header.h" +#include "mace/core/tensor.h" namespace mace { namespace kernels { - template struct MatMulFunctor { void operator()(const Tensor *A, const Tensor *B, Tensor *C, StatsFuture *future) { - std::vector c_shape = {A->dim(0), A->dim(1), B->dim(2), 1}; C->Resize(c_shape); const index_t N = C->dim(0); @@ -52,7 +50,6 @@ struct MatMulFunctor { } }; - template struct MatMulFunctor { void operator()(const Tensor *A, diff --git a/mace/kernels/neon/batch_norm_neon.cc b/mace/kernels/neon/batch_norm_neon.cc index 84dc4408..19094ef7 100644 --- a/mace/kernels/neon/batch_norm_neon.cc +++ b/mace/kernels/neon/batch_norm_neon.cc @@ -52,7 +52,8 @@ void BatchNormFunctor::operator()( #pragma omp parallel for collapse(2) for (index_t i = 0; i < n; ++i) { for (index_t j = 0; j < sample_size; ++j) { - const float *input_sample_ptr = input_ptr + (i * sample_size + j) * channel; + const float *input_sample_ptr = + input_ptr + (i * sample_size + j) * channel; float *output_sample_ptr = output_ptr + (i * sample_size + j) * channel; const float *new_scale_ptr = new_scale.data(); const float *new_offset_ptr = new_offset.data(); diff --git a/mace/kernels/neon/conv_2d_neon.cc b/mace/kernels/neon/conv_2d_neon.cc index 566abd75..8b937ddf 100644 --- a/mace/kernels/neon/conv_2d_neon.cc +++ b/mace/kernels/neon/conv_2d_neon.cc @@ -50,12 +50,11 @@ void Conv2dFunctor::operator()(const Tensor *input, MACE_CHECK_NOTNULL(filter); MACE_CHECK_NOTNULL(output); - std::vector output_shape_vec(4); std::vector paddings(2); kernels::CalcPaddingAndOutputSize( - input->shape().data(), filter->shape().data(), dilations_, - strides_, paddings_, output_shape_vec.data(), paddings.data()); + input->shape().data(), filter->shape().data(), dilations_, strides_, + paddings_, output_shape_vec.data(), paddings.data()); output->Resize(output_shape_vec); typedef void (*Conv2dNeonFunction)( @@ -102,8 +101,8 @@ void Conv2dFunctor::operator()(const Tensor *input, auto output_shape = output->shape().data(); auto conv2d_neon_func = selector[kernel_h - 1][strides_[0] - 1]; - conv2d_neon_func(input_data, input_shape, filter_data, nullptr, - bias_data, output_data, output_shape); + conv2d_neon_func(input_data, input_shape, filter_data, nullptr, bias_data, + output_data, output_shape); } } // namespace kernels diff --git a/mace/kernels/neon/conv_2d_neon_3x3.cc b/mace/kernels/neon/conv_2d_neon_3x3.cc index 6a2aa2ea..af1e83cb 100644 --- a/mace/kernels/neon/conv_2d_neon_3x3.cc +++ b/mace/kernels/neon/conv_2d_neon_3x3.cc @@ -27,10 +27,8 @@ void Conv2dNeonK3x3S1(const float *input, // NCHW int input_channels = input_shape[1]; int input_height = input_shape[2]; int input_width = input_shape[3]; - int multiplier = - filter_shape == nullptr ? 0 : filter_shape[0]; - int filter_in_channels = - filter_shape == nullptr ? input_channels : 1; + int multiplier = filter_shape == nullptr ? 0 : filter_shape[0]; + int filter_in_channels = filter_shape == nullptr ? input_channels : 1; #pragma omp parallel for collapse(2) for (int b = 0; b < output_batch; ++b) { for (int oc = 0; oc < output_channels; ++oc) { @@ -230,10 +228,8 @@ void Conv2dNeonK3x3S2(const float *input, // NCHW int input_channels = input_shape[1]; int input_height = input_shape[2]; int input_width = input_shape[3]; - int multiplier = - filter_shape == nullptr ? 0 : filter_shape[0]; - int filter_in_channels = - filter_shape == nullptr ? input_channels : 1; + int multiplier = filter_shape == nullptr ? 0 : filter_shape[0]; + int filter_in_channels = filter_shape == nullptr ? input_channels : 1; #pragma omp parallel for collapse(2) for (int b = 0; b < output_batch; ++b) { diff --git a/mace/kernels/neon/depthwise_conv_neon.cc b/mace/kernels/neon/depthwise_conv_neon.cc index 7fa1a9f5..fc8f457a 100644 --- a/mace/kernels/neon/depthwise_conv_neon.cc +++ b/mace/kernels/neon/depthwise_conv_neon.cc @@ -52,9 +52,8 @@ void DepthwiseConv2dFunctor::operator()( << "filter" << kernel_h << "x" << kernel_w << "," << " stride " << strides_[0] << "x" << strides_[1] << " is not implemented yet, using slow version"; - DepthwiseConv2dFunctor(strides_, paddings_, - dilations_)( - input, filter, bias, output, future); + DepthwiseConv2dFunctor( + strides_, paddings_, dilations_)(input, filter, bias, output, future); return; } @@ -73,8 +72,8 @@ void DepthwiseConv2dFunctor::operator()( input_shape = padded_input.shape().data(); } auto conv2d_neon_func = selector[kernel_h - 1][strides_[0] - 1]; - conv2d_neon_func(input_ptr, input_shape, filter_ptr, filter_shape, bias_ptr, output_ptr, - output_shape); + conv2d_neon_func(input_ptr, input_shape, filter_ptr, filter_shape, bias_ptr, + output_ptr, output_shape); } } // namespace kernels diff --git a/mace/kernels/opencl/activation_opencl.cc b/mace/kernels/opencl/activation_opencl.cc index 75922a9e..180e38ca 100644 --- a/mace/kernels/opencl/activation_opencl.cc +++ b/mace/kernels/opencl/activation_opencl.cc @@ -57,8 +57,7 @@ void ActivationFunctor::operator()(const Tensor *input, default: LOG(FATAL) << "Unknown activation type: " << activation_; } - kernel_ = - runtime->BuildKernel("activation", kernel_name, built_options); + kernel_ = runtime->BuildKernel("activation", kernel_name, built_options); int idx = 0; kernel_.setArg(idx++, *(input->opencl_image())); if (activation_ == PRELU) { @@ -74,8 +73,8 @@ void ActivationFunctor::operator()(const Tensor *input, static_cast(height * batch)}; const std::vector lws = {8, 16, 8, 1}; std::string tuning_key = - Concat(tuning_key_prefix_, output->dim(0), output->dim(1), - output->dim(2), output->dim(3)); + Concat(tuning_key_prefix_, output->dim(0), output->dim(1), output->dim(2), + output->dim(3)); TuningOrRun3DKernel(kernel_, tuning_key, gws, lws, future); } diff --git a/mace/kernels/opencl/addn.cc b/mace/kernels/opencl/addn.cc index 38388081..a6863a59 100644 --- a/mace/kernels/opencl/addn.cc +++ b/mace/kernels/opencl/addn.cc @@ -5,8 +5,8 @@ #include "mace/kernels/addn.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/kernels/opencl/helper.h" -#include "mace/utils/utils.h" #include "mace/utils/tuner.h" +#include "mace/utils/utils.h" namespace mace { namespace kernels { @@ -57,31 +57,23 @@ void AddNFunctor::operator()( uint32_t idx = 0; for (auto input : input_tensors) { - kernel_.setArg(idx++, - *(input->opencl_image())); + kernel_.setArg(idx++, *(input->opencl_image())); } kernel_.setArg(idx++, *(output_tensor->opencl_image())); } - const uint32_t gws[2] = { - static_cast(width_pixels), - static_cast(batch_height_pixels) - }; + const uint32_t gws[2] = {static_cast(width_pixels), + static_cast(batch_height_pixels)}; const std::vector lws = {64, 16, 1}; std::stringstream ss; - ss << "addn_opencl_kernel_" - << output_shape[0] << "_" - << output_shape[1] << "_" - << output_shape[2] << "_" - << output_shape[3]; + ss << "addn_opencl_kernel_" << output_shape[0] << "_" << output_shape[1] + << "_" << output_shape[2] << "_" << output_shape[3]; TuningOrRun2DKernel(kernel_, ss.str(), gws, lws, future); }; -template -struct AddNFunctor; +template struct AddNFunctor; -template -struct AddNFunctor; +template struct AddNFunctor; } // namespace kernels } // namespace mace diff --git a/mace/kernels/opencl/batch_norm_opencl.cc b/mace/kernels/opencl/batch_norm_opencl.cc index 571bdd53..8f14f34b 100644 --- a/mace/kernels/opencl/batch_norm_opencl.cc +++ b/mace/kernels/opencl/batch_norm_opencl.cc @@ -60,17 +60,14 @@ void BatchNormFunctor::operator()(const Tensor *input, LOG(FATAL) << "Unknown activation type: " << activation_; } - kernel_ = - runtime->BuildKernel("batch_norm", kernel_name, built_options); + kernel_ = runtime->BuildKernel("batch_norm", kernel_name, built_options); uint32_t idx = 0; kernel_.setArg(idx++, *(input->opencl_image())); kernel_.setArg(idx++, *(scale->opencl_image())); - kernel_.setArg(idx++, - *(offset->opencl_image())); + kernel_.setArg(idx++, *(offset->opencl_image())); if (!folded_constant_) { - kernel_.setArg(idx++, - *(mean->opencl_image())); + kernel_.setArg(idx++, *(mean->opencl_image())); kernel_.setArg(idx++, *(var->opencl_image())); kernel_.setArg(idx++, epsilon); } diff --git a/mace/kernels/opencl/bias_add_opencl.cc b/mace/kernels/opencl/bias_add_opencl.cc index c8507433..613b633b 100644 --- a/mace/kernels/opencl/bias_add_opencl.cc +++ b/mace/kernels/opencl/bias_add_opencl.cc @@ -12,11 +12,10 @@ namespace mace { namespace kernels { template -void BiasAddFunctor::operator()( - const Tensor *input, - const Tensor *bias, - Tensor *output, - StatsFuture *future) { +void BiasAddFunctor::operator()(const Tensor *input, + const Tensor *bias, + Tensor *output, + StatsFuture *future) { const index_t batch = input->dim(0); const index_t height = input->dim(1); const index_t width = input->dim(2); @@ -47,10 +46,8 @@ void BiasAddFunctor::operator()( cl::Event event; cl_int error = runtime->command_queue().enqueueNDRangeKernel( - kernel_, cl::NullRange, - cl::NDRange(gws[0], gws[1], gws[2]), - cl::NDRange(lws[0], lws[1], lws[2]), - nullptr, &event); + kernel_, cl::NullRange, cl::NDRange(gws[0], gws[1], gws[2]), + cl::NDRange(lws[0], lws[1], lws[2]), nullptr, &event); MACE_CHECK(error == CL_SUCCESS); if (future != nullptr) { future->wait_fn = [runtime, event](CallStats *stats) { @@ -62,9 +59,7 @@ void BiasAddFunctor::operator()( } } -template -struct BiasAddFunctor; -template -struct BiasAddFunctor; +template struct BiasAddFunctor; +template struct BiasAddFunctor; } // namespace kernels } // namespace mace diff --git a/mace/kernels/opencl/buffer_to_image.cc b/mace/kernels/opencl/buffer_to_image.cc index 19be430f..7b484464 100644 --- a/mace/kernels/opencl/buffer_to_image.cc +++ b/mace/kernels/opencl/buffer_to_image.cc @@ -9,36 +9,33 @@ namespace mace { namespace kernels { -template -void BufferToImageFunctor::operator()(Tensor *buffer, - const BufferType type, - Tensor *image, - StatsFuture *future) { +template +void BufferToImageFunctor::operator()( + Tensor *buffer, const BufferType type, Tensor *image, StatsFuture *future) { std::vector image_shape; if (!i2b_) { CalImage2DShape(buffer->shape(), type, image_shape); - if(type == WINOGRAD_FILTER) { - std::vector new_shape = - CalWinogradShape(buffer->shape(), type); + if (type == WINOGRAD_FILTER) { + std::vector new_shape = CalWinogradShape(buffer->shape(), type); image->ResizeImage(new_shape, image_shape); } else { image->ResizeImage(buffer->shape(), image_shape); } } else { - Image *image_buf = dynamic_cast(image->UnderlyingBuffer()); + Image *image_buf = dynamic_cast(image->UnderlyingBuffer()); image_shape = image_buf->image_shape(); buffer->Resize(image->shape()); } - size_t gws[2] = {image_shape[0], - image_shape[1]}; + size_t gws[2] = {image_shape[0], image_shape[1]}; std::string kernel_name; switch (type) { case CONV2D_FILTER: kernel_name = i2b_ ? "filter_image_to_buffer" : "filter_buffer_to_image"; break; case DW_CONV2D_FILTER: - kernel_name = i2b_ ? "dw_filter_image_to_buffer" : "dw_filter_buffer_to_image"; + kernel_name = + i2b_ ? "dw_filter_image_to_buffer" : "dw_filter_buffer_to_image"; break; case IN_OUT_CHANNEL: kernel_name = i2b_ ? "in_out_image_to_buffer" : "in_out_buffer_to_image"; @@ -48,7 +45,8 @@ void BufferToImageFunctor::operator()(Tensor *buffer, break; case IN_OUT_HEIGHT: case WEIGHT_HEIGHT: - kernel_name = i2b_ ? "in_out_height_image_to_buffer" : "in_out_height_buffer_to_image"; + kernel_name = i2b_ ? "in_out_height_image_to_buffer" + : "in_out_height_buffer_to_image"; break; case IN_OUT_WIDTH: MACE_CHECK(!i2b_) << "IN_OUT_WIDTH only support buffer to image now"; @@ -56,7 +54,8 @@ void BufferToImageFunctor::operator()(Tensor *buffer, break; case WINOGRAD_FILTER: gws[1] /= 16; - kernel_name = i2b_ ? "winograd_filter_image_to_buffer" : "winograd_filter_buffer_to_image"; + kernel_name = i2b_ ? "winograd_filter_image_to_buffer" + : "winograd_filter_buffer_to_image"; break; } std::string obfuscated_kernel_name = MACE_OBFUSCATE_SYMBOL(kernel_name); @@ -66,25 +65,30 @@ void BufferToImageFunctor::operator()(Tensor *buffer, built_options.emplace(kernel_name_ss.str()); if (buffer->dtype() == image->dtype()) { built_options.emplace("-DDATA_TYPE=" + DtToCLDt(DataTypeToEnum::value)); - built_options.emplace("-DCMD_DATA_TYPE=" + DtToCLCMDDt(DataTypeToEnum::value)); + built_options.emplace("-DCMD_DATA_TYPE=" + + DtToCLCMDDt(DataTypeToEnum::value)); } else { - built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(DataTypeToEnum::value)); - built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(DataTypeToEnum::value)); + built_options.emplace("-DDATA_TYPE=" + + DtToUpstreamCLDt(DataTypeToEnum::value)); + built_options.emplace("-DCMD_DATA_TYPE=" + + DtToUpstreamCLCMDDt(DataTypeToEnum::value)); } auto runtime = OpenCLRuntime::Global(); auto b2f_kernel = runtime->BuildKernel("buffer_to_image", - obfuscated_kernel_name, - built_options); + obfuscated_kernel_name, built_options); uint32_t idx = 0; b2f_kernel.setArg(idx++, *(buffer->opencl_buffer())); if (!i2b_) { - MACE_CHECK(buffer->buffer_offset() % GetEnumTypeSize(buffer->dtype()) == 0, "buffer offset not aligned"); - b2f_kernel.setArg(idx++, static_cast(buffer->buffer_offset() / GetEnumTypeSize(buffer->dtype()))); + MACE_CHECK(buffer->buffer_offset() % GetEnumTypeSize(buffer->dtype()) == 0, + "buffer offset not aligned"); + b2f_kernel.setArg(idx++, + static_cast(buffer->buffer_offset() / + GetEnumTypeSize(buffer->dtype()))); } if (type == ARGUMENT) { b2f_kernel.setArg(idx++, static_cast(buffer->dim(0))); - } else if(type == WEIGHT_HEIGHT) { + } else if (type == WEIGHT_HEIGHT) { b2f_kernel.setArg(idx++, static_cast(buffer->dim(0))); b2f_kernel.setArg(idx++, static_cast(buffer->dim(1))); b2f_kernel.setArg(idx++, 1); @@ -97,10 +101,8 @@ void BufferToImageFunctor::operator()(Tensor *buffer, const std::vector lws = {16, 64}; cl::Event event; cl_int error = runtime->command_queue().enqueueNDRangeKernel( - b2f_kernel, cl::NullRange, - cl::NDRange(gws[0], gws[1]), - cl::NDRange(lws[0], lws[1]), - nullptr, &event); + b2f_kernel, cl::NullRange, cl::NDRange(gws[0], gws[1]), + cl::NDRange(lws[0], lws[1]), nullptr, &event); MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; if (future != nullptr) { diff --git a/mace/kernels/opencl/cl/common.h b/mace/kernels/opencl/cl/common.h index 28b9addd..ac870bd3 100644 --- a/mace/kernels/opencl/cl/common.h +++ b/mace/kernels/opencl/cl/common.h @@ -18,8 +18,8 @@ #define READ_IMAGET CMD_TYPE(read_image, CMD_DATA_TYPE) #define WRITE_IMAGET CMD_TYPE(write_image, CMD_DATA_TYPE) -__constant sampler_t SAMPLER = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; - +__constant sampler_t SAMPLER = + CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; inline DATA_TYPE4 do_activation(DATA_TYPE4 in, #ifdef USE_PRELU diff --git a/mace/kernels/opencl/concat.cc b/mace/kernels/opencl/concat.cc index 48466e6a..9cd508bd 100644 --- a/mace/kernels/opencl/concat.cc +++ b/mace/kernels/opencl/concat.cc @@ -5,8 +5,8 @@ #include "mace/kernels/concat.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/kernels/opencl/helper.h" -#include "mace/utils/utils.h" #include "mace/utils/tuner.h" +#include "mace/utils/utils.h" namespace mace { namespace kernels { @@ -42,24 +42,23 @@ static void Concat2(cl::Kernel *kernel, *kernel = runtime->BuildKernel("concat", kernel_name, built_options); uint32_t idx = 0; - kernel->setArg(idx++, *(static_cast(input0->opencl_image()))); - kernel->setArg(idx++, *(static_cast(input1->opencl_image()))); + kernel->setArg(idx++, + *(static_cast(input0->opencl_image()))); + kernel->setArg(idx++, + *(static_cast(input1->opencl_image()))); kernel->setArg(idx++, static_cast(input0->dim(3))); - kernel->setArg(idx++, *(static_cast(output->opencl_image()))); + kernel->setArg(idx++, + *(static_cast(output->opencl_image()))); } const uint32_t gws[3] = { - static_cast(channel_blk), - static_cast(width), + static_cast(channel_blk), static_cast(width), static_cast(batch * height), }; const std::vector lws = {8, 16, 8, 1}; std::stringstream ss; - ss << "concat_opencl_kernel_" - << output->dim(0) << "_" - << output->dim(1) << "_" - << output->dim(2) << "_" - << output->dim(3); + ss << "concat_opencl_kernel_" << output->dim(0) << "_" << output->dim(1) + << "_" << output->dim(2) << "_" << output->dim(3); TuningOrRun3DKernel(*kernel, ss.str(), gws, lws, future); } @@ -97,27 +96,25 @@ static void ConcatN(cl::Kernel *kernel, index_t input_channel_blk = input->dim(3) / 4; chan_blk_offset += input_channel_blk; const uint32_t gws[3] = { - static_cast(input_channel_blk), - static_cast(width), + static_cast(input_channel_blk), static_cast(width), static_cast(batch * height), }; const std::vector lws = {8, 16, 8, 1}; std::stringstream ss; - ss << "concat_n_opencl_kernel_" - << input_channel_blk << "_" - << width << "_" + ss << "concat_n_opencl_kernel_" << input_channel_blk << "_" << width << "_" << batch * height; TuningOrRun3DKernel(*kernel, ss.str(), gws, lws, future); } } -template -void ConcatFunctor::operator()(const std::vector &input_list, - Tensor *output, - StatsFuture *future) { +template +void ConcatFunctor::operator()( + const std::vector &input_list, + Tensor *output, + StatsFuture *future) { const int inputs_count = input_list.size(); MACE_CHECK(inputs_count >= 2 && axis_ == 3) - << "Concat opencl kernel only support >=2 elements with axis == 3"; + << "Concat opencl kernel only support >=2 elements with axis == 3"; const Tensor *input0 = input_list[0]; bool divisible_four = input0->dim(axis_) % 4 == 0; @@ -137,8 +134,9 @@ void ConcatFunctor::operator()(const std::vectordim(axis_); } - MACE_CHECK(inputs_count == 2 || divisible_four, - "Dimensions of inputs should be divisible by 4 when inputs_count > 2."); + MACE_CHECK( + inputs_count == 2 || divisible_four, + "Dimensions of inputs should be divisible by 4 when inputs_count > 2."); std::vector image_shape; CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, image_shape); output->ResizeImage(output_shape, image_shape); @@ -151,17 +149,14 @@ void ConcatFunctor::operator()(const std::vector::value, output, future); - } - else { + } else { MACE_NOT_IMPLEMENTED; } } }; -template -struct ConcatFunctor; -template -struct ConcatFunctor; +template struct ConcatFunctor; +template struct ConcatFunctor; } // namespace kernels } // namespace mace diff --git a/mace/kernels/opencl/conv_2d_opencl.cc b/mace/kernels/opencl/conv_2d_opencl.cc index 5dc97944..3ed87e7c 100644 --- a/mace/kernels/opencl/conv_2d_opencl.cc +++ b/mace/kernels/opencl/conv_2d_opencl.cc @@ -47,21 +47,21 @@ extern void Conv2dOpencl(cl::Kernel *kernel, Tensor *output, StatsFuture *future); -template +template void Conv2dFunctor::operator()(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output, StatsFuture *future) { typedef void (*Conv2dOpenclFunction)( - cl::Kernel *kernel, - const Tensor *input, const Tensor *filter, const Tensor *bias, const int stride, - const int *padding, const int *dilations, const ActivationType activation, - const float relux_max_limit, const DataType dt, - Tensor *output, StatsFuture *future); + cl::Kernel * kernel, const Tensor *input, const Tensor *filter, + const Tensor *bias, const int stride, const int *padding, + const int *dilations, const ActivationType activation, + const float relux_max_limit, const DataType dt, Tensor *output, + StatsFuture *future); // Selection matrix: kernel_size x stride_size - static const Conv2dOpenclFunction selector[5] = - {Conv2dOpenclK1x1, nullptr, Conv2dOpenclK3x3, nullptr, nullptr}; + static const Conv2dOpenclFunction selector[5] = { + Conv2dOpenclK1x1, nullptr, Conv2dOpenclK3x3, nullptr, nullptr}; index_t kernel_h = filter->dim(0); index_t kernel_w = filter->dim(1); @@ -83,8 +83,9 @@ void Conv2dFunctor::operator()(const Tensor *input, padding_type_, output_shape.data(), paddings.data()); } else { paddings = paddings_; - CalcOutputSize(input->shape().data(), filter->shape().data(), paddings_.data(), - dilations_, strides_, RoundType::FLOOR, output_shape.data()); + CalcOutputSize(input->shape().data(), filter->shape().data(), + paddings_.data(), dilations_, strides_, RoundType::FLOOR, + output_shape.data()); } std::vector output_image_shape; @@ -94,18 +95,18 @@ void Conv2dFunctor::operator()(const Tensor *input, if (kernel_h == kernel_w && kernel_h <= 5 && selector[kernel_h - 1] != nullptr) { auto conv2d_func = selector[kernel_h - 1]; - conv2d_func(&kernel_, input, filter, bias, strides_[0], paddings.data(), dilations_, activation_, - relux_max_limit_, DataTypeToEnum::value, output, future); + conv2d_func(&kernel_, input, filter, bias, strides_[0], paddings.data(), + dilations_, activation_, relux_max_limit_, + DataTypeToEnum::value, output, future); } else { - Conv2dOpencl(&kernel_, input, filter, bias, strides_[0], paddings.data(), dilations_, - activation_, relux_max_limit_, DataTypeToEnum::value, output, future); + Conv2dOpencl(&kernel_, input, filter, bias, strides_[0], paddings.data(), + dilations_, activation_, relux_max_limit_, + DataTypeToEnum::value, output, future); } } -template -struct Conv2dFunctor; -template -struct Conv2dFunctor; +template struct Conv2dFunctor; +template struct Conv2dFunctor; } // namespace kernels } // namespace mace diff --git a/mace/kernels/opencl/conv_2d_opencl_1x1.cc b/mace/kernels/opencl/conv_2d_opencl_1x1.cc index b370b32b..41eaad56 100644 --- a/mace/kernels/opencl/conv_2d_opencl_1x1.cc +++ b/mace/kernels/opencl/conv_2d_opencl_1x1.cc @@ -66,20 +66,15 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel, } auto runtime = OpenCLRuntime::Global(); - *kernel = - runtime->BuildKernel("conv_2d_1x1", kernel_name, built_options); + *kernel = runtime->BuildKernel("conv_2d_1x1", kernel_name, built_options); uint32_t idx = 0; - kernel->setArg(idx++, - *(input->opencl_image())); - kernel->setArg(idx++, - *(filter->opencl_image())); + kernel->setArg(idx++, *(input->opencl_image())); + kernel->setArg(idx++, *(filter->opencl_image())); if (bias != nullptr) { - kernel->setArg(idx++, - *(bias->opencl_image())); + kernel->setArg(idx++, *(bias->opencl_image())); } - kernel->setArg(idx++, - *(output->opencl_image())); + kernel->setArg(idx++, *(output->opencl_image())); // FIXME handle flexable data type: half not supported kernel->setArg(idx++, relux_max_limit); kernel->setArg(idx++, static_cast(input_height)); @@ -100,6 +95,5 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel, TuningOrRun3DKernel(*kernel, tuning_key, gws, lws, future); } - } // namespace kernels } // namespace mace diff --git a/mace/kernels/opencl/conv_2d_opencl_3x3.cc b/mace/kernels/opencl/conv_2d_opencl_3x3.cc index a7eb668d..df2672c9 100644 --- a/mace/kernels/opencl/conv_2d_opencl_3x3.cc +++ b/mace/kernels/opencl/conv_2d_opencl_3x3.cc @@ -61,20 +61,15 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel, } auto runtime = OpenCLRuntime::Global(); - *kernel = - runtime->BuildKernel("conv_2d_3x3", kernel_name, built_options); + *kernel = runtime->BuildKernel("conv_2d_3x3", kernel_name, built_options); uint32_t idx = 0; - kernel->setArg(idx++, - *(input->opencl_image())); - kernel->setArg(idx++, - *(filter->opencl_image())); + kernel->setArg(idx++, *(input->opencl_image())); + kernel->setArg(idx++, *(filter->opencl_image())); if (bias != nullptr) { - kernel->setArg(idx++, - *(bias->opencl_image())); + kernel->setArg(idx++, *(bias->opencl_image())); } - kernel->setArg(idx++, - *(output->opencl_image())); + kernel->setArg(idx++, *(output->opencl_image())); kernel->setArg(idx++, relux_max_limit); kernel->setArg(idx++, static_cast(input->dim(1))); kernel->setArg(idx++, static_cast(input->dim(2))); diff --git a/mace/kernels/opencl/conv_2d_opencl_general.cc b/mace/kernels/opencl/conv_2d_opencl_general.cc index 5f3ffa5e..c317aa8c 100644 --- a/mace/kernels/opencl/conv_2d_opencl_general.cc +++ b/mace/kernels/opencl/conv_2d_opencl_general.cc @@ -61,20 +61,15 @@ extern void Conv2dOpencl(cl::Kernel *kernel, } auto runtime = OpenCLRuntime::Global(); - *kernel = - runtime->BuildKernel("conv_2d", kernel_name, built_options); + *kernel = runtime->BuildKernel("conv_2d", kernel_name, built_options); uint32_t idx = 0; - kernel->setArg(idx++, - *(input->opencl_image())); - kernel->setArg(idx++, - *(filter->opencl_image())); + kernel->setArg(idx++, *(input->opencl_image())); + kernel->setArg(idx++, *(filter->opencl_image())); if (bias != nullptr) { - kernel->setArg(idx++, - *(bias->opencl_image())); + kernel->setArg(idx++, *(bias->opencl_image())); } - kernel->setArg(idx++, - *(output->opencl_image())); + kernel->setArg(idx++, *(output->opencl_image())); kernel->setArg(idx++, relux_max_limit); kernel->setArg(idx++, static_cast(input->dim(1))); kernel->setArg(idx++, static_cast(input->dim(2))); diff --git a/mace/kernels/opencl/depthwise_conv_opencl.cc b/mace/kernels/opencl/depthwise_conv_opencl.cc index 3bbd4f43..1b99188b 100644 --- a/mace/kernels/opencl/depthwise_conv_opencl.cc +++ b/mace/kernels/opencl/depthwise_conv_opencl.cc @@ -34,7 +34,7 @@ void DepthwiseConv2d(cl::Kernel *kernel, const index_t channel_blocks = RoundUpDiv4(channels); const index_t input_channel_blocks = RoundUpDiv4(input_channels); const index_t width_blocks = RoundUpDiv4(width); - if(kernel->get() == nullptr) { + if (kernel->get() == nullptr) { const index_t input_batch = input->dim(0); const index_t input_height = input->dim(1); const index_t input_width = input->dim(2); @@ -78,18 +78,16 @@ void DepthwiseConv2d(cl::Kernel *kernel, LOG(FATAL) << "Unknown activation type: " << activation; } - *kernel = runtime->BuildKernel("depthwise_conv2d", kernel_name, built_options); + *kernel = + runtime->BuildKernel("depthwise_conv2d", kernel_name, built_options); uint32_t idx = 0; kernel->setArg(idx++, *(input->opencl_image())); - kernel->setArg( - idx++, *(filter->opencl_image())); + kernel->setArg(idx++, *(filter->opencl_image())); if (bias != nullptr) { - kernel->setArg( - idx++, *(bias->opencl_image())); + kernel->setArg(idx++, *(bias->opencl_image())); } - kernel->setArg( - idx++, *(output->opencl_image())); + kernel->setArg(idx++, *(output->opencl_image())); kernel->setArg(idx++, relux_max_limit); kernel->setArg(idx++, static_cast(input_height)); kernel->setArg(idx++, static_cast(input_width)); @@ -154,16 +152,17 @@ void DepthwiseConv2dFunctor::operator()( padding_type_, output_shape.data(), paddings.data()); } else { paddings = paddings_; - CalcOutputSize(input->shape().data(), fake_filter_shape.data(), paddings_.data(), - dilations_, strides_, RoundType::FLOOR, output_shape.data()); + CalcOutputSize(input->shape().data(), fake_filter_shape.data(), + paddings_.data(), dilations_, strides_, RoundType::FLOOR, + output_shape.data()); } std::vector output_image_shape; CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, output_image_shape); output->ResizeImage(output_shape, output_image_shape); - DepthwiseConv2d(&kernel_, input, filter, bias, strides_[0], paddings.data(), dilations_, - activation_, relux_max_limit_, + DepthwiseConv2d(&kernel_, input, filter, bias, strides_[0], paddings.data(), + dilations_, activation_, relux_max_limit_, DataTypeToEnum::value, output, future); } diff --git a/mace/kernels/opencl/eltwise_opencl.cc b/mace/kernels/opencl/eltwise_opencl.cc index 8c589c2f..82312c75 100644 --- a/mace/kernels/opencl/eltwise_opencl.cc +++ b/mace/kernels/opencl/eltwise_opencl.cc @@ -15,7 +15,6 @@ void EltwiseFunctor::operator()(const Tensor *input0, const Tensor *input1, Tensor *output, StatsFuture *future) { - const index_t batch = input0->dim(0); const index_t height = input0->dim(1); const index_t width = input0->dim(2); @@ -38,10 +37,8 @@ void EltwiseFunctor::operator()(const Tensor *input0, kernel_ = runtime->BuildKernel("eltwise", kernel_name, built_options); uint32_t idx = 0; - kernel_.setArg(idx++, - *(input0->opencl_image())); - kernel_.setArg(idx++, - *(input1->opencl_image())); + kernel_.setArg(idx++, *(input0->opencl_image())); + kernel_.setArg(idx++, *(input1->opencl_image())); if (!coeff_.empty()) { kernel_.setArg(idx++, coeff_[0]); kernel_.setArg(idx++, coeff_[1]); @@ -49,17 +46,12 @@ void EltwiseFunctor::operator()(const Tensor *input0, kernel_.setArg(idx++, *(output->opencl_image())); } - const uint32_t gws[2] = { - static_cast(width_pixels), - static_cast(batch_height_pixels) - }; + const uint32_t gws[2] = {static_cast(width_pixels), + static_cast(batch_height_pixels)}; const std::vector lws = {64, 16, 1}; std::stringstream ss; - ss << "eltwise_opencl_kernel_" - << output->dim(0) << "_" - << output->dim(1) << "_" - << output->dim(2) << "_" - << output->dim(3); + ss << "eltwise_opencl_kernel_" << output->dim(0) << "_" << output->dim(1) + << "_" << output->dim(2) << "_" << output->dim(3); TuningOrRun2DKernel(kernel_, ss.str(), gws, lws, future); } diff --git a/mace/kernels/opencl/fully_connected_opencl.cc b/mace/kernels/opencl/fully_connected_opencl.cc index 4a4eacc1..0e208cf4 100644 --- a/mace/kernels/opencl/fully_connected_opencl.cc +++ b/mace/kernels/opencl/fully_connected_opencl.cc @@ -10,14 +10,13 @@ namespace mace { namespace kernels { -template +template void FullyConnectedFunctor::operator()( const Tensor *input, const Tensor *weight, const Tensor *bias, Tensor *output, StatsFuture *future) { - std::vector output_shape = {input->dim(0), 1, 1, weight->dim(0)}; std::vector output_image_shape; CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, output_image_shape); @@ -57,19 +56,16 @@ void FullyConnectedFunctor::operator()( default: LOG(FATAL) << "Unknown activation type: " << activation_; } - kernel_ = runtime->BuildKernel("fully_connected", kernel_name, built_options); + kernel_ = + runtime->BuildKernel("fully_connected", kernel_name, built_options); uint32_t idx = 0; - kernel_.setArg(idx++, - *(input->opencl_image())); - kernel_.setArg(idx++, - *(weight->opencl_image())); + kernel_.setArg(idx++, *(input->opencl_image())); + kernel_.setArg(idx++, *(weight->opencl_image())); if (bias != nullptr) { - kernel_.setArg(idx++, - *(bias->opencl_image())); + kernel_.setArg(idx++, *(bias->opencl_image())); } - kernel_.setArg(idx++, - *(output->opencl_image())); + kernel_.setArg(idx++, *(output->opencl_image())); kernel_.setArg(idx++, static_cast(input->dim(1))); kernel_.setArg(idx++, static_cast(input->dim(2))); kernel_.setArg(idx++, static_cast(input->dim(3))); @@ -78,25 +74,18 @@ void FullyConnectedFunctor::operator()( } const uint32_t gws[2] = { - static_cast(batch), - static_cast(output_blocks), + static_cast(batch), static_cast(output_blocks), }; const std::vector lws = {16, 64, 1}; std::stringstream ss; - ss << "fc_opencl_kernel_" - << output->dim(0) << "_" - << output->dim(1) << "_" - << output->dim(2) << "_" - << output->dim(3); + ss << "fc_opencl_kernel_" << output->dim(0) << "_" << output->dim(1) << "_" + << output->dim(2) << "_" << output->dim(3); TuningOrRun2DKernel(kernel_, ss.str(), gws, lws, future); - }; -template -struct FullyConnectedFunctor; +template struct FullyConnectedFunctor; -template -struct FullyConnectedFunctor; +template struct FullyConnectedFunctor; } // namespace kernels } // namespace mace diff --git a/mace/kernels/opencl/helper.cc b/mace/kernels/opencl/helper.cc index ee141adb..791db167 100644 --- a/mace/kernels/opencl/helper.cc +++ b/mace/kernels/opencl/helper.cc @@ -3,8 +3,8 @@ // #include "mace/kernels/opencl/helper.h" -#include "mace/utils/utils.h" #include "mace/utils/tuner.h" +#include "mace/utils/utils.h" namespace mace { namespace kernels { @@ -28,8 +28,9 @@ void CalConv2dFilterImageShape(const std::vector &shape, /* HWOI */ } // [H * W * M, (Ic + 3) / 4] -void CalDepthwiseConv2dFilterImageShape(const std::vector &shape, /* HWIM */ - std::vector &image_shape) { +void CalDepthwiseConv2dFilterImageShape( + const std::vector &shape, /* HWIM */ + std::vector &image_shape) { MACE_CHECK(shape.size() == 4); image_shape.resize(2); image_shape[0] = shape[0] * shape[1] * shape[3]; @@ -47,8 +48,9 @@ void CalArgImageShape(const std::vector &shape, // Only support 3x3 now // [ (Ic + 3) / 4, 16 * Oc] -void CalWinogradFilterImageShape(const std::vector &shape, /* Oc, Ic, H, W*/ - std::vector &image_shape) { +void CalWinogradFilterImageShape( + const std::vector &shape, /* Oc, Ic, H, W*/ + std::vector &image_shape) { MACE_CHECK(shape.size() == 4); image_shape.resize(2); image_shape[0] = RoundUpDiv4(shape[1]); @@ -115,19 +117,16 @@ void CalImage2DShape(const std::vector &shape, /* NHWC */ } } - std::vector CalWinogradShape(const std::vector &shape, const BufferType type) { if (type == WINOGRAD_FILTER) { return {16, shape[0], shape[1], 1}; - }else if (type == IN_OUT_HEIGHT) { - index_t out_width = shape[0] * - ((shape[1] - 1) / 2) * - ((shape[2] - 1) / 2); + } else if (type == IN_OUT_HEIGHT) { + index_t out_width = shape[0] * ((shape[1] - 1) / 2) * ((shape[2] - 1) / 2); return {16, shape[3], out_width, 1}; } else { LOG(FATAL) << "Mace not supported yet."; - return std::vector(); + return std::vector(); } } @@ -188,10 +187,10 @@ void TuningOrRun3DKernel(cl::Kernel &kernel, std::vector local_ws(3, 0); local_ws[0] = std::min(gws[0], kwg_size); local_ws[1] = std::min(gws[1], kwg_size / local_ws[0]); - local_ws[2] = std::min(gws[2], - kwg_size / (local_ws[0] * local_ws[1])); + local_ws[2] = + std::min(gws[2], kwg_size / (local_ws[0] * local_ws[1])); return { - // TODO tuning these magic numbers + // TODO tuning these magic numbers {local_ws[0], local_ws[1], local_ws[2], 1}, {kwg_size / 16, 4, 4, 1}, {kwg_size / 32, 4, 8, 1}, @@ -217,20 +216,20 @@ void TuningOrRun3DKernel(cl::Kernel &kernel, }; }; cl::Event event; - auto func = [&](const std::vector ¶ms, - Timer *timer, + auto func = [&](const std::vector ¶ms, Timer *timer, std::vector *tuning_result) -> cl_int { - MACE_CHECK(params.size() == 4) << "Tuning parameters of 3D kernel must be 4D"; + MACE_CHECK(params.size() == 4) + << "Tuning parameters of 3D kernel must be 4D"; cl_int error = CL_SUCCESS; if (timer == nullptr) { uint32_t num_blocks = params[3]; const uint32_t block_size = gws[2] / num_blocks; if (gws[2] % num_blocks > 0) num_blocks++; for (uint32_t i = 0; i < num_blocks; ++i) { - uint32_t gws2 = (i == num_blocks - 1) ? (gws[2] - (i * block_size)) : block_size; + uint32_t gws2 = + (i == num_blocks - 1) ? (gws[2] - (i * block_size)) : block_size; error = runtime->command_queue().enqueueNDRangeKernel( - kernel, - cl::NDRange(0, 0, i * block_size), + kernel, cl::NDRange(0, 0, i * block_size), cl::NDRange(gws[0], gws[1], gws2), cl::NDRange(params[0], params[1], params[2]), nullptr, &event); MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; @@ -247,15 +246,16 @@ void TuningOrRun3DKernel(cl::Kernel &kernel, if (LimitKernelTime()) { double elapse_time = timer->AccumulatedMicros(); timer->ClearTiming(); - uint32_t num_blocks = std::min(static_cast(elapse_time / kMaxKernelExeTime) + 1, gws[2]); + uint32_t num_blocks = std::min( + static_cast(elapse_time / kMaxKernelExeTime) + 1, gws[2]); (*tuning_result)[3] = num_blocks; const uint32_t block_size = gws[2] / num_blocks; if (gws[2] % num_blocks > 0) num_blocks++; for (uint32_t i = 0; i < num_blocks; ++i) { - uint32_t gws2 = (i == num_blocks - 1) ? (gws[2] - (i * block_size)) : block_size; + uint32_t gws2 = + (i == num_blocks - 1) ? (gws[2] - (i * block_size)) : block_size; error = runtime->command_queue().enqueueNDRangeKernel( - kernel, - cl::NDRange(0, 0, i * block_size), + kernel, cl::NDRange(0, 0, i * block_size), cl::NDRange(gws[0], gws[1], gws2), cl::NDRange(params[0], params[1], params[2]), nullptr, &event); MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; @@ -300,34 +300,30 @@ void TuningOrRun2DKernel(cl::Kernel &kernel, {kwg_size / 256, 256, 1}, {kwg_size / 512, 512, 1}, {kwg_size, 1, 1}, - {1, kwg_size, 1} - }; + {1, kwg_size, 1}}; }; cl::Event event; - auto func = [&](const std::vector ¶ms, - Timer *timer, + auto func = [&](const std::vector ¶ms, Timer *timer, std::vector *tuning_result) -> cl_int { - MACE_CHECK(params.size() == 3) << "Tuning parameters of 2D kernel must be 3d"; + MACE_CHECK(params.size() == 3) + << "Tuning parameters of 2D kernel must be 3d"; cl_int error = CL_SUCCESS; if (timer == nullptr) { uint32_t num_blocks = params[2]; const uint32_t block_size = gws[1] / num_blocks; if (gws[1] % num_blocks > 0) num_blocks++; for (uint32_t i = 0; i < num_blocks; ++i) { - uint32_t gws1 = (i == num_blocks - 1) ? (gws[1] - (i * block_size)) : block_size; + uint32_t gws1 = + (i == num_blocks - 1) ? (gws[1] - (i * block_size)) : block_size; error = runtime->command_queue().enqueueNDRangeKernel( - kernel, - cl::NDRange(0, i * block_size), - cl::NDRange(gws[0], gws1), - cl::NDRange(params[0], params[1]), - nullptr, &event); + kernel, cl::NDRange(0, i * block_size), cl::NDRange(gws[0], gws1), + cl::NDRange(params[0], params[1]), nullptr, &event); MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; } } else { timer->ClearTiming(); error = runtime->command_queue().enqueueNDRangeKernel( - kernel, cl::NullRange, - cl::NDRange(gws[0], gws[1]), + kernel, cl::NullRange, cl::NDRange(gws[0], gws[1]), cl::NDRange(params[0], params[1]), nullptr, &event); MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; timer->AccumulateTiming(); @@ -336,16 +332,16 @@ void TuningOrRun2DKernel(cl::Kernel &kernel, if (LimitKernelTime()) { double elapse_time = timer->AccumulatedMicros(); timer->ClearTiming(); - uint32_t num_blocks = std::min(static_cast(elapse_time / kMaxKernelExeTime) + 1, gws[1]); + uint32_t num_blocks = std::min( + static_cast(elapse_time / kMaxKernelExeTime) + 1, gws[1]); (*tuning_result)[2] = num_blocks; const uint32_t block_size = gws[1] / num_blocks; if (gws[1] % num_blocks > 0) num_blocks++; for (uint32_t i = 0; i < num_blocks; ++i) { - uint32_t gws1 = (i == num_blocks - 1) ? (gws[1] - (i * block_size)) : block_size; + uint32_t gws1 = + (i == num_blocks - 1) ? (gws[1] - (i * block_size)) : block_size; error = runtime->command_queue().enqueueNDRangeKernel( - kernel, - cl::NDRange(0, i * block_size), - cl::NDRange(gws[0], gws1), + kernel, cl::NDRange(0, i * block_size), cl::NDRange(gws[0], gws1), cl::NDRange(params[0], params[1]), nullptr, &event); MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; timer->AccumulateTiming(); @@ -355,11 +351,8 @@ void TuningOrRun2DKernel(cl::Kernel &kernel, return error; }; OpenCLProfilingTimer timer(&event); - Tuner::Get()->template TuneOrRun(tuning_key, - lws, - params_generator, - func, - &timer); + Tuner::Get()->template TuneOrRun( + tuning_key, lws, params_generator, func, &timer); if (future != nullptr) { future->wait_fn = [runtime, event](CallStats *stats) { event.wait(); @@ -368,7 +361,6 @@ void TuningOrRun2DKernel(cl::Kernel &kernel, } }; } - } } // namespace kernels diff --git a/mace/kernels/opencl/helper.h b/mace/kernels/opencl/helper.h index 36e9827f..19cc6ff3 100644 --- a/mace/kernels/opencl/helper.h +++ b/mace/kernels/opencl/helper.h @@ -5,16 +5,16 @@ #ifndef MACE_KERNELS_OPENCL_HELPER_H_ #define MACE_KERNELS_OPENCL_HELPER_H_ +#include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/types.h" #include "mace/utils/utils.h" -#include "mace/core/future.h" namespace mace { namespace kernels { -const float kMaxKernelExeTime = 1000.0; // microseconds +const float kMaxKernelExeTime = 1000.0; // microseconds enum BufferType { CONV2D_FILTER = 0, @@ -31,7 +31,7 @@ void CalImage2DShape(const std::vector &shape, /* NHWC */ const BufferType type, std::vector &image_shape); -std::vector CalWinogradShape(const std::vector &shape, +std::vector CalWinogradShape(const std::vector &shape, const BufferType type); std::string DtToCLCMDDt(const DataType dt); @@ -48,7 +48,6 @@ void TuningOrRun3DKernel(cl::Kernel &kernel, const std::vector &lws, StatsFuture *future); - void TuningOrRun2DKernel(cl::Kernel &kernel, const std::string tuning_key, const uint32_t *gws, @@ -72,12 +71,12 @@ inline bool LimitKernelTime() { } namespace { -template +template void AppendToStream(std::stringstream *ss, const std::string &delimiter, T v) { (*ss) << v; } -template +template void AppendToStream(std::stringstream *ss, const std::string &delimiter, T first, @@ -87,7 +86,7 @@ void AppendToStream(std::stringstream *ss, } } // namespace -template +template std::string Concat(Args... args) { std::stringstream ss; AppendToStream(&ss, "_", args...); diff --git a/mace/kernels/opencl/matmul.cc b/mace/kernels/opencl/matmul.cc index 77560853..d453c293 100644 --- a/mace/kernels/opencl/matmul.cc +++ b/mace/kernels/opencl/matmul.cc @@ -11,12 +11,10 @@ namespace mace { namespace kernels { template -void MatMulFunctor::operator()( - const Tensor *A, - const Tensor *B, - Tensor *C, - StatsFuture *future) { - +void MatMulFunctor::operator()(const Tensor *A, + const Tensor *B, + Tensor *C, + StatsFuture *future) { std::vector c_shape = {A->dim(0), A->dim(1), B->dim(2), 1}; std::vector c_image_shape; CalImage2DShape(c_shape, BufferType::IN_OUT_HEIGHT, c_image_shape); @@ -41,8 +39,7 @@ void MatMulFunctor::operator()( uint32_t idx = 0; kernel_.setArg(idx++, *(A->opencl_image())); - kernel_.setArg(idx++, - *(B->opencl_image())); + kernel_.setArg(idx++, *(B->opencl_image())); kernel_.setArg(idx++, *(C->opencl_image())); kernel_.setArg(idx++, static_cast(height)); kernel_.setArg(idx++, static_cast(width)); @@ -57,20 +54,14 @@ void MatMulFunctor::operator()( }; const std::vector lws = {16, 64, 1}; std::stringstream ss; - ss << "matmul_opencl_kernel_" - << C->dim(0) << "_" - << C->dim(1) << "_" - << C->dim(2) << "_" - << C->dim(3); + ss << "matmul_opencl_kernel_" << C->dim(0) << "_" << C->dim(1) << "_" + << C->dim(2) << "_" << C->dim(3); TuningOrRun2DKernel(kernel_, ss.str(), gws, lws, future); - }; -template -struct MatMulFunctor; +template struct MatMulFunctor; -template -struct MatMulFunctor; +template struct MatMulFunctor; } // namespace kernels } // namespace mace diff --git a/mace/kernels/opencl/pooling_opencl.cc b/mace/kernels/opencl/pooling_opencl.cc index 1272a4fb..d9256776 100644 --- a/mace/kernels/opencl/pooling_opencl.cc +++ b/mace/kernels/opencl/pooling_opencl.cc @@ -11,17 +11,15 @@ namespace mace { namespace kernels { -template +template void PoolingFunctor::operator()(const Tensor *input, Tensor *output, StatsFuture *future) { MACE_CHECK(dilations_[0] == 1 && dilations_[1] == 1) - << "Pooling opencl kernel not support dilation yet"; + << "Pooling opencl kernel not support dilation yet"; std::vector output_shape(4); - std::vector filter_shape = { - kernels_[0], kernels_[1], - input->dim(3), input->dim(3) - }; + std::vector filter_shape = {kernels_[0], kernels_[1], input->dim(3), + input->dim(3)}; std::vector paddings(2); if (paddings_.empty()) { @@ -77,24 +75,17 @@ void PoolingFunctor::operator()(const Tensor *input, } const uint32_t gws[3] = { - static_cast(channel_blocks), - static_cast(out_width), + static_cast(channel_blocks), static_cast(out_width), static_cast(batch * out_height), }; std::vector lws = {8, 16, 8, 1}; std::stringstream ss; - ss << "pooling_opencl_kernel_" - << output->dim(0) << "_" - << output->dim(1) << "_" - << output->dim(2) << "_" - << output->dim(3); + ss << "pooling_opencl_kernel_" << output->dim(0) << "_" << output->dim(1) + << "_" << output->dim(2) << "_" << output->dim(3); TuningOrRun3DKernel(kernel_, ss.str(), gws, lws, future); - } -template -struct PoolingFunctor; -template -struct PoolingFunctor; +template struct PoolingFunctor; +template struct PoolingFunctor; } // namespace kernels } // namespace mace diff --git a/mace/kernels/opencl/resize_bilinear_opencl.cc b/mace/kernels/opencl/resize_bilinear_opencl.cc index 5761d3cb..470a335d 100644 --- a/mace/kernels/opencl/resize_bilinear_opencl.cc +++ b/mace/kernels/opencl/resize_bilinear_opencl.cc @@ -2,12 +2,12 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // +#include "mace/kernels/resize_bilinear.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/tensor.h" -#include "mace/kernels/resize_bilinear.h" #include "mace/kernels/opencl/helper.h" -#include "mace/utils/utils.h" #include "mace/utils/tuner.h" +#include "mace/utils/utils.h" namespace mace { namespace kernels { @@ -29,14 +29,14 @@ void ResizeBilinearFunctor::operator()( std::vector output_shape{batch, out_height, out_width, channels}; std::vector output_image_shape; - CalImage2DShape(output_shape, - BufferType::IN_OUT_CHANNEL, + CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, output_image_shape); output->ResizeImage(output_shape, output_image_shape); float height_scale = CalculateResizeScale(in_height, out_height, align_corners_); - float width_scale = CalculateResizeScale(in_width, out_width, align_corners_); + float width_scale = + CalculateResizeScale(in_width, out_width, align_corners_); auto runtime = OpenCLRuntime::Global(); std::set built_options; @@ -45,7 +45,8 @@ void ResizeBilinearFunctor::operator()( auto dt = DataTypeToEnum::value; built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt)); built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt)); - kernel_ = runtime->BuildKernel("resize_bilinear", kernel_name, built_options); + kernel_ = + runtime->BuildKernel("resize_bilinear", kernel_name, built_options); uint32_t idx = 0; kernel_.setArg(idx++, *(input->opencl_image())); @@ -62,11 +63,8 @@ void ResizeBilinearFunctor::operator()( static_cast(out_height * batch)}; const std::vector lws = {8, 16, 8, 1}; std::stringstream ss; - ss << "resize_bilinear_opencl_kernel_" - << output->dim(0) << "_" - << output->dim(1) << "_" - << output->dim(2) << "_" - << output->dim(3); + ss << "resize_bilinear_opencl_kernel_" << output->dim(0) << "_" + << output->dim(1) << "_" << output->dim(2) << "_" << output->dim(3); TuningOrRun3DKernel(kernel_, ss.str(), gws, lws, future); } diff --git a/mace/kernels/opencl/softmax_opencl.cc b/mace/kernels/opencl/softmax_opencl.cc index a3336aa6..25e1c9e4 100644 --- a/mace/kernels/opencl/softmax_opencl.cc +++ b/mace/kernels/opencl/softmax_opencl.cc @@ -6,13 +6,13 @@ #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/kernels/opencl/helper.h" -#include "mace/utils/utils.h" #include "mace/utils/tuner.h" +#include "mace/utils/utils.h" namespace mace { namespace kernels { -template +template void SoftmaxFunctor::operator()(const Tensor *logits, Tensor *output, StatsFuture *future) { @@ -45,17 +45,12 @@ void SoftmaxFunctor::operator()(const Tensor *logits, static_cast(height * batch)}; const std::vector lws = {8, 16, 8, 1}; std::stringstream ss; - ss << "softmax_opencl_kernel_" - << output->dim(0) << "_" - << output->dim(1) << "_" - << output->dim(2) << "_" - << output->dim(3); + ss << "softmax_opencl_kernel_" << output->dim(0) << "_" << output->dim(1) + << "_" << output->dim(2) << "_" << output->dim(3); TuningOrRun3DKernel(kernel_, ss.str(), gws, lws, future); } -template -struct SoftmaxFunctor; -template -struct SoftmaxFunctor; +template struct SoftmaxFunctor; +template struct SoftmaxFunctor; } // namespace kernels } // namespace mace diff --git a/mace/kernels/opencl/space_to_batch_opencl.cc b/mace/kernels/opencl/space_to_batch_opencl.cc index 2eb06027..0cecb0a7 100644 --- a/mace/kernels/opencl/space_to_batch_opencl.cc +++ b/mace/kernels/opencl/space_to_batch_opencl.cc @@ -5,20 +5,21 @@ #ifndef MACE_KERNELS_OPENCL_SPACE_TO_BATCH_H_ #define MACE_KERNELS_OPENCL_SPACE_TO_BATCH_H_ -#include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/kernels/space_to_batch.h" +#include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/kernels/opencl/helper.h" -#include "mace/utils/utils.h" #include "mace/utils/tuner.h" +#include "mace/utils/utils.h" namespace mace { namespace kernels { template -void SpaceToBatchFunctor::operator()(Tensor *space_tensor, - const std::vector &output_shape, - Tensor *batch_tensor, - StatsFuture *future) { +void SpaceToBatchFunctor::operator()( + Tensor *space_tensor, + const std::vector &output_shape, + Tensor *batch_tensor, + StatsFuture *future) { const char *kernel_name = nullptr; std::vector output_image_shape; CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, output_image_shape); @@ -37,8 +38,10 @@ void SpaceToBatchFunctor::operator()(Tensor *space_tensor kernel_name_ss << "-D" << kernel_name << "=" << obfuscated_kernel_name; built_options.emplace(kernel_name_ss.str()); built_options.emplace("-DDATA_TYPE=" + DtToCLDt(DataTypeToEnum::value)); - built_options.emplace("-DCMD_DATA_TYPE=" + DtToCLCMDDt(DataTypeToEnum::value)); - kernel_ = runtime->BuildKernel("space_to_batch", kernel_name, built_options); + built_options.emplace("-DCMD_DATA_TYPE=" + + DtToCLCMDDt(DataTypeToEnum::value)); + kernel_ = + runtime->BuildKernel("space_to_batch", kernel_name, built_options); uint32_t idx = 0; if (b2s_) { @@ -59,15 +62,13 @@ void SpaceToBatchFunctor::operator()(Tensor *space_tensor } const uint32_t chan_blk = RoundUpDiv4(batch_tensor->dim(3)); - const uint32_t gws[3] = {chan_blk, - static_cast(batch_tensor->dim(2)), - static_cast(batch_tensor->dim(0) * batch_tensor->dim(1))}; + const uint32_t gws[3] = { + chan_blk, static_cast(batch_tensor->dim(2)), + static_cast(batch_tensor->dim(0) * batch_tensor->dim(1))}; const std::vector lws = {8, 16, 8, 1}; std::stringstream ss; - ss << kernel_name << "_" - << batch_tensor->dim(0) << "_" - << batch_tensor->dim(1) << "_" - << batch_tensor->dim(2) << "_" + ss << kernel_name << "_" << batch_tensor->dim(0) << "_" + << batch_tensor->dim(1) << "_" << batch_tensor->dim(2) << "_" << batch_tensor->dim(3); TuningOrRun3DKernel(kernel_, ss.str(), gws, lws, future); } diff --git a/mace/kernels/opencl/winograd_transform.cc b/mace/kernels/opencl/winograd_transform.cc index 8fd17f21..ee7d5d12 100644 --- a/mace/kernels/opencl/winograd_transform.cc +++ b/mace/kernels/opencl/winograd_transform.cc @@ -11,21 +11,21 @@ namespace mace { namespace kernels { -template -void WinogradTransformFunctor::operator()(const Tensor *input_tensor, - Tensor *output_tensor, - StatsFuture *future) { +template +void WinogradTransformFunctor::operator()( + const Tensor *input_tensor, Tensor *output_tensor, StatsFuture *future) { std::vector output_shape(4); std::vector filter_shape = {3, 3, input_tensor->dim(3), 1}; std::vector paddings(2); if (paddings_.empty()) { kernels::CalcNHWCPaddingAndOutputSize( - input_tensor->shape().data(), filter_shape.data(), dilations_.data(), strides_.data(), - padding_type_, output_shape.data(), paddings.data()); + input_tensor->shape().data(), filter_shape.data(), dilations_.data(), + strides_.data(), padding_type_, output_shape.data(), paddings.data()); } else { paddings = paddings_; - CalcOutputSize(input_tensor->shape().data(), filter_shape.data(), paddings_.data(), - dilations_.data(), strides_.data(), RoundType::FLOOR, output_shape.data()); + CalcOutputSize(input_tensor->shape().data(), filter_shape.data(), + paddings_.data(), dilations_.data(), strides_.data(), + RoundType::FLOOR, output_shape.data()); } const index_t round_h = (output_shape[1] + 1) / 2; @@ -38,14 +38,16 @@ void WinogradTransformFunctor::operator()(const Tensor *i CalImage2DShape(output_shape, BufferType::IN_OUT_HEIGHT, image_shape); output_tensor->ResizeImage(output_shape, image_shape); - std::string obfuscated_kernel_name = MACE_OBFUSCATE_SYMBOL("winograd_transform_2x2"); + std::string obfuscated_kernel_name = + MACE_OBFUSCATE_SYMBOL("winograd_transform_2x2"); std::set built_options; built_options.emplace("-Dwinograd_transform_2x2=" + obfuscated_kernel_name); - built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(DataTypeToEnum::value)); - built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(DataTypeToEnum::value)); + built_options.emplace("-DDATA_TYPE=" + + DtToUpstreamCLDt(DataTypeToEnum::value)); + built_options.emplace("-DCMD_DATA_TYPE=" + + DtToUpstreamCLCMDDt(DataTypeToEnum::value)); auto runtime = OpenCLRuntime::Global(); - kernel_ = runtime->BuildKernel("winograd_transform", - obfuscated_kernel_name, + kernel_ = runtime->BuildKernel("winograd_transform", obfuscated_kernel_name, built_options); uint32_t idx = 0; @@ -60,34 +62,39 @@ void WinogradTransformFunctor::operator()(const Tensor *i kernel_.setArg(idx++, static_cast(paddings[1] / 2)); } - const uint32_t gws[2] = {static_cast(out_width), - static_cast(RoundUpDiv4(input_tensor->dim(3)))}; + const uint32_t gws[2] = { + static_cast(out_width), + static_cast(RoundUpDiv4(input_tensor->dim(3)))}; const std::vector lws = {128, 8, 1}; std::stringstream ss; - ss << "winograd_transform_kernel_" - << input_tensor->dim(0) << "_" - << input_tensor->dim(1) << "_" - << input_tensor->dim(2) << "_" + ss << "winograd_transform_kernel_" << input_tensor->dim(0) << "_" + << input_tensor->dim(1) << "_" << input_tensor->dim(2) << "_" << input_tensor->dim(3); TuningOrRun2DKernel(kernel_, ss.str(), gws, lws, future); } -template -void WinogradInverseTransformFunctor::operator()(const Tensor *input_tensor, - const Tensor *bias, - Tensor *output_tensor, - StatsFuture *future) { - std::vector output_shape = {batch_, height_, width_, input_tensor->dim(1)}; +template +void WinogradInverseTransformFunctor::operator()( + const Tensor *input_tensor, + const Tensor *bias, + Tensor *output_tensor, + StatsFuture *future) { + std::vector output_shape = {batch_, height_, width_, + input_tensor->dim(1)}; std::vector image_shape; CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, image_shape); output_tensor->ResizeImage(output_shape, image_shape); if (kernel_.get() == nullptr) { - std::string obfuscated_kernel_name = MACE_OBFUSCATE_SYMBOL("winograd_inverse_transform_2x2"); + std::string obfuscated_kernel_name = + MACE_OBFUSCATE_SYMBOL("winograd_inverse_transform_2x2"); std::set built_options; - built_options.emplace("-Dwinograd_inverse_transform_2x2=" + obfuscated_kernel_name); - built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(DataTypeToEnum::value)); - built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(DataTypeToEnum::value)); + built_options.emplace("-Dwinograd_inverse_transform_2x2=" + + obfuscated_kernel_name); + built_options.emplace("-DDATA_TYPE=" + + DtToUpstreamCLDt(DataTypeToEnum::value)); + built_options.emplace("-DCMD_DATA_TYPE=" + + DtToUpstreamCLCMDDt(DataTypeToEnum::value)); built_options.emplace(bias != nullptr ? "-DBIAS" : ""); switch (activation_) { case NOOP: @@ -112,18 +119,21 @@ void WinogradInverseTransformFunctor::operator()(const Te } auto runtime = OpenCLRuntime::Global(); - kernel_ = runtime->BuildKernel("winograd_transform", - obfuscated_kernel_name, + kernel_ = runtime->BuildKernel("winograd_transform", obfuscated_kernel_name, built_options); const uint32_t round_h = (height_ + 1) / 2; const uint32_t round_w = (width_ + 1) / 2; uint32_t idx = 0; - kernel_.setArg(idx++, *(static_cast(input_tensor->opencl_image()))); + kernel_.setArg( + idx++, + *(static_cast(input_tensor->opencl_image()))); if (bias != nullptr) { - kernel_.setArg(idx++, *(static_cast(bias->opencl_image()))); + kernel_.setArg(idx++, + *(static_cast(bias->opencl_image()))); } - kernel_.setArg(idx++, *(static_cast(output_tensor->opencl_image()))); + kernel_.setArg( + idx++, *(static_cast(output_tensor->opencl_image()))); kernel_.setArg(idx++, static_cast(output_shape[1])); kernel_.setArg(idx++, static_cast(output_shape[2])); kernel_.setArg(idx++, static_cast(round_h * round_w)); @@ -131,28 +141,23 @@ void WinogradInverseTransformFunctor::operator()(const Te kernel_.setArg(idx++, relux_max_limit_); } - const uint32_t gws[2] = {static_cast(input_tensor->dim(2)), - static_cast(RoundUpDiv4(input_tensor->dim(1)))}; + const uint32_t gws[2] = { + static_cast(input_tensor->dim(2)), + static_cast(RoundUpDiv4(input_tensor->dim(1)))}; const std::vector lws = {128, 8, 1}; std::stringstream ss; - ss << "winograd_inverse_transform_kernel_" - << input_tensor->dim(0) << "_" - << input_tensor->dim(1) << "_" - << input_tensor->dim(2) << "_" + ss << "winograd_inverse_transform_kernel_" << input_tensor->dim(0) << "_" + << input_tensor->dim(1) << "_" << input_tensor->dim(2) << "_" << input_tensor->dim(3); TuningOrRun2DKernel(kernel_, ss.str(), gws, lws, future); } -template -struct WinogradTransformFunctor; -template -struct WinogradTransformFunctor; +template struct WinogradTransformFunctor; +template struct WinogradTransformFunctor; -template -struct WinogradInverseTransformFunctor; -template -struct WinogradInverseTransformFunctor; +template struct WinogradInverseTransformFunctor; +template struct WinogradInverseTransformFunctor; } // namespace kernels } // namespace mace diff --git a/mace/kernels/pooling.h b/mace/kernels/pooling.h index dbbfaefc..a2d3bcdb 100644 --- a/mace/kernels/pooling.h +++ b/mace/kernels/pooling.h @@ -7,9 +7,9 @@ #include #include "mace/core/future.h" +#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" #include "mace/kernels/conv_pool_2d_util.h" -#include "mace/core/runtime/opencl/cl2_header.h" namespace mace { @@ -42,7 +42,7 @@ struct PoolingFunctorBase { const int *dilations_; }; -template +template struct PoolingFunctor : PoolingFunctorBase { PoolingFunctor(const PoolingType pooling_type, const int *kernels, @@ -50,29 +50,27 @@ struct PoolingFunctor : PoolingFunctorBase { const Padding padding_type, const std::vector &paddings, const int *dilations) - : PoolingFunctorBase(pooling_type, kernels, - strides, padding_type, - paddings, dilations) {} + : PoolingFunctorBase( + pooling_type, kernels, strides, padding_type, paddings, dilations) { + } void operator()(const Tensor *input_tensor, Tensor *output_tensor, StatsFuture *future) { - std::vector output_shape(4); std::vector filter_shape = { - kernels_[0], kernels_[1], - input_tensor->dim(3), input_tensor->dim(3) - }; + kernels_[0], kernels_[1], input_tensor->dim(3), input_tensor->dim(3)}; std::vector paddings(2); if (paddings_.empty()) { kernels::CalcNHWCPaddingAndOutputSize( - input_tensor->shape().data(), filter_shape.data(), dilations_, strides_, - padding_type_, output_shape.data(), paddings.data()); + input_tensor->shape().data(), filter_shape.data(), dilations_, + strides_, padding_type_, output_shape.data(), paddings.data()); } else { paddings = paddings_; - CalcOutputSize(input_tensor->shape().data(), filter_shape.data(), paddings_.data(), - dilations_, strides_, RoundType::CEIL, output_shape.data()); + CalcOutputSize(input_tensor->shape().data(), filter_shape.data(), + paddings_.data(), dilations_, strides_, RoundType::CEIL, + output_shape.data()); } output_tensor->Resize(output_shape); @@ -110,7 +108,8 @@ struct PoolingFunctor : PoolingFunctorBase { for (int h = 0; h < height; ++h) { for (int w = 0; w < width; ++w) { for (int c = 0; c < channels; ++c) { - index_t out_offset = (((b * height) + h) * width + w) * channels + c; + index_t out_offset = + (((b * height) + h) * width + w) * channels + c; index_t in_offset = b * in_image_size * input_channels + c; T res = std::numeric_limits::lowest(); for (int kh = 0; kh < kernel_h; ++kh) { @@ -119,7 +118,8 @@ struct PoolingFunctor : PoolingFunctorBase { int inw = padded_w_start + w * stride_w + dilation_w * kw; if (inh >= 0 && inh < input_height && inw >= 0 && inw < input_width) { - index_t input_offset = in_offset + (inh * input_width + inw) * input_channels; + index_t input_offset = + in_offset + (inh * input_width + inw) * input_channels; res = std::max(res, input[input_offset]); } } @@ -135,7 +135,8 @@ struct PoolingFunctor : PoolingFunctorBase { for (int h = 0; h < height; ++h) { for (int w = 0; w < width; ++w) { for (int c = 0; c < channels; ++c) { - index_t out_offset = (((b * height) + h) * width + w) * channels + c; + index_t out_offset = + (((b * height) + h) * width + w) * channels + c; index_t in_offset = b * in_image_size * input_channels + c; T sum = 0; int block_size = 0; @@ -145,7 +146,8 @@ struct PoolingFunctor : PoolingFunctorBase { int inw = padded_w_start + w * stride_w + dilation_w * kw; if (inh >= 0 && inh < input_height && inw >= 0 && inw < input_width) { - index_t input_offset = in_offset + (inh * input_width + inw) * input_channels; + index_t input_offset = + in_offset + (inh * input_width + inw) * input_channels; sum += input[input_offset]; block_size += 1; } @@ -158,16 +160,13 @@ struct PoolingFunctor : PoolingFunctorBase { } } } - }; -template<> +template <> void PoolingFunctor::operator()( - const Tensor *input_tensor, - Tensor *output_tensor, - StatsFuture *future); + const Tensor *input_tensor, Tensor *output_tensor, StatsFuture *future); -template +template struct PoolingFunctor : PoolingFunctorBase { PoolingFunctor(const PoolingType pooling_type, const int *kernels, @@ -175,9 +174,9 @@ struct PoolingFunctor : PoolingFunctorBase { const Padding padding_type, const std::vector &paddings, const int *dilations) - : PoolingFunctorBase(pooling_type, kernels, - strides, padding_type, - paddings, dilations) {} + : PoolingFunctorBase( + pooling_type, kernels, strides, padding_type, paddings, dilations) { + } void operator()(const Tensor *input_tensor, Tensor *output_tensor, StatsFuture *future); diff --git a/mace/kernels/reshape.h b/mace/kernels/reshape.h index 4d37a199..544ba360 100644 --- a/mace/kernels/reshape.h +++ b/mace/kernels/reshape.h @@ -5,8 +5,8 @@ #define MACE_KERNELS_RESHAPE_H_ #include "mace/core/future.h" -#include "mace/core/tensor.h" #include "mace/core/runtime/opencl/cl2_header.h" +#include "mace/core/tensor.h" namespace mace { namespace kernels { @@ -25,7 +25,6 @@ struct ReshapeFunctor { } }; - } // namespace kernels } // namespace mace diff --git a/mace/kernels/resize_bilinear.h b/mace/kernels/resize_bilinear.h index 1762cb3b..bdd94192 100644 --- a/mace/kernels/resize_bilinear.h +++ b/mace/kernels/resize_bilinear.h @@ -5,8 +5,8 @@ #define MACE_KERNELS_RESIZE_BILINEAR_H_ #include "mace/core/future.h" -#include "mace/core/tensor.h" #include "mace/core/runtime/opencl/cl2_header.h" +#include "mace/core/tensor.h" namespace mace { namespace kernels { @@ -74,9 +74,9 @@ void ResizeImage(const T *images, const T *batch_input_ptr = images + in_batch_num_values * b; T *batch_output_ptr = output + out_batch_num_values * b; const T *y_lower_input_ptr = - batch_input_ptr + ys[y].lower * in_width * channels; + batch_input_ptr + ys[y].lower * in_width * channels; const T *y_upper_input_ptr = - batch_input_ptr + ys[y].upper * in_width * channels; + batch_input_ptr + ys[y].upper * in_width * channels; T *y_output_ptr = batch_output_ptr + y * out_width * channels; const float ys_lerp = ys[y].lerp; @@ -95,7 +95,7 @@ void ResizeImage(const T *images, const T bottom_right = bottom_right_ptr[c]; output_ptr[c] = ComputeLerp(top_left, top_right, bottom_left, - bottom_right, xs_lerp, ys_lerp); + bottom_right, xs_lerp, ys_lerp); } } } @@ -107,10 +107,10 @@ struct ResizeBilinearFunctorBase { ResizeBilinearFunctorBase(const std::vector &size, bool align_corners) : align_corners_(align_corners) { - MACE_CHECK(size.size() == 2); - out_height_ = size[0]; - out_width_ = size[1]; - } + MACE_CHECK(size.size() == 2); + out_height_ = size[0]; + out_width_ = size[1]; + } protected: bool align_corners_; @@ -163,8 +163,9 @@ struct ResizeBilinearFunctor : ResizeBilinearFunctorBase { } }; -template -struct ResizeBilinearFunctor : ResizeBilinearFunctorBase { +template +struct ResizeBilinearFunctor + : ResizeBilinearFunctorBase { ResizeBilinearFunctor(const std::vector &size, bool align_corners) : ResizeBilinearFunctorBase(size, align_corners) {} diff --git a/mace/kernels/space_to_batch.h b/mace/kernels/space_to_batch.h index 4b4c15f2..402bf97c 100644 --- a/mace/kernels/space_to_batch.h +++ b/mace/kernels/space_to_batch.h @@ -6,9 +6,9 @@ #define MACE_KERNELS_CONV_2D_H_ #include "mace/core/future.h" +#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" #include "mace/public/mace.h" -#include "mace/core/runtime/opencl/cl2_header.h" namespace mace { namespace kernels { @@ -16,11 +16,10 @@ namespace kernels { struct SpaceToBatchFunctorBase { SpaceToBatchFunctorBase(const std::vector &paddings, const std::vector &block_shape, - bool b2s): - paddings_(paddings.begin(), paddings.end()), - block_shape_(block_shape.begin(), block_shape.end()), - b2s_(b2s) - {} + bool b2s) + : paddings_(paddings.begin(), paddings.end()), + block_shape_(block_shape.begin(), block_shape.end()), + b2s_(b2s) {} std::vector paddings_; std::vector block_shape_; @@ -28,10 +27,11 @@ struct SpaceToBatchFunctorBase { }; template -struct SpaceToBatchFunctor : SpaceToBatchFunctorBase{ +struct SpaceToBatchFunctor : SpaceToBatchFunctorBase { SpaceToBatchFunctor(const std::vector &paddings, const std::vector &block_shape, - bool b2s): SpaceToBatchFunctorBase(paddings, block_shape, b2s){} + bool b2s) + : SpaceToBatchFunctorBase(paddings, block_shape, b2s) {} void operator()(Tensor *space_tensor, const std::vector &output_shape, @@ -42,10 +42,11 @@ struct SpaceToBatchFunctor : SpaceToBatchFunctorBase{ }; template -struct SpaceToBatchFunctor: SpaceToBatchFunctorBase{ +struct SpaceToBatchFunctor : SpaceToBatchFunctorBase { SpaceToBatchFunctor(const std::vector &paddings, const std::vector &block_shape, - bool b2s): SpaceToBatchFunctorBase(paddings, block_shape, b2s){} + bool b2s) + : SpaceToBatchFunctorBase(paddings, block_shape, b2s) {} void operator()(Tensor *space_tensor, const std::vector &output_shape, @@ -53,7 +54,6 @@ struct SpaceToBatchFunctor: SpaceToBatchFunctorBase{ StatsFuture *future); cl::Kernel kernel_; - }; } // namespace kernels diff --git a/mace/kernels/winograd_transform.h b/mace/kernels/winograd_transform.h index fdab5c7c..464a59ce 100644 --- a/mace/kernels/winograd_transform.h +++ b/mace/kernels/winograd_transform.h @@ -6,10 +6,10 @@ #define MACE_KERNELS_WINOGRAD_TRANSFORM_H_ #include "mace/core/future.h" +#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" -#include "mace/kernels/conv_pool_2d_util.h" #include "mace/kernels/activation.h" -#include "mace/core/runtime/opencl/cl2_header.h" +#include "mace/kernels/conv_pool_2d_util.h" namespace mace { namespace kernels { @@ -17,38 +17,36 @@ namespace kernels { struct WinogradTransformFunctorBase { WinogradTransformFunctorBase(const Padding &padding_type, const std::vector &paddings) - : strides_({1, 1}), dilations_({1, 1}), - padding_type_(padding_type), paddings_(paddings) {} + : strides_({1, 1}), + dilations_({1, 1}), + padding_type_(padding_type), + paddings_(paddings) {} - const std::vector strides_; // [stride_h, stride_w] - const std::vector dilations_; // [dilation_h, dilation_w] + const std::vector strides_; // [stride_h, stride_w] + const std::vector dilations_; // [dilation_h, dilation_w] Padding padding_type_; std::vector paddings_; }; -template +template struct WinogradTransformFunctor : WinogradTransformFunctorBase { WinogradTransformFunctor(const Padding &padding_type, const std::vector &paddings) : WinogradTransformFunctorBase(padding_type, paddings) {} - void operator()(const Tensor *input, - Tensor *output, - StatsFuture *future) { + void operator()(const Tensor *input, Tensor *output, StatsFuture *future) { MACE_NOT_IMPLEMENTED; } - }; -template -struct WinogradTransformFunctor : WinogradTransformFunctorBase { +template +struct WinogradTransformFunctor + : WinogradTransformFunctorBase { WinogradTransformFunctor(const Padding &padding_type, const std::vector &paddings) : WinogradTransformFunctorBase(padding_type, paddings) {} - void operator()(const Tensor *input, - Tensor *output, - StatsFuture *future); + void operator()(const Tensor *input, Tensor *output, StatsFuture *future); cl::Kernel kernel_; }; @@ -72,14 +70,15 @@ struct WinogradInverseTransformFunctorBase { const float relux_max_limit_; }; -template +template struct WinogradInverseTransformFunctor : WinogradInverseTransformFunctorBase { WinogradInverseTransformFunctor(const int batch, const int height, const int width, const ActivationType activation, const float relux_max_limit) - : WinogradInverseTransformFunctorBase(batch, height, width, activation, relux_max_limit) {} + : WinogradInverseTransformFunctorBase( + batch, height, width, activation, relux_max_limit) {} void operator()(const Tensor *input, const Tensor *bias, @@ -87,17 +86,18 @@ struct WinogradInverseTransformFunctor : WinogradInverseTransformFunctorBase { StatsFuture *future) { MACE_NOT_IMPLEMENTED; } - }; -template -struct WinogradInverseTransformFunctor : WinogradInverseTransformFunctorBase { +template +struct WinogradInverseTransformFunctor + : WinogradInverseTransformFunctorBase { WinogradInverseTransformFunctor(const int batch, const int height, const int width, const ActivationType activation, const float relux_max_limit) - : WinogradInverseTransformFunctorBase(batch, height, width, activation, relux_max_limit) {} + : WinogradInverseTransformFunctorBase( + batch, height, width, activation, relux_max_limit) {} void operator()(const Tensor *input, const Tensor *bias, diff --git a/mace/ops/activation.h b/mace/ops/activation.h index a55dfe1a..5f08bc26 100644 --- a/mace/ops/activation.h +++ b/mace/ops/activation.h @@ -22,7 +22,8 @@ class ActivationOp : public Operator { bool Run(StatsFuture *future) override { const Tensor *input_tensor = this->Input(0); - const Tensor *alpha_tensor = this->InputSize() >= 2 ? this->Input(1) : nullptr; + const Tensor *alpha_tensor = + this->InputSize() >= 2 ? this->Input(1) : nullptr; Tensor *output_tensor = this->outputs_[0]; output_tensor->ResizeLike(input_tensor); diff --git a/mace/ops/activation_test.cc b/mace/ops/activation_test.cc index ce5ddd45..77f8e745 100644 --- a/mace/ops/activation_test.cc +++ b/mace/ops/activation_test.cc @@ -214,9 +214,7 @@ void TestSimplePrelu() { net.AddInputFromArray( "Input", {2, 2, 2, 2}, {-7, 7, -6, 6, -5, -5, -4, -4, -3, 3, -2, 2, -1, -1, 0, 0}); - net.AddInputFromArray( - "Alpha", {2}, - {2.0, 3.0}); + net.AddInputFromArray("Alpha", {2}, {2.0, 3.0}); if (D == DeviceType::OPENCL) { BufferToImage(net, "Input", "InputImage", @@ -250,7 +248,8 @@ void TestSimplePrelu() { } auto expected = CreateTensor( - {2, 2, 2, 2}, {-14, 7, -12, 6, -10, -15, -8, -12, -6, 3, -4, 2, -2, -3, 0, 0}); + {2, 2, 2, 2}, + {-14, 7, -12, 6, -10, -15, -8, -12, -6, 3, -4, 2, -2, -3, 0, 0}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); } diff --git a/mace/ops/addn.h b/mace/ops/addn.h index 9adc3341..5824844b 100644 --- a/mace/ops/addn.h +++ b/mace/ops/addn.h @@ -26,12 +26,10 @@ class AddNOp : public Operator { for (int i = 1; i < n; ++i) { inputs[i] = this->Input(i); MACE_CHECK(inputs[0]->dim_size() == inputs[i]->dim_size()); - MACE_CHECK(inputs[0]->size() == inputs[i]->size()) << "Input 0: " - << MakeString(inputs[0]->shape()) - << ", size: " << inputs[0]->size() - << ". Input " << i << ": " - << MakeString(inputs[i]->shape()) - << ", size: " << inputs[i]->size(); + MACE_CHECK(inputs[0]->size() == inputs[i]->size()) + << "Input 0: " << MakeString(inputs[0]->shape()) + << ", size: " << inputs[0]->size() << ". Input " << i << ": " + << MakeString(inputs[i]->shape()) << ", size: " << inputs[i]->size(); } functor_(inputs, output_tensor, future); diff --git a/mace/ops/addn_benchmark.cc b/mace/ops/addn_benchmark.cc index 8ffccad2..85c7853d 100644 --- a/mace/ops/addn_benchmark.cc +++ b/mace/ops/addn_benchmark.cc @@ -15,8 +15,7 @@ static void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) { OpsTestNet net; // Add input data for (int i = 0; i < inputs; ++i) { - net.AddRandomInput(MakeString("Input", i).c_str(), - {n, h, w, c}); + net.AddRandomInput(MakeString("Input", i).c_str(), {n, h, w, c}); } if (D == DeviceType::OPENCL) { diff --git a/mace/ops/batch_norm_benchmark.cc b/mace/ops/batch_norm_benchmark.cc index b0975aa3..5091e26e 100644 --- a/mace/ops/batch_norm_benchmark.cc +++ b/mace/ops/batch_norm_benchmark.cc @@ -76,7 +76,7 @@ static void BatchNorm( static void BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \ int iters) { \ const int64_t tot = static_cast(iters) * N * C * H * W; \ - mace::testing::MaccProcessed(tot); \ + mace::testing::MaccProcessed(tot); \ mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ BatchNorm(iters, N, C, H, W); \ } \ diff --git a/mace/ops/batch_to_space.h b/mace/ops/batch_to_space.h index 59f8e03b..91d2c0c6 100644 --- a/mace/ops/batch_to_space.h +++ b/mace/ops/batch_to_space.h @@ -12,15 +12,14 @@ namespace mace { -template +template class BatchToSpaceNDOp : public Operator { public: BatchToSpaceNDOp(const OperatorDef &op_def, Workspace *ws) : Operator(op_def, ws), - functor_( - OperatorBase::GetRepeatedArgument("crops", {0, 0, 0, 0}), - OperatorBase::GetRepeatedArgument("block_shape", {1, 1}), - true) {} + functor_(OperatorBase::GetRepeatedArgument("crops", {0, 0, 0, 0}), + OperatorBase::GetRepeatedArgument("block_shape", {1, 1}), + true) {} bool Run(StatsFuture *future) override { const Tensor *batch_tensor = this->Input(INPUT); @@ -28,7 +27,8 @@ class BatchToSpaceNDOp : public Operator { std::vector output_shape(4, 0); CalculateOutputShape(batch_tensor, space_tensor, output_shape.data()); - functor_(space_tensor, output_shape, const_cast(batch_tensor), future); + functor_(space_tensor, output_shape, const_cast(batch_tensor), + future); return true; } @@ -37,7 +37,8 @@ class BatchToSpaceNDOp : public Operator { Tensor *output, index_t *output_shape) { auto crops = OperatorBase::GetRepeatedArgument("crops", {0, 0, 0, 0}); - auto block_shape = OperatorBase::GetRepeatedArgument("block_shape", {1, 1}); + auto block_shape = + OperatorBase::GetRepeatedArgument("block_shape", {1, 1}); MACE_CHECK(input_tensor->dim_size() == 4, "Input's shape should be 4D"); MACE_CHECK(block_shape.size() == 2, "Block's shape should be 1D"); MACE_CHECK(crops.size() == 4, "Crops' shape should be 2D"); @@ -45,13 +46,13 @@ class BatchToSpaceNDOp : public Operator { const index_t block_dims = block_shape.size(); index_t block_shape_product = 1; for (uint32_t block_dim = 0; block_dim < block_dims; ++block_dim) { - MACE_CHECK(block_shape[block_dim] > 1, "block_shape's value should be great to 1"); + MACE_CHECK(block_shape[block_dim] > 1, + "block_shape's value should be great to 1"); const index_t block_shape_value = block_shape[block_dim]; - const index_t cropped_input_size = input_tensor->dim(block_dim + 1) * block_shape_value - - crops[block_dim * 2] - - crops[block_dim * 2 + 1]; - MACE_CHECK(cropped_input_size >= 0, - "cropped size must be non-negative"); + const index_t cropped_input_size = + input_tensor->dim(block_dim + 1) * block_shape_value - + crops[block_dim * 2] - crops[block_dim * 2 + 1]; + MACE_CHECK(cropped_input_size >= 0, "cropped size must be non-negative"); block_shape_product *= block_shape_value; output_shape[block_dim + 1] = cropped_input_size; } diff --git a/mace/ops/batch_to_space_benchmark.cc b/mace/ops/batch_to_space_benchmark.cc index bac02236..aa68adc4 100644 --- a/mace/ops/batch_to_space_benchmark.cc +++ b/mace/ops/batch_to_space_benchmark.cc @@ -41,7 +41,7 @@ static void BMBatchToSpace( BM_BATCH_TO_SPACE_##N##_##H##_##W##_##C##_##ARG##_##TYPE##_##DEVICE( \ int iters) { \ const int64_t tot = static_cast(iters) * N * C * H * W; \ - mace::testing::MaccProcessed(tot); \ + mace::testing::MaccProcessed(tot); \ mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ BMBatchToSpace(iters, N, C, H, W, ARG); \ } \ diff --git a/mace/ops/bias_add_benchmark.cc b/mace/ops/bias_add_benchmark.cc index d59885de..8af9405b 100644 --- a/mace/ops/bias_add_benchmark.cc +++ b/mace/ops/bias_add_benchmark.cc @@ -53,7 +53,7 @@ static void BiasAdd(int iters, int batch, int channels, int height, int width) { static void BM_BIAS_ADD_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \ int iters) { \ const int64_t tot = static_cast(iters) * N * C * H * W; \ - mace::testing::MaccProcessed(tot); \ + mace::testing::MaccProcessed(tot); \ mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ BiasAdd(iters, N, C, H, W); \ } \ diff --git a/mace/ops/buffer_to_image.h b/mace/ops/buffer_to_image.h index 72306338..d1d8621b 100644 --- a/mace/ops/buffer_to_image.h +++ b/mace/ops/buffer_to_image.h @@ -11,16 +11,17 @@ namespace mace { template -class BufferToImageOp: public Operator { +class BufferToImageOp : public Operator { public: BufferToImageOp(const OperatorDef &op_def, Workspace *ws) - : Operator(op_def, ws) {} + : Operator(op_def, ws) {} bool Run(StatsFuture *future) override { const Tensor *input_tensor = this->Input(INPUT); - kernels::BufferType type = static_cast(OperatorBase::GetSingleArgument( - "buffer_type", static_cast(kernels::CONV2D_FILTER))); + kernels::BufferType type = + static_cast(OperatorBase::GetSingleArgument( + "buffer_type", static_cast(kernels::CONV2D_FILTER))); Tensor *output = this->Output(OUTPUT); functor_(const_cast(input_tensor), type, output, future); diff --git a/mace/ops/buffer_to_image_test.cc b/mace/ops/buffer_to_image_test.cc index 34c7d16f..04baa382 100644 --- a/mace/ops/buffer_to_image_test.cc +++ b/mace/ops/buffer_to_image_test.cc @@ -7,8 +7,9 @@ using namespace mace; -template -void TestBidirectionTransform(const int type, const std::vector &input_shape) { +template +void TestBidirectionTransform(const int type, + const std::vector &input_shape) { OpsTestNet net; OpDefBuilder("BufferToImage", "BufferToImageTest") .Input("Input") @@ -34,7 +35,8 @@ void TestBidirectionTransform(const int type, const std::vector &input_ net.RunOp(D); // Check - ExpectTensorNear(*net.GetOutput("Input"), *net.GetOutput("I2BOutput"), 1e-5); + ExpectTensorNear(*net.GetOutput("Input"), *net.GetOutput("I2BOutput"), + 1e-5); } TEST(BufferToImageTest, ArgSmall) { @@ -54,51 +56,63 @@ TEST(BufferToImageTest, ArgLarge) { } TEST(BufferToImageTest, InputSmallSingleChannel) { - TestBidirectionTransform(kernels::IN_OUT_CHANNEL, {1, 2, 3, 1}); + TestBidirectionTransform(kernels::IN_OUT_CHANNEL, + {1, 2, 3, 1}); } TEST(BufferToImageTest, InputSmallMultipleChannel) { - TestBidirectionTransform(kernels::IN_OUT_CHANNEL, {1, 2, 3, 3}); + TestBidirectionTransform(kernels::IN_OUT_CHANNEL, + {1, 2, 3, 3}); } TEST(BufferToImageTest, InputSmallMultipleBatchAndChannel) { - TestBidirectionTransform(kernels::IN_OUT_CHANNEL, {3, 2, 3, 3}); + TestBidirectionTransform(kernels::IN_OUT_CHANNEL, + {3, 2, 3, 3}); } TEST(BufferToImageTest, InputMedia) { - TestBidirectionTransform(kernels::IN_OUT_CHANNEL, {3, 13, 17, 128}); + TestBidirectionTransform(kernels::IN_OUT_CHANNEL, + {3, 13, 17, 128}); } TEST(BufferToImageTest, InputLarge) { - TestBidirectionTransform(kernels::IN_OUT_CHANNEL, {3, 64, 64, 256}); + TestBidirectionTransform(kernels::IN_OUT_CHANNEL, + {3, 64, 64, 256}); } TEST(BufferToImageTest, Filter1x1Small) { - TestBidirectionTransform(kernels::CONV2D_FILTER, {1, 1, 3, 5}); + TestBidirectionTransform(kernels::CONV2D_FILTER, + {1, 1, 3, 5}); } TEST(BufferToImageTest, Filter1x1Media) { - TestBidirectionTransform(kernels::CONV2D_FILTER, {1, 1, 13, 17}); + TestBidirectionTransform(kernels::CONV2D_FILTER, + {1, 1, 13, 17}); } TEST(BufferToImageTest, Filter1x1Large) { - TestBidirectionTransform(kernels::CONV2D_FILTER, {1, 1, 128, 512}); + TestBidirectionTransform(kernels::CONV2D_FILTER, + {1, 1, 128, 512}); } TEST(BufferToImageTest, Filter3x3Small) { - TestBidirectionTransform(kernels::CONV2D_FILTER, {3, 3, 3, 5}); + TestBidirectionTransform(kernels::CONV2D_FILTER, + {3, 3, 3, 5}); } TEST(BufferToImageTest, Filter3x3Meida) { - TestBidirectionTransform(kernels::CONV2D_FILTER, {3, 3, 13, 17}); + TestBidirectionTransform(kernels::CONV2D_FILTER, + {3, 3, 13, 17}); } TEST(BufferToImageTest, Filter3x3Large) { - TestBidirectionTransform(kernels::CONV2D_FILTER, {3, 3, 128, 256}); + TestBidirectionTransform(kernels::CONV2D_FILTER, + {3, 3, 128, 256}); } -template -void TestDiffTypeBidirectionTransform(const int type, const std::vector &input_shape) { +template +void TestDiffTypeBidirectionTransform(const int type, + const std::vector &input_shape) { OpsTestNet net; OpDefBuilder("BufferToImage", "BufferToImageTest") .Input("Input") @@ -123,14 +137,16 @@ void TestDiffTypeBidirectionTransform(const int type, const std::vector net.RunOp(D); // Check - ExpectTensorNear(*net.GetOutput("Input"), *net.GetOutput("I2BOutput"), 1e-2); + ExpectTensorNear(*net.GetOutput("Input"), *net.GetOutput("I2BOutput"), + 1e-2); } TEST(BufferToImageTest, ArgFloatToHalfSmall) { - TestDiffTypeBidirectionTransform(kernels::ARGUMENT, {11}); + TestDiffTypeBidirectionTransform(kernels::ARGUMENT, + {11}); } -template +template void TestStringHalfBidirectionTransform(const int type, const std::vector &input_shape, const unsigned char *input_data) { @@ -142,9 +158,10 @@ void TestStringHalfBidirectionTransform(const int type, .AddIntArg("T", DataTypeToEnum::value) .Finalize(net.NewOperatorDef()); - const half *h_data = reinterpret_cast(input_data); + const half *h_data = reinterpret_cast(input_data); - net.AddInputFromArray("Input", input_shape, std::vector(h_data, h_data+2)); + net.AddInputFromArray("Input", input_shape, + std::vector(h_data, h_data + 2)); // Run net.RunOp(D); @@ -160,12 +177,14 @@ void TestStringHalfBidirectionTransform(const int type, net.RunOp(D); // Check - ExpectTensorNear(*net.GetOutput("Input"), *net.GetOutput("I2BOutput"), 1e-2); + ExpectTensorNear(*net.GetOutput("Input"), *net.GetOutput("I2BOutput"), + 1e-2); } TEST(BufferToImageTest, ArgStringHalfToHalfSmall) { - const unsigned char input_data[] = {0xCD, 0x3C, 0x33, 0x40,}; - TestStringHalfBidirectionTransform(kernels::ARGUMENT, - {2}, - input_data); + const unsigned char input_data[] = { + 0xCD, 0x3C, 0x33, 0x40, + }; + TestStringHalfBidirectionTransform( + kernels::ARGUMENT, {2}, input_data); } diff --git a/mace/ops/channel_shuffle.h b/mace/ops/channel_shuffle.h index 9f6b19be..b87d6263 100644 --- a/mace/ops/channel_shuffle.h +++ b/mace/ops/channel_shuffle.h @@ -28,8 +28,8 @@ class ChannelShuffleOp : public Operator { input->shape()[1]); output->ResizeLike(input); - functor_(input->data(), input->shape().data(), - output->mutable_data(), future); + functor_(input->data(), input->shape().data(), output->mutable_data(), + future); return true; } diff --git a/mace/ops/channel_shuffle_benchmark.cc b/mace/ops/channel_shuffle_benchmark.cc index a984b39d..bf64eda2 100644 --- a/mace/ops/channel_shuffle_benchmark.cc +++ b/mace/ops/channel_shuffle_benchmark.cc @@ -41,7 +41,7 @@ static void ChannelShuffle( static void BM_CHANNEL_SHUFFLE_##N##_##C##_##H##_##W##_##G##_##DEVICE( \ int iters) { \ const int64_t tot = static_cast(iters) * N * C * H * W; \ - mace::testing::MaccProcessed(tot); \ + mace::testing::MaccProcessed(tot); \ mace::testing::BytesProcessed(tot *(sizeof(float))); \ ChannelShuffle(iters, N, C, H, W, G); \ } \ diff --git a/mace/ops/concat.h b/mace/ops/concat.h index 4577dc8b..cadd5293 100644 --- a/mace/ops/concat.h +++ b/mace/ops/concat.h @@ -14,10 +14,11 @@ class ConcatOp : public Operator { public: ConcatOp(const OperatorDef &op_def, Workspace *ws) : Operator(op_def, ws), - functor_(OperatorBase::GetSingleArgument("axis", 3)){} + functor_(OperatorBase::GetSingleArgument("axis", 3)) {} bool Run(StatsFuture *future) override { - MACE_CHECK(this->InputSize() >= 2) << "There must be at least two inputs to concat"; + MACE_CHECK(this->InputSize() >= 2) + << "There must be at least two inputs to concat"; const std::vector input_list = this->Inputs(); const int32_t concat_axis = OperatorBase::GetSingleArgument("axis", 3); const int32_t input_dims = input_list[0]->dim_size(); diff --git a/mace/ops/concat_benchmark.cc b/mace/ops/concat_benchmark.cc index cc447b83..bbbbc126 100644 --- a/mace/ops/concat_benchmark.cc +++ b/mace/ops/concat_benchmark.cc @@ -37,11 +37,10 @@ static void ConcatHelper(int iters, int concat_dim, int dim1) { } } -#define BM_CONCAT_CPU_MACRO(DIM0, DIM1) \ - static void BM_CONCAT_CPU_##DIM0##_##DIM1( \ - int iters) { \ +#define BM_CONCAT_CPU_MACRO(DIM0, DIM1) \ + static void BM_CONCAT_CPU_##DIM0##_##DIM1(int iters) { \ ConcatHelper(iters, DIM0, DIM1); \ - } \ + } \ BENCHMARK(BM_CONCAT_CPU_##DIM0##_##DIM1) BM_CONCAT_CPU_MACRO(0, 1000); @@ -90,13 +89,11 @@ static void OpenclConcatHelper(int iters, } } - -#define BM_CONCAT_OPENCL_MACRO(N, H, W, C, TYPE) \ - static void BM_CONCAT_OPENCL_##N##_##H##_##W##_##C##_##TYPE( \ - int iters) { \ - std::vector shape = {N, H, W, C}; \ - OpenclConcatHelper(iters, shape, shape, 3); \ - } \ +#define BM_CONCAT_OPENCL_MACRO(N, H, W, C, TYPE) \ + static void BM_CONCAT_OPENCL_##N##_##H##_##W##_##C##_##TYPE(int iters) { \ + std::vector shape = {N, H, W, C}; \ + OpenclConcatHelper(iters, shape, shape, 3); \ + } \ BENCHMARK(BM_CONCAT_OPENCL_##N##_##H##_##W##_##C##_##TYPE) BM_CONCAT_OPENCL_MACRO(3, 32, 32, 32, float); diff --git a/mace/ops/concat_test.cc b/mace/ops/concat_test.cc index a49e593c..2e061ad4 100644 --- a/mace/ops/concat_test.cc +++ b/mace/ops/concat_test.cc @@ -112,8 +112,8 @@ TEST_F(ConcatOpTest, CPURandom) { concat_axis_size += input_shapes[i][axis]; GenerateRandomRealTypeData(input_shapes[i], inputs[i]); input_ptrs[i] = inputs[i].data(); - net.AddInputFromArray( - MakeString("Input", i), input_shapes[i], inputs[i]); + net.AddInputFromArray(MakeString("Input", i), + input_shapes[i], inputs[i]); } // Run @@ -214,6 +214,6 @@ TEST_F(ConcatOpTest, OPENCLUnAligned) { } TEST_F(ConcatOpTest, OPENCLAlignedMultiInput) { - OpenclRandomTest({{3, 32, 32, 32}, {3, 32, 32, 32}, - {3, 32, 32, 32}, {3, 32, 32, 32}}, 3); + OpenclRandomTest( + {{3, 32, 32, 32}, {3, 32, 32, 32}, {3, 32, 32, 32}, {3, 32, 32, 32}}, 3); } \ No newline at end of file diff --git a/mace/ops/conv_2d_test.cc b/mace/ops/conv_2d_test.cc index 184772c4..086f7328 100644 --- a/mace/ops/conv_2d_test.cc +++ b/mace/ops/conv_2d_test.cc @@ -2,8 +2,8 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // -#include #include "mace/ops/conv_2d.h" +#include #include "mace/ops/ops_test_util.h" using namespace mace; @@ -342,7 +342,8 @@ TEST_F(Conv2dOpTest, CPUConv1x1) { TestConv1x1(); } TEST_F(Conv2dOpTest, OPENCLConv1x1) { TestConv1x1(); } template -static void TestComplexConvNxNS12(const std::vector &shape, const int stride) { +static void TestComplexConvNxNS12(const std::vector &shape, + const int stride) { testing::internal::LogToStderr(); auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w, Padding type) { @@ -412,27 +413,21 @@ static void TestComplexConvNxNS12(const std::vector &shape, const int s } TEST_F(Conv2dOpTest, OPENCLAlignedConvNxNS12) { - TestComplexConvNxNS12({32, 16, 16, 32}, - 1); - TestComplexConvNxNS12({32, 16, 16, 32}, - 2); + TestComplexConvNxNS12({32, 16, 16, 32}, 1); + TestComplexConvNxNS12({32, 16, 16, 32}, 2); } TEST_F(Conv2dOpTest, OPENCLUnalignedConvNxNS12) { - TestComplexConvNxNS12({17, 113, 5, 7}, - 1); - TestComplexConvNxNS12({17, 113, 5, 7}, - 2); + TestComplexConvNxNS12({17, 113, 5, 7}, 1); + TestComplexConvNxNS12({17, 113, 5, 7}, 2); } TEST_F(Conv2dOpTest, OPENCLUnalignedConvNxNS34) { - TestComplexConvNxNS12({31, 113, 13, 17}, - 3); - TestComplexConvNxNS12({32, 32, 13, 17}, - 4); + TestComplexConvNxNS12({31, 113, 13, 17}, 3); + TestComplexConvNxNS12({32, 32, 13, 17}, 4); } -template +template static void TestHalfComplexConvNxNS12(const std::vector &input_shape, const std::vector &filter_shape, const std::vector &dilations) { @@ -519,67 +514,58 @@ static void TestHalfComplexConvNxNS12(const std::vector &input_shape, } TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv1x1S12) { - TestHalfComplexConvNxNS12({32, 32}, - {1, 1, 32, 64}, + TestHalfComplexConvNxNS12({32, 32}, {1, 1, 32, 64}, {1, 1}); } TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv3x3S12) { - TestHalfComplexConvNxNS12({32, 32}, - {3, 3, 32, 64}, + TestHalfComplexConvNxNS12({32, 32}, {3, 3, 32, 64}, {1, 1}); } TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv15x1S12) { - TestHalfComplexConvNxNS12({32, 32}, - {15, 1, 256, 2}, + TestHalfComplexConvNxNS12({32, 32}, {15, 1, 256, 2}, {1, 1}); } TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv1x15S12) { - TestHalfComplexConvNxNS12({32, 32}, - {1, 15, 256, 2}, + TestHalfComplexConvNxNS12({32, 32}, {1, 15, 256, 2}, {1, 1}); } TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv7x75S12) { - TestHalfComplexConvNxNS12({32, 32}, - {7, 7, 3, 64}, + TestHalfComplexConvNxNS12({32, 32}, {7, 7, 3, 64}, {1, 1}); } TEST_F(Conv2dOpTest, OPENCLHalfUnalignedConv1x1S12) { - TestHalfComplexConvNxNS12({107, 113}, - {1, 1, 5, 7}, + TestHalfComplexConvNxNS12({107, 113}, {1, 1, 5, 7}, {1, 1}); } TEST_F(Conv2dOpTest, OPENCLHalfUnalignedConv3x3S12) { - TestHalfComplexConvNxNS12({107, 113}, - {3, 3, 5, 7}, + TestHalfComplexConvNxNS12({107, 113}, {3, 3, 5, 7}, {1, 1}); } TEST_F(Conv2dOpTest, OPENCLHalfConv5x5Dilation2) { - TestHalfComplexConvNxNS12({64, 64}, - {5, 5, 16, 16}, + TestHalfComplexConvNxNS12({64, 64}, {5, 5, 16, 16}, {2, 2}); } TEST_F(Conv2dOpTest, OPENCLHalfConv7x7Dilation2) { - TestHalfComplexConvNxNS12({64, 64}, - {7, 7, 16, 16}, + TestHalfComplexConvNxNS12({64, 64}, {7, 7, 16, 16}, {2, 2}); } TEST_F(Conv2dOpTest, OPENCLHalfConv7x7Dilation4) { - TestHalfComplexConvNxNS12({63, 67}, - {7, 7, 16, 16}, + TestHalfComplexConvNxNS12({63, 67}, {7, 7, 16, 16}, {4, 4}); } -template -static void TestDilationConvNxN(const std::vector &shape, const int dilation_rate) { +template +static void TestDilationConvNxN(const std::vector &shape, + const int dilation_rate) { testing::internal::LogToStderr(); auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w, Padding type) { @@ -617,9 +603,12 @@ static void TestDilationConvNxN(const std::vector &shape, const int dil expected.Copy(*net.GetOutput("Output")); // run on gpu - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::CONV2D_FILTER); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT_CHANNEL); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::CONV2D_FILTER); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") @@ -634,7 +623,8 @@ static void TestDilationConvNxN(const std::vector &shape, const int dil // Run on device net.RunOp(D); - ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT_CHANNEL); + ImageToBuffer(net, "OutputImage", "OPENCLOutput", + kernels::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.001); }; @@ -647,22 +637,20 @@ static void TestDilationConvNxN(const std::vector &shape, const int dil } TEST_F(Conv2dOpTest, OPENCLAlignedDilation2) { - TestDilationConvNxN({32, 32, 32, 64}, - 2); + TestDilationConvNxN({32, 32, 32, 64}, 2); } TEST_F(Conv2dOpTest, OPENCLAligned2Dilation4) { - TestDilationConvNxN({128, 128, 16, 16}, - 4); + TestDilationConvNxN({128, 128, 16, 16}, 4); } TEST_F(Conv2dOpTest, OPENCLUnalignedDilation4) { - TestDilationConvNxN({107, 113, 5, 7}, - 4); + TestDilationConvNxN({107, 113, 5, 7}, 4); } -template -static void TestArbitraryPadConvNxN(const std::vector &shape, const std::vector &paddings) { +template +static void TestArbitraryPadConvNxN(const std::vector &shape, + const std::vector &paddings) { testing::internal::LogToStderr(); auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w) { srand(time(NULL)); @@ -698,9 +686,12 @@ static void TestArbitraryPadConvNxN(const std::vector &shape, const std expected.Copy(*net.GetOutput("Output")); // run on gpu - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::CONV2D_FILTER); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT_CHANNEL); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::CONV2D_FILTER); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") @@ -714,7 +705,8 @@ static void TestArbitraryPadConvNxN(const std::vector &shape, const std // Run on device net.RunOp(D); - ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT_CHANNEL); + ImageToBuffer(net, "OutputImage", "OPENCLOutput", + kernels::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.001); }; @@ -726,8 +718,7 @@ static void TestArbitraryPadConvNxN(const std::vector &shape, const std } TEST_F(Conv2dOpTest, OPENCLAlignedPad1) { - TestArbitraryPadConvNxN({32, 32, 32, 64}, - {1, 1}); + TestArbitraryPadConvNxN({32, 32, 32, 64}, {1, 1}); } TEST_F(Conv2dOpTest, OPENCLAlignedPad2) { @@ -736,6 +727,5 @@ TEST_F(Conv2dOpTest, OPENCLAlignedPad2) { } TEST_F(Conv2dOpTest, OPENCLUnalignedPad4) { - TestArbitraryPadConvNxN({107, 113, 5, 7}, - {4, 4}); + TestArbitraryPadConvNxN({107, 113, 5, 7}, {4, 4}); } diff --git a/mace/ops/eltwise.h b/mace/ops/eltwise.h index 5bbd9441..621a8f2b 100644 --- a/mace/ops/eltwise.h +++ b/mace/ops/eltwise.h @@ -18,15 +18,17 @@ class EltwiseOp : public Operator { functor_(static_cast( OperatorBase::GetSingleArgument( "type", static_cast(kernels::EltwiseType::SUM))), - OperatorBase::GetRepeatedArgument("coeff")){} + OperatorBase::GetRepeatedArgument("coeff")) {} bool Run(StatsFuture *future) override { const Tensor *input0 = this->Input(0); const Tensor *input1 = this->Input(1); Tensor *output = this->Output(OUTPUT); - MACE_CHECK(input0->dim_size() == input1->dim_size()) << "Inputs of Eltwise op must be same shape"; - for(int i = 0; i < input0->dim_size(); ++i) { - MACE_CHECK(input0->dim(i) == input1->dim(i)) << "Inputs of Eltwise op must be same shape"; + MACE_CHECK(input0->dim_size() == input1->dim_size()) + << "Inputs of Eltwise op must be same shape"; + for (int i = 0; i < input0->dim_size(); ++i) { + MACE_CHECK(input0->dim(i) == input1->dim(i)) + << "Inputs of Eltwise op must be same shape"; } output->ResizeLike(input0); diff --git a/mace/ops/eltwise_benchmark.cc b/mace/ops/eltwise_benchmark.cc index c2f48643..8dcb243a 100644 --- a/mace/ops/eltwise_benchmark.cc +++ b/mace/ops/eltwise_benchmark.cc @@ -61,7 +61,7 @@ static void EltwiseBenchmark( BM_ELTWISE_##ELT_TYPE##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE( \ int iters) { \ const int64_t tot = static_cast(iters) * N * H * W * C; \ - mace::testing::MaccProcessed(tot); \ + mace::testing::MaccProcessed(tot); \ mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ EltwiseBenchmark( \ iters, static_cast(ELT_TYPE), N, H, W, C); \ diff --git a/mace/ops/eltwise_test.cc b/mace/ops/eltwise_test.cc index 3e3d3362..ae8cf5f0 100644 --- a/mace/ops/eltwise_test.cc +++ b/mace/ops/eltwise_test.cc @@ -2,15 +2,15 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // +#include "mace/kernels/eltwise.h" #include "mace/core/operator.h" #include "mace/ops/ops_test_util.h" -#include "mace/kernels/eltwise.h" namespace mace { class EltwiseOpTest : public OpsTestBase {}; -template +template void Simple(const kernels::EltwiseType type, const std::vector &shape, const std::vector &input0, @@ -36,8 +36,10 @@ void Simple(const kernels::EltwiseType type, // Run net.RunOp(D); } else { - BufferToImage(net, "Input1", "InputImg1", kernels::BufferType::IN_OUT_CHANNEL); - BufferToImage(net, "Input2", "InputImg2", kernels::BufferType::IN_OUT_CHANNEL); + BufferToImage(net, "Input1", "InputImg1", + kernels::BufferType::IN_OUT_CHANNEL); + BufferToImage(net, "Input2", "InputImg2", + kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Eltwise", "EltwiseTest") .Input("InputImg1") .Input("InputImg2") @@ -49,7 +51,8 @@ void Simple(const kernels::EltwiseType type, // Run net.RunOp(D); - ImageToBuffer(net, "OutputImg", "Output", kernels::BufferType::IN_OUT_CHANNEL); + ImageToBuffer(net, "OutputImg", "Output", + kernels::BufferType::IN_OUT_CHANNEL); } auto expected = CreateTensor(shape, output); @@ -58,64 +61,42 @@ void Simple(const kernels::EltwiseType type, } TEST_F(EltwiseOpTest, CPUSimple) { - Simple(kernels::EltwiseType::PROD, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, - {1, 2, 3, 4, 5, 6}, + Simple(kernels::EltwiseType::PROD, {1, 1, 2, 3}, + {1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6}, {1, 4, 9, 16, 25, 36}); - Simple(kernels::EltwiseType::SUM, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, - {1, 2, 3, 4, 5, 6}, + Simple(kernels::EltwiseType::SUM, {1, 1, 2, 3}, + {1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}); - Simple(kernels::EltwiseType::SUM, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, - {1, 2, 3, 4, 5, 6}, - {3, 6, 9, 12, 15, 18}, - {2, 1}); - Simple(kernels::EltwiseType::MAX, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, - {1, 1, 3, 3, 6, 6}, + Simple(kernels::EltwiseType::SUM, {1, 1, 2, 3}, + {1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6}, + {3, 6, 9, 12, 15, 18}, {2, 1}); + Simple(kernels::EltwiseType::MAX, {1, 1, 2, 3}, + {1, 2, 3, 4, 5, 6}, {1, 1, 3, 3, 6, 6}, {1, 2, 3, 4, 6, 6}); - Simple(kernels::EltwiseType::MIN, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, - {1, 1, 3, 3, 6, 6}, + Simple(kernels::EltwiseType::MIN, {1, 1, 2, 3}, + {1, 2, 3, 4, 5, 6}, {1, 1, 3, 3, 6, 6}, {1, 1, 3, 3, 5, 6}); } TEST_F(EltwiseOpTest, GPUSimple) { - Simple(kernels::EltwiseType::PROD, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, - {1, 2, 3, 4, 5, 6}, + Simple(kernels::EltwiseType::PROD, {1, 1, 2, 3}, + {1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6}, {1, 4, 9, 16, 25, 36}); - Simple(kernels::EltwiseType::SUM, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, - {1, 2, 3, 4, 5, 6}, + Simple(kernels::EltwiseType::SUM, {1, 1, 2, 3}, + {1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}); - Simple(kernels::EltwiseType::SUM, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, - {1, 2, 3, 4, 5, 6}, - {3, 6, 9, 12, 15, 18}, - {2, 1}); - Simple(kernels::EltwiseType::MAX, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, - {1, 1, 3, 3, 6, 6}, + Simple(kernels::EltwiseType::SUM, {1, 1, 2, 3}, + {1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6}, + {3, 6, 9, 12, 15, 18}, {2, 1}); + Simple(kernels::EltwiseType::MAX, {1, 1, 2, 3}, + {1, 2, 3, 4, 5, 6}, {1, 1, 3, 3, 6, 6}, {1, 2, 3, 4, 6, 6}); - Simple(kernels::EltwiseType::MIN, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, - {1, 1, 3, 3, 6, 6}, + Simple(kernels::EltwiseType::MIN, {1, 1, 2, 3}, + {1, 2, 3, 4, 5, 6}, {1, 1, 3, 3, 6, 6}, {1, 1, 3, 3, 5, 6}); } -template +template void RandomTest(const kernels::EltwiseType type, const std::vector &shape) { testing::internal::LogToStderr(); @@ -139,8 +120,10 @@ void RandomTest(const kernels::EltwiseType type, // Run net.RunOp(); - BufferToImage(net, "Input1", "InputImg1", kernels::BufferType::IN_OUT_CHANNEL); - BufferToImage(net, "Input2", "InputImg2", kernels::BufferType::IN_OUT_CHANNEL); + BufferToImage(net, "Input1", "InputImg1", + kernels::BufferType::IN_OUT_CHANNEL); + BufferToImage(net, "Input2", "InputImg2", + kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Eltwise", "EltwiseTest") .Input("InputImg1") .Input("InputImg2") @@ -153,12 +136,15 @@ void RandomTest(const kernels::EltwiseType type, // Run net.RunOp(D); - ImageToBuffer(net, "OutputImg", "OPENCLOutput", kernels::BufferType::IN_OUT_CHANNEL); + ImageToBuffer(net, "OutputImg", "OPENCLOutput", + kernels::BufferType::IN_OUT_CHANNEL); if (DataTypeToEnum::value == DT_FLOAT) { - ExpectTensorNear(*net.GetTensor("Output"), *net.GetOutput("OPENCLOutput"), 1e-3); + ExpectTensorNear(*net.GetTensor("Output"), + *net.GetOutput("OPENCLOutput"), 1e-3); } else { - ExpectTensorNear(*net.GetTensor("Output"), *net.GetOutput("OPENCLOutput"), 1e-1); + ExpectTensorNear(*net.GetTensor("Output"), + *net.GetOutput("OPENCLOutput"), 1e-1); } } diff --git a/mace/ops/folded_batch_norm.cc b/mace/ops/folded_batch_norm.cc index 8658d577..5847ab94 100644 --- a/mace/ops/folded_batch_norm.cc +++ b/mace/ops/folded_batch_norm.cc @@ -7,25 +7,22 @@ namespace mace { void Register_FoldedBatchNorm(OperatorRegistry *op_registry) { - REGISTER_OPERATOR(op_registry, - OpKeyBuilder("FoldedBatchNorm") - .Device(DeviceType::CPU) - .TypeConstraint("T") - .Build(), + REGISTER_OPERATOR(op_registry, OpKeyBuilder("FoldedBatchNorm") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), FoldedBatchNormOp); - REGISTER_OPERATOR(op_registry, - OpKeyBuilder("FoldedBatchNorm") - .Device(DeviceType::OPENCL) - .TypeConstraint("T") - .Build(), + REGISTER_OPERATOR(op_registry, OpKeyBuilder("FoldedBatchNorm") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), FoldedBatchNormOp); - REGISTER_OPERATOR(op_registry, - OpKeyBuilder("FoldedBatchNorm") - .Device(DeviceType::OPENCL) - .TypeConstraint("T") - .Build(), + REGISTER_OPERATOR(op_registry, OpKeyBuilder("FoldedBatchNorm") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), FoldedBatchNormOp); } diff --git a/mace/ops/folded_batch_norm_test.cc b/mace/ops/folded_batch_norm_test.cc index 45bd6736..77bf351d 100644 --- a/mace/ops/folded_batch_norm_test.cc +++ b/mace/ops/folded_batch_norm_test.cc @@ -17,7 +17,7 @@ void CalculateScaleOffset(const std::vector &gamma, std::vector &scale, std::vector &offset) { size_t size = gamma.size(); - for (int i = 0 ; i < size; ++i) { + for (int i = 0; i < size; ++i) { scale[i] = gamma[i] / std::sqrt(var[i] + epsilon); offset[i] = offset[i] - mean[i] * scale[i]; } diff --git a/mace/ops/fully_connected.h b/mace/ops/fully_connected.h index c65947af..2f915149 100644 --- a/mace/ops/fully_connected.h +++ b/mace/ops/fully_connected.h @@ -15,11 +15,10 @@ class FullyConnectedOp : public Operator { public: FullyConnectedOp(const OperatorDef &operator_def, Workspace *ws) : Operator(operator_def, ws), - functor_( - kernels::StringToActivationType( - OperatorBase::GetSingleArgument("activation", - "NOOP")), - OperatorBase::GetSingleArgument("max_limit", 0.0f)) {} + functor_(kernels::StringToActivationType( + OperatorBase::GetSingleArgument("activation", + "NOOP")), + OperatorBase::GetSingleArgument("max_limit", 0.0f)) {} bool Run(StatsFuture *future) override { const Tensor *input = this->Input(INPUT); diff --git a/mace/ops/fully_connected_benchmark.cc b/mace/ops/fully_connected_benchmark.cc index 04776899..9ada2c54 100644 --- a/mace/ops/fully_connected_benchmark.cc +++ b/mace/ops/fully_connected_benchmark.cc @@ -17,16 +17,17 @@ static void FCBenchmark( // Add input data net.AddRandomInput("Input", {batch, height, width, channel}); - net.AddRandomInput("Weight", {out_channel, height * width * channel}); + net.AddRandomInput("Weight", + {out_channel, height * width * channel}); net.AddRandomInput("Bias", {out_channel}); if (D == DeviceType::OPENCL) { BufferToImage(net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); BufferToImage(net, "Weight", "WeightImage", - kernels::BufferType::WEIGHT_HEIGHT); + kernels::BufferType::WEIGHT_HEIGHT); BufferToImage(net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); + kernels::BufferType::ARGUMENT); OpDefBuilder("FC", "FullyConnectedTest") .Input("InputImage") @@ -57,14 +58,17 @@ static void FCBenchmark( net.Sync(); } -#define BM_FC_MACRO(N, H, W, C, OC, TYPE, DEVICE) \ - static void BM_FC_##N##_##H##_##W##_##C##_##OC##_##TYPE##_##DEVICE(int iters) { \ - const int64_t macc = static_cast(iters) * N * C * H * W * OC + OC; \ - const int64_t tot = static_cast(iters) * (N + OC) * C * H * W + OC; \ - mace::testing::MaccProcessed(macc); \ - mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ - FCBenchmark(iters, N, H, W, C, OC); \ - } \ +#define BM_FC_MACRO(N, H, W, C, OC, TYPE, DEVICE) \ + static void BM_FC_##N##_##H##_##W##_##C##_##OC##_##TYPE##_##DEVICE( \ + int iters) { \ + const int64_t macc = \ + static_cast(iters) * N * C * H * W * OC + OC; \ + const int64_t tot = \ + static_cast(iters) * (N + OC) * C * H * W + OC; \ + mace::testing::MaccProcessed(macc); \ + mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + FCBenchmark(iters, N, H, W, C, OC); \ + } \ BENCHMARK(BM_FC_##N##_##H##_##W##_##C##_##OC##_##TYPE##_##DEVICE) #define BM_FC(N, H, W, C, OC) \ diff --git a/mace/ops/fully_connected_test.cc b/mace/ops/fully_connected_test.cc index a945f41a..3a41dd87 100644 --- a/mace/ops/fully_connected_test.cc +++ b/mace/ops/fully_connected_test.cc @@ -10,7 +10,7 @@ namespace mace { class FullyConnectedOpTest : public OpsTestBase {}; -template +template void Simple(const std::vector &input_shape, const std::vector &input_value, const std::vector &weight_shape, @@ -58,83 +58,52 @@ void Simple(const std::vector &input_shape, } // Check - auto expected = - CreateTensor(output_shape, output_value); + auto expected = CreateTensor(output_shape, output_value); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); } TEST_F(FullyConnectedOpTest, SimpleCPU) { - Simple({1, 2, 2, 2}, - {1, 2, 3, 4, 5, 6, 7, 8}, - {1, 8}, - {1, 2, 3, 4, 5, 6, 7, 8}, - {1}, {2}, - {1, 1, 1, 1}, {206}); - Simple({1, 1, 2, 5}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, - {2, 10}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, - 10, 20, 30, 40, 50, 60, 70, 80, 90, 100}, - {2}, {2, 3}, - {1, 1, 1, 2}, {387, 3853}); - Simple({1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, - {5, 6}, - {1, 2, 3, 4, 5, 6, - 10, 20, 30, 40, 50, 60, - 1, 2, 3, 4, 5, 6, - 10, 20, 30, 40, 50, 60, - 1, 2, 3, 4, 5, 6}, - {5}, {1, 2, 3, 4, 5}, - {1, 1, 1, 5}, {92, 912, 94, 914, 96}); + Simple({1, 2, 2, 2}, {1, 2, 3, 4, 5, 6, 7, 8}, {1, 8}, + {1, 2, 3, 4, 5, 6, 7, 8}, {1}, {2}, {1, 1, 1, 1}, + {206}); + Simple( + {1, 1, 2, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {2, 10}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100}, + {2}, {2, 3}, {1, 1, 1, 2}, {387, 3853}); + Simple( + {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {5, 6}, + {1, 2, 3, 4, 5, 6, 10, 20, 30, 40, 50, 60, 1, 2, 3, + 4, 5, 6, 10, 20, 30, 40, 50, 60, 1, 2, 3, 4, 5, 6}, + {5}, {1, 2, 3, 4, 5}, {1, 1, 1, 5}, {92, 912, 94, 914, 96}); } TEST_F(FullyConnectedOpTest, SimpleCPUWithBatch) { - Simple({2, 1, 2, 2}, - {1, 2, 3, 4, 5, 6, 7, 8}, - {1, 4}, - {1, 2, 3, 4}, - {1}, {2}, - {2, 1, 1, 1}, {32, 72}); + Simple({2, 1, 2, 2}, {1, 2, 3, 4, 5, 6, 7, 8}, {1, 4}, + {1, 2, 3, 4}, {1}, {2}, {2, 1, 1, 1}, {32, 72}); } TEST_F(FullyConnectedOpTest, SimpleOPENCL) { - Simple({1, 2, 2, 2}, - {1, 2, 3, 4, 5, 6, 7, 8}, - {1, 8}, - {1, 2, 3, 4, 5, 6, 7, 8}, - {1}, {2}, - {1, 1, 1, 1}, {206}); - Simple({1, 1, 2, 5}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, - {2, 10}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, - 10, 20, 30, 40, 50, 60, 70, 80, 90, 100}, - {2}, {2, 3}, - {1, 1, 1, 2}, {387, 3853}); - Simple({1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, - {5, 6}, - {1, 2, 3, 4, 5, 6, - 10, 20, 30, 40, 50, 60, - 1, 2, 3, 4, 5, 6, - 10, 20, 30, 40, 50, 60, - 1, 2, 3, 4, 5, 6}, - {5}, {1, 2, 3, 4, 5}, - {1, 1, 1, 5}, {92, 912, 94, 914, 96}); + Simple({1, 2, 2, 2}, {1, 2, 3, 4, 5, 6, 7, 8}, {1, 8}, + {1, 2, 3, 4, 5, 6, 7, 8}, {1}, {2}, {1, 1, 1, 1}, + {206}); + Simple( + {1, 1, 2, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {2, 10}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100}, + {2}, {2, 3}, {1, 1, 1, 2}, {387, 3853}); + Simple( + {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {5, 6}, + {1, 2, 3, 4, 5, 6, 10, 20, 30, 40, 50, 60, 1, 2, 3, + 4, 5, 6, 10, 20, 30, 40, 50, 60, 1, 2, 3, 4, 5, 6}, + {5}, {1, 2, 3, 4, 5}, {1, 1, 1, 5}, {92, 912, 94, 914, 96}); } TEST_F(FullyConnectedOpTest, SimpleGPUWithBatch) { - Simple({2, 1, 2, 2}, - {1, 2, 3, 4, 5, 6, 7, 8}, - {1, 4}, - {1, 2, 3, 4}, - {1}, {2}, - {2, 1, 1, 1}, {32, 72}); + Simple({2, 1, 2, 2}, {1, 2, 3, 4, 5, 6, 7, 8}, {1, 4}, + {1, 2, 3, 4}, {1}, {2}, {2, 1, 1, 1}, {32, 72}); } -template +template void Complex(const index_t batch, const index_t height, const index_t width, @@ -156,8 +125,7 @@ void Complex(const index_t batch, "Input", {batch, height, width, channels}); net.AddRandomInput( "Weight", {out_channel, height * width * channels}); - net.AddRandomInput( - "Bias", {out_channel}); + net.AddRandomInput("Bias", {out_channel}); // run cpu net.RunOp(); @@ -215,6 +183,4 @@ TEST_F(FullyConnectedOpTest, OPENCLHalfUnAlignedWithBatch) { Complex(16, 13, 12, 31, 113); Complex(31, 21, 11, 23, 103); } - } - diff --git a/mace/ops/fused_conv_2d_test.cc b/mace/ops/fused_conv_2d_test.cc index 37a056f1..ad64be0d 100644 --- a/mace/ops/fused_conv_2d_test.cc +++ b/mace/ops/fused_conv_2d_test.cc @@ -511,8 +511,9 @@ TEST_F(FusedConv2dOpTest, OPENCL15X1ConvNxNS12) { TestGeneralConvNxNS12({40, 40}, {15, 1, 32, 64}); } -template -static void TestAtrousConvNxN(const std::vector &shape, const int dilation) { +template +static void TestAtrousConvNxN(const std::vector &shape, + const int dilation) { testing::internal::LogToStderr(); auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w, Padding type) { @@ -550,9 +551,12 @@ static void TestAtrousConvNxN(const std::vector &shape, const int dilat expected.Copy(*net.GetOutput("Output")); // run on gpu - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::CONV2D_FILTER); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT_CHANNEL); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::CONV2D_FILTER); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("FusedConv2D", "FusedConv2dTest") .Input("InputImage") @@ -567,7 +571,8 @@ static void TestAtrousConvNxN(const std::vector &shape, const int dilat // Run on device net.RunOp(D); - ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT_CHANNEL); + ImageToBuffer(net, "OutputImage", "OPENCLOutput", + kernels::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.001); }; @@ -591,7 +596,7 @@ TEST_F(FusedConv2dOpTest, OPENCLUnalignedAtrousConvNxN) { TestAtrousConvNxN({107, 113, 5, 7}, 2); } -template +template static void TestGeneralHalfAtrousConv(const std::vector &image_shape, const std::vector &filter_shape, const std::vector &dilations) { @@ -620,7 +625,8 @@ static void TestGeneralHalfAtrousConv(const std::vector &image_shape, .Finalize(net.NewOperatorDef()); // Add input data - net.AddRandomInput("Input", {batch, height, width, input_channels}); + net.AddRandomInput("Input", + {batch, height, width, input_channels}); net.AddRandomInput( "Filter", {kernel_h, kernel_w, output_channels, input_channels}); net.AddRandomInput("Bias", {output_channels}); @@ -632,9 +638,12 @@ static void TestGeneralHalfAtrousConv(const std::vector &image_shape, expected.Copy(*net.GetOutput("Output")); // run on gpu - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::CONV2D_FILTER); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT_CHANNEL); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::CONV2D_FILTER); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("FusedConv2D", "FusedConv2dTest") .Input("InputImage") @@ -649,7 +658,8 @@ static void TestGeneralHalfAtrousConv(const std::vector &image_shape, // Run on device net.RunOp(D); - ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT_CHANNEL); + ImageToBuffer(net, "OutputImage", "OPENCLOutput", + kernels::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.7); }; @@ -658,13 +668,11 @@ static void TestGeneralHalfAtrousConv(const std::vector &image_shape, } TEST_F(FusedConv2dOpTest, OPENCL7X7AtrousConvD2) { - TestGeneralHalfAtrousConv({32, 32}, - {7, 7, 3, 16}, + TestGeneralHalfAtrousConv({32, 32}, {7, 7, 3, 16}, {2, 2}); } TEST_F(FusedConv2dOpTest, OPENCL15X15AtrousConvD4) { - TestGeneralHalfAtrousConv({63, 71}, - {15, 15, 16, 16}, + TestGeneralHalfAtrousConv({63, 71}, {15, 15, 16, 16}, {2, 2}); } diff --git a/mace/ops/global_avg_pooling.h b/mace/ops/global_avg_pooling.h index 55deb2a9..dc1cda9e 100644 --- a/mace/ops/global_avg_pooling.h +++ b/mace/ops/global_avg_pooling.h @@ -1,4 +1,4 @@ -//DMACE_ENABLE_NEON +// DMACE_ENABLE_NEON // Copyright (c) 2017 XiaoMi All rights reserved. // diff --git a/mace/ops/global_avg_pooling_benchmark.cc b/mace/ops/global_avg_pooling_benchmark.cc index 00b5471a..1ea07c4b 100644 --- a/mace/ops/global_avg_pooling_benchmark.cc +++ b/mace/ops/global_avg_pooling_benchmark.cc @@ -40,13 +40,13 @@ static void GlobalAvgPooling( static void BM_GLOBAL_AVG_POOLING_##N##_##C##_##H##_##W##_##DEVICE( \ int iters) { \ const int64_t tot = static_cast(iters) * N * C * H * W; \ - mace::testing::MaccProcessed(tot); \ + mace::testing::MaccProcessed(tot); \ mace::testing::BytesProcessed(tot *(sizeof(float))); \ GlobalAvgPooling(iters, N, C, H, W); \ } \ BENCHMARK(BM_GLOBAL_AVG_POOLING_##N##_##C##_##H##_##W##_##DEVICE) -#define BM_GLOBAL_AVG_POOLING(N, C, H, W) \ +#define BM_GLOBAL_AVG_POOLING(N, C, H, W) \ BM_GLOBAL_AVG_POOLING_MACRO(N, C, H, W, CPU); // BM_GLOBAL_AVG_POOLING_MACRO(N, C, H, W, NEON); diff --git a/mace/ops/image_to_buffer.h b/mace/ops/image_to_buffer.h index ab4cc5ed..22169b4e 100644 --- a/mace/ops/image_to_buffer.h +++ b/mace/ops/image_to_buffer.h @@ -11,17 +11,18 @@ namespace mace { template -class ImageToBufferOp: public Operator { +class ImageToBufferOp : public Operator { public: ImageToBufferOp(const OperatorDef &op_def, Workspace *ws) - : Operator(op_def, ws), functor_(true) {} + : Operator(op_def, ws), functor_(true) {} bool Run(StatsFuture *future) override { const Tensor *input_tensor = this->Input(INPUT); Tensor *output = this->Output(OUTPUT); - kernels::BufferType type = static_cast(OperatorBase::GetSingleArgument( - "buffer_type", static_cast(kernels::CONV2D_FILTER))); + kernels::BufferType type = + static_cast(OperatorBase::GetSingleArgument( + "buffer_type", static_cast(kernels::CONV2D_FILTER))); functor_(output, type, const_cast(input_tensor), future); return true; } diff --git a/mace/ops/matmul.h b/mace/ops/matmul.h index 6cfdfe99..b45ae35a 100644 --- a/mace/ops/matmul.h +++ b/mace/ops/matmul.h @@ -24,8 +24,8 @@ class MatMulOp : public Operator { << "The dimension of A and B should be 4"; MACE_CHECK(A->dim(0) == B->dim(0)) << "A and B must have same batch size"; MACE_CHECK(A->dim(2) == B->dim(1)) - << "the number of A's column " << A->dim(2) - << " must be equal to B's row " << B->dim(1); + << "the number of A's column " << A->dim(2) + << " must be equal to B's row " << B->dim(1); functor_(A, B, C, future); return true; diff --git a/mace/ops/matmul_test.cc b/mace/ops/matmul_test.cc index b6c801df..d8b80ead 100644 --- a/mace/ops/matmul_test.cc +++ b/mace/ops/matmul_test.cc @@ -10,7 +10,7 @@ namespace mace { class MatMulOpTest : public OpsTestBase {}; -template +template void Simple(const std::vector &A_shape, const std::vector &A_value, const std::vector &B_shape, @@ -51,29 +51,24 @@ void Simple(const std::vector &A_shape, } // Check - auto expected = - CreateTensor(C_shape, C_value); + auto expected = CreateTensor(C_shape, C_value); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); } TEST_F(MatMulOpTest, SimpleCPU) { - Simple({1, 2, 3, 1}, {1, 2, 3, 4, 5, 6}, - {1, 3, 2, 1}, {1, 2, 3, 4, 5, 6}, - {1, 2, 2, 1}, {22, 28, 49, 64}); - Simple({1, 5, 5, 1}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25}, - {1, 5, 5, 1}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25}, - {1, 5, 5, 1}, - {215, 230, 245, 260, 275, 490, 530, 570, 610, 650, - 765, 830, 895, 960, 1025, 1040, 1130, 1220, 1310, 1400, - 1315, 1430, 1545, 1660, 1775}); + Simple({1, 2, 3, 1}, {1, 2, 3, 4, 5, 6}, {1, 3, 2, 1}, + {1, 2, 3, 4, 5, 6}, {1, 2, 2, 1}, {22, 28, 49, 64}); + Simple( + {1, 5, 5, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25}, + {1, 5, 5, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25}, + {1, 5, 5, 1}, {215, 230, 245, 260, 275, 490, 530, 570, 610, + 650, 765, 830, 895, 960, 1025, 1040, 1130, 1220, + 1310, 1400, 1315, 1430, 1545, 1660, 1775}); } - TEST_F(MatMulOpTest, SimpleCPUWithBatch) { Simple({2, 2, 3, 1}, {1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6}, {2, 3, 2, 1}, {1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6}, @@ -81,19 +76,17 @@ TEST_F(MatMulOpTest, SimpleCPUWithBatch) { } TEST_F(MatMulOpTest, SimpleOPENCL) { - Simple({1, 2, 3, 1}, {1, 2, 3, 4, 5, 6}, - {1, 3, 2, 1}, {1, 2, 3, 4, 5, 6}, - {1, 2, 2, 1}, {22, 28, 49, 64}); - Simple({1, 5, 5, 1}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25}, - {1, 5, 5, 1}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25}, - {1, 5, 5, 1}, - {215, 230, 245, 260, 275, 490, 530, 570, 610, 650, - 765, 830, 895, 960, 1025, 1040, 1130, 1220, 1310, 1400, - 1315, 1430, 1545, 1660, 1775}); + Simple({1, 2, 3, 1}, {1, 2, 3, 4, 5, 6}, {1, 3, 2, 1}, + {1, 2, 3, 4, 5, 6}, {1, 2, 2, 1}, + {22, 28, 49, 64}); + Simple( + {1, 5, 5, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25}, + {1, 5, 5, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25}, + {1, 5, 5, 1}, {215, 230, 245, 260, 275, 490, 530, 570, 610, + 650, 765, 830, 895, 960, 1025, 1040, 1130, 1220, + 1310, 1400, 1315, 1430, 1545, 1660, 1775}); } TEST_F(MatMulOpTest, SimpleGPUWithBatch) { @@ -118,8 +111,8 @@ void Complex(const index_t batch, .Finalize(net.NewOperatorDef()); // Add input data - net.AddRandomInput( - "A", {batch, height, channels, 1}); + net.AddRandomInput("A", + {batch, height, channels, 1}); net.AddRandomInput( "B", {batch, channels, out_width, 1}); @@ -132,9 +125,9 @@ void Complex(const index_t batch, // Run on opencl BufferToImage(net, "A", "AImage", - kernels::BufferType::IN_OUT_WIDTH); + kernels::BufferType::IN_OUT_WIDTH); BufferToImage(net, "B", "BImage", - kernels::BufferType::IN_OUT_HEIGHT); + kernels::BufferType::IN_OUT_HEIGHT); OpDefBuilder("MatMul", "MatMulTest") .Input("AImage") @@ -177,5 +170,4 @@ TEST_F(MatMulOpTest, OPENCLHalfUnAlignedWithBatch) { Complex(16, 32, 64, 64); Complex(31, 31, 61, 67); } - } diff --git a/mace/ops/ops_test_util.h b/mace/ops/ops_test_util.h index 15aa3bc9..50c2f2ca 100644 --- a/mace/ops/ops_test_util.h +++ b/mace/ops/ops_test_util.h @@ -95,7 +95,7 @@ class OpDefBuilder { class OpsTestNet { public: - OpsTestNet() : op_registry_(new OperatorRegistry()) {}; + OpsTestNet() : op_registry_(new OperatorRegistry()){}; template void AddInputFromArray(const std::string &name, @@ -239,7 +239,7 @@ void GenerateRandomIntTypeData(const std::vector &shape, template std::unique_ptr CreateTensor(const std::vector &shape, - const std::vector &data) { + const std::vector &data) { std::unique_ptr res( new Tensor(GetDeviceAllocator(DeviceType::CPU), DataTypeToEnum::v())); res->Resize(shape); @@ -334,9 +334,8 @@ struct Expector { for (int h = 0; h < x.dim(1); ++h) { for (int w = 0; w < x.dim(2); ++w) { for (int c = 0; c < x.dim(3); ++c) { - EXPECT_NEAR(*a, *b, abs_err) << "with index = [" - << n << ", " << h << ", " - << w << ", " << c << "]"; + EXPECT_NEAR(*a, *b, abs_err) << "with index = [" << n << ", " << h + << ", " << w << ", " << c << "]"; a++; b++; } diff --git a/mace/ops/pooling.h b/mace/ops/pooling.h index 2e4aed62..b88093ab 100644 --- a/mace/ops/pooling.h +++ b/mace/ops/pooling.h @@ -20,8 +20,12 @@ class PoolingOp : public ConvPool2dOpBase { pooling_type_( static_cast(OperatorBase::GetSingleArgument( "pooling_type", static_cast(AVG)))), - functor_(pooling_type_, kernels_.data(), this->strides_.data(), - this->padding_type_, this->paddings_, this->dilations_.data()){}; + functor_(pooling_type_, + kernels_.data(), + this->strides_.data(), + this->padding_type_, + this->paddings_, + this->dilations_.data()){}; bool Run(StatsFuture *future) override { const Tensor *input = this->Input(INPUT); diff --git a/mace/ops/pooling_benchmark.cc b/mace/ops/pooling_benchmark.cc index fd673d42..dae7e1af 100644 --- a/mace/ops/pooling_benchmark.cc +++ b/mace/ops/pooling_benchmark.cc @@ -54,7 +54,7 @@ static void Pooling(int iters, BM_POOLING_##N##_##C##_##H##_##W##_K##KE##S##STRIDE##_##PA##_##PO##_##DEVICE( \ int iters) { \ const int64_t tot = static_cast(iters) * N * C * H * W; \ - mace::testing::MaccProcessed(tot); \ + mace::testing::MaccProcessed(tot); \ mace::testing::BytesProcessed(tot *(sizeof(float))); \ Pooling(iters, N, C, H, W, KE, STRIDE, Padding::PA, \ PoolingType::PO); \ @@ -62,7 +62,7 @@ static void Pooling(int iters, BENCHMARK( \ BM_POOLING_##N##_##C##_##H##_##W##_K##KE##S##STRIDE##_##PA##_##PO##_##DEVICE) -#define BM_POOLING(N, C, H, W, K, S, PA, PO) \ +#define BM_POOLING(N, C, H, W, K, S, PA, PO) \ BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, CPU); // BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, NEON); diff --git a/mace/ops/pooling_test.cc b/mace/ops/pooling_test.cc index bf4cff8b..8bababc8 100644 --- a/mace/ops/pooling_test.cc +++ b/mace/ops/pooling_test.cc @@ -198,7 +198,8 @@ static void MaxPooling3S2(const std::vector &input_shape, Tensor expected; expected.Copy(*net.GetOutput("Output")); - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Pooling", "PoolingTest") .Input("InputImage") .Output("OutputImage") @@ -333,7 +334,8 @@ static void AvgPoolingTest(const std::vector &shape, Tensor expected; expected.Copy(*net.GetOutput("Output")); - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Pooling", "PoolingTest") .Input("InputImage") .Output("OutputImage") diff --git a/mace/ops/reshape.h b/mace/ops/reshape.h index 46c8c875..a4aec715 100644 --- a/mace/ops/reshape.h +++ b/mace/ops/reshape.h @@ -15,7 +15,7 @@ class ReshapeOp : public Operator { public: ReshapeOp(const OperatorDef &op_def, Workspace *ws) : Operator(op_def, ws), - shape_(OperatorBase::GetRepeatedArgument("shape")){} + shape_(OperatorBase::GetRepeatedArgument("shape")) {} bool Run(StatsFuture *future) override { const Tensor *input = this->Input(INPUT); @@ -38,9 +38,11 @@ class ReshapeOp : public Operator { } if (unknown_idx != -1) { - MACE_CHECK(product != 0) << "Cannot infer shape if there is zero shape size."; + MACE_CHECK(product != 0) + << "Cannot infer shape if there is zero shape size."; const index_t missing = input->size() / product; - MACE_CHECK(missing * product == input->size()) << "Input size not match reshaped tensor size"; + MACE_CHECK(missing * product == input->size()) + << "Input size not match reshaped tensor size"; out_shape[unknown_idx] = missing; } diff --git a/mace/ops/reshape_test.cc b/mace/ops/reshape_test.cc index ab3c13a0..851f33cc 100644 --- a/mace/ops/reshape_test.cc +++ b/mace/ops/reshape_test.cc @@ -13,7 +13,6 @@ class ReshapeTest : public OpsTestBase {}; void TestReshape(const std::vector &org_shape, const std::vector &output_shape, const std::vector &res_shape) { - // Construct graph OpsTestNet net; OpDefBuilder("Reshape", "ReshapeTest") diff --git a/mace/ops/softmax.cc b/mace/ops/softmax.cc index 8742f020..7b68e762 100644 --- a/mace/ops/softmax.cc +++ b/mace/ops/softmax.cc @@ -20,9 +20,9 @@ void Register_Softmax(OperatorRegistry *op_registry) { SoftmaxOp); REGISTER_OPERATOR(op_registry, OpKeyBuilder("Softmax") - .Device(DeviceType::OPENCL) - .TypeConstraint("T") - .Build(), + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), SoftmaxOp); } diff --git a/mace/ops/softmax.h b/mace/ops/softmax.h index cbce1d75..3eebabe0 100644 --- a/mace/ops/softmax.h +++ b/mace/ops/softmax.h @@ -14,11 +14,10 @@ template class SoftmaxOp : public Operator { public: SoftmaxOp(const OperatorDef &operator_def, Workspace *ws) - : Operator(operator_def, ws) { - } + : Operator(operator_def, ws) {} bool Run(StatsFuture *future) override { - const Tensor *logits= this->Input(LOGITS); + const Tensor *logits = this->Input(LOGITS); Tensor *output = this->Output(OUTPUT); output->ResizeLike(logits); diff --git a/mace/ops/softmax_test.cc b/mace/ops/softmax_test.cc index af8e3afc..68c4e4e6 100644 --- a/mace/ops/softmax_test.cc +++ b/mace/ops/softmax_test.cc @@ -14,7 +14,8 @@ void Simple() { // Construct graph OpsTestNet net; // Add input data - net.AddInputFromArray("Input", {1, 1, 2, 4}, {1, 1, 1, 1, 1, 2, 3, 4}); + net.AddInputFromArray("Input", {1, 1, 2, 4}, + {1, 1, 1, 1, 1, 2, 3, 4}); if (D == DeviceType::OPENCL) { BufferToImage(net, "Input", "InputImage", @@ -41,18 +42,15 @@ void Simple() { net.RunOp(D); } - auto expected = CreateTensor({1, 1, 2, 4}, {0.25, 0.25, 0.25, 0.25, - 0.0320586, 0.08714432, 0.23688282, 0.64391426}); + auto expected = CreateTensor( + {1, 1, 2, 4}, + {0.25, 0.25, 0.25, 0.25, 0.0320586, 0.08714432, 0.23688282, 0.64391426}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-7); } -TEST_F(SoftmaxOpTest, CPUSimple) { - Simple(); -} -TEST_F(SoftmaxOpTest, OPENCLSimple) { - Simple(); -} +TEST_F(SoftmaxOpTest, CPUSimple) { Simple(); } +TEST_F(SoftmaxOpTest, OPENCLSimple) { Simple(); } template void Complex(const std::vector &logits_shape) { diff --git a/mace/ops/space_to_batch.h b/mace/ops/space_to_batch.h index 787b82e6..b25c5895 100644 --- a/mace/ops/space_to_batch.h +++ b/mace/ops/space_to_batch.h @@ -12,7 +12,7 @@ namespace mace { -template +template class SpaceToBatchNDOp : public Operator { public: SpaceToBatchNDOp(const OperatorDef &op_def, Workspace *ws) @@ -28,17 +28,19 @@ class SpaceToBatchNDOp : public Operator { std::vector output_shape(4, 0); CalculateOutputShape(space_tensor, batch_tensor, output_shape.data()); - functor_(const_cast(space_tensor), output_shape, batch_tensor, future); + functor_(const_cast(space_tensor), output_shape, batch_tensor, + future); return true; } private: - inline void CalculateOutputShape(const Tensor *input_tensor, Tensor *output, index_t *output_shape) { - auto paddings = OperatorBase::GetRepeatedArgument("paddings", {0, 0, 0, 0}); - auto block_shape = OperatorBase::GetRepeatedArgument("block_shape", {1, 1}); + auto paddings = + OperatorBase::GetRepeatedArgument("paddings", {0, 0, 0, 0}); + auto block_shape = + OperatorBase::GetRepeatedArgument("block_shape", {1, 1}); MACE_CHECK(input_tensor->dim_size() == 4, "Input's shape should be 4D"); MACE_CHECK(block_shape.size() == 2, "Block's shape should be 1D"); MACE_CHECK(paddings.size() == 4, "Paddings' shape should be 2D"); @@ -46,13 +48,14 @@ class SpaceToBatchNDOp : public Operator { const index_t block_dims = block_shape.size(); index_t block_shape_product = 1; for (uint32_t block_dim = 0; block_dim < block_dims; ++block_dim) { - MACE_CHECK(block_shape[block_dim] > 1, "block_shape's value should be great to 1"); + MACE_CHECK(block_shape[block_dim] > 1, + "block_shape's value should be great to 1"); const index_t block_shape_value = block_shape[block_dim]; - const index_t padded_input_size = input_tensor->dim(block_dim + 1) - + paddings[block_dim * 2] - + paddings[block_dim * 2 + 1]; - MACE_CHECK(padded_input_size % block_shape_value == 0, - "padded input ", padded_input_size, " is not divisible by block_shape"); + const index_t padded_input_size = input_tensor->dim(block_dim + 1) + + paddings[block_dim * 2] + + paddings[block_dim * 2 + 1]; + MACE_CHECK(padded_input_size % block_shape_value == 0, "padded input ", + padded_input_size, " is not divisible by block_shape"); block_shape_product *= block_shape_value; output_shape[block_dim + 1] = padded_input_size / block_shape_value; } diff --git a/mace/ops/space_to_batch_benchmark.cc b/mace/ops/space_to_batch_benchmark.cc index ac643f94..db72ce54 100644 --- a/mace/ops/space_to_batch_benchmark.cc +++ b/mace/ops/space_to_batch_benchmark.cc @@ -42,7 +42,7 @@ static void BMSpaceToBatch( BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE( \ int iters) { \ const int64_t tot = static_cast(iters) * N * C * H * W; \ - mace::testing::MaccProcessed(tot); \ + mace::testing::MaccProcessed(tot); \ mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ BMSpaceToBatch(iters, N, H, W, C, SHAPE); \ } \ diff --git a/mace/ops/winograd_convolution_test.cc b/mace/ops/winograd_convolution_test.cc index c76757f9..9fc5e40b 100644 --- a/mace/ops/winograd_convolution_test.cc +++ b/mace/ops/winograd_convolution_test.cc @@ -4,8 +4,8 @@ #include #include "mace/core/operator.h" -#include "mace/ops/ops_test_util.h" #include "mace/kernels/conv_pool_2d_util.h" +#include "mace/ops/ops_test_util.h" namespace mace { @@ -21,7 +21,9 @@ void TransposeFilter(const std::vector &input, for (index_t w = 0; w < input_shape[1]; ++w) { for (index_t oc = 0; oc < input_shape[2]; ++oc) { for (index_t ic = 0; ic < input_shape[3]; ++ic) { - int offset = ((oc * input_shape[3] + ic) * input_shape[0] + h) * input_shape[1] + w; + int offset = ((oc * input_shape[3] + ic) * input_shape[0] + h) * + input_shape[1] + + w; output[offset] = *input_ptr; ++input_ptr; } @@ -30,7 +32,7 @@ void TransposeFilter(const std::vector &input, } } -template +template void WinogradConvolution(const index_t batch, const index_t height, const index_t width, @@ -53,8 +55,7 @@ void WinogradConvolution(const index_t batch, kernels::BufferType::IN_OUT_CHANNEL); BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::CONV2D_FILTER); - BufferToImage(net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); + BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") .Input("FilterImage") @@ -78,8 +79,10 @@ void WinogradConvolution(const index_t batch, // transform filter std::vector wino_filter_data; TransposeFilter(filter_data, filter_shape, wino_filter_data); - net.AddInputFromArray("WinoFilterData", {out_channels, in_channels, 3, 3}, wino_filter_data); - BufferToImage(net, "WinoFilterData", "WinoFilter", kernels::BufferType::WINOGRAD_FILTER); + net.AddInputFromArray( + "WinoFilterData", {out_channels, in_channels, 3, 3}, wino_filter_data); + BufferToImage(net, "WinoFilterData", "WinoFilter", + kernels::BufferType::WINOGRAD_FILTER); // transform input OpDefBuilder("WinogradTransform", "WinogradTransformTest") @@ -126,18 +129,23 @@ void WinogradConvolution(const index_t batch, } TEST_F(WinogradConvlutionTest, AlignedConvolution) { - WinogradConvolution(1, 32, 32, 32, 16, Padding::VALID); - WinogradConvolution(1, 32, 32, 32, 16, Padding::SAME); + WinogradConvolution(1, 32, 32, 32, 16, + Padding::VALID); + WinogradConvolution(1, 32, 32, 32, 16, + Padding::SAME); } TEST_F(WinogradConvlutionTest, UnAlignedConvolution) { - WinogradConvolution(1, 61, 67, 31, 37, Padding::VALID); - WinogradConvolution(1, 61, 67, 37, 31, Padding::SAME); + WinogradConvolution(1, 61, 67, 31, 37, + Padding::VALID); + WinogradConvolution(1, 61, 67, 37, 31, + Padding::SAME); } TEST_F(WinogradConvlutionTest, BatchConvolution) { - WinogradConvolution(3, 64, 64, 32, 32, Padding::VALID); - WinogradConvolution(5, 61, 67, 37, 31, Padding::SAME); + WinogradConvolution(3, 64, 64, 32, 32, + Padding::VALID); + WinogradConvolution(5, 61, 67, 37, 31, + Padding::SAME); } - } diff --git a/mace/ops/winograd_inverse_transform.h b/mace/ops/winograd_inverse_transform.h index aef37473..4ea49289 100644 --- a/mace/ops/winograd_inverse_transform.h +++ b/mace/ops/winograd_inverse_transform.h @@ -8,12 +8,12 @@ #include #include "mace/core/operator.h" -#include "mace/kernels/winograd_transform.h" #include "mace/kernels/activation.h" +#include "mace/kernels/winograd_transform.h" namespace mace { -template +template class WinogradInverseTransformOp : public Operator { public: WinogradInverseTransformOp(const OperatorDef &op_def, Workspace *ws) diff --git a/mace/ops/winograd_transform.h b/mace/ops/winograd_transform.h index 71d8a527..e225adc7 100644 --- a/mace/ops/winograd_transform.h +++ b/mace/ops/winograd_transform.h @@ -12,14 +12,14 @@ namespace mace { -template +template class WinogradTransformOp : public Operator { public: WinogradTransformOp(const OperatorDef &op_def, Workspace *ws) : Operator(op_def, ws), functor_(static_cast(OperatorBase::GetSingleArgument( - "padding", static_cast(VALID))), - OperatorBase::GetRepeatedArgument("padding_values")) {} + "padding", static_cast(VALID))), + OperatorBase::GetRepeatedArgument("padding_values")) {} bool Run(StatsFuture *future) override { const Tensor *input_tensor = this->Input(INPUT); diff --git a/mace/ops/winograd_transform_benchmark.cc b/mace/ops/winograd_transform_benchmark.cc index a8c0e77b..23f7249b 100644 --- a/mace/ops/winograd_transform_benchmark.cc +++ b/mace/ops/winograd_transform_benchmark.cc @@ -16,7 +16,7 @@ static void BMWinogradTransform( net.AddRandomInput("Input", {batch, height, width, channels}); BufferToImage(net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("WinogradTransform", "WinogradTransformTest") .Input("InputImage") .Output("OutputImage") @@ -36,17 +36,15 @@ static void BMWinogradTransform( net.Sync(); } -#define BM_WINOGRAD_TRANSFORM_MACRO(N, H, W, C, TYPE, DEVICE) \ - static void \ - BM_WINOGRAD_TRANSFORM_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE( \ - int iters) { \ - const int64_t tot = static_cast(iters) * N * C * H * W; \ - mace::testing::MaccProcessed(tot); \ - mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ - BMWinogradTransform(iters, N, H, W, C); \ - } \ - BENCHMARK( \ - BM_WINOGRAD_TRANSFORM_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE) +#define BM_WINOGRAD_TRANSFORM_MACRO(N, H, W, C, TYPE, DEVICE) \ + static void BM_WINOGRAD_TRANSFORM_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE( \ + int iters) { \ + const int64_t tot = static_cast(iters) * N * C * H * W; \ + mace::testing::MaccProcessed(tot); \ + mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + BMWinogradTransform(iters, N, H, W, C); \ + } \ + BENCHMARK(BM_WINOGRAD_TRANSFORM_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE) #define BM_WINOGRAD_TRANSFORM(N, H, W, C) \ BM_WINOGRAD_TRANSFORM_MACRO(N, H, W, C, half, OPENCL); @@ -88,16 +86,16 @@ static void BMWinogradInverseTransform( net.Sync(); } -#define BM_WINOGRAD_INVERSE_TRANSFORM_MACRO(N, H, W, C, TYPE, DEVICE) \ - static void \ - BM_WINOGRAD_INVERSE_TRANSFORM_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE( \ - int iters) { \ - const int64_t tot = static_cast(iters) * N * C * H * W; \ - mace::testing::MaccProcessed(tot); \ - mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ - BMWinogradInverseTransform(iters, N, H, W, C); \ - } \ - BENCHMARK( \ +#define BM_WINOGRAD_INVERSE_TRANSFORM_MACRO(N, H, W, C, TYPE, DEVICE) \ + static void \ + BM_WINOGRAD_INVERSE_TRANSFORM_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE( \ + int iters) { \ + const int64_t tot = static_cast(iters) * N * C * H * W; \ + mace::testing::MaccProcessed(tot); \ + mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + BMWinogradInverseTransform(iters, N, H, W, C); \ + } \ + BENCHMARK( \ BM_WINOGRAD_INVERSE_TRANSFORM_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE) #define BM_WINOGRAD_INVERSE_TRANSFORM(N, H, W, C) \ diff --git a/mace/public/mace.h b/mace/public/mace.h index d5fd7a52..5d4ad299 100644 --- a/mace/public/mace.h +++ b/mace/public/mace.h @@ -6,10 +6,10 @@ #define MACE_CORE_MACE_H_ #include -#include -#include -#include #include +#include +#include +#include namespace mace { @@ -25,13 +25,11 @@ namespace mace { #define MACE_STR(x) MACE_STR_HELPER(x) // e.g. "0.5.0" or "0.6.0-alpha". -#define MACE_VERSION_STRING \ +#define MACE_VERSION_STRING \ (MACE_STR(MACE_MAJOR_VERSION) "." MACE_STR(MACE_MINOR_VERSION) "." MACE_STR( \ MACE_PATCH_VERSION) MACE_VERSION_SUFFIX) -inline const char *MaceVersion() { - return MACE_VERSION_STRING; -} +inline const char *MaceVersion() { return MACE_VERSION_STRING; } extern const char *MaceGitVersion(); @@ -43,17 +41,9 @@ extern const char *MaceGitVersion(); classname &operator=(const classname &) = delete #endif -enum NetMode { - INIT = 0, - NORMAL = 1 -}; +enum NetMode { INIT = 0, NORMAL = 1 }; -enum DeviceType { - CPU = 0, - NEON = 1, - OPENCL = 2, - HEXAGON = 3 -}; +enum DeviceType { CPU = 0, NEON = 1, OPENCL = 2, HEXAGON = 3 }; enum DataType { DT_INVALID = 0, @@ -104,6 +94,7 @@ class Argument { public: Argument(); void CopyFrom(const Argument &from); + public: const std::string &name() const; void set_name(const std::string &value); @@ -147,11 +138,13 @@ class NodeInput { NodeInput() {} NodeInput(int node_id, int output_port); void CopyFrom(const NodeInput &from); + public: int node_id() const; void set_node_id(int node_id); int output_port() const; void set_output_port(int output_port); + private: int node_id_; int output_port_; @@ -162,8 +155,10 @@ class OutputShape { OutputShape(); OutputShape(const std::vector &dims); void CopyFrom(const OutputShape &from); + public: const std::vector &dims() const; + private: std::vector dims_; }; @@ -240,10 +235,12 @@ class OperatorDef { class MemoryBlock { public: MemoryBlock(int mem_id, uint32_t x, uint32_t y); + public: int mem_id() const; uint32_t x() const; uint32_t y() const; + private: int mem_id_; uint32_t x_; @@ -255,9 +252,9 @@ class MemoryArena { const std::vector &mem_block() const; std::vector &mutable_mem_block(); int mem_block_size() const; + private: std::vector mem_block_; - }; // for hexagon mace-nnlib @@ -268,10 +265,11 @@ class InputInfo { int32_t max_byte_size() const; DataType data_type() const; const std::vector &dims() const; + private: std::string name_; int32_t node_id_; - int32_t max_byte_size_; // only support 32-bit len + int32_t max_byte_size_; // only support 32-bit len DataType data_type_; std::vector dims_; }; @@ -285,10 +283,11 @@ class OutputInfo { void set_data_type(DataType data_type); const std::vector &dims() const; void set_dims(const std::vector &dims); + private: std::string name_; int32_t node_id_; - int32_t max_byte_size_; // only support 32-bit len + int32_t max_byte_size_; // only support 32-bit len DataType data_type_; std::vector dims_; }; @@ -299,6 +298,7 @@ class NetDef { int op_size() const; const OperatorDef &op(const int idx) const; + public: const std::string &name() const; bool has_name() const; @@ -359,7 +359,6 @@ struct RunMetadata { std::vector op_stats; }; - class Workspace; class NetBase; class OperatorRegistry; @@ -374,8 +373,7 @@ struct MaceInputInfo { class MaceEngine { public: // Single input and output - explicit MaceEngine(const NetDef *net_def, - DeviceType device_type); + explicit MaceEngine(const NetDef *net_def, DeviceType device_type); // Multiple input or output explicit MaceEngine(const NetDef *net_def, DeviceType device_type, @@ -394,7 +392,7 @@ class MaceEngine { // Multiple input or output bool Run(const std::vector &input, std::map &output, - RunMetadata *run_metadata=nullptr); + RunMetadata *run_metadata = nullptr); MaceEngine(const MaceEngine &) = delete; MaceEngine &operator=(const MaceEngine &) = delete; diff --git a/mace/utils/command_line_flags.h b/mace/utils/command_line_flags.h index ce65e944..4373ceed 100644 --- a/mace/utils/command_line_flags.h +++ b/mace/utils/command_line_flags.h @@ -45,7 +45,7 @@ class Flags { // Return a usage message with command line cmdline, and the // usage_text strings in flag_list[]. static std::string Usage(const std::string &cmdline, - const std::vector &flag_list); + const std::vector &flag_list); }; } // namespace mace diff --git a/mace/utils/env_time.h b/mace/utils/env_time.h index 9f42486e..ce70a244 100644 --- a/mace/utils/env_time.h +++ b/mace/utils/env_time.h @@ -9,7 +9,6 @@ #include #include - namespace mace { inline int64_t NowMicros() { diff --git a/mace/utils/logging.h b/mace/utils/logging.h index e743e18e..22e39488 100644 --- a/mace/utils/logging.h +++ b/mace/utils/logging.h @@ -10,8 +10,8 @@ #include #include -#include "mace/utils/env_time.h" #include "mace/public/mace.h" +#include "mace/utils/env_time.h" #include "mace/utils/string_util.h" #undef ERROR diff --git a/mace/utils/string_util.h b/mace/utils/string_util.h index ac7ab4e0..aad884d3 100644 --- a/mace/utils/string_util.h +++ b/mace/utils/string_util.h @@ -27,7 +27,7 @@ inline void MakeStringInternal(std::stringstream &ss, MakeStringInternal(ss, args...); } -} // namespace +} // namespace template std::string MakeString(const Args &... args) { diff --git a/mace/utils/timer.h b/mace/utils/timer.h index ab48fb89..d6e28608 100644 --- a/mace/utils/timer.h +++ b/mace/utils/timer.h @@ -24,13 +24,9 @@ class WallClockTimer : public Timer { public: WallClockTimer() : accumulated_micros_(0) {} - void StartTiming() override { - start_micros_ = NowMicros(); - } + void StartTiming() override { start_micros_ = NowMicros(); } - void StopTiming() override { - stop_micros_ = NowMicros(); - } + void StopTiming() override { stop_micros_ = NowMicros(); } void AccumulateTiming() override { StopTiming(); @@ -43,13 +39,9 @@ class WallClockTimer : public Timer { accumulated_micros_ = 0; } - double ElapsedMicros() override { - return stop_micros_ - start_micros_; - } + double ElapsedMicros() override { return stop_micros_ - start_micros_; } - double AccumulatedMicros() override { - return accumulated_micros_; - } + double AccumulatedMicros() override { return accumulated_micros_; } private: double start_micros_; diff --git a/mace/utils/tuner_test.cc b/mace/utils/tuner_test.cc index 6cd54c9f..80cc50d0 100644 --- a/mace/utils/tuner_test.cc +++ b/mace/utils/tuner_test.cc @@ -30,20 +30,14 @@ TEST_F(TunerTest, SimpleRun) { WallClockTimer timer; std::vector default_params(1, 1); - int res = Tuner::Get()->template TuneOrRun("SimpleRun", - default_params, - nullptr, - TunerFunc, - &timer); + int res = Tuner::Get()->template TuneOrRun( + "SimpleRun", default_params, nullptr, TunerFunc, &timer); EXPECT_EQ(expect, res); default_params[0] = 2; - res = Tuner::Get()->template TuneOrRun("SimpleRun", - default_params, - nullptr, - TunerFunc, - &timer); + res = Tuner::Get()->template TuneOrRun( + "SimpleRun", default_params, nullptr, TunerFunc, &timer); EXPECT_EQ(expect + 1, res); } @@ -64,20 +58,13 @@ TEST_F(TunerTest, SimpleTune) { }; // tune WallClockTimer timer; - int res = - Tuner::Get()->template TuneOrRun("SimpleRun", - default_params, - *params_generator, - TunerFunc, - &timer); + int res = Tuner::Get()->template TuneOrRun( + "SimpleRun", default_params, *params_generator, TunerFunc, &timer); EXPECT_EQ(expect, res); // run - res = Tuner::Get()->template TuneOrRun("SimpleRun", - default_params, - nullptr, - TunerFunc, - &timer); + res = Tuner::Get()->template TuneOrRun( + "SimpleRun", default_params, nullptr, TunerFunc, &timer); EXPECT_EQ(expect, res); } diff --git a/mace/utils/utils.h b/mace/utils/utils.h index 3f5c8ee0..0330de47 100644 --- a/mace/utils/utils.h +++ b/mace/utils/utils.h @@ -62,9 +62,9 @@ inline std::string ObfuscateSymbol(const std::string &src) { if (dest.empty()) { return dest; } - dest[0] = src[0]; // avoid invalid symbol which starts from 0-9 + dest[0] = src[0]; // avoid invalid symbol which starts from 0-9 const std::string encode_dict = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_"; + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_"; for (size_t i = 1; i < src.size(); i++) { char ch = src[i]; int idx; diff --git a/mace/utils/utils_test.cc b/mace/utils/utils_test.cc index 6cd54c9f..80cc50d0 100644 --- a/mace/utils/utils_test.cc +++ b/mace/utils/utils_test.cc @@ -30,20 +30,14 @@ TEST_F(TunerTest, SimpleRun) { WallClockTimer timer; std::vector default_params(1, 1); - int res = Tuner::Get()->template TuneOrRun("SimpleRun", - default_params, - nullptr, - TunerFunc, - &timer); + int res = Tuner::Get()->template TuneOrRun( + "SimpleRun", default_params, nullptr, TunerFunc, &timer); EXPECT_EQ(expect, res); default_params[0] = 2; - res = Tuner::Get()->template TuneOrRun("SimpleRun", - default_params, - nullptr, - TunerFunc, - &timer); + res = Tuner::Get()->template TuneOrRun( + "SimpleRun", default_params, nullptr, TunerFunc, &timer); EXPECT_EQ(expect + 1, res); } @@ -64,20 +58,13 @@ TEST_F(TunerTest, SimpleTune) { }; // tune WallClockTimer timer; - int res = - Tuner::Get()->template TuneOrRun("SimpleRun", - default_params, - *params_generator, - TunerFunc, - &timer); + int res = Tuner::Get()->template TuneOrRun( + "SimpleRun", default_params, *params_generator, TunerFunc, &timer); EXPECT_EQ(expect, res); // run - res = Tuner::Get()->template TuneOrRun("SimpleRun", - default_params, - nullptr, - TunerFunc, - &timer); + res = Tuner::Get()->template TuneOrRun( + "SimpleRun", default_params, nullptr, TunerFunc, &timer); EXPECT_EQ(expect, res); } -- GitLab