diff --git a/docs/development/adding_a_new_op.md b/docs/development/adding_a_new_op.md index 2bf0af810845070f77ac174bcbfb7ccfc8f40113..4a631dd7ddd2df38b75a20ceec29bbb2a60e207e 100644 --- a/docs/development/adding_a_new_op.md +++ b/docs/development/adding_a_new_op.md @@ -19,7 +19,7 @@ Define the new Op class in `mace/ops/my_custom_op.cc`. The structure of Op is like the following code. ```c++ -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" namespace mace { namespace ops { @@ -39,7 +39,7 @@ class MyCustomOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterMyCustomOp(OpRegistryBase *op_registry) { +void RegisterMyCustomOp(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "MyCustomOp", MyCustomOp, DeviceType::CPU, float); @@ -63,14 +63,14 @@ namespace ops { ... -extern void RegisterMyCustomOp(OpRegistryBase *op_registry); +extern void RegisterMyCustomOp(OpRegistry *op_registry); ... } // namespace ops -OpRegistry::OpRegistry() : OpRegistryBase() { +OpRegistry::OpRegistry() { // Keep in lexicographical order ... diff --git a/docs/user_guide/advanced_usage.rst b/docs/user_guide/advanced_usage.rst index 0f9d76093077835b71623edd274801f277a07ae8..dc7344bed145dd0d1c1dea4673226a15d2a1e638 100644 --- a/docs/user_guide/advanced_usage.rst +++ b/docs/user_guide/advanced_usage.rst @@ -557,7 +557,7 @@ which will reduce the library size significantly. the final binary just link the } // namespace ops - OpRegistry::OpRegistry() : OpRegistryBase() { + OpRegistry::OpRegistry() { // Just leave the ops used in your models ... diff --git a/docs/user_guide/advanced_usage_cmake.rst b/docs/user_guide/advanced_usage_cmake.rst index 7be5e2f227a6950ae83bc7bb9d218cd1fcb1a87d..23631b93d3de058fab4ce04b3aa2a3fb8bae19cc 100644 --- a/docs/user_guide/advanced_usage_cmake.rst +++ b/docs/user_guide/advanced_usage_cmake.rst @@ -370,12 +370,13 @@ the sample code show how to calculate the Top-1 accuracy with imagenet validatio Reduce Library Size ------------------- -Remove the registration of the ops unused for your models in the ``mace/ops/ops_register.cc``, -which will reduce the library size significantly. the final binary just link the registered ops' code. +Remove the registration of the ops and delegators unused for your models in the +``mace/ops/registry/ops_registry.cc`` and ``mace/ops/registry/op_delegators_registry.cc``, +which will reduce the library size significantly. the final binary just link the registered ops and delegators' code. .. code-block:: cpp - #include "mace/ops/ops_register.h" + #include "mace/ops/registry/registry.h" namespace mace { namespace ops { @@ -386,12 +387,38 @@ which will reduce the library size significantly. the final binary just link the } // namespace ops - OpRegistry::OpRegistry() : OpRegistryBase() { + void RegisterAllOps(OpRegistry *registry) { // Just leave the ops used in your models ... - ops::RegisterMyCustomOp(this); + ops::RegisterMyCustomOp(registry); + + ... + + } + + } // namespace mace + +.. code-block:: cpp + + #include "mace/ops/registry/registry.h" + + namespace mace { + namespace ops { + // Just leave the delegators used in your ops + + ... + + } // namespace ops + + + void RegisterAllOpDelegators(OpDelegatorRegistry *registry) { + // Just leave the delegators used in your ops + + ... + + ops::RegisterMyCustomDelegator(registry); ... diff --git a/mace/core/BUILD.bazel b/mace/core/BUILD.bazel index 971b2a271c389b11c61f37e1def1ce49b4537a2e..39fc3883d7acab01ecf26533ce4613b8b59f6208 100644 --- a/mace/core/BUILD.bazel +++ b/mace/core/BUILD.bazel @@ -26,6 +26,8 @@ cc_library( srcs = glob( [ "*.cc", + "ops/*.cc", + "registry/*.cc", "runtime/cpu/*.cc", ], exclude = [ @@ -53,6 +55,8 @@ cc_library( hdrs = glob( [ "*.h", + "ops/*.h", + "registry/*.h", "runtime/cpu/*.h", ], exclude = [ @@ -68,7 +72,7 @@ cc_library( ])) + if_hta_enabled(glob([ "runtime/hexagon/*hta*.h", ])) + if_apu_enabled(glob([ - "runtime/apu/*.h" + "runtime/apu/*.h", ])) + if_rpcmem_enabled([ "rpcmem.h", ]), diff --git a/mace/core/CMakeLists.txt b/mace/core/CMakeLists.txt index 25ab20bff9167b3936f8fb2101c3c9165016ea46..775eca5d8699a93428ad2988d7b5b420b8fc1ac4 100644 --- a/mace/core/CMakeLists.txt +++ b/mace/core/CMakeLists.txt @@ -8,9 +8,16 @@ set(CORE_SRCS net.cc net_def_adapter.cc net_optimizer.cc - op_context.cc - operator.cc + ops/op_condition_builder.cc + ops/op_condition_context.cc + ops/op_construct_context.cc + ops/op_context.cc + ops/operator.cc + ops/op_init_context.cc quantize.cc + registry/op_delegator_registry.cc + registry/op_registration_info.cc + registry/ops_registry.cc runtime_failure_mock.cc types.cc workspace.cc diff --git a/mace/core/net.cc b/mace/core/net.cc index 78d40dd7f57440055eea4c48c375071db2e6bf13..1e11654921d7dadfd30636c41c679630e95b5ea0 100644 --- a/mace/core/net.cc +++ b/mace/core/net.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "mace/core/net.h" + #include #include #include @@ -20,8 +22,9 @@ #include "mace/core/future.h" #include "mace/core/memory_optimizer.h" -#include "mace/core/net.h" -#include "mace/core/op_context.h" +#include "mace/core/ops/op_init_context.h" +#include "mace/core/ops/op_context.h" +#include "mace/core/registry/ops_registry.h" #include "mace/public/mace.h" #include "mace/port/env.h" #include "mace/utils/conf_util.h" @@ -33,7 +36,7 @@ namespace mace { -SerialNet::SerialNet(const OpRegistryBase *op_registry, +SerialNet::SerialNet(const OpRegistry *op_registry, const NetDef *net_def, Workspace *ws, Device *target_device, diff --git a/mace/core/net.h b/mace/core/net.h index 18ec5134549ddf2a9fa62139034bb051e0afd64e..f761af134cea5c5124c3574601bc8a00acd817d2 100644 --- a/mace/core/net.h +++ b/mace/core/net.h @@ -21,13 +21,14 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" namespace mace { class RunMetadata; class Workspace; class MemoryOptimizer; +class OpRegistry; class NetBase { public: @@ -44,7 +45,7 @@ class NetBase { class SerialNet : public NetBase { public: - SerialNet(const OpRegistryBase *op_registry, + SerialNet(const OpRegistry *op_registry, const NetDef *net_def, Workspace *ws, Device *target_device, diff --git a/mace/core/net_def_adapter.cc b/mace/core/net_def_adapter.cc index 205dcdbe47374b92082a102eeef84dfe149794f3..7aa084b3ea00f0a634c475d08884ba7664382e02 100644 --- a/mace/core/net_def_adapter.cc +++ b/mace/core/net_def_adapter.cc @@ -17,7 +17,9 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/ops/op_condition_context.h" +#include "mace/core/registry/ops_registry.h" #include "mace/utils/math.h" #ifdef MACE_ENABLE_OPENCL #include "mace/core/runtime/opencl/opencl_util.h" @@ -82,7 +84,7 @@ void BuildTransposeOpDef( } // namespace -NetDefAdapter::NetDefAdapter(const OpRegistryBase *op_registry, +NetDefAdapter::NetDefAdapter(const OpRegistry *op_registry, const Workspace *ws) : op_registry_(op_registry), ws_(ws) {} diff --git a/mace/core/net_def_adapter.h b/mace/core/net_def_adapter.h index 0268329e4c7d2659492ea777a18606a82e8572bd..b285a4a5d913db3051e02fd93f8a50826e176d17 100644 --- a/mace/core/net_def_adapter.h +++ b/mace/core/net_def_adapter.h @@ -23,14 +23,17 @@ #include "mace/core/types.h" #include "mace/proto/mace.pb.h" #include "mace/port/port.h" -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" #include "mace/core/net_optimizer.h" namespace mace { -class OpRegistryBase; -class Workspace; class Device; +class OpConditionContext; +class OperatorDef; +class OpRegistry; +class Workspace; + /////////////////////////////////////////////////////////////////////////////// /// Conventions @@ -49,7 +52,7 @@ class Device; /////////////////////////////////////////////////////////////////////////////// class NetDefAdapter { public: - NetDefAdapter(const OpRegistryBase *op_registry, + NetDefAdapter(const OpRegistry *op_registry, const Workspace *ws); // Adapt original net_def to a better net. // 1. Adapt device: choose best device for every op in the net. @@ -122,7 +125,7 @@ class NetDefAdapter { std::string DebugString(const NetDef *net_def); private: - const OpRegistryBase *op_registry_; + const OpRegistry *op_registry_; const Workspace *ws_; NetOptimizer net_optimizer_; }; diff --git a/mace/core/operator.cc b/mace/core/operator.cc deleted file mode 100644 index a266ce2b4b4335146a56d80fac61d7229737c006..0000000000000000000000000000000000000000 --- a/mace/core/operator.cc +++ /dev/null @@ -1,371 +0,0 @@ -// Copyright 2018 The MACE Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include - -#include "mace/core/operator.h" - -namespace mace { -OpConditionContext::OpConditionContext( - const Workspace *ws, - OpConditionContext::TensorShapeMap *info) - : operator_def_(nullptr), - ws_(ws), - device_(nullptr), - tensor_shape_info_(info) {} - -void OpConditionContext::set_operator_def( - const OperatorDef *operator_def) { - operator_def_ = operator_def; - input_data_types_.clear(); -} - -void OpConditionContext::SetInputInfo(size_t idx, - MemoryType mem_type, - DataType dt) { - if (input_mem_types_.empty()) { - // the default inputs' memory types are same as output memory type. - input_mem_types_.resize(operator_def_->input_size(), output_mem_type_); - } - if (input_data_types_.empty()) { - // the default inputs' data types are same as operation's data type. - DataType op_dt = static_cast( - ProtoArgHelper::GetOptionalArg( - *operator_def_, "T", static_cast(DataType::DT_FLOAT))); - input_data_types_.resize(operator_def_->input_size(), op_dt); - } - MACE_CHECK(idx < input_mem_types_.size() && idx < input_data_types_.size()); - input_mem_types_[idx] = mem_type; - input_data_types_[idx] = dt; -} - -void OpConditionContext::set_output_mem_type(MemoryType type) { - MACE_CHECK(operator_def_ != nullptr); - output_mem_type_ = type; - input_mem_types_.clear(); -} - -MemoryType OpConditionContext::GetInputMemType(size_t idx) const { - if (input_mem_types_.empty()) { - return output_mem_type_; - } - MACE_CHECK(idx < input_mem_types_.size(), - idx, " < ", input_mem_types_.size()); - return input_mem_types_[idx]; -} - -DataType OpConditionContext::GetInputDataType(size_t idx) const { - if (input_data_types_.empty()) { - // the default inputs' data types are same as operation's data type. - return static_cast( - ProtoArgHelper::GetOptionalArg( - *operator_def_, "T", static_cast(DataType::DT_FLOAT))); - } - MACE_CHECK(idx < input_data_types_.size()); - return input_data_types_[idx]; -} - -#ifdef MACE_ENABLE_OPENCL -void OpConditionContext::SetInputOpenCLBufferType( - size_t idx, OpenCLBufferType buffer_type) { - if (input_opencl_buffer_types_.empty()) { - // the default inputs' memory types are same as output memory type. - input_opencl_buffer_types_.resize(operator_def_->input_size(), - OpenCLBufferType::IN_OUT_CHANNEL); - } - MACE_CHECK(idx < input_opencl_buffer_types_.size()); - input_opencl_buffer_types_[idx] = buffer_type; -} -OpenCLBufferType OpConditionContext::GetInputOpenCLBufferType( - size_t idx) const { - if (input_opencl_buffer_types_.empty()) { - return OpenCLBufferType::IN_OUT_CHANNEL; - } - MACE_CHECK(idx < input_opencl_buffer_types_.size()); - return input_opencl_buffer_types_[idx]; -} -#endif // MACE_ENABLE_OPENCL - -OpConstructContext::OpConstructContext(Workspace *ws) - : operator_def_(nullptr), - ws_(ws), - device_(nullptr) {} - -void OpConstructContext::set_operator_def( - std::shared_ptr operator_def) { - operator_def_ = operator_def; -} - -OpInitContext::OpInitContext(Workspace *ws, Device *device) - : ws_(ws), device_(device) {} - -Operation::Operation(OpConstructContext *context) - : operator_def_(context->operator_def()) {} - -MaceStatus Operation::Init(OpInitContext *context) { - Workspace *ws = context->workspace(); - for (const std::string &input_str : operator_def_->input()) { - const Tensor *tensor = ws->GetTensor(input_str); - MACE_CHECK(tensor != nullptr, "op ", operator_def_->type(), - ": Encountered a non-existing input tensor: ", input_str); - inputs_.push_back(tensor); - } - for (int i = 0; i < operator_def_->output_size(); ++i) { - const std::string output_str = operator_def_->output(i); - if (ws->HasTensor(output_str)) { - outputs_.push_back(ws->GetTensor(output_str)); - } else { - MACE_CHECK( - operator_def_->output_type_size() == 0 || - operator_def_->output_size() == operator_def_->output_type_size(), - "operator output size != operator output type size", - operator_def_->output_size(), - operator_def_->output_type_size()); - DataType output_type; - if (i < operator_def_->output_type_size()) { - output_type = operator_def_->output_type(i); - } else { - output_type = static_cast( - ProtoArgHelper::GetOptionalArg( - *operator_def_, "T", static_cast(DT_FLOAT))); - } - outputs_.push_back(MACE_CHECK_NOTNULL(ws->CreateTensor( - output_str, context->device()->allocator(), output_type))); - } - if (i < operator_def_->output_shape_size()) { - std::vector - shape_configured(operator_def_->output_shape(i).dims_size()); - for (size_t dim = 0; dim < shape_configured.size(); ++dim) { - shape_configured[dim] = operator_def_->output_shape(i).dims(dim); - } - ws->GetTensor(output_str)->SetShapeConfigured(shape_configured); - } - } - return MaceStatus::MACE_SUCCESS; -} - -// op registry -namespace { -class OpKeyBuilder { - public: - explicit OpKeyBuilder(const std::string &op_name); - - OpKeyBuilder &Device(DeviceType device); - - OpKeyBuilder &TypeConstraint(const char *attr_name, - DataType allowed); - - const std::string Build(); - - private: - std::string op_name_; - DeviceType device_type_; - std::map type_constraint_; -}; - -OpKeyBuilder::OpKeyBuilder(const std::string &op_name) : op_name_(op_name) {} - -OpKeyBuilder &OpKeyBuilder::Device(DeviceType device) { - device_type_ = device; - return *this; -} - -OpKeyBuilder &OpKeyBuilder::TypeConstraint(const char *attr_name, - DataType allowed) { - type_constraint_[attr_name] = allowed; - return *this; -} - -const std::string OpKeyBuilder::Build() { - static const std::vector type_order = {"T"}; - std::stringstream ss; - ss << op_name_; - ss << device_type_; - for (auto type : type_order) { - ss << type << "_" << DataTypeToString(type_constraint_[type]); - } - - return ss.str(); -} -} // namespace - -OpRegistrationInfo::OpRegistrationInfo() { - // default device type placer - device_placer = [this](OpConditionContext *context) -> std::set { - MACE_UNUSED(context); - return this->devices; - }; - - // default input and output memory type setter - memory_type_setter = [](OpConditionContext *context) -> void { - if (context->device()->device_type() == DeviceType::GPU) { -#ifdef MACE_ENABLE_OPENCL - if (context->device()->gpu_runtime()->UseImageMemory()) { - context->set_output_mem_type(MemoryType::GPU_IMAGE); - } else { - context->set_output_mem_type(MemoryType::GPU_BUFFER); - } -#endif // MACE_ENABLE_OPENCL - } else { - context->set_output_mem_type(MemoryType::CPU_BUFFER); - } - }; - - data_format_selector = [](OpConditionContext *context) - -> std::vector { - DataFormat op_data_format = - static_cast( - ProtoArgHelper::GetOptionalArg( - *context->operator_def(), "data_format", - static_cast(DataFormat::NONE))); - return std::vector(context->operator_def()->input_size(), - op_data_format); - }; -} - -void OpRegistrationInfo::AddDevice(DeviceType device) { - devices.insert(device); -} - -void OpRegistrationInfo::Register(const std::string &key, OpCreator creator) { - VLOG(3) << "Registering: " << key; - MACE_CHECK(creators.count(key) == 0, "Key already registered: ", key); - creators[key] = creator; -} - -MaceStatus OpRegistryBase::Register( - const std::string &op_type, - const DeviceType device_type, - const DataType dt, - OpRegistrationInfo::OpCreator creator) { - if (registry_.count(op_type) == 0) { - registry_[op_type] = std::unique_ptr( - new OpRegistrationInfo); - } - registry_[op_type]->AddDevice(device_type); - - std::string op_key = OpKeyBuilder(op_type) - .Device(device_type) - .TypeConstraint("T", dt) - .Build(); - registry_.at(op_type)->Register(op_key, creator); - return MaceStatus::MACE_SUCCESS; -} - -MaceStatus OpRegistryBase::Register( - const OpConditionBuilder &builder) { - std::string op_type = builder.type(); - if (registry_.count(op_type) == 0) { - registry_[op_type] = std::unique_ptr( - new OpRegistrationInfo); - } - builder.Finalize(registry_[op_type].get()); - return MaceStatus::MACE_SUCCESS; -} - -const std::set OpRegistryBase::AvailableDevices( - const std::string &op_type, OpConditionContext *context) const { - MACE_CHECK(registry_.count(op_type) != 0, - op_type, " operation is not registered."); - - return registry_.at(op_type)->device_placer(context); -} - -void OpRegistryBase::GetInOutMemoryTypes( - const std::string &op_type, - OpConditionContext *context) const { - MACE_CHECK(registry_.count(op_type) != 0, - op_type, " operation is not registered. op_type=", op_type); - return registry_.at(op_type)->memory_type_setter(context); -} - -const std::vector OpRegistryBase::InputsDataFormat( - const std::string &op_type, - OpConditionContext *context) const { - MACE_CHECK(registry_.count(op_type) != 0, - op_type, " operation is not registered."); - return registry_.at(op_type)->data_format_selector(context); -} - -std::unique_ptr OpRegistryBase::CreateOperation( - OpConstructContext *context, - DeviceType device_type) const { - auto operator_def = context->operator_def(); - DataType dtype = static_cast( - ProtoArgHelper::GetOptionalArg( - *operator_def, "T", static_cast(DT_FLOAT))); - VLOG(1) << "Creating operator " << operator_def->name() << "(" - << operator_def->type() << "<" << dtype << ">" << ") on " - << device_type; - const std::string op_type = context->operator_def()->type(); - MACE_CHECK(registry_.count(op_type) != 0, - op_type, " operation is not registered."); - - auto key_dtype = - (device_type == DeviceType::GPU && dtype == DT_HALF) ? DT_FLOAT : dtype; - std::string key = OpKeyBuilder(op_type) - .Device(device_type) - .TypeConstraint("T", key_dtype) - .Build(); - if (registry_.at(op_type)->creators.count(key) == 0) { - LOG(FATAL) << "Key not registered: " << key - << ", op type is: " << operator_def->type(); - } - return registry_.at(op_type)->creators.at(key)(context); -} - -OpConditionBuilder::OpConditionBuilder(const std::string &type) - : type_(type) {} - -const std::string OpConditionBuilder::type() const { - return type_; -} - -OpConditionBuilder &OpConditionBuilder::SetDevicePlacerFunc( - OpRegistrationInfo::DevicePlacer placer) { - placer_ = placer; - return *this; -} - -OpConditionBuilder &OpConditionBuilder::SetInputMemoryTypeSetter( - OpRegistrationInfo::MemoryTypeSetter setter) { - memory_type_setter_ = setter; - return *this; -} - -OpConditionBuilder &OpConditionBuilder::SetInputsDataFormatSelector( - OpRegistrationInfo::DataFormatSelector selector) { - data_format_selector_ = selector; - return *this; -} - -void OpConditionBuilder::Finalize(OpRegistrationInfo *info) const { - if (info != nullptr) { - if (placer_) { - info->device_placer = placer_; - } - if (memory_type_setter_) { - info->memory_type_setter = memory_type_setter_; - } - - if (data_format_selector_) { - info->data_format_selector = data_format_selector_; - } - } -} - -} // namespace mace diff --git a/mace/core/operator.h b/mace/core/operator.h deleted file mode 100644 index fbcbfd2ead3f8d70552464420f450fae17b04b0a..0000000000000000000000000000000000000000 --- a/mace/core/operator.h +++ /dev/null @@ -1,358 +0,0 @@ -// Copyright 2018 The MACE Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef MACE_CORE_OPERATOR_H_ -#define MACE_CORE_OPERATOR_H_ - -#include -#include -#include -#include -#include - -#include "mace/core/arg_helper.h" -#include "mace/core/op_context.h" -#include "mace/core/tensor.h" -#include "mace/core/workspace.h" -#include "mace/proto/mace.pb.h" -#ifdef MACE_ENABLE_OPENCL -#include "mace/core/runtime/opencl/opencl_util.h" -#endif // MACE_ENABLE_OPENCL - -namespace mace { - -// OpConditionContext has all information used for choosing proper Op -class OpConditionContext { - public: - typedef std::unordered_map> TensorShapeMap; - OpConditionContext(const Workspace *ws, TensorShapeMap *info); - ~OpConditionContext() = default; - - void set_operator_def(const OperatorDef *operator_def); - - inline const OperatorDef *operator_def() const { - return operator_def_; - } - - inline const Workspace *workspace() const { - return ws_; - } - - inline void set_device(Device *device) { - device_ = device; - } - - inline Device *device() const { - return device_; - } - - inline TensorShapeMap *tensor_shape_info() const { - return tensor_shape_info_; - } - - void set_output_mem_type(MemoryType type); - - inline MemoryType output_mem_type() const { - return output_mem_type_; - } - - void SetInputInfo(size_t idx, MemoryType mem_type, DataType dt); - - MemoryType GetInputMemType(size_t idx) const; - - DataType GetInputDataType(size_t idx) const; - -#ifdef MACE_ENABLE_OPENCL - void SetInputOpenCLBufferType(size_t idx, OpenCLBufferType buffer_type); - OpenCLBufferType GetInputOpenCLBufferType(size_t idx) const; -#endif // MACE_ENABLE_OPENCL - - private: - const OperatorDef *operator_def_; - const Workspace *ws_; - Device *device_; - TensorShapeMap *tensor_shape_info_; - // used for memory transform - std::vector input_mem_types_; - std::vector input_data_types_; - MemoryType output_mem_type_; // there is only one output memory type now. -#ifdef MACE_ENABLE_OPENCL - std::vector input_opencl_buffer_types_; -#endif // MACE_ENABLE_OPENCL -}; - -// memory_optimizer, device -class OpConstructContext { - typedef std::unordered_map> TensorShapeMap; - - public: - explicit OpConstructContext(Workspace *ws); - ~OpConstructContext() = default; - - void set_operator_def(std::shared_ptr operator_def); - - inline std::shared_ptr operator_def() const { - return operator_def_; - } - - inline Workspace *workspace() const { - return ws_; - } - - inline void set_device(Device *device) { - device_ = device; - } - - inline Device *device() const { - return device_; - } -#ifdef MACE_ENABLE_OPENCL - inline MemoryType GetOpMemoryType() const { - return static_cast( - ProtoArgHelper::GetOptionalArg( - *operator_def_, OutputMemoryTypeTagName(), - static_cast(MemoryType::CPU_BUFFER))); - } -#endif // MACE_ENABLE_OPENCL - - private: - std::shared_ptr operator_def_; - Workspace *ws_; - Device *device_; -}; - -// memory_optimizer, device -class OpInitContext { - public: - explicit OpInitContext(Workspace *ws, Device *device = nullptr); - ~OpInitContext() = default; - - inline Workspace *workspace() const { - return ws_; - } - - inline void set_device(Device *device) { - device_ = device; - } - - inline Device *device() const { - return device_; - } - - private: - Workspace *ws_; - Device *device_; -}; - -// Conventions -// * If there exist format, NHWC is the default format -// * The input/output format of CPU ops with float data type is NCHW -// * The input/output format of GPU ops and CPU Quantization ops is NHWC -// * Inputs' data type is same as the operation data type by default. -// * The outputs' data type is same as the operation data type by default. -class Operation { - public: - explicit Operation(OpConstructContext *context); - virtual ~Operation() = default; - - template - inline T GetOptionalArg(const std::string &name, - const T &default_value) const { - MACE_CHECK(operator_def_, "operator_def was null!"); - return ProtoArgHelper::GetOptionalArg( - *operator_def_, name, default_value); - } - template - inline std::vector GetRepeatedArgs( - const std::string &name, const std::vector &default_value = {}) const { - MACE_CHECK(operator_def_, "operator_def was null!"); - return ProtoArgHelper::GetRepeatedArgs( - *operator_def_, name, default_value); - } - - inline DeviceType device_type() const { - return static_cast(operator_def_->device_type()); - } - - inline const Tensor *Input(unsigned int idx) { - MACE_CHECK(idx < inputs_.size()); - return inputs_[idx]; - } - - inline Tensor *Output(int idx) { return outputs_[idx]; } - - inline int InputSize() { return inputs_.size(); } - inline int OutputSize() { return outputs_.size(); } - inline const std::vector &Inputs() const { return inputs_; } - inline const std::vector &Outputs() { return outputs_; } - - // Run Op asynchronously (depends on device), return a future if not nullptr. - virtual MaceStatus Init(OpInitContext *); - virtual MaceStatus Run(OpContext *) = 0; - - inline const OperatorDef &debug_def() const { - MACE_CHECK(has_debug_def(), "operator_def was null!"); - return *operator_def_; - } - - inline void set_debug_def( - const std::shared_ptr &operator_def) { - operator_def_ = operator_def; - } - - inline bool has_debug_def() const { return operator_def_ != nullptr; } - - inline std::shared_ptr operator_def() { - return operator_def_; - } - - protected: - std::shared_ptr operator_def_; - std::vector inputs_; - std::vector outputs_; - - MACE_DISABLE_COPY_AND_ASSIGN(Operation); -}; - -// MACE_OP_INPUT_TAGS and MACE_OP_OUTPUT_TAGS are optional features to name the -// indices of the operator's inputs and outputs, in order to avoid confusion. -// For example, for a fully convolution layer that has input, weight and bias, -// you can define its input tags as: -// MACE_OP_INPUT_TAGS(INPUT, WEIGHT, BIAS); -// And in the code, instead of doing -// auto& weight = Input(1); -// you can now do -// auto& weight = Input(WEIGHT); -// to make it more clear. -#define MACE_OP_INPUT_TAGS(first_input, ...) \ - enum _InputTags { first_input = 0, __VA_ARGS__ } -#define MACE_OP_OUTPUT_TAGS(first_input, ...) \ - enum _OutputTags { first_input = 0, __VA_ARGS__ } - -struct OpRegistrationInfo { - public: - typedef std::function(OpConstructContext *)> - OpCreator; - typedef std::function(OpConditionContext *)> - DevicePlacer; - typedef std::function MemoryTypeSetter; - typedef std::function(OpConditionContext *)> - DataFormatSelector; - - OpRegistrationInfo(); - - void AddDevice(DeviceType); - - void Register(const std::string &key, OpCreator creator); - - std::set devices; - std::unordered_map creators; - DevicePlacer device_placer; - MemoryTypeSetter memory_type_setter; - DataFormatSelector data_format_selector; -}; - -class OpConditionBuilder { - public: - explicit OpConditionBuilder(const std::string &type); - - const std::string type() const; - - OpConditionBuilder &SetDevicePlacerFunc( - OpRegistrationInfo::DevicePlacer placer); - - // If you set input memory type for specified Op, - // you must call OpConditionContext::set_output_mem_type - OpConditionBuilder &SetInputMemoryTypeSetter( - OpRegistrationInfo::MemoryTypeSetter setter); - - OpConditionBuilder &SetInputsDataFormatSelector( - OpRegistrationInfo::DataFormatSelector selector); - - void Finalize(OpRegistrationInfo *info) const; - - private: - std::string type_; - OpRegistrationInfo::DevicePlacer placer_; - OpRegistrationInfo::MemoryTypeSetter memory_type_setter_; - OpRegistrationInfo::DataFormatSelector data_format_selector_; -}; - -class OpRegistryBase { - public: - OpRegistryBase() = default; - virtual ~OpRegistryBase() = default; - MaceStatus Register(const std::string &op_type, - const DeviceType device_type, - const DataType dt, - OpRegistrationInfo::OpCreator creator); - - MaceStatus Register(const OpConditionBuilder &builder); - - const std::set AvailableDevices( - const std::string &op_type, OpConditionContext *context) const; - - void GetInOutMemoryTypes( - const std::string &op_type, OpConditionContext *context) const; - - const std::vector InputsDataFormat( - const std::string &op_type, OpConditionContext *context) const; - - std::unique_ptr CreateOperation( - OpConstructContext *context, - DeviceType device_type) const; - - template - static std::unique_ptr DefaultCreator( - OpConstructContext *context) { - return std::unique_ptr(new DerivedType(context)); - } - - private: - std::unordered_map< - std::string, - std::unique_ptr> registry_; - MACE_DISABLE_COPY_AND_ASSIGN(OpRegistryBase); -}; - -#define MACE_REGISTER_OP(op_registry, op_type, class_name, device, dt) \ - op_registry->Register(op_type, \ - device, \ - DataTypeToEnum
::value, \ - OpRegistryBase::DefaultCreator>) - -#define MACE_REGISTER_OP_BY_CLASS( \ - op_registry, op_type, class_name, device, dt) \ - op_registry->Register(op_type, \ - device, \ - DataTypeToEnum
::value, \ - OpRegistryBase::DefaultCreator) - -#ifdef MACE_ENABLE_OPENCL -#define MACE_REGISTER_GPU_OP(op_registry, op_type, class_name) \ - op_registry->Register( \ - op_type, \ - DeviceType::GPU, \ - DT_FLOAT, \ - OpRegistryBase::DefaultCreator>) -#else -#define MACE_REGISTER_GPU_OP(op_registry, op_type, class_name) -#endif - -#define MACE_REGISTER_OP_CONDITION(op_registry, builder) \ - op_registry->Register(builder) - -} // namespace mace - -#endif // MACE_CORE_OPERATOR_H_ diff --git a/mace/core/ops/op_condition_builder.cc b/mace/core/ops/op_condition_builder.cc new file mode 100644 index 0000000000000000000000000000000000000000..4f226e3620f9b9988e83865a2f6f73aa06daef77 --- /dev/null +++ b/mace/core/ops/op_condition_builder.cc @@ -0,0 +1,59 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mace/core/ops/op_condition_builder.h" + +namespace mace { + +OpConditionBuilder::OpConditionBuilder(const std::string &type) + : type_(type) {} + +const std::string OpConditionBuilder::type() const { + return type_; +} + +OpConditionBuilder &OpConditionBuilder::SetDevicePlacerFunc( + OpRegistrationInfo::DevicePlacer placer) { + placer_ = placer; + return *this; +} + +OpConditionBuilder &OpConditionBuilder::SetInputMemoryTypeSetter( + OpRegistrationInfo::MemoryTypeSetter setter) { + memory_type_setter_ = setter; + return *this; +} + +OpConditionBuilder &OpConditionBuilder::SetInputsDataFormatSelector( + OpRegistrationInfo::DataFormatSelector selector) { + data_format_selector_ = selector; + return *this; +} + +void OpConditionBuilder::Finalize(OpRegistrationInfo *info) const { + if (info != nullptr) { + if (placer_) { + info->device_placer = placer_; + } + if (memory_type_setter_) { + info->memory_type_setter = memory_type_setter_; + } + + if (data_format_selector_) { + info->data_format_selector = data_format_selector_; + } + } +} + +} // namespace mace diff --git a/mace/core/ops/op_condition_builder.h b/mace/core/ops/op_condition_builder.h new file mode 100644 index 0000000000000000000000000000000000000000..1378e00ec69f56cd32b1b4645346f2a510f104bc --- /dev/null +++ b/mace/core/ops/op_condition_builder.h @@ -0,0 +1,53 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MACE_CORE_OPS_OP_CONDITION_BUILDER_H_ +#define MACE_CORE_OPS_OP_CONDITION_BUILDER_H_ + +#include +#include + +#include "mace/core/registry/op_registration_info.h" +#include "mace/core/types.h" + +namespace mace { +class OpConditionBuilder { + public: + explicit OpConditionBuilder(const std::string &type); + + const std::string type() const; + + OpConditionBuilder &SetDevicePlacerFunc( + OpRegistrationInfo::DevicePlacer placer); + + // If you set input memory type for specified Op, + // you must call OpConditionContext::set_output_mem_type + OpConditionBuilder &SetInputMemoryTypeSetter( + OpRegistrationInfo::MemoryTypeSetter setter); + + OpConditionBuilder &SetInputsDataFormatSelector( + OpRegistrationInfo::DataFormatSelector selector); + + void Finalize(OpRegistrationInfo *info) const; + + private: + std::string type_; + OpRegistrationInfo::DevicePlacer placer_; + OpRegistrationInfo::MemoryTypeSetter memory_type_setter_; + OpRegistrationInfo::DataFormatSelector data_format_selector_; +}; + +} // namespace mace + +#endif // MACE_CORE_OPS_OP_CONDITION_BUILDER_H_ diff --git a/mace/core/ops/op_condition_context.cc b/mace/core/ops/op_condition_context.cc new file mode 100644 index 0000000000000000000000000000000000000000..eb094a8cf2889a1e926363b2a3de24884cad7a98 --- /dev/null +++ b/mace/core/ops/op_condition_context.cc @@ -0,0 +1,104 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mace/core/ops/op_condition_context.h" + +#include "mace/core/arg_helper.h" +#include "mace/proto/mace.pb.h" +#include "mace/utils/logging.h" + +namespace mace { + +OpConditionContext::OpConditionContext( + const Workspace *ws, + OpConditionContext::TensorShapeMap *info) + : operator_def_(nullptr), + ws_(ws), + device_(nullptr), + tensor_shape_info_(info) {} + +void OpConditionContext::set_operator_def( + const OperatorDef *operator_def) { + operator_def_ = operator_def; + input_data_types_.clear(); +} + +void OpConditionContext::SetInputInfo(size_t idx, + MemoryType mem_type, + DataType dt) { + if (input_mem_types_.empty()) { + // the default inputs' memory types are same as output memory type. + input_mem_types_.resize(operator_def_->input_size(), output_mem_type_); + } + if (input_data_types_.empty()) { + // the default inputs' data types are same as operation's data type. + DataType op_dt = static_cast( + ProtoArgHelper::GetOptionalArg( + *operator_def_, "T", static_cast(DataType::DT_FLOAT))); + input_data_types_.resize(operator_def_->input_size(), op_dt); + } + MACE_CHECK(idx < input_mem_types_.size() && idx < input_data_types_.size()); + input_mem_types_[idx] = mem_type; + input_data_types_[idx] = dt; +} + +void OpConditionContext::set_output_mem_type(MemoryType type) { + MACE_CHECK(operator_def_ != nullptr); + output_mem_type_ = type; + input_mem_types_.clear(); +} + +MemoryType OpConditionContext::GetInputMemType(size_t idx) const { + if (input_mem_types_.empty()) { + return output_mem_type_; + } + MACE_CHECK(idx < input_mem_types_.size(), + idx, " < ", input_mem_types_.size()); + return input_mem_types_[idx]; +} + +DataType OpConditionContext::GetInputDataType(size_t idx) const { + if (input_data_types_.empty()) { + // the default inputs' data types are same as operation's data type. + return static_cast( + ProtoArgHelper::GetOptionalArg( + *operator_def_, "T", static_cast(DataType::DT_FLOAT))); + } + MACE_CHECK(idx < input_data_types_.size()); + return input_data_types_[idx]; +} + +#ifdef MACE_ENABLE_OPENCL +void OpConditionContext::SetInputOpenCLBufferType( + size_t idx, OpenCLBufferType buffer_type) { + if (input_opencl_buffer_types_.empty()) { + // the default inputs' memory types are same as output memory type. + input_opencl_buffer_types_.resize(operator_def_->input_size(), + OpenCLBufferType::IN_OUT_CHANNEL); + } + MACE_CHECK(idx < input_opencl_buffer_types_.size()); + input_opencl_buffer_types_[idx] = buffer_type; +} + +OpenCLBufferType OpConditionContext::GetInputOpenCLBufferType( + size_t idx) const { + if (input_opencl_buffer_types_.empty()) { + return OpenCLBufferType::IN_OUT_CHANNEL; + } + MACE_CHECK(idx < input_opencl_buffer_types_.size()); + return input_opencl_buffer_types_[idx]; +} +#endif // MACE_ENABLE_OPENCL + +} // namespace mace diff --git a/mace/core/ops/op_condition_context.h b/mace/core/ops/op_condition_context.h new file mode 100644 index 0000000000000000000000000000000000000000..8e1c882e2cc7f94bab2d4266e5365b99b916aa19 --- /dev/null +++ b/mace/core/ops/op_condition_context.h @@ -0,0 +1,94 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MACE_CORE_OPS_OP_CONDITION_CONTEXT_H_ +#define MACE_CORE_OPS_OP_CONDITION_CONTEXT_H_ + +#include +#include +#include +#include + +#include "mace/core/types.h" + +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/opencl_util.h" +#endif // MACE_ENABLE_OPENCL + +namespace mace { +class Workspace; +class Device; + +// OpConditionContext has all information used for choosing proper Op +class OpConditionContext { + public: + typedef std::unordered_map> TensorShapeMap; + OpConditionContext(const Workspace *ws, TensorShapeMap *info); + ~OpConditionContext() = default; + + void set_operator_def(const OperatorDef *operator_def); + + const OperatorDef *operator_def() const { + return operator_def_; + } + + const Workspace *workspace() const { + return ws_; + } + + void set_device(Device *device) { + device_ = device; + } + + Device *device() const { + return device_; + } + + TensorShapeMap *tensor_shape_info() const { + return tensor_shape_info_; + } + + void set_output_mem_type(MemoryType type); + + MemoryType output_mem_type() const { + return output_mem_type_; + } + + void SetInputInfo(size_t idx, MemoryType mem_type, DataType dt); + + MemoryType GetInputMemType(size_t idx) const; + + DataType GetInputDataType(size_t idx) const; + +#ifdef MACE_ENABLE_OPENCL + void SetInputOpenCLBufferType(size_t idx, OpenCLBufferType buffer_type); + OpenCLBufferType GetInputOpenCLBufferType(size_t idx) const; +#endif // MACE_ENABLE_OPENCL + + private: + const OperatorDef *operator_def_; + const Workspace *ws_; + Device *device_; + TensorShapeMap *tensor_shape_info_; + // used for memory transform + std::vector input_mem_types_; + std::vector input_data_types_; + MemoryType output_mem_type_; // there is only one output memory type now. +#ifdef MACE_ENABLE_OPENCL + std::vector input_opencl_buffer_types_; +#endif // MACE_ENABLE_OPENCL +}; +} // namespace mace + +#endif // MACE_CORE_OPS_OP_CONDITION_CONTEXT_H_ diff --git a/mace/core/ops/op_construct_context.cc b/mace/core/ops/op_construct_context.cc new file mode 100644 index 0000000000000000000000000000000000000000..fc701259bf7f397dc5b85e4fba36d54f0a2a1036 --- /dev/null +++ b/mace/core/ops/op_construct_context.cc @@ -0,0 +1,29 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mace/core/ops/op_construct_context.h" + +namespace mace { + +OpConstructContext::OpConstructContext(Workspace *ws) + : operator_def_(nullptr), + ws_(ws), + device_(nullptr) {} + +void OpConstructContext::set_operator_def( + std::shared_ptr operator_def) { + operator_def_ = operator_def; +} + +} // namespace mace diff --git a/mace/core/ops/op_construct_context.h b/mace/core/ops/op_construct_context.h new file mode 100644 index 0000000000000000000000000000000000000000..9bd4709da3359a57f24155d4c394e15ae8951d0e --- /dev/null +++ b/mace/core/ops/op_construct_context.h @@ -0,0 +1,73 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MACE_CORE_OPS_OP_CONSTRUCT_CONTEXT_H_ +#define MACE_CORE_OPS_OP_CONSTRUCT_CONTEXT_H_ + +#include +#include +#include +#include + +#include "mace/core/arg_helper.h" +#include "mace/core/types.h" +#include "mace/proto/mace.pb.h" + +namespace mace { +class Device; +class Workspace; + +// memory_optimizer, device +class OpConstructContext { + typedef std::unordered_map> TensorShapeMap; + + public: + explicit OpConstructContext(Workspace *ws); + ~OpConstructContext() = default; + + void set_operator_def(std::shared_ptr operator_def); + + std::shared_ptr operator_def() const { + return operator_def_; + } + + Workspace *workspace() const { + return ws_; + } + + void set_device(Device *device) { + device_ = device; + } + + Device *device() const { + return device_; + } +#ifdef MACE_ENABLE_OPENCL + inline MemoryType GetOpMemoryType() const { + return static_cast( + ProtoArgHelper::GetOptionalArg( + *operator_def_, OutputMemoryTypeTagName(), + static_cast(MemoryType::CPU_BUFFER))); + } +#endif // MACE_ENABLE_OPENCL + + private: + std::shared_ptr operator_def_; + Workspace *ws_; + Device *device_; +}; + +} // namespace mace + +#endif // MACE_CORE_OPS_OP_CONSTRUCT_CONTEXT_H_ diff --git a/mace/core/op_context.cc b/mace/core/ops/op_context.cc similarity index 96% rename from mace/core/op_context.cc rename to mace/core/ops/op_context.cc index d0ebeff7a733ed95f0d47275427bb130cb8bc446..641609952cb250fd6827c1e762c1fac75af85dda 100644 --- a/mace/core/op_context.cc +++ b/mace/core/ops/op_context.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" namespace mace { diff --git a/mace/core/op_context.h b/mace/core/ops/op_context.h similarity index 90% rename from mace/core/op_context.h rename to mace/core/ops/op_context.h index 26a31dc3c1d5df9f3665114d463890b029b598d1..062254793f30d9ac2d4db6bede9a1d103eafd6b8 100644 --- a/mace/core/op_context.h +++ b/mace/core/ops/op_context.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_CORE_OP_CONTEXT_H_ -#define MACE_CORE_OP_CONTEXT_H_ +#ifndef MACE_CORE_OPS_OP_CONTEXT_H_ +#define MACE_CORE_OPS_OP_CONTEXT_H_ #include "mace/core/device.h" #include "mace/core/workspace.h" @@ -35,8 +35,7 @@ class OpContext { Device *device_; Workspace *ws_; StatsFuture *future_; - // metadata }; } // namespace mace -#endif // MACE_CORE_OP_CONTEXT_H_ +#endif // MACE_CORE_OPS_OP_CONTEXT_H_ diff --git a/mace/core/ops/op_delegator.h b/mace/core/ops/op_delegator.h new file mode 100644 index 0000000000000000000000000000000000000000..029bd39f814e8b69507a0a2db162732885fb2acd --- /dev/null +++ b/mace/core/ops/op_delegator.h @@ -0,0 +1,58 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MACE_CORE_OPS_OP_DELEGATOR_H_ +#define MACE_CORE_OPS_OP_DELEGATOR_H_ + +#include + +#include "mace/utils/macros.h" +#include "mace/utils/memory.h" + +namespace mace { + +enum ImplType { + REF = 0, + NEON, +}; + +#ifdef MACE_ENABLE_NEON +#define MACE_CPU_IMPL_TYPE NEON +#else +#define MACE_CPU_IMPL_TYPE REF +#endif + +struct DelegatorParam { + public: + DelegatorParam() = default; + virtual ~DelegatorParam() = default; +}; + +class OpDelegator { + public: + explicit OpDelegator(const DelegatorParam ¶m) { + MACE_UNUSED(param); + } + virtual ~OpDelegator() = default; + + template + static std::unique_ptr DefaultCreator( + const DelegatorParam ¶m) { + return make_unique(static_cast(param)); + } +}; + +} // namespace mace + +#endif // MACE_CORE_OPS_OP_DELEGATOR_H_ diff --git a/mace/core/ops/op_init_context.cc b/mace/core/ops/op_init_context.cc new file mode 100644 index 0000000000000000000000000000000000000000..2b3cee2505da7e2b66279b1d45c2b108d611ce5b --- /dev/null +++ b/mace/core/ops/op_init_context.cc @@ -0,0 +1,22 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mace/core/ops/op_init_context.h" + +namespace mace { + +OpInitContext::OpInitContext(Workspace *ws, Device *device) + : ws_(ws), device_(device) {} + +} // namespace mace diff --git a/mace/core/ops/op_init_context.h b/mace/core/ops/op_init_context.h new file mode 100644 index 0000000000000000000000000000000000000000..da51cc23536b016f8ba9f256adc6438c1fa0c100 --- /dev/null +++ b/mace/core/ops/op_init_context.h @@ -0,0 +1,47 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MACE_CORE_OPS_OP_INIT_CONTEXT_H_ +#define MACE_CORE_OPS_OP_INIT_CONTEXT_H_ + +namespace mace { +class Workspace; +class Device; + +// memory_optimizer, device +class OpInitContext { + public: + explicit OpInitContext(Workspace *ws, Device *device = nullptr); + ~OpInitContext() = default; + + Workspace *workspace() const { + return ws_; + } + + void set_device(Device *device) { + device_ = device; + } + + Device *device() const { + return device_; + } + + private: + Workspace *ws_; + Device *device_; +}; + +} // namespace mace + +#endif // MACE_CORE_OPS_OP_INIT_CONTEXT_H_ diff --git a/mace/core/ops/operator.cc b/mace/core/ops/operator.cc new file mode 100644 index 0000000000000000000000000000000000000000..5aa102d91717723e3db1492a3a6f195349961e03 --- /dev/null +++ b/mace/core/ops/operator.cc @@ -0,0 +1,68 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mace/core/ops/operator.h" + +#include + +#include "mace/core/ops/op_construct_context.h" +#include "mace/core/ops/op_init_context.h" + +namespace mace { +Operation::Operation(OpConstructContext *context) + : operator_def_(context->operator_def()) {} + +MaceStatus Operation::Init(OpInitContext *context) { + Workspace *ws = context->workspace(); + for (const std::string &input_str : operator_def_->input()) { + const Tensor *tensor = ws->GetTensor(input_str); + MACE_CHECK(tensor != nullptr, "op ", operator_def_->type(), + ": Encountered a non-existing input tensor: ", input_str); + inputs_.push_back(tensor); + } + for (int i = 0; i < operator_def_->output_size(); ++i) { + const std::string output_str = operator_def_->output(i); + if (ws->HasTensor(output_str)) { + outputs_.push_back(ws->GetTensor(output_str)); + } else { + MACE_CHECK( + operator_def_->output_type_size() == 0 || + operator_def_->output_size() == operator_def_->output_type_size(), + "operator output size != operator output type size", + operator_def_->output_size(), + operator_def_->output_type_size()); + DataType output_type; + if (i < operator_def_->output_type_size()) { + output_type = operator_def_->output_type(i); + } else { + output_type = static_cast( + ProtoArgHelper::GetOptionalArg( + *operator_def_, "T", static_cast(DT_FLOAT))); + } + outputs_.push_back(MACE_CHECK_NOTNULL(ws->CreateTensor( + output_str, context->device()->allocator(), output_type))); + } + if (i < operator_def_->output_shape_size()) { + std::vector + shape_configured(operator_def_->output_shape(i).dims_size()); + for (size_t dim = 0; dim < shape_configured.size(); ++dim) { + shape_configured[dim] = operator_def_->output_shape(i).dims(dim); + } + ws->GetTensor(output_str)->SetShapeConfigured(shape_configured); + } + } + return MaceStatus::MACE_SUCCESS; +} + +} // namespace mace diff --git a/mace/core/ops/operator.h b/mace/core/ops/operator.h new file mode 100644 index 0000000000000000000000000000000000000000..bb4a20d554fa7159cec1f022252cf9f6870f5fa0 --- /dev/null +++ b/mace/core/ops/operator.h @@ -0,0 +1,120 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MACE_CORE_OPS_OPERATOR_H_ +#define MACE_CORE_OPS_OPERATOR_H_ + +#include +#include +#include + +#include "mace/core/arg_helper.h" +#include "mace/core/ops/op_construct_context.h" +#include "mace/core/ops/op_context.h" +#include "mace/core/tensor.h" +#include "mace/proto/mace.pb.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/opencl_util.h" +#endif // MACE_ENABLE_OPENCL + +namespace mace { +class OpInitContext; +// Conventions +// * If there exist format, NHWC is the default format +// * The input/output format of CPU ops with float data type is NCHW +// * The input/output format of GPU ops and CPU Quantization ops is NHWC +// * Inputs' data type is same as the operation data type by default. +// * The outputs' data type is same as the operation data type by default. +class Operation { + public: + explicit Operation(OpConstructContext *context); + virtual ~Operation() = default; + + template + T GetOptionalArg(const std::string &name, + const T &default_value) const { + MACE_CHECK(operator_def_, "operator_def was null!"); + return ProtoArgHelper::GetOptionalArg( + *operator_def_, name, default_value); + } + template + std::vector GetRepeatedArgs( + const std::string &name, const std::vector &default_value = {}) const { + MACE_CHECK(operator_def_, "operator_def was null!"); + return ProtoArgHelper::GetRepeatedArgs( + *operator_def_, name, default_value); + } + + DeviceType device_type() const { + return static_cast(operator_def_->device_type()); + } + + const Tensor *Input(unsigned int idx) { + MACE_CHECK(idx < inputs_.size()); + return inputs_[idx]; + } + + Tensor *Output(int idx) { return outputs_[idx]; } + + int InputSize() { return inputs_.size(); } + int OutputSize() { return outputs_.size(); } + const std::vector &Inputs() const { return inputs_; } + const std::vector &Outputs() { return outputs_; } + + // Run Op asynchronously (depends on device), return a future if not nullptr. + virtual MaceStatus Init(OpInitContext *); + virtual MaceStatus Run(OpContext *) = 0; + + const OperatorDef &debug_def() const { + MACE_CHECK(has_debug_def(), "operator_def was null!"); + return *operator_def_; + } + + void set_debug_def( + const std::shared_ptr &operator_def) { + operator_def_ = operator_def; + } + + bool has_debug_def() const { return operator_def_ != nullptr; } + + inline std::shared_ptr operator_def() { + return operator_def_; + } + + protected: + std::shared_ptr operator_def_; + std::vector inputs_; + std::vector outputs_; + + MACE_DISABLE_COPY_AND_ASSIGN(Operation); +}; + +// MACE_OP_INPUT_TAGS and MACE_OP_OUTPUT_TAGS are optional features to name the +// indices of the operator's inputs and outputs, in order to avoid confusion. +// For example, for a fully convolution layer that has input, weight and bias, +// you can define its input tags as: +// MACE_OP_INPUT_TAGS(INPUT, WEIGHT, BIAS); +// And in the code, instead of doing +// auto& weight = Input(1); +// you can now do +// auto& weight = Input(WEIGHT); +// to make it more clear. +#define MACE_OP_INPUT_TAGS(first_input, ...) \ + enum _InputTags { first_input = 0, __VA_ARGS__ } +#define MACE_OP_OUTPUT_TAGS(first_input, ...) \ + enum _OutputTags { first_input = 0, __VA_ARGS__ } + +} // namespace mace + +#endif // MACE_CORE_OPS_OPERATOR_H_ diff --git a/mace/core/registry/op_delegator_registry.cc b/mace/core/registry/op_delegator_registry.cc new file mode 100644 index 0000000000000000000000000000000000000000..006f5555f8710ddd667166c182088b86de6e2af5 --- /dev/null +++ b/mace/core/registry/op_delegator_registry.cc @@ -0,0 +1,39 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mace/core/registry/op_delegator_registry.h" + +#include + +#include "mace/utils/logging.h" + +namespace mace { + +MaceStatus OpDelegatorRegistry::Register(const std::string &key, + DelegatorCreator creator) { + MACE_CHECK(registry_.count(key) == 0, "Register an exist key."); + registry_[key] = std::move(creator); + return MaceStatus::MACE_SUCCESS; +} + +DelegatorCreator OpDelegatorRegistry::GetCreator(const std::string &key) const { + MACE_CHECK(registry_.count(key) > 0, key, " not exist."); + return registry_.at(key); +} + +template<> const char *DType::name_ = "float"; +template<> const char *DType::name_ = "int"; +template<> const char *DType::name_ = "uint8_t"; + +} // namespace mace diff --git a/mace/core/registry/op_delegator_registry.h b/mace/core/registry/op_delegator_registry.h new file mode 100644 index 0000000000000000000000000000000000000000..f70d5555792b19419d48c84fd06ad9f422096d95 --- /dev/null +++ b/mace/core/registry/op_delegator_registry.h @@ -0,0 +1,94 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MACE_CORE_REGISTRY_OP_DELEGATOR_REGISTRY_H_ +#define MACE_CORE_REGISTRY_OP_DELEGATOR_REGISTRY_H_ + +#include +#include +#include +#include +#include + +#include "mace/core/ops/op_delegator.h" +#include "mace/proto/mace.pb.h" +#include "mace/public/mace.h" + +namespace mace { +typedef std::function(const DelegatorParam &)> + DelegatorCreator; + +class OpDelegatorRegistry { + public: + OpDelegatorRegistry() = default; + ~OpDelegatorRegistry() = default; + + MaceStatus Register(const std::string &key, DelegatorCreator creator); + DelegatorCreator GetCreator(const std::string &key) const; + + private: + std::unordered_map registry_; +}; + +template +struct DType { static const char *name_; }; +template<> const char *DType::name_; +template<> const char *DType::name_; +template<> const char *DType::name_; + + +} // namespace mace + +#ifndef MACE_DELEGATOR_KEY_TMP +#define MACE_DELEGATOR_KEY_TMP(delegator_name, device, DT, impl) \ + (std::string(#delegator_name"_"#device"_"#impl"_") + DType
::name_) +#endif // MACE_DELEGATOR_KEY_TMP + +#ifndef MACE_DELEGATOR_KEY +#define MACE_DELEGATOR_KEY(delegator_name, device, DT, impl) \ + MACE_DELEGATOR_KEY_TMP(delegator_name, device, DT, impl) +#endif // MACE_DELEGATOR_KEY + +#ifndef MACE_DELEGATOR_KEY_EX_TMP +#define MACE_DELEGATOR_KEY_EX_TMP(delegator_name, device, DT, impl, tag) \ + (std::string(#delegator_name"_"#device"_"#impl"_"#tag"_") + DType
::name_) +#endif // MACE_DELEGATOR_KEY_EX_TMP + +#ifndef MACE_DELEGATOR_KEY_EX +#define MACE_DELEGATOR_KEY_EX(delegator_name, device, DT, impl, tag) \ + MACE_DELEGATOR_KEY_EX_TMP(delegator_name, device, DT, impl, tag) +#endif // MACE_DELEGATOR_KEY_EX + +#ifndef MACE_REGISTER_DELEGATOR +#define MACE_REGISTER_DELEGATOR(registry, class_name, param_name, key) \ + void Register##class_name##Delegator(OpDelegatorRegistry *registry) { \ + registry->Register( \ + key, OpDelegator::DefaultCreator); \ + } +#endif // MACE_REGISTER_DELEGATOR + +#ifndef MACE_DEFINE_DELEGATOR_CREATOR +#define MACE_DEFINE_DELEGATOR_CREATOR(class_name) \ + static std::unique_ptr Create( \ + Workspace *workspace, const std::string &tag, \ + const DelegatorParam ¶m) { \ + DelegatorCreator creator = \ + workspace->GetDelegatorRegistry()->GetCreator(tag); \ + std::unique_ptr delegator = creator(param); \ + return std::unique_ptr( \ + static_cast(delegator.release())); \ + } +#endif // MACE_DEFINE_DELEGATOR_CREATOR + +#endif // MACE_CORE_REGISTRY_OP_DELEGATOR_REGISTRY_H_ diff --git a/mace/core/registry/op_registration_info.cc b/mace/core/registry/op_registration_info.cc new file mode 100644 index 0000000000000000000000000000000000000000..e043897648ad644f56ad0b402698783862535630 --- /dev/null +++ b/mace/core/registry/op_registration_info.cc @@ -0,0 +1,69 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mace/core/registry/op_registration_info.h" + +#include +#include +#include +#include + +#include "mace/core/ops/op_condition_context.h" + +namespace mace { +OpRegistrationInfo::OpRegistrationInfo() { + // default device type placer + device_placer = [this](OpConditionContext *context) -> std::set { + MACE_UNUSED(context); + return this->devices; + }; + + // default input and output memory type setter + memory_type_setter = [](OpConditionContext *context) -> void { + if (context->device()->device_type() == DeviceType::GPU) { +#ifdef MACE_ENABLE_OPENCL + if (context->device()->gpu_runtime()->UseImageMemory()) { + context->set_output_mem_type(MemoryType::GPU_IMAGE); + } else { + context->set_output_mem_type(MemoryType::GPU_BUFFER); + } +#endif // MACE_ENABLE_OPENCL + } else { + context->set_output_mem_type(MemoryType::CPU_BUFFER); + } + }; + + data_format_selector = [](OpConditionContext *context) + -> std::vector { + DataFormat op_data_format = + static_cast( + ProtoArgHelper::GetOptionalArg( + *context->operator_def(), "data_format", + static_cast(DataFormat::NONE))); + return std::vector(context->operator_def()->input_size(), + op_data_format); + }; +} + +void OpRegistrationInfo::AddDevice(DeviceType device) { + devices.insert(device); +} + +void OpRegistrationInfo::Register(const std::string &key, OpCreator creator) { + VLOG(3) << "Registering: " << key; + MACE_CHECK(creators.count(key) == 0, "Key already registered: ", key); + creators[key] = std::move(creator); +} + +} // namespace mace diff --git a/mace/core/registry/op_registration_info.h b/mace/core/registry/op_registration_info.h new file mode 100644 index 0000000000000000000000000000000000000000..ed110a3c07bfe040bb3ea53f8e99d92523326513 --- /dev/null +++ b/mace/core/registry/op_registration_info.h @@ -0,0 +1,56 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#ifndef MACE_CORE_REGISTRY_OP_REGISTRATION_INFO_H_ +#define MACE_CORE_REGISTRY_OP_REGISTRATION_INFO_H_ + +#include +#include +#include +#include +#include + +#include "mace/core/ops/operator.h" +#include "mace/proto/mace.pb.h" + +namespace mace { +class OpConstructContext; +class OpConditionContext; + +class OpRegistrationInfo { + public: + typedef std::function(OpConstructContext *)> + OpCreator; + typedef std::function(OpConditionContext *)> + DevicePlacer; + typedef std::function MemoryTypeSetter; + typedef std::function(OpConditionContext *)> + DataFormatSelector; + + OpRegistrationInfo(); + + void AddDevice(DeviceType); + + void Register(const std::string &key, OpCreator creator); + + std::set devices; + std::unordered_map creators; + DevicePlacer device_placer; + MemoryTypeSetter memory_type_setter; + DataFormatSelector data_format_selector; +}; +} // namespace mace + +#endif // MACE_CORE_REGISTRY_OP_REGISTRATION_INFO_H_ diff --git a/mace/core/registry/ops_registry.cc b/mace/core/registry/ops_registry.cc new file mode 100644 index 0000000000000000000000000000000000000000..8a99c9e1d9a4ce00d94254a92b01c9384e25271c --- /dev/null +++ b/mace/core/registry/ops_registry.cc @@ -0,0 +1,149 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mace/core/registry/ops_registry.h" + +#include +#include +#include +#include +#include + +namespace mace { +namespace { +class OpKeyBuilder { + public: + explicit OpKeyBuilder(const std::string &op_name); + + OpKeyBuilder &Device(DeviceType device); + + OpKeyBuilder &TypeConstraint(const char *attr_name, + DataType allowed); + + const std::string Build(); + + private: + std::string op_name_; + DeviceType device_type_; + std::map type_constraint_; +}; + +OpKeyBuilder::OpKeyBuilder(const std::string &op_name) : op_name_(op_name) {} + +OpKeyBuilder &OpKeyBuilder::Device(DeviceType device) { + device_type_ = device; + return *this; +} + +OpKeyBuilder &OpKeyBuilder::TypeConstraint(const char *attr_name, + DataType allowed) { + type_constraint_[attr_name] = allowed; + return *this; +} + +const std::string OpKeyBuilder::Build() { + static const std::vector type_order = {"T"}; + std::stringstream ss; + ss << op_name_; + ss << device_type_; + for (auto type : type_order) { + ss << type << "_" << DataTypeToString(type_constraint_[type]); + } + + return ss.str(); +} +} // namespace + +MaceStatus OpRegistry::Register( + const std::string &op_type, + const DeviceType device_type, + const DataType dt, + OpRegistrationInfo::OpCreator creator) { + if (registry_.count(op_type) == 0) { + registry_[op_type] = std::unique_ptr( + new OpRegistrationInfo); + } + registry_[op_type]->AddDevice(device_type); + + std::string op_key = OpKeyBuilder(op_type) + .Device(device_type) + .TypeConstraint("T", dt) + .Build(); + registry_.at(op_type)->Register(op_key, creator); + return MaceStatus::MACE_SUCCESS; +} + +MaceStatus OpRegistry::Register( + const OpConditionBuilder &builder) { + std::string op_type = builder.type(); + if (registry_.count(op_type) == 0) { + registry_[op_type] = std::unique_ptr( + new OpRegistrationInfo); + } + builder.Finalize(registry_[op_type].get()); + return MaceStatus::MACE_SUCCESS; +} + +const std::set OpRegistry::AvailableDevices( + const std::string &op_type, OpConditionContext *context) const { + MACE_CHECK(registry_.count(op_type) != 0, + op_type, " operation is not registered."); + + return registry_.at(op_type)->device_placer(context); +} + +void OpRegistry::GetInOutMemoryTypes( + const std::string &op_type, + OpConditionContext *context) const { + MACE_CHECK(registry_.count(op_type) != 0, + op_type, " operation is not registered. op_type=", op_type); + return registry_.at(op_type)->memory_type_setter(context); +} + +const std::vector OpRegistry::InputsDataFormat( + const std::string &op_type, + OpConditionContext *context) const { + MACE_CHECK(registry_.count(op_type) != 0, + op_type, " operation is not registered."); + return registry_.at(op_type)->data_format_selector(context); +} + +std::unique_ptr OpRegistry::CreateOperation( + OpConstructContext *context, + DeviceType device_type) const { + auto operator_def = context->operator_def(); + DataType dtype = static_cast( + ProtoArgHelper::GetOptionalArg( + *operator_def, "T", static_cast(DT_FLOAT))); + VLOG(1) << "Creating operator " << operator_def->name() << "(" + << operator_def->type() << "<" << dtype << ">" << ") on " + << device_type; + const std::string op_type = context->operator_def()->type(); + MACE_CHECK(registry_.count(op_type) != 0, + op_type, " operation is not registered."); + + auto key_dtype = + (device_type == DeviceType::GPU && dtype == DT_HALF) ? DT_FLOAT : dtype; + std::string key = OpKeyBuilder(op_type) + .Device(device_type) + .TypeConstraint("T", key_dtype) + .Build(); + if (registry_.at(op_type)->creators.count(key) == 0) { + LOG(FATAL) << "Key not registered: " << key + << ", op type is: " << operator_def->type(); + } + return registry_.at(op_type)->creators.at(key)(context); +} + +} // namespace mace diff --git a/mace/core/registry/ops_registry.h b/mace/core/registry/ops_registry.h new file mode 100644 index 0000000000000000000000000000000000000000..46476a64d157e6446b5668279e7adedd2df4eec5 --- /dev/null +++ b/mace/core/registry/ops_registry.h @@ -0,0 +1,99 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#ifndef MACE_CORE_REGISTRY_OPS_REGISTRY_H_ +#define MACE_CORE_REGISTRY_OPS_REGISTRY_H_ + +#include +#include +#include +#include +#include + +#include "mace/core/ops/operator.h" +#include "mace/core/ops/op_condition_builder.h" +#include "mace/core/ops/op_condition_context.h" +#include "mace/public/mace.h" +#include "mace/proto/mace.pb.h" +#include "mace/utils/memory.h" + +namespace mace { + +class OpRegistry { + public: + OpRegistry() = default; + virtual ~OpRegistry() = default; + MaceStatus Register(const std::string &op_type, + const DeviceType device_type, + const DataType dt, + OpRegistrationInfo::OpCreator creator); + + MaceStatus Register(const OpConditionBuilder &builder); + + const std::set AvailableDevices( + const std::string &op_type, OpConditionContext *context) const; + + void GetInOutMemoryTypes( + const std::string &op_type, OpConditionContext *context) const; + + const std::vector InputsDataFormat( + const std::string &op_type, OpConditionContext *context) const; + + std::unique_ptr CreateOperation( + OpConstructContext *context, + DeviceType device_type) const; + + template + static std::unique_ptr DefaultCreator( + OpConstructContext *context) { + return make_unique(context); + } + + private: + std::unordered_map> + registry_; + MACE_DISABLE_COPY_AND_ASSIGN(OpRegistry); +}; + +#define MACE_REGISTER_OP(op_registry, op_type, class_name, device, dt) \ + op_registry->Register(op_type, \ + device, \ + DataTypeToEnum
::value, \ + OpRegistry::DefaultCreator>) + +#define MACE_REGISTER_OP_BY_CLASS(\ + op_registry, op_type, class_name, device, dt) \ + op_registry->Register(op_type, \ + device, \ + DataTypeToEnum
::value, \ + OpRegistry::DefaultCreator) + +#ifdef MACE_ENABLE_OPENCL +#define MACE_REGISTER_GPU_OP(op_registry, op_type, class_name) \ + op_registry->Register( \ + op_type, \ + DeviceType::GPU, \ + DT_FLOAT, \ + OpRegistry::DefaultCreator>) +#else +#define MACE_REGISTER_GPU_OP(op_registry, op_type, class_name) +#endif + +#define MACE_REGISTER_OP_CONDITION(op_registry, builder) \ + op_registry->Register(builder) + +} // namespace mace + +#endif // MACE_CORE_REGISTRY_OPS_REGISTRY_H_ diff --git a/mace/core/workspace.cc b/mace/core/workspace.cc index fa9a58915b4a87a1a0d826180839bd103d515d23..08bf59b055714a95b18b6530ae03e38dfacd4acb 100644 --- a/mace/core/workspace.cc +++ b/mace/core/workspace.cc @@ -46,7 +46,7 @@ bool HasHalfTensor(const NetDef &net_def) { return false; } -template +template void DequantizeTensor(Device *device, const unsigned char *model_data, const ConstTensor &const_tensor, @@ -66,7 +66,8 @@ void DequantizeTensor(Device *device, } // namespace -Workspace::Workspace() = default; +Workspace::Workspace(const OpDelegatorRegistry *registry) : + op_delegator_registry_(registry) {} Tensor *Workspace::CreateTensor(const std::string &name, Allocator *alloc, @@ -144,7 +145,7 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def, DataType dst_data_type = const_tensor.data_type(); if (device_type == DeviceType::CPU && - const_tensor.data_type() == DataType::DT_HALF) { + const_tensor.data_type() == DataType::DT_HALF) { dst_data_type = DataType::DT_FLOAT; } else if (!is_quantize_model && const_tensor.quantized()) { if (device_type == GPU && net_def.data_type() != DataType::DT_FLOAT) { @@ -173,13 +174,13 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def, if (device_type == DeviceType::CPU && const_tensor.data_type() == DataType::DT_HALF) { - // uncompress the weights of fp16 - auto org_data = reinterpret_cast( - model_data + const_tensor.offset()); - float *dst_data = tensor->mutable_data(); - for (int i = 0; i < const_tensor.data_size(); ++i) { - dst_data[i] = half_float::half_cast(org_data[i]); - } + // uncompress the weights of fp16 + auto org_data = reinterpret_cast( + model_data + const_tensor.offset()); + float *dst_data = tensor->mutable_data(); + for (int i = 0; i < const_tensor.data_size(); ++i) { + dst_data[i] = half_float::half_cast(org_data[i]); + } } else if (!is_quantize_model && const_tensor.quantized()) { // uncompress the weights of uint8 if (dst_data_type != DT_FLOAT) { @@ -401,4 +402,8 @@ void Workspace::RemoveTensor(const std::string &name) { } } +const OpDelegatorRegistry *Workspace::GetDelegatorRegistry() const { + return op_delegator_registry_; +} + } // namespace mace diff --git a/mace/core/workspace.h b/mace/core/workspace.h index 4308f92477de911e5c9a376bea59064aed1590e6..eae7ebd568140aa7ec4f65730ff15ce0c59fae8c 100644 --- a/mace/core/workspace.h +++ b/mace/core/workspace.h @@ -27,13 +27,14 @@ namespace mace { +class OpDelegatorRegistry; class MemoryOptimizer; class Workspace { public: typedef std::map> TensorMap; - Workspace(); + explicit Workspace(const OpDelegatorRegistry *registry); ~Workspace() {} Tensor *CreateTensor(const std::string &name, @@ -71,15 +72,16 @@ class Workspace { void RemoveTensor(const std::string &name); + const OpDelegatorRegistry *GetDelegatorRegistry() const; + private: TensorMap tensor_map_; - std::unique_ptr tensor_buffer_; - PreallocatedPooledAllocator preallocated_allocator_; - bool diffused_buffer_; + const OpDelegatorRegistry *op_delegator_registry_; + MACE_DISABLE_COPY_AND_ASSIGN(Workspace); }; diff --git a/mace/libmace/mace.cc b/mace/libmace/mace.cc index b9d3b13c24f1490c688d775f51534c2094c6f377..6ab855f42a0654ec3b8040c27bc66831f7f937af 100644 --- a/mace/libmace/mace.cc +++ b/mace/libmace/mace.cc @@ -19,8 +19,10 @@ #include "mace/core/device_context.h" #include "mace/core/memory_optimizer.h" #include "mace/core/net.h" -#include "mace/ops/registry/ops_registry.h" +#include "mace/core/registry/ops_registry.h" +#include "mace/core/registry/op_delegator_registry.h" #include "mace/ops/common/transpose.h" +#include "mace/ops/registry/registry.h" #include "mace/utils/math.h" #include "mace/utils/memory.h" #include "mace/utils/stl_util.h" @@ -451,7 +453,8 @@ class MaceEngine::Impl { private: std::unique_ptr model_data_; - std::unique_ptr op_registry_; + std::unique_ptr op_registry_; + std::unique_ptr op_delegator_registry_; DeviceType device_type_; std::unique_ptr device_; std::unique_ptr ws_; @@ -478,9 +481,10 @@ class MaceEngine::Impl { MaceEngine::Impl::Impl(const MaceEngineConfig &config) : model_data_(nullptr), op_registry_(new OpRegistry), + op_delegator_registry_(new OpDelegatorRegistry), device_type_(config.impl_->device_type()), device_(nullptr), - ws_(new Workspace()), + ws_(new Workspace(op_delegator_registry_.get())), net_(nullptr), is_quantized_model_(false), thread_pool_(new utils::ThreadPool(config.impl_->num_threads(), @@ -498,6 +502,8 @@ MaceEngine::Impl::Impl(const MaceEngineConfig &config) #endif { LOG(INFO) << "Creating MaceEngine, MACE version: " << MaceVersion(); + ops::RegisterAllOps(op_registry_.get()); + ops::RegisterAllOpDelegators(op_delegator_registry_.get()); thread_pool_->Init(); if (device_type_ == DeviceType::CPU) { device_.reset(new CPUDevice(config.impl_->num_threads(), diff --git a/mace/ops/BUILD.bazel b/mace/ops/BUILD.bazel index 9861198aaa49b99dec5302a0c934f2947e39fc7d..52ad46edfde322f45d12becbb249261beed12498 100644 --- a/mace/ops/BUILD.bazel +++ b/mace/ops/BUILD.bazel @@ -22,11 +22,13 @@ cc_library( srcs = glob( [ "common/*.cc", + "delegator/*.cc", ], ), hdrs = glob( [ "common/*.h", + "delegator/*.h", ], ), copts = [ @@ -58,12 +60,16 @@ cc_library( [ "ref/*.cc", ], - ), + ) + if_quantize_enabled(glob([ + "ref/q8/*.cc", + ])), hdrs = glob( [ "ref/*.h", ], - ), + ) + if_quantize_enabled(glob([ + "ref/q8/*.h", + ])), copts = [ "-Werror", "-Wextra", @@ -236,12 +242,12 @@ cc_library( cc_library( name = "ops", - srcs = [ - "registry/ops_registry.cc", - ], - hdrs = [ - "registry/ops_registry.h", - ], + srcs = glob([ + "registry/*.cc", + ]), + hdrs = glob([ + "registry/*.h", + ]), copts = [ "-Werror", "-Wextra", diff --git a/mace/ops/CMakeLists.txt b/mace/ops/CMakeLists.txt index 7994b445a6bc7aabb82f3198c2c2405f857b4e1b..7de9661d61d05cd6e4ac9d551cbccbb38904f7d4 100644 --- a/mace/ops/CMakeLists.txt +++ b/mace/ops/CMakeLists.txt @@ -1,6 +1,10 @@ file(GLOB OPS_COMMON_SRCS common/*.cc) file(GLOB OPS_REF_KERNELS_SRCS ref/*.cc) +file(GLOB OPS_REF_Q8_KERNELS_SRCS + ref/q8/*.cc +) + file(GLOB OPS_ARM_NEON_FP32_KERNELS_SRCS arm/fp32/*.cc ) @@ -17,20 +21,23 @@ file(GLOB OPS_OPENCL_KERNELS_SRCS file(GLOB OPS_INTERNAL_OPS_SRCS *.cc) -set(OPS_SRCS registry/ops_registry.cc) +set(OPS_SRCS registry/ops_registry.cc registry/op_delegators_registry.cc) set(OPS_SRCS ${OPS_SRCS} ${OPS_COMMON_SRCS}) set(OPS_SRCS ${OPS_SRCS} ${OPS_INTERNAL_OPS_SRCS}) # TODO we need to remove this in production build set(OPS_SRCS ${OPS_SRCS} ${OPS_REF_KERNELS_SRCS}) +if(MACE_ENABLE_QUANTIZE) + set(OPS_SRCS ${OPS_SRCS} ${OPS_REF_Q8_KERNELS_SRCS}) +endif(MACE_ENABLE_QUANTIZE) + if(MACE_ENABLE_NEON) set(OPS_SRCS ${OPS_SRCS} ${OPS_ARM_NEON_FP32_KERNELS_SRCS}) + if(MACE_ENABLE_QUANTIZE) + set(OPS_SRCS ${OPS_SRCS} ${OPS_ARM_NEON_Q8_KERNELS_SRCS}) + endif(MACE_ENABLE_QUANTIZE) endif(MACE_ENABLE_NEON) -if(MACE_ENABLE_QUANTIZE) - set(OPS_SRCS ${OPS_SRCS} ${OPS_ARM_NEON_Q8_KERNELS_SRCS}) -endif(MACE_ENABLE_QUANTIZE) - if(MACE_ENABLE_OPENCL) set(OPS_SRCS ${OPS_SRCS} ${OPS_OPENCL_KERNELS_SRCS}) endif(MACE_ENABLE_OPENCL) diff --git a/mace/ops/activation.cc b/mace/ops/activation.cc index 255370568b6eb7a8702900b85b0e2c99d4606a6b..338de7ead4db24e35169bdc6cd681729e84b15b8 100644 --- a/mace/ops/activation.cc +++ b/mace/ops/activation.cc @@ -17,13 +17,10 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" -#if defined(MACE_ENABLE_NEON) -#include "mace/ops/arm/fp32/activation.h" -#else -#include "mace/ops/ref/activation.h" -#endif +#include "mace/ops/delegator/activation.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/buffer_transformer.h" @@ -37,19 +34,20 @@ namespace ops { template class ActivationOp; -template<> -class ActivationOp : public Operation { +template +class ActivationOp : public Operation { public: explicit ActivationOp(OpConstructContext *context) : Operation(context), activation_type_(ops::StringToActivationType( - Operation::GetOptionalArg("activation", - "NOOP"))), - activation_delegator_(activation_type_, - Operation::GetOptionalArg("max_limit", - 0.0f), - Operation::GetOptionalArg( - "leakyrelu_coefficient", 0.0f)) {} + Operation::GetOptionalArg("activation", "NOOP"))), + activation_delegator_(delegator::Activation::Create( + context->workspace(), + MACE_DELEGATOR_KEY(Activation, CPU, T, MACE_CPU_IMPL_TYPE), + delegator::ActivationParam( + activation_type_, + Operation::GetOptionalArg("max_limit", 0), + Operation::GetOptionalArg("leakyrelu_coefficient", 0)))) {} MaceStatus Run(OpContext *context) override { MACE_UNUSED(context); @@ -58,28 +56,24 @@ class ActivationOp : public Operation { if (activation_type_ == PRELU) { MACE_RETURN_IF_ERROR(output->ResizeLike(input)); - const float *input_ptr = input->data(); - float *output_ptr = output->mutable_data(); + const T *input_ptr = input->data(); + T *output_ptr = output->mutable_data(); MACE_CHECK(this->InputSize() > 1); const Tensor *alpha = this->Input(1); - const float *alpha_ptr = alpha->data(); + const T *alpha_ptr = alpha->data(); const index_t outer_size = output->dim(0); const index_t inner_size = output->dim(2) * output->dim(3); PReLUActivation(context, input_ptr, outer_size, input->dim(1), inner_size, alpha_ptr, output_ptr); } else { - activation_delegator_.Compute(context, input, output); + activation_delegator_->Compute(context, input, output); } return MaceStatus::MACE_SUCCESS; } private: ActivationType activation_type_; -#if defined(MACE_ENABLE_NEON) - arm::fp32::Activation activation_delegator_; -#else - ref::Activation activation_delegator_; -#endif // MACE_ENABLE_NEON + std::unique_ptr activation_delegator_; }; #ifdef MACE_ENABLE_OPENCL @@ -122,7 +116,7 @@ class ActivationOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterActivation(OpRegistryBase *op_registry) { +void RegisterActivation(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Activation", ActivationOp, DeviceType::CPU, float); MACE_REGISTER_GPU_OP(op_registry, "Activation", ActivationOp); diff --git a/mace/ops/activation.h b/mace/ops/activation.h index 9ceae6e07ff983e5c577406d60b6616c56da4fc3..4003dd309331a59d64c2ff6ace5299e7cc9587a6 100644 --- a/mace/ops/activation.h +++ b/mace/ops/activation.h @@ -20,7 +20,7 @@ #include #include "mace/core/types.h" -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/ops/common/activation_type.h" #include "mace/utils/logging.h" diff --git a/mace/ops/addn.cc b/mace/ops/addn.cc index 5b98ba8554caa69929adacefe27b94499d274cd9..8e5ce2e1928a1244ccd0ee27a3aa8c9bdc7a5ec7 100644 --- a/mace/ops/addn.cc +++ b/mace/ops/addn.cc @@ -19,7 +19,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/image/addn.h" @@ -92,7 +93,7 @@ class AddNOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterAddN(OpRegistryBase *op_registry) { +void RegisterAddN(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "AddN", AddNOp, DeviceType::CPU, float); MACE_REGISTER_GPU_OP(op_registry, "AddN", AddNOp); MACE_REGISTER_OP_CONDITION( diff --git a/mace/ops/argmax.cc b/mace/ops/argmax.cc index 32007d6ccbcd59cd78670ad7f46aced4a3e6fa4c..5ec9dc92b818196b53ba60c0886467f5f2618bb4 100644 --- a/mace/ops/argmax.cc +++ b/mace/ops/argmax.cc @@ -18,7 +18,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -109,7 +110,7 @@ class ArgMaxOp : public Operation { -void RegisterArgMax(OpRegistryBase *op_registry) { +void RegisterArgMax(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "ArgMax", ArgMaxOp, DeviceType::CPU, float); } diff --git a/mace/ops/arm/fp32/activation.cc b/mace/ops/arm/fp32/activation.cc index cac3badb523262663820b93e2527588f49be4923..8c66bd563093a20941c64a50faa2a68aad891710 100644 --- a/mace/ops/arm/fp32/activation.cc +++ b/mace/ops/arm/fp32/activation.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/ops/arm/fp32/activation.h" +#include "mace/ops/delegator/activation.h" #include #include @@ -22,16 +22,22 @@ namespace ops { namespace arm { namespace fp32 { -Activation::Activation(ActivationType type, - const float limit, - const float leakyrelu_coefficient) - : type_(type), - limit_(limit), - leakyrelu_coefficient_(leakyrelu_coefficient) {} +class Activation : public delegator::Activation { + public: + explicit Activation(const delegator::ActivationParam ¶m) + : delegator::Activation(param) {} + ~Activation() = default; + + MaceStatus Compute(const OpContext *context, + const Tensor *input, Tensor *output) override; + + private: + void DoActivation(const OpContext *context, + const Tensor *input, Tensor *output); +}; MaceStatus Activation::Compute(const OpContext *context, - const Tensor *input, - Tensor *output) { + const Tensor *input, Tensor *output) { Tensor::MappingGuard input_guard(input); if (input != output) { MACE_RETURN_IF_ERROR(output->ResizeLike(input)); @@ -139,7 +145,7 @@ void Activation::DoActivation(const OpContext *context, // remain for (index_t i = block_count * 4; i < size; ++i) { output_data[i] = std::max(input_data[i], 0.f) + - std::min(input_data[i], 0.f) * leakyrelu_coefficient_; + std::min(input_data[i], 0.f) * leakyrelu_coefficient_; } break; @@ -169,14 +175,19 @@ void Activation::DoActivation(const OpContext *context, break; } - case NOOP: + case NOOP: { break; + } - default: + default: { MACE_NOT_IMPLEMENTED; + } } } +MACE_REGISTER_DELEGATOR(registry, Activation, delegator::ActivationParam, + MACE_DELEGATOR_KEY(Activation, CPU, float, NEON)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/activation.h b/mace/ops/arm/fp32/activation.h deleted file mode 100644 index 265915d0c3a8d3bdbab3e4c0d0f60521730dec34..0000000000000000000000000000000000000000 --- a/mace/ops/arm/fp32/activation.h +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2019 The MACE Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef MACE_OPS_ARM_FP32_ACTIVATION_H_ -#define MACE_OPS_ARM_FP32_ACTIVATION_H_ - -#include "mace/core/op_context.h" -#include "mace/ops/common/activation_type.h" - -namespace mace { -namespace ops { -namespace arm { -namespace fp32 { - -class Activation { - public: - explicit Activation(ActivationType type, - const float limit, - const float leakyrelu_coefficient); - ~Activation() = default; - - MaceStatus Compute( - const OpContext *context, - const Tensor *input, - Tensor *output); - - private: - void DoActivation(const OpContext *context, - const Tensor *input, - Tensor *output); - - ActivationType type_; - const float limit_; - const float leakyrelu_coefficient_; -}; - -} // namespace fp32 -} // namespace arm -} // namespace ops -} // namespace mace - -#endif // MACE_OPS_ARM_FP32_ACTIVATION_H_ diff --git a/mace/ops/arm/fp32/bias_add.cc b/mace/ops/arm/fp32/bias_add.cc index f572b22bbd1cfe80b39ff7e2c76727aa8b437fac..fc5a55b3d4d0abf6cdad15bfd540bb20446803af 100644 --- a/mace/ops/arm/fp32/bias_add.cc +++ b/mace/ops/arm/fp32/bias_add.cc @@ -12,15 +12,27 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/ops/arm/fp32/bias_add.h" - #include +#include "mace/ops/delegator/bias_add.h" namespace mace { namespace ops { namespace arm { namespace fp32 { +class BiasAdd : public delegator::BiasAdd { + public: + explicit BiasAdd(const DelegatorParam ¶m) : delegator::BiasAdd(param) {} + ~BiasAdd() = default; + + MaceStatus Compute(const OpContext *context, const Tensor *input, + const Tensor *bias, Tensor *output) override; + + private: + void AddBias(const OpContext *context, const Tensor *input, + const Tensor *bias, Tensor *output); +}; + MaceStatus BiasAdd::Compute(const OpContext *context, const Tensor *input, const Tensor *bias, @@ -117,6 +129,9 @@ void BiasAdd::AddBias(const OpContext *context, } } +MACE_REGISTER_DELEGATOR(registry, BiasAdd, DelegatorParam, + MACE_DELEGATOR_KEY(BiasAdd, CPU, float, NEON)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/bias_add.h b/mace/ops/arm/fp32/bias_add.h deleted file mode 100644 index a3e6849157472bc9df8117299cf3f0d01ca203d8..0000000000000000000000000000000000000000 --- a/mace/ops/arm/fp32/bias_add.h +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2019 The MACE Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef MACE_OPS_ARM_FP32_BIAS_ADD_H_ -#define MACE_OPS_ARM_FP32_BIAS_ADD_H_ - -#include "mace/core/op_context.h" - -namespace mace { -namespace ops { -namespace arm { -namespace fp32 { - -class BiasAdd { - public: - BiasAdd() = default; - ~BiasAdd() = default; - - MaceStatus Compute( - const OpContext *context, - const Tensor *input, - const Tensor *bias, - Tensor *output); - - private: - void AddBias(const OpContext *context, - const Tensor *input, - const Tensor *bias, - Tensor *output); -}; - -} // namespace fp32 -} // namespace arm -} // namespace ops -} // namespace mace - -#endif // MACE_OPS_ARM_FP32_BIAS_ADD_H_ diff --git a/mace/ops/arm/fp32/conv_2d.h b/mace/ops/arm/fp32/conv_2d.h index dc8d0effd101e77df88473c884fcdb670768379e..a143f5f84c2092c614d60576e27e26ec69d7e3a3 100644 --- a/mace/ops/arm/fp32/conv_2d.h +++ b/mace/ops/arm/fp32/conv_2d.h @@ -18,36 +18,25 @@ #include #include -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" +#include "mace/ops/delegator/conv_2d.h" #include "mace/ops/arm/fp32/gemm.h" #include "mace/ops/common/conv_pool_2d_util.h" +#include "mace/public/mace.h" namespace mace { namespace ops { namespace arm { namespace fp32 { -class Conv2dBase { +class Conv2dBase : public delegator::Conv2d { public: - Conv2dBase(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type) - : strides_(strides), - dilations_(dilations), - paddings_(paddings), - padding_type_(padding_type) {} + explicit Conv2dBase(const delegator::Conv2dParam ¶m) + : delegator::Conv2d(param) {} virtual ~Conv2dBase() = default; - virtual MaceStatus Compute( - const OpContext *context, - const Tensor *input, - const Tensor *filter, - Tensor *output) = 0; - protected: void CalOutputShapeAndInputPadSize(const std::vector &input_shape, const std::vector &filter_shape, @@ -83,11 +72,6 @@ class Conv2dBase { const int pad_left, Tensor *dst); void UnPadOutput(const Tensor &src, Tensor *dst); - - const std::vector strides_; - const std::vector dilations_; - const std::vector paddings_; - const Padding padding_type_; }; } // namespace fp32 diff --git a/mace/ops/arm/fp32/conv_2d_1x1.cc b/mace/ops/arm/fp32/conv_2d_1x1.cc index d5e03652bbd25bad8eb43bfb67b2ef98092b9b2f..0aad6be90729aac36bd09d1f9a3bea57ddb82b8b 100644 --- a/mace/ops/arm/fp32/conv_2d_1x1.cc +++ b/mace/ops/arm/fp32/conv_2d_1x1.cc @@ -12,13 +12,32 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/ops/arm/fp32/conv_2d_1x1.h" +#include "mace/ops/arm/fp32/conv_2d.h" +#include "mace/ops/arm/fp32/gemm.h" +#include "mace/ops/delegator/conv_2d.h" namespace mace { namespace ops { namespace arm { namespace fp32 { +class Conv2dK1x1 : public Conv2dBase { + public: + explicit Conv2dK1x1(const delegator::Conv2dParam ¶m) + : Conv2dBase(param), + gemm_(delegator::GemmParam()) {} + virtual ~Conv2dK1x1() {} + + MaceStatus Compute( + const OpContext *context, + const Tensor *input, + const Tensor *filter, + Tensor *output) override; + + private: + Gemm gemm_; +}; + MaceStatus Conv2dK1x1::Compute(const OpContext *context, const Tensor *input, const Tensor *filter, @@ -94,6 +113,9 @@ MaceStatus Conv2dK1x1::Compute(const OpContext *context, output); } +MACE_REGISTER_DELEGATOR(registry, Conv2dK1x1, delegator::Conv2dParam, + MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, NEON, K1x1)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/conv_2d_1x1.h b/mace/ops/arm/fp32/conv_2d_1x1.h deleted file mode 100644 index cde94ea01927ad544bb347eaea53bcb55b01f7f8..0000000000000000000000000000000000000000 --- a/mace/ops/arm/fp32/conv_2d_1x1.h +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright 2019 The MACE Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef MACE_OPS_ARM_FP32_CONV_2D_1X1_H_ -#define MACE_OPS_ARM_FP32_CONV_2D_1X1_H_ - -#include -#include "mace/public/mace.h" -#include "mace/core/tensor.h" -#include "mace/core/op_context.h" -#include "mace/ops/arm/fp32/gemm.h" -#include "mace/ops/arm/fp32/conv_2d.h" - -namespace mace { -namespace ops { -namespace arm { -namespace fp32 { - -class Conv2dK1x1 : public Conv2dBase { - public: - Conv2dK1x1(const std::vector &paddings, const Padding padding_type) - : Conv2dBase({1, 1}, {1, 1}, paddings, padding_type) {} - virtual ~Conv2dK1x1() {} - - MaceStatus Compute( - const OpContext *context, - const Tensor *input, - const Tensor *filter, - Tensor *output) override; - - private: - Gemm gemm_; -}; - -} // namespace fp32 -} // namespace arm -} // namespace ops -} // namespace mace - -#endif // MACE_OPS_ARM_FP32_CONV_2D_1X1_H_ diff --git a/mace/ops/arm/fp32/conv_2d_1xn.cc b/mace/ops/arm/fp32/conv_2d_1xn.cc index 3be9e3eb5dca7ecf4ecf66b1371796872c5cd0b5..fc92091f55edf6f9d9eac7a6a285f718d62034e0 100644 --- a/mace/ops/arm/fp32/conv_2d_1xn.cc +++ b/mace/ops/arm/fp32/conv_2d_1xn.cc @@ -17,6 +17,8 @@ #include #include +#include "mace/ops/delegator/conv_2d.h" + namespace mace { namespace ops { namespace arm { @@ -859,6 +861,19 @@ MaceStatus Conv2dK15x1S1::Compute(const OpContext *context, return MaceStatus::MACE_SUCCESS; } +MACE_REGISTER_DELEGATOR(registry, Conv2dK1x7S1, delegator::Conv2dParam, + MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, NEON, K1x7S1)) + +MACE_REGISTER_DELEGATOR(registry, Conv2dK7x1S1, delegator::Conv2dParam, + MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, NEON, K7x1S1)) + +MACE_REGISTER_DELEGATOR(registry, Conv2dK1x15S1, delegator::Conv2dParam, + MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, + NEON, K1x15S1)) +MACE_REGISTER_DELEGATOR(registry, Conv2dK15x1S1, delegator::Conv2dParam, + MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, + NEON, K15x1S1)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/conv_2d_1xn.h b/mace/ops/arm/fp32/conv_2d_1xn.h index 0bdd66737907627f7dd44e1cb94c24803ea0c8fc..c0a6da637e3ecffd74da458c71730a8646e365c3 100644 --- a/mace/ops/arm/fp32/conv_2d_1xn.h +++ b/mace/ops/arm/fp32/conv_2d_1xn.h @@ -16,10 +16,11 @@ #define MACE_OPS_ARM_FP32_CONV_2D_1XN_H_ #include -#include "mace/public/mace.h" + +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" #include "mace/ops/arm/fp32/conv_2d.h" +#include "mace/public/mace.h" namespace mace { namespace ops { @@ -28,8 +29,8 @@ namespace fp32 { class Conv2dK1x7S1 : public Conv2dBase { public: - Conv2dK1x7S1(const std::vector &paddings, const Padding padding_type) - : Conv2dBase({1, 1}, {1, 1}, paddings, padding_type) {} + explicit Conv2dK1x7S1(const delegator::Conv2dParam ¶m) + : Conv2dBase(param) {} virtual ~Conv2dK1x7S1() {} MaceStatus Compute( @@ -41,8 +42,8 @@ class Conv2dK1x7S1 : public Conv2dBase { class Conv2dK7x1S1 : public Conv2dBase { public: - Conv2dK7x1S1(const std::vector &paddings, const Padding padding_type) - : Conv2dBase({1, 1}, {1, 1}, paddings, padding_type) {} + explicit Conv2dK7x1S1(const delegator::Conv2dParam ¶m) + : Conv2dBase(param) {} virtual ~Conv2dK7x1S1() {} MaceStatus Compute( @@ -54,8 +55,8 @@ class Conv2dK7x1S1 : public Conv2dBase { class Conv2dK1x15S1 : public Conv2dBase { public: - Conv2dK1x15S1(const std::vector &paddings, const Padding padding_type) - : Conv2dBase({1, 1}, {1, 1}, paddings, padding_type) {} + explicit Conv2dK1x15S1(const delegator::Conv2dParam ¶m) + : Conv2dBase(param) {} virtual ~Conv2dK1x15S1() {} MaceStatus Compute( @@ -67,8 +68,8 @@ class Conv2dK1x15S1 : public Conv2dBase { class Conv2dK15x1S1 : public Conv2dBase { public: - Conv2dK15x1S1(const std::vector &paddings, const Padding padding_type) - : Conv2dBase({1, 1}, {1, 1}, paddings, padding_type) {} + explicit Conv2dK15x1S1(const delegator::Conv2dParam ¶m) + : Conv2dBase(param) {} virtual ~Conv2dK15x1S1() {} MaceStatus Compute( diff --git a/mace/ops/arm/fp32/conv_2d_3x3.cc b/mace/ops/arm/fp32/conv_2d_3x3.cc index 95c3034138d9ecab67d1aae0ee770ff07ab20788..37d8ef849f73e53d4afebc55ac19efe50fe7c02b 100644 --- a/mace/ops/arm/fp32/conv_2d_3x3.cc +++ b/mace/ops/arm/fp32/conv_2d_3x3.cc @@ -17,6 +17,8 @@ #include #include +#include "mace/ops/delegator/conv_2d.h" + namespace mace { namespace ops { namespace arm { @@ -735,6 +737,11 @@ MaceStatus Conv2dK3x3S2::Compute(const OpContext *context, return MaceStatus::MACE_SUCCESS; } +MACE_REGISTER_DELEGATOR(registry, Conv2dK3x3S1, delegator::Conv2dParam, + MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, NEON, K3x3S1)) +MACE_REGISTER_DELEGATOR(registry, Conv2dK3x3S2, delegator::Conv2dParam, + MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, NEON, K3x3S2)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/conv_2d_3x3.h b/mace/ops/arm/fp32/conv_2d_3x3.h index bd96501d98f32ebe9ffe0bad98cccee67bc0b062..e64d061e3e6103f78901c144d9866d047e8dfc96 100644 --- a/mace/ops/arm/fp32/conv_2d_3x3.h +++ b/mace/ops/arm/fp32/conv_2d_3x3.h @@ -16,10 +16,11 @@ #define MACE_OPS_ARM_FP32_CONV_2D_3X3_H_ #include -#include "mace/public/mace.h" + +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" #include "mace/ops/arm/fp32/conv_2d.h" +#include "mace/public/mace.h" namespace mace { namespace ops { @@ -28,8 +29,8 @@ namespace fp32 { class Conv2dK3x3S1 : public Conv2dBase { public: - Conv2dK3x3S1(const std::vector &paddings, const Padding padding_type) - : Conv2dBase({1, 1}, {1, 1}, paddings, padding_type) {} + explicit Conv2dK3x3S1(const delegator::Conv2dParam ¶m) + : Conv2dBase(param) {} virtual ~Conv2dK3x3S1() {} MaceStatus Compute( @@ -41,8 +42,8 @@ class Conv2dK3x3S1 : public Conv2dBase { class Conv2dK3x3S2 : public Conv2dBase { public: - Conv2dK3x3S2(const std::vector &paddings, const Padding padding_type) - : Conv2dBase({2, 2}, {1, 1}, paddings, padding_type) {} + explicit Conv2dK3x3S2(const delegator::Conv2dParam ¶m) + : Conv2dBase(param) {} virtual ~Conv2dK3x3S2() {} MaceStatus Compute( diff --git a/mace/ops/arm/fp32/conv_2d_3x3_winograd.cc b/mace/ops/arm/fp32/conv_2d_3x3_winograd.cc index ab2517bf6295691de4ba00fd22d9e651e1e13fee..cbdb7d66443e5d47759dcb8fe44890f85f2c4d5a 100644 --- a/mace/ops/arm/fp32/conv_2d_3x3_winograd.cc +++ b/mace/ops/arm/fp32/conv_2d_3x3_winograd.cc @@ -17,6 +17,7 @@ #include #include "mace/ops/common/conv_pool_2d_util.h" +#include "mace/ops/delegator/conv_2d.h" #include "mace/utils/memory.h" #include "mace/utils/math.h" @@ -800,6 +801,10 @@ void Conv2dK3x3Winograd::TransformOutput8x8(const OpContext *context, }, 0, batch, 1, 0, out_channels, 1); } +MACE_REGISTER_DELEGATOR(registry, Conv2dK3x3Winograd, delegator::Conv2dParam, + MACE_DELEGATOR_KEY_EX( + Conv2d, CPU, float, NEON, K3x3Winograd)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/conv_2d_3x3_winograd.h b/mace/ops/arm/fp32/conv_2d_3x3_winograd.h index 53118a6aea3b2d8d3a75b08fa5d0b0f84ef69203..ec4db81bb2d552615430b81e330ef0ff862c563f 100644 --- a/mace/ops/arm/fp32/conv_2d_3x3_winograd.h +++ b/mace/ops/arm/fp32/conv_2d_3x3_winograd.h @@ -18,11 +18,11 @@ #include #include -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" -#include "mace/ops/arm/fp32/gemm.h" #include "mace/ops/arm/fp32/conv_2d.h" +#include "mace/ops/arm/fp32/gemm.h" +#include "mace/public/mace.h" namespace mace { namespace ops { @@ -31,10 +31,9 @@ namespace fp32 { class Conv2dK3x3Winograd : public Conv2dBase { public: - Conv2dK3x3Winograd(const std::vector &paddings, - const Padding padding_type) - : Conv2dBase({1, 1}, {1, 1}, paddings, padding_type), - gemm_(), + explicit Conv2dK3x3Winograd(const delegator::Conv2dParam ¶m) + : Conv2dBase(param), + gemm_(delegator::GemmParam()), transformed_filter_(nullptr), out_tile_size_(0) {} diff --git a/mace/ops/arm/fp32/conv_2d_5x5.cc b/mace/ops/arm/fp32/conv_2d_5x5.cc index 1b41ec7ccd87a14e5683e1f84bc6f967e159b5b3..cc117cf98637b2f886007ae15ffe75d47f884ff0 100644 --- a/mace/ops/arm/fp32/conv_2d_5x5.cc +++ b/mace/ops/arm/fp32/conv_2d_5x5.cc @@ -12,16 +12,30 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/ops/arm/fp32/conv_2d_5x5.h" - #include #include +#include "mace/ops/arm/fp32/conv_2d.h" +#include "mace/ops/delegator/conv_2d.h" + namespace mace { namespace ops { namespace arm { namespace fp32 { +class Conv2dK5x5S1 : public Conv2dBase { + public: + explicit Conv2dK5x5S1(const delegator::Conv2dParam ¶m) + : Conv2dBase(param) {} + virtual ~Conv2dK5x5S1() {} + + MaceStatus Compute( + const OpContext *context, + const Tensor *input, + const Tensor *filter, + Tensor *output) override; +}; + #define MACE_Conv2dNeonK5x5SnLoadCalc4 \ /* load filter (4 outch x 1 height x 4 width) */ \ float32x4_t vf00, vf10, vf20, vf30; \ @@ -244,6 +258,9 @@ MaceStatus Conv2dK5x5S1::Compute(const OpContext *context, return MaceStatus::MACE_SUCCESS; } +MACE_REGISTER_DELEGATOR(registry, Conv2dK5x5S1, delegator::Conv2dParam, + MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, NEON, K5x5S1)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/conv_2d_5x5.h b/mace/ops/arm/fp32/conv_2d_5x5.h deleted file mode 100644 index b6fdf9bbda9d7edc7593a08e30ce6f30987de2a4..0000000000000000000000000000000000000000 --- a/mace/ops/arm/fp32/conv_2d_5x5.h +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2019 The MACE Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef MACE_OPS_ARM_FP32_CONV_2D_5X5_H_ -#define MACE_OPS_ARM_FP32_CONV_2D_5X5_H_ - -#include -#include "mace/public/mace.h" -#include "mace/core/tensor.h" -#include "mace/core/op_context.h" -#include "mace/ops/arm/fp32/conv_2d.h" - -namespace mace { -namespace ops { -namespace arm { -namespace fp32 { - -class Conv2dK5x5S1 : public Conv2dBase { - public: - Conv2dK5x5S1(const std::vector &paddings, const Padding padding_type) - : Conv2dBase({1, 1}, {1, 1}, paddings, padding_type) {} - virtual ~Conv2dK5x5S1() {} - - MaceStatus Compute( - const OpContext *context, - const Tensor *input, - const Tensor *filter, - Tensor *output) override; -}; - - -} // namespace fp32 -} // namespace arm -} // namespace ops -} // namespace mace - -#endif // MACE_OPS_ARM_FP32_CONV_2D_5X5_H_ diff --git a/mace/ops/arm/fp32/conv_2d_7x7.cc b/mace/ops/arm/fp32/conv_2d_7x7.cc index 4ee8a045a8c61e72fb615816af0fc9c52b77f9b9..cc6963e7b1b8cd7eda4a09cb74a57d5f5ac3b6b2 100644 --- a/mace/ops/arm/fp32/conv_2d_7x7.cc +++ b/mace/ops/arm/fp32/conv_2d_7x7.cc @@ -17,6 +17,8 @@ #include #include +#include "mace/ops/delegator/conv_2d.h" + namespace mace { namespace ops { namespace arm { @@ -720,6 +722,13 @@ MaceStatus Conv2dK7x7S3::Compute(const OpContext *context, return MaceStatus::MACE_SUCCESS; } +MACE_REGISTER_DELEGATOR(registry, Conv2dK7x7S1, delegator::Conv2dParam, + MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, NEON, K7x7S1)) +MACE_REGISTER_DELEGATOR(registry, Conv2dK7x7S2, delegator::Conv2dParam, + MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, NEON, K7x7S2)) +MACE_REGISTER_DELEGATOR(registry, Conv2dK7x7S3, delegator::Conv2dParam, + MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, NEON, K7x7S3)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/conv_2d_7x7.h b/mace/ops/arm/fp32/conv_2d_7x7.h index 9324f4daac2392cb069935d3d46fc36274e8b8ea..0d0467fc5b38a354bab744503dafbe28b5f180f3 100644 --- a/mace/ops/arm/fp32/conv_2d_7x7.h +++ b/mace/ops/arm/fp32/conv_2d_7x7.h @@ -16,10 +16,11 @@ #define MACE_OPS_ARM_FP32_CONV_2D_7X7_H_ #include -#include "mace/public/mace.h" + +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" #include "mace/ops/arm/fp32/conv_2d.h" +#include "mace/public/mace.h" namespace mace { namespace ops { @@ -28,8 +29,8 @@ namespace fp32 { class Conv2dK7x7S1 : public Conv2dBase { public: - Conv2dK7x7S1(const std::vector &paddings, const Padding padding_type) - : Conv2dBase({1, 1}, {1, 1}, paddings, padding_type) {} + explicit Conv2dK7x7S1(const delegator::Conv2dParam ¶m) + : Conv2dBase(param) {} virtual ~Conv2dK7x7S1() {} MaceStatus Compute( @@ -41,8 +42,8 @@ class Conv2dK7x7S1 : public Conv2dBase { class Conv2dK7x7S2 : public Conv2dBase { public: - Conv2dK7x7S2(const std::vector &paddings, const Padding padding_type) - : Conv2dBase({2, 2}, {1, 1}, paddings, padding_type) {} + explicit Conv2dK7x7S2(const delegator::Conv2dParam ¶m) + : Conv2dBase(param) {} virtual ~Conv2dK7x7S2() {} MaceStatus Compute( @@ -54,8 +55,8 @@ class Conv2dK7x7S2 : public Conv2dBase { class Conv2dK7x7S3 : public Conv2dBase { public: - Conv2dK7x7S3(const std::vector &paddings, const Padding padding_type) - : Conv2dBase({3, 3}, {1, 1}, paddings, padding_type) {} + explicit Conv2dK7x7S3(const delegator::Conv2dParam ¶m) + : Conv2dBase(param) {} virtual ~Conv2dK7x7S3() {} MaceStatus Compute( diff --git a/mace/ops/arm/fp32/conv_general.cc b/mace/ops/arm/fp32/conv_general.cc index 25fb2441481cb5ac55da78e44327478b513de018..2fdc57e2ef7d9e0f029919249a0bb776d5183879 100644 --- a/mace/ops/arm/fp32/conv_general.cc +++ b/mace/ops/arm/fp32/conv_general.cc @@ -12,15 +12,30 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/ops/arm/fp32/conv_general.h" +#include "mace/ops/arm/fp32/conv_2d.h" #include +#include "mace/ops/delegator/conv_2d.h" + namespace mace { namespace ops { namespace arm { namespace fp32 { +class Conv2dGeneral : public Conv2dBase { + public: + explicit Conv2dGeneral(const delegator::Conv2dParam ¶m) + : Conv2dBase(param) {} + virtual ~Conv2dGeneral() {} + + MaceStatus Compute( + const OpContext *context, + const Tensor *input, + const Tensor *filter, + Tensor *output) override; +}; + MaceStatus Conv2dGeneral::Compute(const OpContext *context, const Tensor *input, const Tensor *filter, @@ -237,6 +252,10 @@ MaceStatus Conv2dGeneral::Compute(const OpContext *context, return MaceStatus::MACE_SUCCESS; } +MACE_REGISTER_DELEGATOR( + registry, Conv2dGeneral, delegator::Conv2dParam, + MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, NEON, General)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/conv_general.h b/mace/ops/arm/fp32/conv_general.h deleted file mode 100644 index 115acdb3fe83cb80e1e20e7939c5fe03eed7c6da..0000000000000000000000000000000000000000 --- a/mace/ops/arm/fp32/conv_general.h +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright 2019 The MACE Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef MACE_OPS_ARM_FP32_CONV_GENERAL_H_ -#define MACE_OPS_ARM_FP32_CONV_GENERAL_H_ - -#include -#include "mace/public/mace.h" -#include "mace/core/tensor.h" -#include "mace/core/op_context.h" -#include "mace/ops/arm/fp32/conv_2d.h" - -namespace mace { -namespace ops { -namespace arm { -namespace fp32 { - -class Conv2dGeneral : public Conv2dBase { - public: - Conv2dGeneral(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type) - : Conv2dBase(strides, dilations, paddings, padding_type) {} - virtual ~Conv2dGeneral() {} - - MaceStatus Compute( - const OpContext *context, - const Tensor *input, - const Tensor *filter, - Tensor *output) override; -}; - -} // namespace fp32 -} // namespace arm -} // namespace ops -} // namespace mace - -#endif // MACE_OPS_ARM_FP32_CONV_GENERAL_H_ diff --git a/mace/ops/arm/fp32/deconv_2d.h b/mace/ops/arm/fp32/deconv_2d.h index 554f2935992d0a6f901bbb7b40aab4b048d63616..128d5858beee4a8530ed3f775536fb3d1652c44b 100644 --- a/mace/ops/arm/fp32/deconv_2d.h +++ b/mace/ops/arm/fp32/deconv_2d.h @@ -18,54 +18,27 @@ #include #include -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/types.h" -#include "mace/core/op_context.h" #include "mace/ops/arm/fp32/gemm.h" #include "mace/ops/common/conv_pool_2d_util.h" +#include "mace/ops/delegator/deconv_2d.h" +#include "mace/public/mace.h" namespace mace { namespace ops { namespace arm { namespace fp32 { -class Deconv2dBase { +class Deconv2dBase : public delegator::Deconv2d { public: - Deconv2dBase(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type, - const index_t group, - const FrameworkType framework_type) - : strides_(strides), - dilations_(dilations), - paddings_(paddings), - padding_type_(padding_type), - group_(group), - framework_type_(framework_type) {} - - Deconv2dBase(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : Deconv2dBase(strides, - dilations, - paddings, - padding_type, - 1, - framework_type) {} + explicit Deconv2dBase(const delegator::Deconv2dParam ¶m) + : delegator::Deconv2d(param), + group_(param.group_) {} virtual ~Deconv2dBase() = default; - virtual MaceStatus Compute( - const OpContext *context, - const Tensor *input, - const Tensor *filter, - const Tensor *output_shape, - Tensor *output) = 0; - protected: MaceStatus ResizeOutAndPadOut(const OpContext *context, const Tensor *input, @@ -78,13 +51,7 @@ class Deconv2dBase { void UnPadOutput(const Tensor &src, const std::vector &out_pad_size, Tensor *dst); - - const std::vector strides_; - const std::vector dilations_; - const std::vector paddings_; - const Padding padding_type_; index_t group_; - const FrameworkType framework_type_; }; } // namespace fp32 diff --git a/mace/ops/arm/fp32/deconv_2d_2x2.cc b/mace/ops/arm/fp32/deconv_2d_2x2.cc index c9d630bbb63c66d72684663659965e32b2be6b60..65cfc6e8d7020e1fd753cbed9a2e7416b1ff56b9 100644 --- a/mace/ops/arm/fp32/deconv_2d_2x2.cc +++ b/mace/ops/arm/fp32/deconv_2d_2x2.cc @@ -330,12 +330,18 @@ MaceStatus Deconv2dK2x2S2::Compute(const OpContext *context, } }, 0, batch, 1, 0, outch, 1); - UnPadOutput(*out_tensor, out_pad_size, output); return MaceStatus::MACE_SUCCESS; } +MACE_REGISTER_DELEGATOR(registry, Deconv2dK2x2S1, delegator::Deconv2dParam, + MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float, + NEON, K2x2S1)) +MACE_REGISTER_DELEGATOR(registry, Deconv2dK2x2S2, delegator::Deconv2dParam, + MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float, + NEON, K2x2S2)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/deconv_2d_2x2.h b/mace/ops/arm/fp32/deconv_2d_2x2.h index 05f80dece27fd6cf20d87861e04a512b94706939..6fd533444a2e1a1e910c2d527987112940ddb4cc 100644 --- a/mace/ops/arm/fp32/deconv_2d_2x2.h +++ b/mace/ops/arm/fp32/deconv_2d_2x2.h @@ -18,12 +18,12 @@ #include #include -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/types.h" -#include "mace/core/op_context.h" #include "mace/ops/arm/fp32/deconv_2d.h" #include "mace/ops/common/conv_pool_2d_util.h" +#include "mace/public/mace.h" namespace mace { namespace ops { @@ -32,10 +32,8 @@ namespace fp32 { class Deconv2dK2x2S1 : public Deconv2dBase { public: - Deconv2dK2x2S1(const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : Deconv2dBase({1, 1}, {1, 1}, paddings, padding_type, framework_type) {} + explicit Deconv2dK2x2S1(const delegator::Deconv2dParam ¶m) + : Deconv2dBase(param) {} virtual ~Deconv2dK2x2S1() {} MaceStatus Compute( @@ -48,10 +46,8 @@ class Deconv2dK2x2S1 : public Deconv2dBase { class Deconv2dK2x2S2 : public Deconv2dBase { public: - Deconv2dK2x2S2(const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : Deconv2dBase({2, 2}, {1, 1}, paddings, padding_type, framework_type) {} + explicit Deconv2dK2x2S2(const delegator::Deconv2dParam ¶m) + : Deconv2dBase(param) {} virtual ~Deconv2dK2x2S2() {} MaceStatus Compute( diff --git a/mace/ops/arm/fp32/deconv_2d_3x3.cc b/mace/ops/arm/fp32/deconv_2d_3x3.cc index b2ef6eae269316c9169e33bbb753606d8572c1ff..55911e25f432a21290295018eefacedb00cfd25d 100644 --- a/mace/ops/arm/fp32/deconv_2d_3x3.cc +++ b/mace/ops/arm/fp32/deconv_2d_3x3.cc @@ -464,6 +464,13 @@ MaceStatus Deconv2dK3x3S2::Compute(const OpContext *context, return MaceStatus::MACE_SUCCESS; } +MACE_REGISTER_DELEGATOR(registry, Deconv2dK3x3S1, delegator::Deconv2dParam, + MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float, + NEON, K3x3S1)) +MACE_REGISTER_DELEGATOR(registry, Deconv2dK3x3S2, delegator::Deconv2dParam, + MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float, + NEON, K3x3S2)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/deconv_2d_3x3.h b/mace/ops/arm/fp32/deconv_2d_3x3.h index 4495cbe8e4ef5fa3b05c72e9970fa05fb67a7fbb..65cc23e6f365d9809d983c94bc12855760046a17 100644 --- a/mace/ops/arm/fp32/deconv_2d_3x3.h +++ b/mace/ops/arm/fp32/deconv_2d_3x3.h @@ -18,12 +18,12 @@ #include #include -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/types.h" -#include "mace/core/op_context.h" #include "mace/ops/arm/fp32/deconv_2d.h" #include "mace/ops/common/conv_pool_2d_util.h" +#include "mace/public/mace.h" namespace mace { namespace ops { @@ -32,10 +32,8 @@ namespace fp32 { class Deconv2dK3x3S1 : public Deconv2dBase { public: - Deconv2dK3x3S1(const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : Deconv2dBase({1, 1}, {1, 1}, paddings, padding_type, framework_type) {} + explicit Deconv2dK3x3S1(const delegator::Deconv2dParam ¶m) + : Deconv2dBase(param) {} virtual ~Deconv2dK3x3S1() {} MaceStatus Compute( @@ -48,10 +46,8 @@ class Deconv2dK3x3S1 : public Deconv2dBase { class Deconv2dK3x3S2 : public Deconv2dBase { public: - Deconv2dK3x3S2(const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : Deconv2dBase({2, 2}, {1, 1}, paddings, padding_type, framework_type) {} + explicit Deconv2dK3x3S2(const delegator::Deconv2dParam ¶m) + : Deconv2dBase(param) {} virtual ~Deconv2dK3x3S2() {} MaceStatus Compute( diff --git a/mace/ops/arm/fp32/deconv_2d_4x4.cc b/mace/ops/arm/fp32/deconv_2d_4x4.cc index 3c47ecff71bc46ea02aa73cb49d511a22c61ba27..b2e17afa75f2545d820722ad90b3297397941a56 100644 --- a/mace/ops/arm/fp32/deconv_2d_4x4.cc +++ b/mace/ops/arm/fp32/deconv_2d_4x4.cc @@ -449,7 +449,6 @@ MaceStatus Deconv2dK4x4S2::Compute(const OpContext *context, const index_t outw = out_shape[3]; const index_t out_img_size = outh * outw; - utils::ThreadPool &thread_pool = context->device()->cpu_runtime()->thread_pool(); @@ -575,6 +574,13 @@ MaceStatus Deconv2dK4x4S2::Compute(const OpContext *context, return MaceStatus::MACE_SUCCESS; } +MACE_REGISTER_DELEGATOR(registry, Deconv2dK4x4S1, delegator::Deconv2dParam, + MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float, + NEON, K4x4S1)) +MACE_REGISTER_DELEGATOR(registry, Deconv2dK4x4S2, delegator::Deconv2dParam, + MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float, + NEON, K4x4S2)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/deconv_2d_4x4.h b/mace/ops/arm/fp32/deconv_2d_4x4.h index 9f09056af0224331fca8815cca18a1f7eecdd1cc..bf86a62ab4575ef20072dc6f1fd648f2bd65da14 100644 --- a/mace/ops/arm/fp32/deconv_2d_4x4.h +++ b/mace/ops/arm/fp32/deconv_2d_4x4.h @@ -18,12 +18,12 @@ #include #include -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/types.h" -#include "mace/core/op_context.h" #include "mace/ops/arm/fp32/deconv_2d.h" #include "mace/ops/common/conv_pool_2d_util.h" +#include "mace/public/mace.h" namespace mace { namespace ops { @@ -32,10 +32,8 @@ namespace fp32 { class Deconv2dK4x4S1 : public Deconv2dBase { public: - Deconv2dK4x4S1(const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : Deconv2dBase({1, 1}, {1, 1}, paddings, padding_type, framework_type) {} + explicit Deconv2dK4x4S1(const delegator::Deconv2dParam ¶m) + : Deconv2dBase(param) {} virtual ~Deconv2dK4x4S1() {} MaceStatus Compute( @@ -48,10 +46,8 @@ class Deconv2dK4x4S1 : public Deconv2dBase { class Deconv2dK4x4S2 : public Deconv2dBase { public: - Deconv2dK4x4S2(const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : Deconv2dBase({2, 2}, {1, 1}, paddings, padding_type, framework_type) {} + explicit Deconv2dK4x4S2(const delegator::Deconv2dParam ¶m) + : Deconv2dBase(param) {} virtual ~Deconv2dK4x4S2() {} MaceStatus Compute( diff --git a/mace/ops/arm/fp32/deconv_2d_general.cc b/mace/ops/arm/fp32/deconv_2d_general.cc index 47bfe39cf27adac58b1240afa66390fc23dc8866..5ffe7b0d7a25bf92824ee1120e65ede9b50fcc08 100644 --- a/mace/ops/arm/fp32/deconv_2d_general.cc +++ b/mace/ops/arm/fp32/deconv_2d_general.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/ops/arm/fp32/deconv_2d_general.h" +#include "mace/ops/arm/fp32/deconv_2d.h" // TODO(liutuo): optimize it @@ -21,6 +21,20 @@ namespace ops { namespace arm { namespace fp32 { +class Deconv2dGeneral : public Deconv2dBase { + public: + explicit Deconv2dGeneral(const delegator::Deconv2dParam ¶m) + : Deconv2dBase(param) {} + virtual ~Deconv2dGeneral() {} + + MaceStatus Compute( + const OpContext *context, + const Tensor *input, + const Tensor *filter, + const Tensor *output_shape, + Tensor *output) override; +}; + MaceStatus Deconv2dGeneral::Compute(const OpContext *context, const Tensor *input, const Tensor *filter, @@ -110,6 +124,10 @@ MaceStatus Deconv2dGeneral::Compute(const OpContext *context, return MaceStatus::MACE_SUCCESS; } +MACE_REGISTER_DELEGATOR(registry, Deconv2dGeneral, delegator::Deconv2dParam, + MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float, + NEON, General)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/deconv_2d_general.h b/mace/ops/arm/fp32/deconv_2d_general.h deleted file mode 100644 index d11ada030c02c4f155aec12e0a162513cdae0c25..0000000000000000000000000000000000000000 --- a/mace/ops/arm/fp32/deconv_2d_general.h +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright 2019 The MACE Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef MACE_OPS_ARM_FP32_DECONV_2D_GENERAL_H_ -#define MACE_OPS_ARM_FP32_DECONV_2D_GENERAL_H_ - -#include -#include - -#include "mace/public/mace.h" -#include "mace/core/tensor.h" -#include "mace/core/types.h" -#include "mace/core/op_context.h" -#include "mace/ops/arm/fp32/deconv_2d.h" -#include "mace/ops/common/conv_pool_2d_util.h" - -namespace mace { -namespace ops { -namespace arm { -namespace fp32 { - -class Deconv2dGeneral : public Deconv2dBase { - public: - Deconv2dGeneral(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : Deconv2dBase(strides, - dilations, - paddings, - padding_type, - framework_type) {} - virtual ~Deconv2dGeneral() {} - - MaceStatus Compute( - const OpContext *context, - const Tensor *input, - const Tensor *filter, - const Tensor *output_shape, - Tensor *output) override; -}; - -} // namespace fp32 -} // namespace arm -} // namespace ops -} // namespace mace - -#endif // MACE_OPS_ARM_FP32_DECONV_2D_GENERAL_H_ diff --git a/mace/ops/arm/fp32/depthwise_conv_2d_3x3.cc b/mace/ops/arm/fp32/depthwise_conv_2d_3x3.cc index a27827b471818c049a09e532c059b56396e8f452..8d77672b7ab094771e067722f703e8bc0e27a6d1 100644 --- a/mace/ops/arm/fp32/depthwise_conv_2d_3x3.cc +++ b/mace/ops/arm/fp32/depthwise_conv_2d_3x3.cc @@ -512,6 +512,13 @@ MaceStatus DepthwiseConv2dK3x3S2::Compute(const mace::OpContext *context, return MaceStatus::MACE_SUCCESS; } +MACE_REGISTER_DELEGATOR( + registry, DepthwiseConv2dK3x3S1, delegator::DepthwiseConv2dParam, + MACE_DELEGATOR_KEY_EX(DepthwiseConv2d, CPU, float, NEON, K3x3S1)) +MACE_REGISTER_DELEGATOR( + registry, DepthwiseConv2dK3x3S2, delegator::DepthwiseConv2dParam, + MACE_DELEGATOR_KEY_EX(DepthwiseConv2d, CPU, float, NEON, K3x3S2)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/depthwise_conv_2d_3x3.h b/mace/ops/arm/fp32/depthwise_conv_2d_3x3.h index c130fbffd361dfb33be9974b3d603e630cb80979..49412b808dde686c26fff1b80137ab86c78d65f9 100644 --- a/mace/ops/arm/fp32/depthwise_conv_2d_3x3.h +++ b/mace/ops/arm/fp32/depthwise_conv_2d_3x3.h @@ -16,10 +16,12 @@ #define MACE_OPS_ARM_FP32_DEPTHWISE_CONV_2D_3X3_H_ #include -#include "mace/public/mace.h" + +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" #include "mace/ops/arm/fp32/conv_2d.h" +#include "mace/ops/delegator/depthwise_conv_2d.h" +#include "mace/public/mace.h" namespace mace { namespace ops { @@ -28,9 +30,8 @@ namespace fp32 { class DepthwiseConv2dK3x3S1 : public Conv2dBase { public: - DepthwiseConv2dK3x3S1(const std::vector &paddings, - const Padding padding_type) - : Conv2dBase({1, 1}, {1, 1}, paddings, padding_type) {} + explicit DepthwiseConv2dK3x3S1(const delegator::DepthwiseConv2dParam ¶m) + : Conv2dBase(param) {} virtual ~DepthwiseConv2dK3x3S1() {} MaceStatus Compute( @@ -42,9 +43,8 @@ class DepthwiseConv2dK3x3S1 : public Conv2dBase { class DepthwiseConv2dK3x3S2 : public Conv2dBase { public: - DepthwiseConv2dK3x3S2(const std::vector &paddings, - const Padding padding_type) - : Conv2dBase({2, 2}, {1, 1}, paddings, padding_type) {} + explicit DepthwiseConv2dK3x3S2(const delegator::DepthwiseConv2dParam ¶m) + : Conv2dBase(param) {} virtual ~DepthwiseConv2dK3x3S2() {} MaceStatus Compute( diff --git a/mace/ops/arm/fp32/depthwise_deconv_2d_3x3.cc b/mace/ops/arm/fp32/depthwise_deconv_2d_3x3.cc index 3cd6d527b7f1fa67d053cc96dea8ae6505e32352..291075ae2205d61035e211fd1c8daa04bec8c9d5 100644 --- a/mace/ops/arm/fp32/depthwise_deconv_2d_3x3.cc +++ b/mace/ops/arm/fp32/depthwise_deconv_2d_3x3.cc @@ -776,6 +776,20 @@ MaceStatus GroupDeconv2dK3x3S2::Compute(const OpContext *context, return MaceStatus::MACE_SUCCESS; } +MACE_REGISTER_DELEGATOR( + registry, DepthwiseDeconv2dK3x3S1, delegator::DepthwiseDeconv2dParam, + MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, CPU, float, NEON, K3x3S1)) +MACE_REGISTER_DELEGATOR( + registry, DepthwiseDeconv2dK3x3S2, delegator::DepthwiseDeconv2dParam, + MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, CPU, float, NEON, K3x3S2)) + +MACE_REGISTER_DELEGATOR( + registry, GroupDeconv2dK3x3S1, delegator::GroupDeconv2dParam, + MACE_DELEGATOR_KEY_EX(GroupDeconv2d, CPU, float, NEON, K3x3S1)) +MACE_REGISTER_DELEGATOR( + registry, GroupDeconv2dK3x3S2, delegator::GroupDeconv2dParam, + MACE_DELEGATOR_KEY_EX(GroupDeconv2d, CPU, float, NEON, K3x3S2)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/depthwise_deconv_2d_3x3.h b/mace/ops/arm/fp32/depthwise_deconv_2d_3x3.h index 5dd315a47ad5e0c9a815b64ca3c5c0de63faf25e..eeb21d6c3c5d50502b268e61f3b0726066a963cb 100644 --- a/mace/ops/arm/fp32/depthwise_deconv_2d_3x3.h +++ b/mace/ops/arm/fp32/depthwise_deconv_2d_3x3.h @@ -18,12 +18,13 @@ #include #include -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/types.h" -#include "mace/core/op_context.h" #include "mace/ops/arm/fp32/deconv_2d.h" #include "mace/ops/common/conv_pool_2d_util.h" +#include "mace/ops/delegator/depthwise_deconv_2d.h" +#include "mace/public/mace.h" namespace mace { namespace ops { @@ -32,14 +33,9 @@ namespace fp32 { class DepthwiseDeconv2dK3x3S1 : public Deconv2dBase { public: - DepthwiseDeconv2dK3x3S1(const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : Deconv2dBase({1, 1}, - {1, 1}, - paddings, - padding_type, - framework_type) {} + explicit DepthwiseDeconv2dK3x3S1( + const delegator::DepthwiseDeconv2dParam ¶m) + : Deconv2dBase(param) {} virtual ~DepthwiseDeconv2dK3x3S1() {} MaceStatus Compute( @@ -52,14 +48,9 @@ class DepthwiseDeconv2dK3x3S1 : public Deconv2dBase { class DepthwiseDeconv2dK3x3S2 : public Deconv2dBase { public: - DepthwiseDeconv2dK3x3S2(const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : Deconv2dBase({2, 2}, - {1, 1}, - paddings, - padding_type, - framework_type) {} + explicit DepthwiseDeconv2dK3x3S2( + const delegator::DepthwiseDeconv2dParam ¶m) + : Deconv2dBase(param) {} virtual ~DepthwiseDeconv2dK3x3S2() {} MaceStatus Compute( @@ -72,16 +63,9 @@ class DepthwiseDeconv2dK3x3S2 : public Deconv2dBase { class GroupDeconv2dK3x3S1 : public Deconv2dBase { public: - GroupDeconv2dK3x3S1(const std::vector &paddings, - const Padding padding_type, - const int group, - const FrameworkType framework_type) - : Deconv2dBase({1, 1}, - {1, 1}, - paddings, - padding_type, - group, - framework_type) {} + explicit GroupDeconv2dK3x3S1( + const delegator::GroupDeconv2dParam ¶m) + : Deconv2dBase(param) {} virtual ~GroupDeconv2dK3x3S1() {} MaceStatus Compute( @@ -94,16 +78,8 @@ class GroupDeconv2dK3x3S1 : public Deconv2dBase { class GroupDeconv2dK3x3S2 : public Deconv2dBase { public: - GroupDeconv2dK3x3S2(const std::vector &paddings, - const Padding padding_type, - const int group, - const FrameworkType framework_type) - : Deconv2dBase({2, 2}, - {1, 1}, - paddings, - padding_type, - group, - framework_type) {} + explicit GroupDeconv2dK3x3S2(const delegator::GroupDeconv2dParam ¶m) + : Deconv2dBase(param) {} virtual ~GroupDeconv2dK3x3S2() {} MaceStatus Compute( diff --git a/mace/ops/arm/fp32/depthwise_deconv_2d_4x4.cc b/mace/ops/arm/fp32/depthwise_deconv_2d_4x4.cc index 85c93b0cef7b53dc170d48eeaa6c65154f85c8e8..f9de2de3df27aeabb4eb9199140993fbd5abb31e 100644 --- a/mace/ops/arm/fp32/depthwise_deconv_2d_4x4.cc +++ b/mace/ops/arm/fp32/depthwise_deconv_2d_4x4.cc @@ -959,6 +959,20 @@ MaceStatus GroupDeconv2dK4x4S2::Compute(const OpContext *context, return MaceStatus::MACE_SUCCESS; } +MACE_REGISTER_DELEGATOR( + registry, DepthwiseDeconv2dK4x4S1, delegator::DepthwiseDeconv2dParam, + MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, CPU, float, NEON, K4x4S1)) +MACE_REGISTER_DELEGATOR( + registry, DepthwiseDeconv2dK4x4S2, delegator::DepthwiseDeconv2dParam, + MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, CPU, float, NEON, K4x4S2)) + +MACE_REGISTER_DELEGATOR( + registry, GroupDeconv2dK4x4S1, delegator::GroupDeconv2dParam, + MACE_DELEGATOR_KEY_EX(GroupDeconv2d, CPU, float, NEON, K4x4S1)) +MACE_REGISTER_DELEGATOR( + registry, GroupDeconv2dK4x4S2, delegator::GroupDeconv2dParam, + MACE_DELEGATOR_KEY_EX(GroupDeconv2d, CPU, float, NEON, K4x4S2)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/depthwise_deconv_2d_4x4.h b/mace/ops/arm/fp32/depthwise_deconv_2d_4x4.h index 4b73ed010afdd783f45e39d638db01427070e717..31d5bd99ed5cfe287026f99ac89d3721c7fed8bb 100644 --- a/mace/ops/arm/fp32/depthwise_deconv_2d_4x4.h +++ b/mace/ops/arm/fp32/depthwise_deconv_2d_4x4.h @@ -18,12 +18,13 @@ #include #include -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/types.h" -#include "mace/core/op_context.h" #include "mace/ops/arm/fp32/deconv_2d.h" #include "mace/ops/common/conv_pool_2d_util.h" +#include "mace/ops/delegator/depthwise_deconv_2d.h" +#include "mace/public/mace.h" namespace mace { namespace ops { @@ -32,14 +33,9 @@ namespace fp32 { class DepthwiseDeconv2dK4x4S1 : public Deconv2dBase { public: - DepthwiseDeconv2dK4x4S1(const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : Deconv2dBase({1, 1}, - {1, 1}, - paddings, - padding_type, - framework_type) {} + explicit DepthwiseDeconv2dK4x4S1( + const delegator::DepthwiseDeconv2dParam ¶m) + : Deconv2dBase(param) {} virtual ~DepthwiseDeconv2dK4x4S1() {} MaceStatus Compute( @@ -52,14 +48,9 @@ class DepthwiseDeconv2dK4x4S1 : public Deconv2dBase { class DepthwiseDeconv2dK4x4S2 : public Deconv2dBase { public: - DepthwiseDeconv2dK4x4S2(const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : Deconv2dBase({2, 2}, - {1, 1}, - paddings, - padding_type, - framework_type) {} + explicit DepthwiseDeconv2dK4x4S2( + const delegator::DepthwiseDeconv2dParam ¶m) + : Deconv2dBase(param) {} virtual ~DepthwiseDeconv2dK4x4S2() {} MaceStatus Compute( @@ -72,16 +63,8 @@ class DepthwiseDeconv2dK4x4S2 : public Deconv2dBase { class GroupDeconv2dK4x4S1 : public Deconv2dBase { public: - GroupDeconv2dK4x4S1(const std::vector &paddings, - const Padding padding_type, - const int group, - const FrameworkType framework_type) - : Deconv2dBase({1, 1}, - {1, 1}, - paddings, - padding_type, - group, - framework_type) {} + explicit GroupDeconv2dK4x4S1(const delegator::GroupDeconv2dParam ¶m) + : Deconv2dBase(param) {} virtual ~GroupDeconv2dK4x4S1() {} MaceStatus Compute( @@ -94,16 +77,8 @@ class GroupDeconv2dK4x4S1 : public Deconv2dBase { class GroupDeconv2dK4x4S2 : public Deconv2dBase { public: - GroupDeconv2dK4x4S2(const std::vector &paddings, - const Padding padding_type, - const int group, - const FrameworkType framework_type) - : Deconv2dBase({2, 2}, - {1, 1}, - paddings, - padding_type, - group, - framework_type) {} + explicit GroupDeconv2dK4x4S2(const delegator::GroupDeconv2dParam ¶m) + : Deconv2dBase(param) {} virtual ~GroupDeconv2dK4x4S2() {} MaceStatus Compute( diff --git a/mace/ops/arm/fp32/depthwise_deconv_2d_general.cc b/mace/ops/arm/fp32/depthwise_deconv_2d_general.cc index a45d5acc6a663d370f1b741b5b15598c9fd40e22..81d715e26dbb34186bcd873b9dc083b27cd1a352 100644 --- a/mace/ops/arm/fp32/depthwise_deconv_2d_general.cc +++ b/mace/ops/arm/fp32/depthwise_deconv_2d_general.cc @@ -207,6 +207,14 @@ MaceStatus GroupDeconv2dGeneral::Compute(const OpContext *context, return MaceStatus::MACE_SUCCESS; } +MACE_REGISTER_DELEGATOR( + registry, DepthwiseDeconv2dGeneral, delegator::DepthwiseDeconv2dParam, + MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, CPU, float, NEON, General)) + +MACE_REGISTER_DELEGATOR( + registry, GroupDeconv2dGeneral, delegator::GroupDeconv2dParam, + MACE_DELEGATOR_KEY_EX(GroupDeconv2d, CPU, float, NEON, General)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/depthwise_deconv_2d_general.h b/mace/ops/arm/fp32/depthwise_deconv_2d_general.h index d73480c5ea1a4fff7aa06656efb9a964acc1b01d..924924498301592de6dd1c9af6473eb61d289407 100644 --- a/mace/ops/arm/fp32/depthwise_deconv_2d_general.h +++ b/mace/ops/arm/fp32/depthwise_deconv_2d_general.h @@ -18,12 +18,13 @@ #include #include -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/types.h" -#include "mace/core/op_context.h" #include "mace/ops/arm/fp32/deconv_2d.h" #include "mace/ops/common/conv_pool_2d_util.h" +#include "mace/ops/delegator/depthwise_deconv_2d.h" +#include "mace/public/mace.h" namespace mace { namespace ops { @@ -32,16 +33,9 @@ namespace fp32 { class DepthwiseDeconv2dGeneral : public Deconv2dBase { public: - DepthwiseDeconv2dGeneral(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : Deconv2dBase(strides, - dilations, - paddings, - padding_type, - framework_type) {} + explicit DepthwiseDeconv2dGeneral( + const delegator::DepthwiseDeconv2dParam ¶m) + : Deconv2dBase(param) {} virtual ~DepthwiseDeconv2dGeneral() {} MaceStatus Compute( @@ -54,18 +48,8 @@ class DepthwiseDeconv2dGeneral : public Deconv2dBase { class GroupDeconv2dGeneral : public Deconv2dBase { public: - GroupDeconv2dGeneral(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type, - const int group, - const FrameworkType framework_type) - : Deconv2dBase(strides, - dilations, - paddings, - padding_type, - group, - framework_type) {} + explicit GroupDeconv2dGeneral(const delegator::GroupDeconv2dParam ¶m) + : Deconv2dBase(param) {} virtual ~GroupDeconv2dGeneral() {} MaceStatus Compute( diff --git a/mace/ops/arm/fp32/gemm.cc b/mace/ops/arm/fp32/gemm.cc index ff26052ffae16a064f4873151ef675c83d1ecbb3..ca429e63d544e13774eb4073c02e9fd6122ad499 100644 --- a/mace/ops/arm/fp32/gemm.cc +++ b/mace/ops/arm/fp32/gemm.cc @@ -1224,6 +1224,9 @@ MaceStatus Gemm::Compute(const OpContext *context, output); } +MACE_REGISTER_DELEGATOR(registry, Gemm, delegator::GemmParam, + MACE_DELEGATOR_KEY(Gemm, CPU, float, NEON)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/gemm.h b/mace/ops/arm/fp32/gemm.h index 00b4d80eef4bf27f98c54f1c77a51765cc7f530d..4910ae358347bf94eef076e63934f9365aa1ef79 100644 --- a/mace/ops/arm/fp32/gemm.h +++ b/mace/ops/arm/fp32/gemm.h @@ -15,10 +15,11 @@ #ifndef MACE_OPS_ARM_FP32_GEMM_H_ #define MACE_OPS_ARM_FP32_GEMM_H_ -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" #include "mace/ops/common/matrix.h" +#include "mace/ops/delegator/gemm.h" +#include "mace/public/mace.h" #include "mace/utils/math.h" // This implements matrix-matrix multiplication. @@ -29,13 +30,12 @@ namespace ops { namespace arm { namespace fp32 { -class Gemm { +class Gemm : public delegator::Gemm { public: - explicit Gemm(const bool should_cache_pack) - : pack_cache_(GetCPUAllocator()), - should_cache_pack_(should_cache_pack), + explicit Gemm(const delegator::GemmParam ¶m) + : delegator::Gemm(param), pack_cache_(GetCPUAllocator()), + should_cache_pack_(param.should_cache_pack_), cached_(0) {} - Gemm() : Gemm(false) {} ~Gemm() {} MaceStatus Compute( @@ -51,7 +51,7 @@ class Gemm { const MatrixMajor output_major, const bool lhs_batched, const bool rhs_batched, - Tensor *output); + Tensor *output) override; // Original matrix before transpose has row-major MaceStatus Compute( @@ -68,7 +68,7 @@ class Gemm { const bool transpose_out, const bool lhs_batched, const bool rhs_batched, - Tensor *output); + Tensor *output) override; private: void ComputeBlock(const float *packed_lhs_data, diff --git a/mace/ops/arm/fp32/gemv.cc b/mace/ops/arm/fp32/gemv.cc index 2f2866cf0da86dd70402d28810247821f229d85b..317e422404327f50b6874993a2ed10f76a000e87 100644 --- a/mace/ops/arm/fp32/gemv.cc +++ b/mace/ops/arm/fp32/gemv.cc @@ -378,6 +378,10 @@ MaceStatus Gemv::Compute(const OpContext *context, #undef vaddvq_f32 #endif + +MACE_REGISTER_DELEGATOR(registry, Gemv, DelegatorParam, + MACE_DELEGATOR_KEY(Gemv, CPU, float, NEON)) + } // namespace fp32 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/fp32/gemv.h b/mace/ops/arm/fp32/gemv.h index 1f406426fbe93ae965f23450eca2a5ba1c517db1..9933cf42b817e20945517588a87dfca2232e7411 100644 --- a/mace/ops/arm/fp32/gemv.h +++ b/mace/ops/arm/fp32/gemv.h @@ -15,18 +15,19 @@ #ifndef MACE_OPS_ARM_FP32_GEMV_H_ #define MACE_OPS_ARM_FP32_GEMV_H_ -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" +#include "mace/ops/delegator/gemv.h" +#include "mace/public/mace.h" namespace mace { namespace ops { namespace arm { namespace fp32 { -class Gemv { +class Gemv : public delegator::Gemv { public: - Gemv() {} + explicit Gemv(const DelegatorParam ¶m) : delegator::Gemv(param) {} ~Gemv() {} // Always row-major after transpose MaceStatus Compute( @@ -39,7 +40,7 @@ class Gemv { const index_t lhs_width, const bool lhs_batched, const bool rhs_batched, - Tensor *output); + Tensor *output) override; }; } // namespace fp32 diff --git a/mace/ops/arm/q8/eltwise.cc b/mace/ops/arm/q8/eltwise.cc index bdaa57a640ec6e6d66cd080830211b95c4ceb5b5..74d44104c422f555ee9e5b18ab5647aba9c7f2bd 100644 --- a/mace/ops/arm/q8/eltwise.cc +++ b/mace/ops/arm/q8/eltwise.cc @@ -12,12 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/ops/arm/q8/eltwise.h" - #include #include #include "mace/ops/common/gemmlowp_util.h" +#include "mace/ops/delegator/eltwise.h" #include "mace/utils/logging.h" namespace mace { @@ -25,6 +24,16 @@ namespace ops { namespace arm { namespace q8 { +class Eltwise : public delegator::Eltwise { + public: + explicit Eltwise(const delegator::EltwiseParam ¶m) + : delegator::Eltwise(param) {} + ~Eltwise() = default; + + MaceStatus Compute(const OpContext *context, const Tensor *input0, + const Tensor *input1, Tensor *output) override; +}; + MaceStatus Eltwise::Compute(const OpContext *context, const Tensor *input0, const Tensor *input1, @@ -144,7 +153,7 @@ MaceStatus Eltwise::Compute(const OpContext *context, gemmlowp::SaturatingRoundingDoublingHighMul( res, output_multiplier), -output_shift) + - output->zero_point(); + output->zero_point(); output_ptr[i] = Saturate(output_val); } }, @@ -153,6 +162,9 @@ MaceStatus Eltwise::Compute(const OpContext *context, return MaceStatus::MACE_SUCCESS; } +MACE_REGISTER_DELEGATOR(registry, Eltwise, delegator::EltwiseParam, + MACE_DELEGATOR_KEY(Eltwise, CPU, uint8_t, NEON)) + } // namespace q8 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/q8/eltwise.h b/mace/ops/arm/q8/eltwise.h deleted file mode 100644 index 200b13cb2769787a92c2d03da40f1b2e10d65900..0000000000000000000000000000000000000000 --- a/mace/ops/arm/q8/eltwise.h +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2019 The MACE Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// This implements matrix-vector multiplication described as -// https://github.com/google/gemmlowp/blob/master/todo/fast-gemv.txt - -#ifndef MACE_OPS_ARM_Q8_ELTWISE_H_ -#define MACE_OPS_ARM_Q8_ELTWISE_H_ - -#include "mace/core/op_context.h" -#include "mace/core/types.h" -#include "mace/ops/common/eltwise_type.h" - -namespace mace { -namespace ops { -namespace arm { -namespace q8 { - -class Eltwise { - public: - explicit Eltwise(const EltwiseType type) : type_(type) {} - - MaceStatus Compute(const OpContext *context, - const Tensor *input0, - const Tensor *input1, - Tensor *output); - - private: - EltwiseType type_; -}; - -} // namespace q8 -} // namespace arm -} // namespace ops -} // namespace mace - -#endif // MACE_OPS_ARM_Q8_ELTWISE_H_ diff --git a/mace/ops/arm/q8/gemv.cc b/mace/ops/arm/q8/gemv.cc index 388c68147ff305cf603c95a62293024b7b1db03d..11290d357d0a33992ba52d3a5b8de31040a66738 100644 --- a/mace/ops/arm/q8/gemv.cc +++ b/mace/ops/arm/q8/gemv.cc @@ -181,6 +181,14 @@ class Gemv; template class Gemv; +typedef Gemv GemvUint8; +MACE_REGISTER_DELEGATOR(registry, GemvUint8, DelegatorParam, + MACE_DELEGATOR_KEY(Gemv, CPU, uint8_t, NEON)) + +typedef Gemv GemvInt32; +MACE_REGISTER_DELEGATOR(registry, GemvInt32, DelegatorParam, + MACE_DELEGATOR_KEY(Gemv, CPU, int32_t, NEON)) + } // namespace q8 } // namespace arm } // namespace ops diff --git a/mace/ops/arm/q8/gemv.h b/mace/ops/arm/q8/gemv.h index 21a275798a7dd9533c1645d606386aa89cf91a92..c9b98a07d3f50e5f5c26ff42caf791e9b6d38b67 100644 --- a/mace/ops/arm/q8/gemv.h +++ b/mace/ops/arm/q8/gemv.h @@ -1,4 +1,4 @@ -// Copyright 2019 The MACE Authors. All Rights Reserved. +// Copyright 2020 The MACE Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,15 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -// This implements matrix-vector multiplication described as -// https://github.com/google/gemmlowp/blob/master/todo/fast-gemv.txt - #ifndef MACE_OPS_ARM_Q8_GEMV_H_ #define MACE_OPS_ARM_Q8_GEMV_H_ -#include "mace/public/mace.h" -#include "mace/core/tensor.h" -#include "mace/core/op_context.h" +#include "mace/ops/delegator/gemv.h" namespace mace { namespace ops { @@ -28,11 +23,11 @@ namespace arm { namespace q8 { template -class Gemv { +class Gemv : public delegator::Gemv { public: - Gemv() : is_output_type_uint8_( - DataTypeToEnum::value == DataType::DT_UINT8) { - } + explicit Gemv(const DelegatorParam ¶m) + : delegator::Gemv(param), is_output_type_uint8_( + DataTypeToEnum::value == DataType::DT_UINT8) {} ~Gemv() {} // Always row-major after transpose MaceStatus Compute( @@ -45,7 +40,7 @@ class Gemv { const index_t lhs_width, const bool lhs_batched, const bool rhs_batched, - Tensor *output); + Tensor *output) override; private: bool is_output_type_uint8_; diff --git a/mace/ops/arm/q8/quantize.cc b/mace/ops/arm/q8/quantize.cc index 9c80dcbc5ba3ac0f6c2770c9c5249ff8c70e73c8..4a8d402b2d859fbfb486eb7860d675a2320815ce 100644 --- a/mace/ops/arm/q8/quantize.cc +++ b/mace/ops/arm/q8/quantize.cc @@ -17,7 +17,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/core/tensor.h" #include "mace/core/quantize.h" @@ -106,12 +107,12 @@ class DequantizeOp : public Operation { QuantizeUtil quantize_util_; }; -void RegisterQuantize(OpRegistryBase *op_registry) { +void RegisterQuantize(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Quantize", QuantizeOp, DeviceType::CPU, uint8_t); } -void RegisterDequantize(OpRegistryBase *op_registry) { +void RegisterDequantize(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Dequantize", DequantizeOp, DeviceType::CPU, uint8_t); MACE_REGISTER_OP(op_registry, "Dequantize", DequantizeOp, diff --git a/mace/ops/batch_norm.cc b/mace/ops/batch_norm.cc index a27e46c5739428e6b08952db83f0dfce5b60e798..88c9a179fe2982b1ec38821dd850784d97953608 100644 --- a/mace/ops/batch_norm.cc +++ b/mace/ops/batch_norm.cc @@ -16,14 +16,10 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/ops/activation.h" - -#if defined(MACE_ENABLE_NEON) -#include "mace/ops/arm/fp32/activation.h" -#else -#include "mace/ops/ref/activation.h" -#endif +#include "mace/ops/delegator/activation.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/buffer_transformer.h" @@ -45,11 +41,16 @@ class BatchNormOp : public Operation { epsilon_(Operation::GetOptionalArg("epsilon", static_cast(1e-4))), activation_delegator_( - ops::StringToActivationType( - Operation::GetOptionalArg("activation", "NOOP")), - Operation::GetOptionalArg("max_limit", 0.0f), - Operation::GetOptionalArg( - "leakyrelu_coefficient", 0.0f)) {} + delegator::Activation::Create( + context->workspace(), + MACE_DELEGATOR_KEY(Activation, CPU, float, MACE_CPU_IMPL_TYPE), + delegator::ActivationParam( + ops::StringToActivationType( + Operation::GetOptionalArg("activation", + "NOOP")), + Operation::GetOptionalArg("max_limit", 0.0f), + Operation::GetOptionalArg("leakyrelu_coefficient", + 0.0f)))) {} MaceStatus Run(OpContext *context) override { MACE_UNUSED(context); @@ -142,18 +143,14 @@ class BatchNormOp : public Operation { }, 0, batch, 1, 0, channels, 1); } - activation_delegator_.Compute(context, output, output); + activation_delegator_->Compute(context, output, output); return MaceStatus::MACE_SUCCESS; } private: float epsilon_; -#ifdef MACE_ENABLE_NEON - arm::fp32::Activation activation_delegator_; -#else - ref::Activation activation_delegator_; -#endif // MACE_ENABLE_NEON + std::unique_ptr activation_delegator_; protected: MACE_OP_INPUT_TAGS(INPUT, SCALE, OFFSET, MEAN, VAR); @@ -232,7 +229,7 @@ class BatchNormOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterBatchNorm(OpRegistryBase *op_registry) { +void RegisterBatchNorm(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "BatchNorm", BatchNormOp, DeviceType::CPU, float); MACE_REGISTER_GPU_OP(op_registry, "BatchNorm", BatchNormOp); diff --git a/mace/ops/batch_to_space.cc b/mace/ops/batch_to_space.cc index 937387fc6be78587c0898a5ab5d00a3640b87d3b..90324cd76f0797ae0535b99c139f48ee58077a35 100644 --- a/mace/ops/batch_to_space.cc +++ b/mace/ops/batch_to_space.cc @@ -15,7 +15,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/image/batch_to_space.h" #endif // MACE_ENABLE_OPENCL @@ -285,7 +286,7 @@ class BatchToSpaceNDOp : public BatchToSpaceOpBase { }; #endif // MACE_ENABLE_OPENCL -void RegisterBatchToSpaceND(OpRegistryBase *op_registry) { +void RegisterBatchToSpaceND(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "BatchToSpaceND", BatchToSpaceNDOp, DeviceType::CPU, float); diff --git a/mace/ops/bias_add.cc b/mace/ops/bias_add.cc index c17a6e49dd5ab74302933d31fea4d07a197c4a8f..54a0f2710ad7ca8430e26d9661baf6a86b58c315 100644 --- a/mace/ops/bias_add.cc +++ b/mace/ops/bias_add.cc @@ -16,14 +16,10 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/ops/activation.h" - -#ifdef MACE_ENABLE_NEON -#include "mace/ops/arm/fp32/bias_add.h" -#else -#include "mace/ops/ref/bias_add.h" -#endif // MACE_ENABLE_NEON +#include "mace/ops/delegator/bias_add.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/buffer_transformer.h" @@ -42,8 +38,11 @@ class BiasAddOp : public Operation { public: explicit BiasAddOp(OpConstructContext *context) : Operation(context), - has_data_format_(Operation::GetOptionalArg("has_data_format", - 0)) {} + has_data_format_(Operation::GetOptionalArg("has_data_format", 0)), + bias_add_delegator_(delegator::BiasAdd::Create( + context->workspace(), + MACE_DELEGATOR_KEY(BiasAdd, CPU, float, MACE_CPU_IMPL_TYPE), + DelegatorParam())) {} MaceStatus Run(OpContext *context) override { MACE_UNUSED(context); @@ -56,7 +55,7 @@ class BiasAddOp : public Operation { MACE_CHECK(bias->dim_size() == 1 || bias->dim_size() == 2, "bias must be 1-dimensional or n*c for caffee.", MakeString(bias->shape())); - bias_add_delegator_.Compute(context, input, bias, output); + bias_add_delegator_->Compute(context, input, bias, output); } else { // NHWC MACE_CHECK(bias->dim_size() == 1 || bias->dim_size() == 2, "bias must be 1 or 2 dimensionals for caffee.", @@ -115,11 +114,7 @@ class BiasAddOp : public Operation { private: int has_data_format_; -#ifdef MACE_ENABLE_NEON - arm::fp32::BiasAdd bias_add_delegator_; -#else - ref::BiasAdd bias_add_delegator_; -#endif // MACE_ENABLE_NEON + std::unique_ptr bias_add_delegator_; }; #ifdef MACE_ENABLE_OPENCL @@ -164,7 +159,7 @@ class BiasAddOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterBiasAdd(OpRegistryBase *op_registry) { +void RegisterBiasAdd(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "BiasAdd", BiasAddOp, DeviceType::CPU, float); MACE_REGISTER_GPU_OP(op_registry, "BiasAdd", BiasAddOp); diff --git a/mace/ops/cast.cc b/mace/ops/cast.cc index 940959a93f0333033e26a0825f28cf0f735f1bb3..dfa42a7600de0f7ebc0a4e6cc8dac7c12c783db8 100644 --- a/mace/ops/cast.cc +++ b/mace/ops/cast.cc @@ -12,7 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #if defined(MACE_ENABLE_NEON) && defined(__ANDROID__) #include @@ -54,7 +55,7 @@ class CastOp : public Operation { MACE_OP_OUTPUT_TAGS(OUTPUT); }; -void RegisterCast(OpRegistryBase *op_registry) { +void RegisterCast(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Cast", CastOp, DeviceType::CPU, float); MACE_REGISTER_OP(op_registry, "Cast", CastOp, diff --git a/mace/ops/channel_shuffle.cc b/mace/ops/channel_shuffle.cc index a7fababb3e9a2806d4de0eb4b9d91600c4180a30..cddda38db323d70151093bcf9a84446f6f3cc5e4 100644 --- a/mace/ops/channel_shuffle.cc +++ b/mace/ops/channel_shuffle.cc @@ -14,7 +14,8 @@ #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/image/channel_shuffle.h" #endif // MACE_ENABLE_OPENCL @@ -98,7 +99,7 @@ class ChannelShuffleOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterChannelShuffle(OpRegistryBase *op_registry) { +void RegisterChannelShuffle(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "ChannelShuffle", ChannelShuffleOp, DeviceType::CPU, float); diff --git a/mace/ops/common/lstm.h b/mace/ops/common/lstm.h index d9e4024894dba1a7c3995e8239ef0a9e814a50e9..a22094e59abcc3b4e7331e7103ad12a49229786d 100644 --- a/mace/ops/common/lstm.h +++ b/mace/ops/common/lstm.h @@ -15,8 +15,8 @@ #ifndef MACE_OPS_COMMON_LSTM_H_ #define MACE_OPS_COMMON_LSTM_H_ +#include "mace/core/ops/op_context.h" #include "mace/core/types.h" -#include "mace/core/op_context.h" namespace mace { namespace ops { diff --git a/mace/ops/common/transpose.h b/mace/ops/common/transpose.h index 6a70133c2a7513be3ee9efa52784ae00b4f09457..8ff72df6cdd99d4969622f952ccd452f0fa89fa1 100644 --- a/mace/ops/common/transpose.h +++ b/mace/ops/common/transpose.h @@ -20,7 +20,7 @@ #endif // MACE_ENABLE_NEON #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/public/mace.h" namespace mace { diff --git a/mace/ops/concat.cc b/mace/ops/concat.cc index aff95a2e552ed348faa59405713e2adae84ac8ea..65f05fdc63418d6a3e31cecd9700f6dd2055a02e 100644 --- a/mace/ops/concat.cc +++ b/mace/ops/concat.cc @@ -14,7 +14,8 @@ #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/core/quantize.h" #include "mace/utils/memory.h" @@ -221,7 +222,7 @@ class ConcatOp : public ConcatOpBase { }; #endif // MACE_ENABLE_OPENCL -void RegisterConcat(OpRegistryBase *op_registry) { +void RegisterConcat(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Concat", ConcatOp, DeviceType::CPU, float); diff --git a/mace/ops/conv_2d.cc b/mace/ops/conv_2d.cc index c2666d073c370240e3945f166b4ce18a9d9dc0ff..83da3f85c7185f2004248e5cd2ce3697c1ce58b1 100644 --- a/mace/ops/conv_2d.cc +++ b/mace/ops/conv_2d.cc @@ -24,32 +24,18 @@ #include #include "mace/core/future.h" -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/core/tensor.h" #include "mace/ops/activation.h" #include "mace/ops/conv_pool_2d_base.h" #include "mace/ops/common/conv_pool_2d_util.h" +#include "mace/ops/delegator/activation.h" +#include "mace/ops/delegator/bias_add.h" +#include "mace/ops/delegator/conv_2d.h" #include "mace/utils/memory.h" #include "mace/utils/math.h" -#ifdef MACE_ENABLE_NEON -#include "mace/ops/arm/fp32/conv_2d.h" -#include "mace/ops/arm/fp32/conv_2d_1x1.h" -#include "mace/ops/arm/fp32/conv_2d_3x3.h" -#include "mace/ops/arm/fp32/conv_2d_3x3_winograd.h" -#include "mace/ops/arm/fp32/conv_2d_5x5.h" -#include "mace/ops/arm/fp32/conv_2d_7x7.h" -#include "mace/ops/arm/fp32/conv_2d_1xn.h" -#include "mace/ops/arm/fp32/conv_general.h" -#include "mace/ops/arm/fp32/bias_add.h" -#include "mace/ops/arm/fp32/activation.h" -#else -#include "mace/ops/ref/activation.h" -#include "mace/ops/ref/bias_add.h" -#endif // MACE_ENABLE_NEON - -#include "mace/ops/ref/conv_2d.h" - #ifdef MACE_ENABLE_QUANTIZE #include "mace/ops/common/gemmlowp_util.h" #include "mace/ops/arm/q8/quantization_util.h" @@ -72,13 +58,21 @@ class Conv2dOp : public ConvPool2dOpBase { public: explicit Conv2dOp(OpConstructContext *context) : ConvPool2dOpBase(context), - activation_delegator_(ops::StringToActivationType( - Operation::GetOptionalArg("activation", - "NOOP")), - Operation::GetOptionalArg("max_limit", - 0.0f), - Operation::GetOptionalArg( - "leakyrelu_coefficient", 0.0f)) {} + activation_delegator_( + delegator::Activation::Create( + context->workspace(), + MACE_DELEGATOR_KEY(Activation, CPU, float, MACE_CPU_IMPL_TYPE), + delegator::ActivationParam( + ops::StringToActivationType( + Operation::GetOptionalArg("activation", + "NOOP")), + Operation::GetOptionalArg("max_limit", 0.0f), + Operation::GetOptionalArg("leakyrelu_coefficient", + 0.0f)))), + bias_add_delegator_(delegator::BiasAdd::Create( + context->workspace(), + MACE_DELEGATOR_KEY(BiasAdd, CPU, float, MACE_CPU_IMPL_TYPE), + DelegatorParam())) {} MaceStatus Run(OpContext *context) override { const Tensor *input = this->Input(INPUT); @@ -86,116 +80,100 @@ class Conv2dOp : public ConvPool2dOpBase { const Tensor *bias = this->InputSize() >= 3 ? this->Input(BIAS) : nullptr; Tensor *output = this->Output(OUTPUT); -#ifdef MACE_ENABLE_NEON - // the following params are used to decide which conv delegator to use - const index_t stride_h = strides_[0]; - const index_t stride_w = strides_[1]; - const index_t dilation_h = dilations_[0]; - const index_t dilation_w = dilations_[1]; - const index_t filter_h = filter->dim(2); - const index_t filter_w = filter->dim(3); - const index_t input_channels = input->dim(1); - const index_t channels = filter->dim(0); - - // NOTE: delegator is fixed after first round of running, - // although winograd depends on input params. - // We do not support changeable filter for now. if (conv2d_delegator_ == nullptr) { - if (filter_h == 1 && filter_w == 1 && stride_h == 1 && stride_w == 1 - && dilation_h == 1 && dilation_w == 1) { - conv2d_delegator_ = make_unique( - paddings_, padding_type_); - } else if (filter_h == 3 && filter_w == 3 - && stride_h == 1 && stride_w == 1 && dilation_h == 1 - && dilation_w == 1) { - if (input_channels >= 8 && channels >= 8) { - conv2d_delegator_ = make_unique( - paddings_, padding_type_); - } else { - conv2d_delegator_ = make_unique( - paddings_, padding_type_); + std::string tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, + MACE_CPU_IMPL_TYPE, General); + if (MACE_CPU_IMPL_TYPE == NEON) { + // the following params are used to decide which conv delegator to use + const index_t stride_h = strides_[0]; + const index_t stride_w = strides_[1]; + const index_t dilation_h = dilations_[0]; + const index_t dilation_w = dilations_[1]; + const index_t filter_h = filter->dim(2); + const index_t filter_w = filter->dim(3); + const index_t input_channels = input->dim(1); + const index_t channels = filter->dim(0); + // NOTE: delegator is fixed after first round of running, + // although winograd depends on input params. + // We do not support changeable filter for now. + if (filter_h == 1 && filter_w == 1 && stride_h == 1 && stride_w == 1 + && dilation_h == 1 && dilation_w == 1) { + tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K1x1); + } else if (filter_h == 3 && filter_w == 3 + && stride_h == 1 && stride_w == 1 && dilation_h == 1 + && dilation_w == 1) { + if (input_channels >= 8 && channels >= 8) { + tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K3x3Winograd); + } else { + tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K3x3S1); + } + } else if (filter_h == 3 && filter_w == 3 + && stride_h == 2 && stride_w == 2 && dilation_h == 1 + && dilation_w == 1) { + tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K3x3S2); + } else if (filter_h == 5 && filter_w == 5 + && stride_h == 1 && stride_w == 1 && dilation_h == 1 + && dilation_w == 1) { + tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K5x5S1); + } else if (filter_h == 7 && filter_w == 7 + && stride_h == 1 && stride_w == 1 && dilation_h == 1 + && dilation_w == 1) { + tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K7x7S1); + } else if (filter_h == 7 && filter_w == 7 + && stride_h == 2 && stride_w == 2 && dilation_h == 1 + && dilation_w == 1) { + tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K7x7S2); + } else if (filter_h == 7 && filter_w == 7 + && stride_h == 3 && stride_w == 3 && dilation_h == 1 + && dilation_w == 1) { + tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K7x7S3); + } else if (filter_h == 1 && filter_w == 7 + && stride_h == 1 && stride_w == 1 && dilation_h == 1 + && dilation_w == 1) { + tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K1x7S1); + } else if (filter_h == 7 && filter_w == 1 + && stride_h == 1 && stride_w == 1 && dilation_h == 1 + && dilation_w == 1) { + tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K7x1S1); + } else if (filter_h == 1 && filter_w == 15 + && stride_h == 1 && stride_w == 1 && dilation_h == 1 + && dilation_w == 1) { + tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K1x15S1); + } else if (filter_h == 15 && filter_w == 1 + && stride_h == 1 && stride_w == 1 && dilation_h == 1 + && dilation_w == 1) { + tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K15x1S1); } - } else if (filter_h == 3 && filter_w == 3 - && stride_h == 2 && stride_w == 2 && dilation_h == 1 - && dilation_w == 1) { - conv2d_delegator_ = make_unique( - paddings_, padding_type_); - } else if (filter_h == 5 && filter_w == 5 - && stride_h == 1 && stride_w == 1 && dilation_h == 1 - && dilation_w == 1) { - conv2d_delegator_ = make_unique( - paddings_, padding_type_); - } else if (filter_h == 7 && filter_w == 7 - && stride_h == 1 && stride_w == 1 && dilation_h == 1 - && dilation_w == 1) { - conv2d_delegator_ = make_unique( - paddings_, padding_type_); - } else if (filter_h == 7 && filter_w == 7 - && stride_h == 2 && stride_w == 2 && dilation_h == 1 - && dilation_w == 1) { - conv2d_delegator_ = make_unique( - paddings_, padding_type_); - } else if (filter_h == 7 && filter_w == 7 - && stride_h == 3 && stride_w == 3 && dilation_h == 1 - && dilation_w == 1) { - conv2d_delegator_ = make_unique( - paddings_, padding_type_); - } else if (filter_h == 1 && filter_w == 7 - && stride_h == 1 && stride_w == 1 && dilation_h == 1 - && dilation_w == 1) { - conv2d_delegator_ = make_unique( - paddings_, padding_type_); - } else if (filter_h == 7 && filter_w == 1 - && stride_h == 1 && stride_w == 1 && dilation_h == 1 - && dilation_w == 1) { - conv2d_delegator_ = make_unique( - paddings_, padding_type_); - } else if (filter_h == 1 && filter_w == 15 - && stride_h == 1 && stride_w == 1 && dilation_h == 1 - && dilation_w == 1) { - conv2d_delegator_ = make_unique( - paddings_, padding_type_); - } else if (filter_h == 15 && filter_w == 1 - && stride_h == 1 && stride_w == 1 && dilation_h == 1 - && dilation_w == 1) { - conv2d_delegator_ = make_unique( - paddings_, padding_type_); - } else { - conv2d_delegator_ = make_unique( - strides_, - dilations_, - paddings_, - padding_type_); } + delegator::Conv2dParam param(strides_, dilations_, + paddings_, padding_type_); + conv2d_delegator_ = delegator::Conv2d::Create(context->workspace(), + tag, param); } conv2d_delegator_->Compute(context, input, filter, output); -#else - if (ref_conv2d_delegator_ == nullptr) { - ref_conv2d_delegator_ = make_unique>(strides_, - dilations_, - paddings_, - padding_type_); - } - ref_conv2d_delegator_->Compute(context, input, filter, output); -#endif - - bias_add_delegator_.Compute(context, output, bias, output); - activation_delegator_.Compute(context, output, output); + bias_add_delegator_->Compute(context, output, bias, output); + activation_delegator_->Compute(context, output, output); return MaceStatus::MACE_SUCCESS; } private: -#ifdef MACE_ENABLE_NEON - std::unique_ptr conv2d_delegator_; - arm::fp32::BiasAdd bias_add_delegator_; - arm::fp32::Activation activation_delegator_; -#else - std::unique_ptr> ref_conv2d_delegator_; - ref::BiasAdd bias_add_delegator_; - ref::Activation activation_delegator_; -#endif // MACE_ENABLE_NEON + std::unique_ptr activation_delegator_; + std::unique_ptr bias_add_delegator_; + std::unique_ptr conv2d_delegator_; private: MACE_OP_INPUT_TAGS(INPUT, FILTER, BIAS); @@ -518,7 +496,7 @@ class Conv2dOp : public ConvPool2dOpBase { }; #endif // MACE_ENABLE_OPENCL -void RegisterConv2D(OpRegistryBase *op_registry) { +void RegisterConv2D(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Conv2D", Conv2dOp, DeviceType::CPU, float); diff --git a/mace/ops/conv_pool_2d_base.h b/mace/ops/conv_pool_2d_base.h index b5ad48aea307a138fbbea234b6f44465055817c4..8d96532d169bdafed2e15d2651ae1dc17a9819b2 100644 --- a/mace/ops/conv_pool_2d_base.h +++ b/mace/ops/conv_pool_2d_base.h @@ -17,7 +17,7 @@ #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" #include "mace/ops/common/conv_pool_2d_util.h" namespace mace { diff --git a/mace/ops/crop.cc b/mace/ops/crop.cc index acaa73f1cfe82834af09d098a7cfc2b12fe70880..5be823453ebd852ae24edbcdd1a33fa2893af03e 100644 --- a/mace/ops/crop.cc +++ b/mace/ops/crop.cc @@ -14,7 +14,8 @@ #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/utils/math.h" #include "mace/utils/memory.h" #ifdef MACE_ENABLE_OPENCL @@ -132,7 +133,7 @@ class CropOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterCrop(OpRegistryBase *op_registry) { +void RegisterCrop(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Crop", CropOp, DeviceType::CPU, float); MACE_REGISTER_GPU_OP(op_registry, "Crop", CropOp); diff --git a/mace/ops/cumsum.cc b/mace/ops/cumsum.cc index 302fdfd585f4a16a7da42ebe1fd495c4f0ce9b6e..b1cb58f0b268da6df2b98397a3a4d005d7706f01 100644 --- a/mace/ops/cumsum.cc +++ b/mace/ops/cumsum.cc @@ -14,7 +14,8 @@ #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -141,7 +142,7 @@ class CumsumOp : public Operation { bool checked_; }; -void RegisterCumsum(OpRegistryBase *op_registry) { +void RegisterCumsum(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Cumsum", CumsumOp, DeviceType::CPU, float); } diff --git a/mace/ops/deconv_2d.cc b/mace/ops/deconv_2d.cc index 98298e0c9b709e51c9c8bda1a260bdd6dc8ed6e5..1e68449bdf1b36d9cbf7566a19f03a3194821069 100644 --- a/mace/ops/deconv_2d.cc +++ b/mace/ops/deconv_2d.cc @@ -14,20 +14,6 @@ #include "mace/ops/deconv_2d.h" -#if defined(MACE_ENABLE_NEON) -#include -#include "mace/ops/arm/fp32/deconv_2d_2x2.h" -#include "mace/ops/arm/fp32/deconv_2d_3x3.h" -#include "mace/ops/arm/fp32/deconv_2d_4x4.h" -#include "mace/ops/arm/fp32/deconv_2d_general.h" -#include "mace/ops/arm/fp32/bias_add.h" -#include "mace/ops/arm/fp32/activation.h" -#else -#include "mace/ops/ref/bias_add.h" -#include "mace/ops/ref/activation.h" -#include "mace/ops/ref/deconv_2d.h" -#endif - #include #include #include @@ -35,9 +21,13 @@ #include #include "mace/core/future.h" +#include "mace/core/registry/ops_registry.h" #include "mace/core/tensor.h" #include "mace/ops/activation.h" #include "mace/ops/common/conv_pool_2d_util.h" +#include "mace/ops/delegator/activation.h" +#include "mace/ops/delegator/bias_add.h" +#include "mace/ops/delegator/deconv_2d.h" #include "mace/utils/memory.h" #include "mace/utils/math.h" @@ -49,6 +39,10 @@ namespace mace { namespace ops { +namespace { +const std::vector kDeconv2dStrides = {1, 1}; +} + template class Deconv2dOp; @@ -57,9 +51,16 @@ class Deconv2dOp : public Deconv2dOpBase { public: explicit Deconv2dOp(OpConstructContext *context) : Deconv2dOpBase(context), - activation_delegator_(activation_, - relux_max_limit_, - leakyrelu_coefficient_) {} + activation_delegator_( + delegator::Activation::Create( + context->workspace(), + MACE_DELEGATOR_KEY(Activation, CPU, float, MACE_CPU_IMPL_TYPE), + delegator::ActivationParam(activation_, relux_max_limit_, + leakyrelu_coefficient_))), + bias_add_delegator_(delegator::BiasAdd::Create( + context->workspace(), + MACE_DELEGATOR_KEY(BiasAdd, CPU, float, MACE_CPU_IMPL_TYPE), + DelegatorParam())) {} MaceStatus Run(OpContext *context) override { const Tensor *input = this->Input(0); @@ -79,91 +80,67 @@ class Deconv2dOp : public Deconv2dOpBase { MACE_CHECK_NOTNULL(filter); MACE_CHECK_NOTNULL(output); -#ifdef MACE_ENABLE_NEON - const index_t kernel_h = filter->dim(2); - const index_t kernel_w = filter->dim(3); - bool use_neon_2x2_s1 = kernel_h == kernel_w && kernel_h == 2 && - strides_[0] == strides_[1] && strides_[0] == 1; - bool use_neon_2x2_s2 = kernel_h == kernel_w && kernel_h == 2 && - strides_[0] == strides_[1] && strides_[0] == 2; + if (deconv2d_delegator_ == nullptr) { + std::string tag = MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, General); + if (MACE_CPU_IMPL_TYPE == NEON) { + const index_t kernel_h = filter->dim(2); + const index_t kernel_w = filter->dim(3); - bool use_neon_3x3_s1 = kernel_h == kernel_w && kernel_h == 3 && - strides_[0] == strides_[1] && strides_[0] == 1; - bool use_neon_3x3_s2 = kernel_h == kernel_w && kernel_h == 3 && - strides_[0] == strides_[1] && strides_[0] == 2; + bool use_neon_2x2_s1 = kernel_h == kernel_w && kernel_h == 2 && + strides_[0] == strides_[1] && strides_[0] == 1; + bool use_neon_2x2_s2 = kernel_h == kernel_w && kernel_h == 2 && + strides_[0] == strides_[1] && strides_[0] == 2; - bool use_neon_4x4_s1 = kernel_h == kernel_w && kernel_h == 4 && - strides_[0] == strides_[1] && strides_[0] == 1; - bool use_neon_4x4_s2 = kernel_h == kernel_w && kernel_h == 4 && - strides_[0] == strides_[1] && strides_[0] == 2; + bool use_neon_3x3_s1 = kernel_h == kernel_w && kernel_h == 3 && + strides_[0] == strides_[1] && strides_[0] == 1; + bool use_neon_3x3_s2 = kernel_h == kernel_w && kernel_h == 3 && + strides_[0] == strides_[1] && strides_[0] == 2; - if (deconv2d_delegator_ == nullptr) { - if (use_neon_2x2_s1) { - deconv2d_delegator_ = make_unique( - paddings_, padding_type_, model_type_); - } else if (use_neon_2x2_s2) { - deconv2d_delegator_ = make_unique( - paddings_, padding_type_, model_type_); - } else if (use_neon_3x3_s1) { - deconv2d_delegator_ = make_unique( - paddings_, padding_type_, model_type_); - } else if (use_neon_3x3_s2) { - deconv2d_delegator_ = make_unique( - paddings_, padding_type_, model_type_); - } else if (use_neon_4x4_s1) { - deconv2d_delegator_ = make_unique( - paddings_, padding_type_, model_type_); - } else if (use_neon_4x4_s2) { - deconv2d_delegator_ = make_unique( - paddings_, padding_type_, model_type_); - } else { - deconv2d_delegator_ = - make_unique(strides_, - std::vector{1, 1}, - paddings_, - padding_type_, - model_type_); + bool use_neon_4x4_s1 = kernel_h == kernel_w && kernel_h == 4 && + strides_[0] == strides_[1] && strides_[0] == 1; + bool use_neon_4x4_s2 = kernel_h == kernel_w && kernel_h == 4 && + strides_[0] == strides_[1] && strides_[0] == 2; + + if (use_neon_2x2_s1) { + tag = MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K2x2S1); + } else if (use_neon_2x2_s2) { + tag = MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K2x2S2); + } else if (use_neon_3x3_s1) { + tag = MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K3x3S1); + } else if (use_neon_3x3_s2) { + tag = MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K3x3S2); + } else if (use_neon_4x4_s1) { + tag = MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K4x4S1); + } else if (use_neon_4x4_s2) { + tag = MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K4x4S2); + } } + delegator::Deconv2dParam param(strides_, kDeconv2dStrides, paddings_, + padding_type_, model_type_); + deconv2d_delegator_ = delegator::Deconv2d::Create(context->workspace(), + tag, param); } - deconv2d_delegator_->Compute(context, - input, - filter, - output_shape_tensor, - output); -#else - if (deconv2d_delegator_ == nullptr) { - deconv2d_delegator_ = make_unique>(strides_, - std::vector{ - 1, 1}, - paddings_, - padding_type_, - model_type_); - } - deconv2d_delegator_->Compute(context, - input, - filter, - output_shape_tensor, - output); - -#endif // MACE_ENABLE_NEON - bias_add_delegator_.Compute(context, output, bias, output); - activation_delegator_.Compute(context, output, output); + deconv2d_delegator_->Compute(context, input, filter, + output_shape_tensor, output); + bias_add_delegator_->Compute(context, output, bias, output); + activation_delegator_->Compute(context, output, output); return MaceStatus::MACE_SUCCESS; } private: -#ifdef MACE_ENABLE_NEON - std::unique_ptr deconv2d_delegator_; - arm::fp32::BiasAdd bias_add_delegator_; - arm::fp32::Activation activation_delegator_; -#else - ref::BiasAdd bias_add_delegator_; - ref::Activation activation_delegator_; - std::unique_ptr> deconv2d_delegator_; -#endif // MACE_ENABLE_NEON + std::unique_ptr activation_delegator_; + std::unique_ptr bias_add_delegator_; + std::unique_ptr deconv2d_delegator_; }; #ifdef MACE_ENABLE_OPENCL @@ -258,7 +235,7 @@ class Deconv2dOp : public Deconv2dOpBase { }; #endif // MACE_ENABLE_OPENCL -void RegisterDeconv2D(OpRegistryBase *op_registry) { +void RegisterDeconv2D(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Deconv2D", Deconv2dOp, DeviceType::CPU, float); MACE_REGISTER_GPU_OP(op_registry, "Deconv2D", Deconv2dOp); diff --git a/mace/ops/deconv_2d.h b/mace/ops/deconv_2d.h index 50a2ecee5e8329ea24aa3fbae419823831d1b370..a11d5f8a8bd77a7be78605a6a256331d2ceccdd7 100644 --- a/mace/ops/deconv_2d.h +++ b/mace/ops/deconv_2d.h @@ -19,7 +19,7 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" #include "mace/core/types.h" #include "mace/ops/activation.h" #include "mace/ops/common/conv_pool_2d_util.h" diff --git a/mace/ops/delegator/activation.h b/mace/ops/delegator/activation.h new file mode 100644 index 0000000000000000000000000000000000000000..80a9c6b376fceda5d84d2de4eb7358213df9613b --- /dev/null +++ b/mace/ops/delegator/activation.h @@ -0,0 +1,61 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MACE_OPS_DELEGATOR_ACTIVATION_H_ +#define MACE_OPS_DELEGATOR_ACTIVATION_H_ + +#include "mace/core/ops/op_context.h" +#include "mace/core/ops/op_delegator.h" +#include "mace/core/registry/op_delegator_registry.h" +#include "mace/ops/common/activation_type.h" + +namespace mace { +namespace ops { +namespace delegator { + +struct ActivationParam : public DelegatorParam { + explicit ActivationParam(ActivationType type, const float limit, + const float leakyrelu_coefficient) + : type_(type), limit_(limit), + leakyrelu_coefficient_(leakyrelu_coefficient) {} + + ActivationType type_; + const float limit_; + const float leakyrelu_coefficient_; +}; + +class Activation : public OpDelegator { + public: + explicit Activation(const ActivationParam ¶m) + : OpDelegator(param), type_(param.type_), limit_(param.limit_), + leakyrelu_coefficient_(param.leakyrelu_coefficient_) {} + virtual ~Activation() = default; + + MACE_DEFINE_DELEGATOR_CREATOR(Activation) + + virtual MaceStatus Compute(const OpContext *context, + const Tensor *input, + Tensor *output) = 0; + + protected: + ActivationType type_; + const float limit_; + const float leakyrelu_coefficient_; +}; + +} // namespace delegator +} // namespace ops +} // namespace mace + +#endif // MACE_OPS_DELEGATOR_ACTIVATION_H_ diff --git a/mace/ops/delegator/bias_add.h b/mace/ops/delegator/bias_add.h new file mode 100644 index 0000000000000000000000000000000000000000..f5fdea0deea984cf2450d2f17cd29c6913a35bd9 --- /dev/null +++ b/mace/ops/delegator/bias_add.h @@ -0,0 +1,43 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MACE_OPS_DELEGATOR_BIAS_ADD_H_ +#define MACE_OPS_DELEGATOR_BIAS_ADD_H_ + +#include "mace/core/ops/op_context.h" +#include "mace/core/ops/op_delegator.h" +#include "mace/core/registry/op_delegator_registry.h" + +namespace mace { +namespace ops { +namespace delegator { + +class BiasAdd : public OpDelegator { + public: + explicit BiasAdd(const DelegatorParam ¶m) : OpDelegator(param) {} + virtual ~BiasAdd() = default; + + MACE_DEFINE_DELEGATOR_CREATOR(BiasAdd) + + virtual MaceStatus Compute(const OpContext *context, + const Tensor *input, + const Tensor *bias, + Tensor *output) = 0; +}; + +} // namespace delegator +} // namespace ops +} // namespace mace + +#endif // MACE_OPS_DELEGATOR_BIAS_ADD_H_ diff --git a/mace/ops/delegator/conv_2d.h b/mace/ops/delegator/conv_2d.h new file mode 100644 index 0000000000000000000000000000000000000000..9ff85f6dacd1123cfbd02a12f90990c6750d5c37 --- /dev/null +++ b/mace/ops/delegator/conv_2d.h @@ -0,0 +1,90 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#ifndef MACE_OPS_DELEGATOR_CONV_2D_H_ +#define MACE_OPS_DELEGATOR_CONV_2D_H_ + +#include + +#include "mace/core/ops/op_context.h" +#include "mace/core/ops/op_delegator.h" +#include "mace/core/registry/op_delegator_registry.h" +#include "mace/ops/common/conv_pool_2d_util.h" + +namespace mace { +namespace ops { + +enum ConvType { + General, + K1x1, + K1x7S1, + K7x1S1, + K1x15S1, + K15x1S1, + K3x3S1, + K3x3S2, + K3x3Winograd, + K5x5S1, + K7x7S1, + K7x7S2, + K7x7S3, +}; + +namespace delegator { + +struct Conv2dParam : public DelegatorParam { + explicit Conv2dParam(const std::vector &strides, + const std::vector &dilations, + const std::vector &paddings, + const Padding padding_type) + : strides_(strides), dilations_(dilations), + paddings_(paddings), padding_type_(padding_type) {} + + const std::vector &strides_; + const std::vector &dilations_; + const std::vector &paddings_; + const Padding padding_type_; +}; + +class Conv2d : public OpDelegator { + public: + explicit Conv2d(const delegator::Conv2dParam ¶m) + : OpDelegator(param), + strides_(param.strides_), + dilations_(param.dilations_), + paddings_(param.paddings_), + padding_type_(param.padding_type_) {} + virtual ~Conv2d() = default; + + MACE_DEFINE_DELEGATOR_CREATOR(Conv2d) + + virtual MaceStatus Compute(const OpContext *context, + const Tensor *input, + const Tensor *filter, + Tensor *output) = 0; + + protected: + const std::vector strides_; + const std::vector dilations_; + const std::vector paddings_; + const Padding padding_type_; +}; + +} // namespace delegator +} // namespace ops +} // namespace mace + +#endif // MACE_OPS_DELEGATOR_CONV_2D_H_ + diff --git a/mace/ops/delegator/deconv_2d.h b/mace/ops/delegator/deconv_2d.h new file mode 100644 index 0000000000000000000000000000000000000000..856f3595bcd37b86dc3c65d2c48a70a4901f3b47 --- /dev/null +++ b/mace/ops/delegator/deconv_2d.h @@ -0,0 +1,95 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#ifndef MACE_OPS_DELEGATOR_DECONV_2D_H_ +#define MACE_OPS_DELEGATOR_DECONV_2D_H_ + +#include + +#include "mace/core/ops/op_context.h" +#include "mace/core/ops/op_delegator.h" +#include "mace/core/registry/op_delegator_registry.h" + +namespace mace { +namespace ops { + +enum DeconvType { + General, + K2x2S1, + K2x2S2, + K3x3S1, + K3x3S2, + K4x4S1, + K4x4S2, +}; + +namespace delegator { + +struct Deconv2dParam : public DelegatorParam { + explicit Deconv2dParam(const std::vector &strides, + const std::vector &dilations, + const std::vector &paddings, + const Padding padding_type, + const FrameworkType framework_type, + const int group = 1) + : strides_(strides), dilations_(dilations), + paddings_(paddings), padding_type_(padding_type), + framework_type_(framework_type), + group_(group) {} + + const std::vector &strides_; + const std::vector &dilations_; + const std::vector &paddings_; + const Padding padding_type_; + const FrameworkType framework_type_; + const int group_; +}; + +class Deconv2d : public OpDelegator { + public: + explicit Deconv2d(const Deconv2dParam ¶m) + : OpDelegator(param), + strides_(param.strides_), + dilations_(param.dilations_), + paddings_(param.paddings_), + padding_type_(param.padding_type_), + framework_type_(param.framework_type_), + group_(param.group_) {} + + virtual ~Deconv2d() = default; + + MACE_DEFINE_DELEGATOR_CREATOR(Deconv2d) + + virtual MaceStatus Compute(const OpContext *context, + const Tensor *input, + const Tensor *filter, + const Tensor *output_shape, + Tensor *output) = 0; + + protected: + const std::vector strides_; + const std::vector dilations_; + const std::vector paddings_; + const Padding padding_type_; + const FrameworkType framework_type_; + const int group_; +}; + +} // namespace delegator +} // namespace ops +} // namespace mace + +#endif // MACE_OPS_DELEGATOR_DECONV_2D_H_ + diff --git a/mace/ops/ref/bias_add.h b/mace/ops/delegator/depthwise_conv_2d.h similarity index 52% rename from mace/ops/ref/bias_add.h rename to mace/ops/delegator/depthwise_conv_2d.h index f3dc6096e0ae409d0a4b226ebd21b04d6e0228b5..c586839bbcdb3a3d42a2a200fffaaf2e40a9432d 100644 --- a/mace/ops/ref/bias_add.h +++ b/mace/ops/delegator/depthwise_conv_2d.h @@ -1,4 +1,4 @@ -// Copyright 2019 The MACE Authors. All Rights Reserved. +// Copyright 2020 The MACE Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,35 +12,22 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_OPS_REF_BIAS_ADD_H_ -#define MACE_OPS_REF_BIAS_ADD_H_ -#include "mace/core/op_context.h" +#ifndef MACE_OPS_DELEGATOR_DEPTHWISE_CONV_2D_H_ +#define MACE_OPS_DELEGATOR_DEPTHWISE_CONV_2D_H_ + +#include "mace/ops/delegator/conv_2d.h" namespace mace { namespace ops { -namespace ref { - -class BiasAdd { - public: - BiasAdd() = default; - ~BiasAdd() = default; - - MaceStatus Compute( - const OpContext *context, - const Tensor *input, - const Tensor *bias, - Tensor *output); - - private: - void AddBias(const OpContext *context, - const Tensor *input, - const Tensor *bias, - Tensor *output); -}; - -} // namespace ref +namespace delegator { + +typedef Conv2dParam DepthwiseConv2dParam; +typedef Conv2d DepthwiseConv2d; + +} // namespace delegator } // namespace ops } // namespace mace -#endif // MACE_OPS_REF_BIAS_ADD_H_ +#endif // MACE_OPS_DELEGATOR_DEPTHWISE_CONV_2D_H_ + diff --git a/mace/ops/delegator/depthwise_deconv_2d.h b/mace/ops/delegator/depthwise_deconv_2d.h new file mode 100644 index 0000000000000000000000000000000000000000..10616f1b10f7470fefecf58a23713aaf9c168709 --- /dev/null +++ b/mace/ops/delegator/depthwise_deconv_2d.h @@ -0,0 +1,35 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#ifndef MACE_OPS_DELEGATOR_DEPTHWISE_DECONV_2D_H_ +#define MACE_OPS_DELEGATOR_DEPTHWISE_DECONV_2D_H_ + +#include "mace/ops/delegator/deconv_2d.h" +namespace mace { +namespace ops { +namespace delegator { + +typedef Deconv2dParam DepthwiseDeconv2dParam; +typedef Deconv2dParam GroupDeconv2dParam; + +typedef Deconv2d DepthwiseDeconv2d; +typedef Deconv2d GroupDeconv2d; + +} // namespace delegator +} // namespace ops +} // namespace mace + +#endif // MACE_OPS_DELEGATOR_DEPTHWISE_DECONV_2D_H_ + diff --git a/mace/ops/delegator/eltwise.h b/mace/ops/delegator/eltwise.h new file mode 100644 index 0000000000000000000000000000000000000000..fe66f35462270535bfda14fb50b907e3309dee6b --- /dev/null +++ b/mace/ops/delegator/eltwise.h @@ -0,0 +1,57 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This implements matrix-vector multiplication described as +// https://github.com/google/gemmlowp/blob/master/todo/fast-gemv.txt + +#ifndef MACE_OPS_DELEGATOR_ELTWISE_H_ +#define MACE_OPS_DELEGATOR_ELTWISE_H_ + +#include "mace/core/ops/op_context.h" +#include "mace/core/ops/op_delegator.h" +#include "mace/core/registry/op_delegator_registry.h" +#include "mace/core/types.h" +#include "mace/ops/common/eltwise_type.h" + +namespace mace { +namespace ops { +namespace delegator { + +struct EltwiseParam : public DelegatorParam { + explicit EltwiseParam(EltwiseType type) + : type_(type) {} + + EltwiseType type_; +}; + +class Eltwise : public OpDelegator { + public: + explicit Eltwise(const EltwiseParam ¶m) : OpDelegator(param), + type_(param.type_) {} + virtual ~Eltwise() = default; + + MACE_DEFINE_DELEGATOR_CREATOR(Eltwise) + + virtual MaceStatus Compute(const OpContext *context, const Tensor *input0, + const Tensor *input1, Tensor *output) = 0; + + protected: + EltwiseType type_; +}; + +} // namespace delegator +} // namespace ops +} // namespace mace + +#endif // MACE_OPS_DELEGATOR_ELTWISE_H_ diff --git a/mace/ops/delegator/gemm.h b/mace/ops/delegator/gemm.h new file mode 100644 index 0000000000000000000000000000000000000000..29043c3b27260740e0c924a2ec4dbd6fda52b666 --- /dev/null +++ b/mace/ops/delegator/gemm.h @@ -0,0 +1,77 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#ifndef MACE_OPS_DELEGATOR_GEMM_H_ +#define MACE_OPS_DELEGATOR_GEMM_H_ + +#include "mace/core/ops/op_context.h" +#include "mace/core/ops/op_delegator.h" +#include "mace/core/registry/op_delegator_registry.h" +#include "mace/ops/common/matrix.h" + +namespace mace { +namespace ops { +namespace delegator { + +struct GemmParam : public DelegatorParam { + explicit GemmParam(const bool should_cache_pack = false) + : should_cache_pack_(should_cache_pack) {} + + const bool should_cache_pack_; +}; + +class Gemm : public OpDelegator { + public: + explicit Gemm(const GemmParam ¶m) : OpDelegator(param) {} + virtual ~Gemm() = default; + + MACE_DEFINE_DELEGATOR_CREATOR(Gemm) + + virtual MaceStatus Compute(const OpContext *context, + const Tensor *lhs, + const Tensor *rhs, + const index_t batch, + const index_t rows, + const index_t cols, + const index_t depth, + const MatrixMajor lhs_major, + const MatrixMajor rhs_major, + const MatrixMajor output_major, + const bool lhs_batched, + const bool rhs_batched, + Tensor *output) = 0; + // Original matrix before transpose has row-major + virtual MaceStatus Compute(const OpContext *context, + const Tensor *lhs, + const Tensor *rhs, + const index_t batch, + const index_t lhs_rows, + const index_t lhs_cols, + const index_t rhs_rows, + const index_t rhs_cols, + const bool transpose_lhs, + const bool transpose_rhs, + const bool transpose_out, + const bool lhs_batched, + const bool rhs_batched, + Tensor *output) = 0; +}; + +} // namespace delegator +} // namespace ops +} // namespace mace + +#endif // MACE_OPS_DELEGATOR_GEMM_H_ + diff --git a/mace/ops/delegator/gemv.h b/mace/ops/delegator/gemv.h new file mode 100644 index 0000000000000000000000000000000000000000..4bdde1820463b140a5a8ba003f91529fd12956bb --- /dev/null +++ b/mace/ops/delegator/gemv.h @@ -0,0 +1,52 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#ifndef MACE_OPS_DELEGATOR_GEMV_H_ +#define MACE_OPS_DELEGATOR_GEMV_H_ + +#include "mace/core/ops/op_context.h" +#include "mace/core/ops/op_delegator.h" +#include "mace/core/registry/op_delegator_registry.h" + +namespace mace { +namespace ops { +namespace delegator { + +class Gemv : public OpDelegator { + public: + explicit Gemv(const DelegatorParam ¶m) : OpDelegator(param) {} + virtual ~Gemv() = default; + + MACE_DEFINE_DELEGATOR_CREATOR(Gemv) + + // Always row-major after transpose + virtual MaceStatus Compute(const OpContext *context, + const Tensor *lhs, + const Tensor *rhs, + const Tensor *bias, + const index_t batch, + const index_t lhs_height, + const index_t lhs_width, + const bool lhs_batched, + const bool rhs_batched, + Tensor *output) = 0; +}; + +} // namespace delegator +} // namespace ops +} // namespace mace + +#endif // MACE_OPS_DELEGATOR_GEMV_H_ + diff --git a/mace/ops/depth_to_space.cc b/mace/ops/depth_to_space.cc index 6efa4d24566972164fd39d848d037f8c850e12e2..9484fdde2964952389e3402d2ffb7323076a153e 100644 --- a/mace/ops/depth_to_space.cc +++ b/mace/ops/depth_to_space.cc @@ -15,7 +15,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/image/depth_to_space.h" #endif // MACE_ENABLE_OPENCL @@ -184,7 +185,7 @@ class DepthToSpaceOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterDepthToSpace(OpRegistryBase *op_registry) { +void RegisterDepthToSpace(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "DepthToSpace", DepthToSpaceOp, DeviceType::CPU, float); diff --git a/mace/ops/depthwise_conv2d.cc b/mace/ops/depthwise_conv2d.cc index 06964ee038088d6921b5d9244eac3c14913522ae..23cf8e046fa82edbab28cbddfb57a99d721c61ac 100644 --- a/mace/ops/depthwise_conv2d.cc +++ b/mace/ops/depthwise_conv2d.cc @@ -17,17 +17,6 @@ #include #include -#include "mace/ops/ref/depthwise_conv_2d.h" - -#if defined(MACE_ENABLE_NEON) -#include "mace/ops/arm/fp32/depthwise_conv_2d_3x3.h" -#include "mace/ops/arm/fp32/bias_add.h" -#include "mace/ops/arm/fp32/activation.h" -#else -#include "mace/ops/ref/activation.h" -#include "mace/ops/ref/bias_add.h" -#endif // MACE_ENABLE_NEON - #ifdef MACE_ENABLE_QUANTIZE #include "mace/ops/arm/q8/quantization_util.h" // We reuse TensorFlow Lite's optimized depthwiseconv_uint8 and parallelized it @@ -36,9 +25,13 @@ #endif // MACE_ENABLE_QUANTIZE #include "mace/core/future.h" -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/ops/activation.h" #include "mace/ops/conv_pool_2d_base.h" +#include "mace/ops/delegator/activation.h" +#include "mace/ops/delegator/bias_add.h" +#include "mace/ops/delegator/depthwise_conv_2d.h" #include "mace/public/mace.h" #include "mace/utils/memory.h" #include "mace/core/quantize.h" @@ -75,9 +68,16 @@ class DepthwiseConv2dOp : public DepthwiseConv2dOpBase { public: explicit DepthwiseConv2dOp(OpConstructContext *context) : DepthwiseConv2dOpBase(context), - activation_delegator_(activation_, - relux_max_limit_, - leakyrelu_coefficient_) {} + activation_delegator_( + delegator::Activation::Create( + context->workspace(), + MACE_DELEGATOR_KEY(Activation, CPU, float, MACE_CPU_IMPL_TYPE), + delegator::ActivationParam(activation_, relux_max_limit_, + leakyrelu_coefficient_))), + bias_add_delegator_(delegator::BiasAdd::Create( + context->workspace(), + MACE_DELEGATOR_KEY(BiasAdd, CPU, float, MACE_CPU_IMPL_TYPE), + DelegatorParam())) {} MaceStatus Run(OpContext *context) override { MACE_UNUSED(context); @@ -92,67 +92,44 @@ class DepthwiseConv2dOp : public DepthwiseConv2dOpBase { MACE_CHECK_NOTNULL(filter); MACE_CHECK_NOTNULL(output); -#ifdef MACE_ENABLE_NEON - const index_t filter_h = filter->dim(2); - const index_t filter_w = filter->dim(3); - const index_t stride_h = strides_[0]; - const index_t stride_w = strides_[1]; - const index_t dilation_h = dilations_[0]; - const index_t dilation_w = dilations_[1]; - - if (filter_h == 3 && filter_w == 3 && stride_h == 1 && stride_w == 1 - && dilation_h == 1 && dilation_w == 1) { - if (conv2d_delegator_.get() == nullptr) { - conv2d_delegator_ = - make_unique(paddings_, - padding_type_); - } - conv2d_delegator_->Compute(context, input, filter, output); - } else if (filter_h == 3 && filter_w == 3 && stride_h == 2 && stride_w == 2 - && dilation_h == 1 && dilation_w == 1) { - if (conv2d_delegator_.get() == nullptr) { - conv2d_delegator_ = - make_unique(paddings_, - padding_type_); - } - conv2d_delegator_->Compute(context, input, filter, output); - } else { - if (ref_conv2d_delegator_.get() == nullptr) { - ref_conv2d_delegator_ = - make_unique>(strides_, - dilations_, - paddings_, - padding_type_); + if (depthwise_conv2d_delegator_ == nullptr) { + std::string tag = MACE_DELEGATOR_KEY_EX(DepthwiseConv2d, CPU, float, + REF, General); + if (MACE_CPU_IMPL_TYPE == NEON) { + const index_t filter_h = filter->dim(2); + const index_t filter_w = filter->dim(3); + const index_t stride_h = strides_[0]; + const index_t stride_w = strides_[1]; + const index_t dilation_h = dilations_[0]; + const index_t dilation_w = dilations_[1]; + if (filter_h == 3 && filter_w == 3 && stride_h == 1 && stride_w == 1 + && dilation_h == 1 && dilation_w == 1) { + tag = MACE_DELEGATOR_KEY_EX(DepthwiseConv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K3x3S1); + } else if (filter_h == 3 && filter_w == 3 && stride_h == 2 + && stride_w == 2 + && dilation_h == 1 && dilation_w == 1) { + tag = MACE_DELEGATOR_KEY_EX(DepthwiseConv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K3x3S2); + } } - ref_conv2d_delegator_->Compute(context, input, filter, output); - } -#else - if (ref_conv2d_delegator_.get() == nullptr) { - ref_conv2d_delegator_ = - make_unique>(strides_, - dilations_, - paddings_, - padding_type_); + delegator::Conv2dParam param(strides_, dilations_, + paddings_, padding_type_); + depthwise_conv2d_delegator_ = delegator::DepthwiseConv2d::Create( + context->workspace(), tag, param); } - ref_conv2d_delegator_->Compute(context, input, filter, output); -#endif // MACE_ENABLE_NEON - bias_add_delegator_.Compute(context, output, bias, output); - activation_delegator_.Compute(context, output, output); + depthwise_conv2d_delegator_->Compute(context, input, filter, output); + bias_add_delegator_->Compute(context, output, bias, output); + activation_delegator_->Compute(context, output, output); return MaceStatus::MACE_SUCCESS; } private: -#ifdef MACE_ENABLE_NEON - std::unique_ptr conv2d_delegator_; - arm::fp32::BiasAdd bias_add_delegator_; - arm::fp32::Activation activation_delegator_; -#else - ref::BiasAdd bias_add_delegator_; - ref::Activation activation_delegator_; -#endif // MACE_ENABLE_NEON - std::unique_ptr> ref_conv2d_delegator_; + std::unique_ptr activation_delegator_; + std::unique_ptr bias_add_delegator_; + std::unique_ptr depthwise_conv2d_delegator_; protected: MACE_OP_INPUT_TAGS(INPUT, FILTER, BIAS); @@ -422,7 +399,7 @@ class DepthwiseConv2dOp : public DepthwiseConv2dOpBase { }; #endif // MACE_ENABLE_OPENCL -void RegisterDepthwiseConv2d(OpRegistryBase *op_registry) { +void RegisterDepthwiseConv2d(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "DepthwiseConv2d", DepthwiseConv2dOp, DeviceType::CPU, float); diff --git a/mace/ops/depthwise_deconv2d.cc b/mace/ops/depthwise_deconv2d.cc index 96f6d575fd2c8663d7c2c860dbbdbd7d0801713d..f09261d6541b4b771baa1a2fe1ac85fad49e5b7d 100644 --- a/mace/ops/depthwise_deconv2d.cc +++ b/mace/ops/depthwise_deconv2d.cc @@ -12,33 +12,22 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/ops/deconv_2d.h" - -#if defined(MACE_ENABLE_NEON) -#include -#include "mace/ops/arm/fp32/depthwise_deconv_2d_general.h" -#include "mace/ops/arm/fp32/depthwise_deconv_2d_3x3.h" -#include "mace/ops/arm/fp32/depthwise_deconv_2d_4x4.h" -#include "mace/ops/arm/fp32/bias_add.h" -#include "mace/ops/arm/fp32/activation.h" - -#else -#include "mace/ops/ref/depthwise_deconv_2d.h" -#include "mace/ops/ref/bias_add.h" -#include "mace/ops/ref/activation.h" -#endif - #include #include #include #include #include "mace/core/future.h" +#include "mace/core/registry/ops_registry.h" #include "mace/core/tensor.h" -#include "mace/utils/math.h" +#include "mace/ops/common/conv_pool_2d_util.h" +#include "mace/ops/deconv_2d.h" +#include "mace/ops/delegator/activation.h" +#include "mace/ops/delegator/bias_add.h" +#include "mace/ops/delegator/depthwise_deconv_2d.h" #include "mace/public/mace.h" +#include "mace/utils/math.h" #include "mace/utils/memory.h" -#include "mace/ops/common/conv_pool_2d_util.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/buffer_transformer.h" @@ -48,6 +37,10 @@ namespace mace { namespace ops { +namespace { +const std::vector kDepthwiseStrides = {1, 1}; +} + template class DepthwiseDeconv2dOp; @@ -57,9 +50,16 @@ class DepthwiseDeconv2dOp public: explicit DepthwiseDeconv2dOp(OpConstructContext *context) : Deconv2dOpBase(context), - activation_delegator_(activation_, - relux_max_limit_, - leakyrelu_coefficient_) {} + activation_delegator_( + delegator::Activation::Create( + context->workspace(), + MACE_DELEGATOR_KEY(Activation, CPU, float, MACE_CPU_IMPL_TYPE), + delegator::ActivationParam(activation_, relux_max_limit_, + leakyrelu_coefficient_))), + bias_add_delegator_(delegator::BiasAdd::Create( + context->workspace(), + MACE_DELEGATOR_KEY(BiasAdd, CPU, float, MACE_CPU_IMPL_TYPE), + DelegatorParam())) {} MaceStatus Run(OpContext *context) override { const Tensor *input = this->Input(0); @@ -74,113 +74,77 @@ class DepthwiseDeconv2dOp const index_t in_channels = input->dim(1); bool is_depthwise = group_ == in_channels; -#ifdef MACE_ENABLE_NEON - const index_t kernel_h = filter->dim(2); - const index_t kernel_w = filter->dim(3); - bool use_neon_3x3_s1 = kernel_h == kernel_w && kernel_h == 3 && - strides_[0] == strides_[1] && strides_[0] == 1; - bool use_neon_3x3_s2 = kernel_h == kernel_w && kernel_h == 3 && - strides_[0] == strides_[1] && strides_[0] == 2; - bool use_neon_4x4_s1 = kernel_h == kernel_w && kernel_h == 4 && - strides_[0] == strides_[1] && strides_[0] == 1; - bool use_neon_4x4_s2 = kernel_h == kernel_w && kernel_h == 4 && - strides_[0] == strides_[1] && strides_[0] == 2; - - if (deconv2d_delegator_ == nullptr) { - if (is_depthwise) { - if (use_neon_3x3_s1) { - deconv2d_delegator_ = make_unique( - paddings_, padding_type_, CAFFE); - } else if (use_neon_3x3_s2) { - deconv2d_delegator_ = make_unique( - paddings_, padding_type_, CAFFE); - } else if (use_neon_4x4_s1) { - deconv2d_delegator_ = make_unique( - paddings_, padding_type_, CAFFE); - } else if (use_neon_4x4_s2) { - deconv2d_delegator_ = make_unique( - paddings_, padding_type_, CAFFE); - } else { - deconv2d_delegator_ = - make_unique( - strides_, - std::vector{1, 1}, - paddings_, - padding_type_, - CAFFE); - } - } else { - if (use_neon_3x3_s1) { - deconv2d_delegator_ = make_unique( - paddings_, padding_type_, group_, CAFFE); - } else if (use_neon_3x3_s2) { - deconv2d_delegator_ = make_unique( - paddings_, padding_type_, group_, CAFFE); - } else if (use_neon_4x4_s1) { - deconv2d_delegator_ = make_unique( - paddings_, padding_type_, group_, CAFFE); - } else if (use_neon_4x4_s2) { - deconv2d_delegator_ = make_unique( - paddings_, padding_type_, group_, CAFFE); + if (depthwise_deconv2d_delegator_ == nullptr) { + if (MACE_CPU_IMPL_TYPE == NEON) { + const index_t kernel_h = filter->dim(2); + const index_t kernel_w = filter->dim(3); + bool use_neon_3x3_s1 = kernel_h == kernel_w && kernel_h == 3 && + strides_[0] == strides_[1] && strides_[0] == 1; + bool use_neon_3x3_s2 = kernel_h == kernel_w && kernel_h == 3 && + strides_[0] == strides_[1] && strides_[0] == 2; + bool use_neon_4x4_s1 = kernel_h == kernel_w && kernel_h == 4 && + strides_[0] == strides_[1] && strides_[0] == 1; + bool use_neon_4x4_s2 = kernel_h == kernel_w && kernel_h == 4 && + strides_[0] == strides_[1] && strides_[0] == 2; + + if (is_depthwise) { + std::string tag = MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, General); + if (use_neon_3x3_s1) { + tag = MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K3x3S1); + } else if (use_neon_3x3_s2) { + tag = MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K3x3S2); + } else if (use_neon_4x4_s1) { + tag = MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K4x4S1); + } else if (use_neon_4x4_s2) { + tag = MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K4x4S2); + } + delegator::DepthwiseDeconv2dParam param(strides_, kDepthwiseStrides, + paddings_, padding_type_, + CAFFE, group_); + depthwise_deconv2d_delegator_ = delegator::DepthwiseDeconv2d::Create( + context->workspace(), tag, param); } else { - deconv2d_delegator_ = make_unique( - strides_, - std::vector{1, 1}, - paddings_, - padding_type_, - group_, - CAFFE); + std::string tag = MACE_DELEGATOR_KEY_EX(GroupDeconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, General); + if (use_neon_3x3_s1) { + tag = MACE_DELEGATOR_KEY_EX(GroupDeconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K3x3S1); + } else if (use_neon_3x3_s2) { + tag = MACE_DELEGATOR_KEY_EX(GroupDeconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K3x3S2); + } else if (use_neon_4x4_s1) { + tag = MACE_DELEGATOR_KEY_EX(GroupDeconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K4x4S1); + } else if (use_neon_4x4_s2) { + tag = MACE_DELEGATOR_KEY_EX(GroupDeconv2d, CPU, float, + MACE_CPU_IMPL_TYPE, K4x4S2); + } + delegator::GroupDeconv2dParam param(strides_, kDepthwiseStrides, + paddings_, padding_type_, + CAFFE, group_); + depthwise_deconv2d_delegator_ = delegator::GroupDeconv2d::Create( + context->workspace(), tag, param); } } } - deconv2d_delegator_->Compute(context, - input, - filter, - nullptr, - output); -#else - if (deconv2d_delegator_ == nullptr) { - if (is_depthwise) { - deconv2d_delegator_ = make_unique>( - strides_, - std::vector{1, 1}, - paddings_, - padding_type_, - CAFFE); - } else { - deconv2d_delegator_ = make_unique>( - strides_, - std::vector{1, 1}, - paddings_, - padding_type_, - group_, - CAFFE); - } - } - deconv2d_delegator_->Compute(context, - input, - filter, - nullptr, - output); -#endif - - bias_add_delegator_.Compute(context, output, bias, output); - activation_delegator_.Compute(context, output, output); + depthwise_deconv2d_delegator_->Compute(context, input, filter, + nullptr, output); + bias_add_delegator_->Compute(context, output, bias, output); + activation_delegator_->Compute(context, output, output); return MaceStatus::MACE_SUCCESS; } private: -#ifdef MACE_ENABLE_NEON - std::unique_ptr deconv2d_delegator_; - arm::fp32::BiasAdd bias_add_delegator_; - arm::fp32::Activation activation_delegator_; -#else - std::unique_ptr> deconv2d_delegator_; - ref::BiasAdd bias_add_delegator_; - ref::Activation activation_delegator_; -#endif // MACE_ENABLE_NEON + std::unique_ptr activation_delegator_; + std::unique_ptr bias_add_delegator_; + std::unique_ptr depthwise_deconv2d_delegator_; }; #ifdef MACE_ENABLE_OPENCL @@ -251,7 +215,7 @@ class DepthwiseDeconv2dOp : public Deconv2dOpBase { }; #endif // MACE_ENABLE_OPENCL -void RegisterDepthwiseDeconv2d(OpRegistryBase *op_registry) { +void RegisterDepthwiseDeconv2d(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "DepthwiseDeconv2d", DepthwiseDeconv2dOp, DeviceType::CPU, float); diff --git a/mace/ops/dynamic_lstm.cc b/mace/ops/dynamic_lstm.cc index fc226c08d112edea0e13d19cc44aa76c1432ea7f..014f23c00d41fe283bc21c23d17bb5b53825fdee 100644 --- a/mace/ops/dynamic_lstm.cc +++ b/mace/ops/dynamic_lstm.cc @@ -35,14 +35,13 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/ops/common/lstm.h" +#include "mace/ops/delegator/gemv.h" #ifdef MACE_ENABLE_NEON #include -#include "mace/ops/arm/fp32/gemv.h" -#else -#include "mace/ops/ref/gemv.h" #endif // MACE_ENABLE_NEON namespace mace { @@ -73,7 +72,11 @@ class DynamicLSTMOp : public Operation { cell_cache_indexes_( Operation::GetRepeatedArgs("cell_cache_indexes")), out_cache_indexes_( - Operation::GetRepeatedArgs("out_cache_indexes")) {} + Operation::GetRepeatedArgs("out_cache_indexes")), + gemv_(delegator::Gemv::Create( + context->workspace(), + MACE_DELEGATOR_KEY(Gemv, CPU, T, MACE_CPU_IMPL_TYPE), + DelegatorParam())) {} inline void Validate() { const Tensor *input = this->Input(0); @@ -93,7 +96,7 @@ class DynamicLSTMOp : public Operation { ") and prev_out_delay(", prev_out_delay_, ") should be less than zero."); MACE_CHECK(prev_cell_delay_ % subsample_factor_ == 0 && - prev_out_delay_ % subsample_factor_ == 0, + prev_out_delay_ % subsample_factor_ == 0, "prev_cell_delay(", prev_cell_delay_, ") and prev_out_delay(", prev_out_delay_, ") should be multiples of subsample_factor(", @@ -190,8 +193,8 @@ class DynamicLSTMOp : public Operation { const index_t affine_a_out_dim = weights_a->dim(0); const index_t affine_a_depth = weights_a->dim(1); MACE_CHECK(affine_a_in_dim == affine_a_depth) - << "affine_a's input_dim:" << affine_a_in_dim - << "!=" << "affine_a's weights' depth:" << affine_a_depth << std::endl; + << "affine_a's input_dim:" << affine_a_in_dim + << "!=" << "affine_a's weights' depth:" << affine_a_depth << std::endl; const index_t lstm_input_dim = affine_a_out_dim + prev_cell_dim_; const index_t lstm_cell_dim = lstm_input_dim / 5; @@ -202,15 +205,15 @@ class DynamicLSTMOp : public Operation { lstm_cell_dim, ")."); MACE_CHECK(lstm_params->dim(0) == 3 && params_stride == lstm_cell_dim && lstm_cell_dim == prev_cell_dim_) - << " lstm params rows: " << lstm_params->dim(0) - << " params_stride: " << params_stride - << " != " << " cell_dim: " << lstm_cell_dim << std::endl; + << " lstm params rows: " << lstm_params->dim(0) + << " params_stride: " << params_stride + << " != " << " cell_dim: " << lstm_cell_dim << std::endl; const index_t affine_b_out_dim = weights_b->dim(0); const index_t affine_b_depth = weights_b->dim(1); const index_t affine_b_in_dim = lstm_cell_dim; MACE_CHECK(affine_b_in_dim == affine_b_depth) - << "affine_b's input_dim:" << affine_b_in_dim - << "!=" << "affine_b's weights' depth:" << affine_b_depth << std::endl; + << "affine_b's input_dim:" << affine_b_in_dim + << "!=" << "affine_b's weights' depth:" << affine_b_depth << std::endl; const index_t output_dim = affine_b_out_dim; MACE_CHECK(prev_out_offset_ + prev_out_dim_ <= output_dim) @@ -316,16 +319,16 @@ class DynamicLSTMOp : public Operation { prev_out_buf_data + i % out_buf_chunk * prev_out_dim_, prev_out_dim_ * sizeof(float)); // Affine - gemv_.Compute(context, - weights_a, - &affine_a_in, - bias_a, - 1, - affine_a_out_dim, - affine_a_depth, - false, - false, - &affine_a_out); + gemv_->Compute(context, + weights_a, + &affine_a_in, + bias_a, + 1, + affine_a_out_dim, + affine_a_depth, + false, + false, + &affine_a_out); // Prepare LSTMNonlinear input and output pointer float *lstm_cell_ptr = prev_cell_buf_data + i % cell_buf_chunk * prev_cell_dim_; @@ -343,16 +346,16 @@ class DynamicLSTMOp : public Operation { affine_b_in_data); UpdateCell(curr_cell_ptr, prev_cell_dim_, scale_); // Affine - gemv_.Compute(context, - weights_b, - &affine_b_in, - bias_b, - 1, - affine_b_out_dim, - affine_b_depth, - false, - false, - &affine_b_out); + gemv_->Compute(context, + weights_b, + &affine_b_in, + bias_b, + 1, + affine_b_out_dim, + affine_b_depth, + false, + false, + &affine_b_out); // Output memcpy(output_ptr, affine_b_out_data, @@ -404,18 +407,13 @@ class DynamicLSTMOp : public Operation { std::vector forward_indexes_; std::vector cell_cache_indexes_; std::vector out_cache_indexes_; - -#ifdef MACE_ENABLE_NEON - arm::fp32::Gemv gemv_; -#else - ref::Gemv gemv_; -#endif // MACE_ENABLE_NEON + std::unique_ptr gemv_; MACE_OP_INPUT_TAGS(INPUT, PREV_OUT, PREV_CELL, WEIGHTS_A, PARAMS, WEIGHTS_B); MACE_OP_OUTPUT_TAGS(OUTPUT, OUT_CACHE, CELL_CACHE); }; -void RegisterDynamicLSTM(OpRegistryBase *op_registry) { +void RegisterDynamicLSTM(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "DynamicLSTM", DynamicLSTMOp, DeviceType::CPU, float); } diff --git a/mace/ops/eltwise.cc b/mace/ops/eltwise.cc index 7db7b6c0c6c59bebab78840c6316fb120908ed01..e4d5a74b9bf518e10de8d499924733e38edafff6 100644 --- a/mace/ops/eltwise.cc +++ b/mace/ops/eltwise.cc @@ -12,11 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifdef MACE_ENABLE_NEON #ifdef MACE_ENABLE_QUANTIZE -#include "mace/ops/arm/q8/eltwise.h" +#include "mace/ops/delegator/eltwise.h" #endif // MACE_ENABLE_QUANTIZE -#endif // MACE_ENABLE_NEON #include "mace/ops/eltwise.h" @@ -28,7 +26,8 @@ #include #include "mace/core/future.h" -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/core/tensor.h" #include "mace/utils/memory.h" #include "mace/core/quantize.h" @@ -1061,7 +1060,7 @@ class EltwiseOp : public Operation { }; #ifdef MACE_ENABLE_QUANTIZE -template <> +template<> class EltwiseOp : public Operation { public: explicit EltwiseOp(OpConstructContext *context) @@ -1071,12 +1070,15 @@ class EltwiseOp : public Operation { coeff_(Operation::GetRepeatedArgs("coeff")), scalar_input_(Operation::GetOptionalArg("scalar_input", 1.0)), scalar_input_index_(Operation::GetOptionalArg( - "scalar_input_index", 1)) -#ifdef MACE_ENABLE_NEON - , eltwise_(static_cast(Operation::GetOptionalArg( - "type", static_cast(ops::EltwiseType::NONE)))) -#endif - {} + "scalar_input_index", 1)), + eltwise_delegator_(delegator::Eltwise::Create( + context->workspace(), + MACE_DELEGATOR_KEY(Eltwise, CPU, uint8_t, MACE_CPU_IMPL_TYPE), + delegator::EltwiseParam( + static_cast( + Operation::GetOptionalArg( + "type", + static_cast(ops::EltwiseType::NONE)))))) {} MaceStatus Run(OpContext *context) override { MACE_UNUSED(context); @@ -1092,77 +1094,7 @@ class EltwiseOp : public Operation { MACE_CHECK(output->scale() != 0); MACE_RETURN_IF_ERROR(output->Resize(input0->shape())); -#ifdef MACE_ENABLE_NEON - eltwise_.Compute(context, input0, input1, output); -#else - constexpr int left_shift = 20; - const double doubled_scale = 2 * std::max(input0->scale(), input1->scale()); - const double adjusted_input0_scale = input0->scale() / doubled_scale; - const double adjusted_input1_scale = input1->scale() / doubled_scale; - const double adjusted_output_scale = - doubled_scale / ((1 << left_shift) * output->scale()); - - int32_t input0_multiplier; - int32_t input1_multiplier; - int32_t output_multiplier; - int32_t input0_shift; - int32_t input1_shift; - int32_t output_shift; - QuantizeMultiplier(adjusted_input0_scale, - &input0_multiplier, - &input0_shift); - QuantizeMultiplier(adjusted_input1_scale, - &input1_multiplier, - &input1_shift); - QuantizeMultiplier(adjusted_output_scale, - &output_multiplier, - &output_shift); - - Tensor::MappingGuard input0_guard(input0); - Tensor::MappingGuard input1_guard(input1); - Tensor::MappingGuard output_guard(output); - - auto input0_ptr = input0->data(); - auto input1_ptr = input1->data(); - auto output_ptr = output->mutable_data(); - - utils::ThreadPool - &thread_pool = context->device()->cpu_runtime()->thread_pool(); - thread_pool.Compute1D([=](index_t start, index_t end, index_t step) { - for (index_t i = start; i < end; i += step) { - const int32_t offset_input0 = input0_ptr[i] - input0->zero_point(); - const int32_t offset_input1 = input1_ptr[i] - input1->zero_point(); - const int32_t shifted_input0 = offset_input0 * (1 << left_shift); - const int32_t shifted_input1 = offset_input1 * (1 << left_shift); - const int32_t multiplied_input0 = - gemmlowp::RoundingDivideByPOT( - gemmlowp::SaturatingRoundingDoublingHighMul(shifted_input0, - input0_multiplier), - -input0_shift); - const int32_t multiplied_input1 = - gemmlowp::RoundingDivideByPOT( - gemmlowp::SaturatingRoundingDoublingHighMul(shifted_input1, - input1_multiplier), - -input1_shift); - - int32_t res; - if (type_ == SUM) { - res = multiplied_input0 + multiplied_input1; - } else { - res = multiplied_input0 - multiplied_input1; - } - - const int32_t output_val = - gemmlowp::RoundingDivideByPOT( - gemmlowp::SaturatingRoundingDoublingHighMul(res, - output_multiplier), - -output_shift) + output->zero_point(); - output_ptr[i] = Saturate(output_val); - } - }, 0, output->size(), 1); -#endif // NEON - - return MaceStatus::MACE_SUCCESS; + return eltwise_delegator_->Compute(context, input0, input1, output); } private: @@ -1171,9 +1103,7 @@ class EltwiseOp : public Operation { float scalar_input_; int32_t scalar_input_index_; Tensor scalar_tensor_; -#ifdef MACE_ENABLE_NEON - arm::q8::Eltwise eltwise_; -#endif + std::unique_ptr eltwise_delegator_; }; #endif // MACE_ENABLE_QUANTIZE @@ -1244,7 +1174,7 @@ class EltwiseOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterEltwise(OpRegistryBase *op_registry) { +void RegisterEltwise(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Eltwise", EltwiseOp, DeviceType::CPU, float); diff --git a/mace/ops/expand_dims.cc b/mace/ops/expand_dims.cc index 5474dd4bc26f50836271a2073be7e5f28f1f0ffe..cc3426c3cab7e27a3cb4965d362c147acaf7a428 100644 --- a/mace/ops/expand_dims.cc +++ b/mace/ops/expand_dims.cc @@ -13,7 +13,8 @@ // limitations under the License. -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/utils/math.h" namespace mace { @@ -53,7 +54,7 @@ class ExpandDimsOp : public Operation { int axis_; }; -void RegisterExpandDims(OpRegistryBase *op_registry) { +void RegisterExpandDims(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "ExpandDims", ExpandDimsOp, DeviceType::CPU, float); diff --git a/mace/ops/extract_pooling.cc b/mace/ops/extract_pooling.cc index 87264f4f66ff04c2bd0c17959450cf9add9532de..765fc58ebc6b4fb2c92286cc9651e2c239e04649 100644 --- a/mace/ops/extract_pooling.cc +++ b/mace/ops/extract_pooling.cc @@ -26,7 +26,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { @@ -176,7 +177,7 @@ class ExtractPoolingOp : public Operation { std::vector counts_; }; -void RegisterExtractPooling(OpRegistryBase *op_registry) { +void RegisterExtractPooling(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "ExtractPooling", ExtractPoolingOp, DeviceType::CPU, float); } diff --git a/mace/ops/fill.cc b/mace/ops/fill.cc index 32a8595dcef36cf352722b38b4ef84e8a0f6ca34..0917674b18c854609617e4e6690c74542b23dc7e 100644 --- a/mace/ops/fill.cc +++ b/mace/ops/fill.cc @@ -13,7 +13,8 @@ // limitations under the License. -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -61,7 +62,7 @@ class FillOp : public Operation { MACE_OP_OUTPUT_TAGS(OUTPUT); }; -void RegisterFill(OpRegistryBase *op_registry) { +void RegisterFill(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Fill", FillOp, DeviceType::CPU, float); } diff --git a/mace/ops/fully_connected.cc b/mace/ops/fully_connected.cc index d863a2843a493d3186021d6621f226fc89689e7b..b037488837e679b8fbf47a8363f5e17c9d4bca42 100644 --- a/mace/ops/fully_connected.cc +++ b/mace/ops/fully_connected.cc @@ -17,22 +17,12 @@ #include #include "mace/core/future.h" -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/core/tensor.h" #include "mace/ops/activation.h" - -#ifdef MACE_ENABLE_NEON -#include "mace/ops/arm/fp32/gemv.h" -#include "mace/ops/arm/fp32/activation.h" - -#ifdef MACE_ENABLE_QUANTIZE -#include "mace/ops/arm/q8/gemv.h" -#endif // MACE_ENABLE_QUANTIZE - -#else -#include "mace/ops/ref/gemv.h" -#include "mace/ops/ref/activation.h" -#endif // MACE_ENABLE_NEON +#include "mace/ops/delegator/activation.h" +#include "mace/ops/delegator/gemv.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/buffer_transformer.h" @@ -71,9 +61,16 @@ class FullyConnectedOp : public FullyConnectedOpBase { public: explicit FullyConnectedOp(OpConstructContext *context) : FullyConnectedOpBase(context), - activation_delegator_(activation_, - relux_max_limit_, - leakyrelu_coefficient_) {} + activation_delegator_(delegator::Activation::Create( + context->workspace(), + MACE_DELEGATOR_KEY(Activation, CPU, float, MACE_CPU_IMPL_TYPE), + delegator::ActivationParam(activation_, + relux_max_limit_, + leakyrelu_coefficient_))), + gemv_(delegator::Gemv::Create( + context->workspace(), + MACE_DELEGATOR_KEY(Gemv, CPU, float, MACE_CPU_IMPL_TYPE), + DelegatorParam())) {} MaceStatus Run(OpContext *context) override { MACE_UNUSED(context); @@ -100,30 +97,25 @@ class FullyConnectedOp : public FullyConnectedOpBase { const index_t input_size = weight->dim(1) * weight->dim(2) * weight->dim(3); const index_t output_size = weight->dim(0); - gemv_.Compute(context, - weight, - input, - bias, - batch, - output_size, - input_size, - false, - true, - output); + gemv_->Compute(context, + weight, + input, + bias, + batch, + output_size, + input_size, + false, + true, + output); - activation_delegator_.Compute(context, output, output); + activation_delegator_->Compute(context, output, output); return MaceStatus::MACE_SUCCESS; } private: -#ifdef MACE_ENABLE_NEON - arm::fp32::Gemv gemv_; - arm::fp32::Activation activation_delegator_; -#else - ref::Gemv gemv_; - ref::Activation activation_delegator_; -#endif // MACE_ENABLE_NEON + std::unique_ptr activation_delegator_; + std::unique_ptr gemv_; }; #ifdef MACE_ENABLE_QUANTIZE @@ -132,7 +124,11 @@ class FullyConnectedOp : public FullyConnectedOpBase { public: explicit FullyConnectedOp(OpConstructContext *context) - : FullyConnectedOpBase(context) {} + : FullyConnectedOpBase(context), + gemv_(delegator::Gemv::Create( + context->workspace(), + MACE_DELEGATOR_KEY(Gemv, CPU, uint8_t, MACE_CPU_IMPL_TYPE), + DelegatorParam())) {} MaceStatus Run(OpContext *context) override { const Tensor *input = this->Input(INPUT); @@ -161,7 +157,7 @@ class FullyConnectedOp const int input_size = static_cast(weight->dim(1) * weight->dim(2) * weight->dim(3)); const int output_size = static_cast(weight->dim(0)); - gemv_.Compute(context, + gemv_->Compute(context, weight, input, bias, @@ -175,11 +171,7 @@ class FullyConnectedOp } private: -#ifdef MACE_ENABLE_NEON - ::mace::ops::arm::q8::Gemv gemv_; -#else - ref::Gemv gemv_; -#endif // MACE_ENABLE_NEON + std::unique_ptr gemv_; }; #endif // MACE_ENABLE_QUANTIZE @@ -231,7 +223,7 @@ class FullyConnectedOp : public FullyConnectedOpBase { }; #endif // MACE_ENABLE_OPENCL -void RegisterFullyConnected(OpRegistryBase *op_registry) { +void RegisterFullyConnected(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "FullyConnected", FullyConnectedOp, DeviceType::CPU, float); diff --git a/mace/ops/gather.cc b/mace/ops/gather.cc index 2114290b66ff8d2d256bc7e9dcce02b298331112..a112d91f94a24b9e8be455e727e8cf87f8c46e6c 100644 --- a/mace/ops/gather.cc +++ b/mace/ops/gather.cc @@ -14,7 +14,8 @@ #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -85,7 +86,7 @@ class GatherOp : public Operation { MACE_OP_OUTPUT_TAGS(OUTPUT); }; -void RegisterGather(OpRegistryBase *op_registry) { +void RegisterGather(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Gather", GatherOp, DeviceType::CPU, float); diff --git a/mace/ops/identity.cc b/mace/ops/identity.cc index 1c7a037ee2b8c1ec445b8c638958209cde7792f0..ac915cd848558300b8cd59770f663e0a2e856727 100644 --- a/mace/ops/identity.cc +++ b/mace/ops/identity.cc @@ -13,7 +13,8 @@ // limitations under the License. -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -32,7 +33,7 @@ class IdentityOp : public Operation { } }; -void RegisterIdentity(OpRegistryBase *op_registry) { +void RegisterIdentity(OpRegistry *op_registry) { MACE_REGISTER_OP_BY_CLASS(op_registry, "Identity", IdentityOp, DeviceType::CPU, float); MACE_REGISTER_OP_BY_CLASS(op_registry, "Identity", IdentityOp, diff --git a/mace/ops/ifdefined.cc b/mace/ops/ifdefined.cc index f0367d20f08d76250bb426da24d5882e6229ab48..84a2831609bec4a4c5ef455834f29812f30848ec 100644 --- a/mace/ops/ifdefined.cc +++ b/mace/ops/ifdefined.cc @@ -25,7 +25,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -162,7 +163,7 @@ class IfDefinedOp : public Operation { MACE_OP_OUTPUT_TAGS(OUTPUT); }; -void RegisterIfDefined(OpRegistryBase *op_registry) { +void RegisterIfDefined(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "IfDefined", IfDefinedOp, DeviceType::CPU, float); } diff --git a/mace/ops/infer_conv2d_shape.cc b/mace/ops/infer_conv2d_shape.cc index fb7bfecc90ccb80d2cedaf321d65b207be988892..f29056fec26989b363e532440da982c55866e1eb 100644 --- a/mace/ops/infer_conv2d_shape.cc +++ b/mace/ops/infer_conv2d_shape.cc @@ -13,7 +13,8 @@ // limitations under the License. -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/ops/common/conv_pool_2d_util.h" namespace mace { @@ -101,7 +102,7 @@ class InferConv2dShapeOp : public Operation { } }; -void RegisterInferConv2dShape(OpRegistryBase *op_registry) { +void RegisterInferConv2dShape(OpRegistry *op_registry) { MACE_REGISTER_OP_BY_CLASS(op_registry, "InferConv2dShape", InferConv2dShapeOp, DeviceType::CPU, float); MACE_REGISTER_OP_BY_CLASS(op_registry, "InferConv2dShape", diff --git a/mace/ops/kaldi_batch_norm.cc b/mace/ops/kaldi_batch_norm.cc index 61c0340cc9abf41bbf224f60402b59d4241eadb7..ed05064faabe77db2feeef4f1fcb24a35fb5970c 100644 --- a/mace/ops/kaldi_batch_norm.cc +++ b/mace/ops/kaldi_batch_norm.cc @@ -19,7 +19,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -167,7 +168,7 @@ class KaldiBatchNormOp : public Operation { MACE_OP_OUTPUT_TAGS(OUTPUT); }; -void RegisterKaldiBatchNorm(OpRegistryBase *op_registry) { +void RegisterKaldiBatchNorm(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "KaldiBatchNorm", KaldiBatchNormOp, DeviceType::CPU, float); } diff --git a/mace/ops/local_response_norm.cc b/mace/ops/local_response_norm.cc index 022ee3e7aa979ee36794f0fe6c4888012a0f0cb2..2ade126c8e7deba122dddfe4eff19d6b4bbc50bf 100644 --- a/mace/ops/local_response_norm.cc +++ b/mace/ops/local_response_norm.cc @@ -15,7 +15,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -91,7 +92,7 @@ class LocalResponseNormOp : public Operation { float beta_; }; -void RegisterLocalResponseNorm(OpRegistryBase *op_registry) { +void RegisterLocalResponseNorm(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "LocalResponseNorm", LocalResponseNormOp, DeviceType::CPU, float); } diff --git a/mace/ops/lpnorm.cc b/mace/ops/lpnorm.cc index 2c62ac194f688788502b1f8be19505ad87ab4402..a5c68a3575931911478461ffd802b16f5e8b79fb 100644 --- a/mace/ops/lpnorm.cc +++ b/mace/ops/lpnorm.cc @@ -16,7 +16,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/image/lpnorm.h" @@ -147,7 +148,7 @@ class LpNormOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterLpNorm(OpRegistryBase *op_registry) { +void RegisterLpNorm(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "LpNorm", LpNormOp, DeviceType::CPU, float); MACE_REGISTER_GPU_OP(op_registry, "LpNorm", LpNormOp); diff --git a/mace/ops/lstm_nonlinear.cc b/mace/ops/lstm_nonlinear.cc index fbf92c16e4361623d41dfbb50e704a4d8a81021e..c975ae62da40b549105bf936653e8ebaa07694c3 100644 --- a/mace/ops/lstm_nonlinear.cc +++ b/mace/ops/lstm_nonlinear.cc @@ -18,7 +18,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/ops/common/lstm.h" namespace mace { @@ -100,7 +101,7 @@ class LSTMNonlinearOp : public Operation { MACE_OP_OUTPUT_TAGS(OUTPUT); }; -void RegisterLSTMNonlinear(OpRegistryBase *op_registry) { +void RegisterLSTMNonlinear(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "LSTMNonlinear", LSTMNonlinearOp, DeviceType::CPU, float); } diff --git a/mace/ops/matmul.cc b/mace/ops/matmul.cc index 1c97279e90f3ccd5792c1ea866729ef0842b9bb4..75e278708514aa94c1783bde7bd9bd228d46a242 100644 --- a/mace/ops/matmul.cc +++ b/mace/ops/matmul.cc @@ -19,25 +19,18 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/core/tensor.h" +#include "mace/ops/delegator/gemm.h" +#include "mace/ops/delegator/gemv.h" #include "mace/utils/math.h" -#ifdef MACE_ENABLE_NEON -#include "mace/ops/arm/fp32/gemm.h" -#include "mace/ops/arm/fp32/gemv.h" - #ifdef MACE_ENABLE_QUANTIZE +#include "mace/ops/common/gemmlowp_util.h" +#ifdef MACE_ENABLE_NEON #include "mace/ops/arm/q8/gemv.h" -#endif // MACE_ENABLE_QUANTIZE - -#else -#include "mace/ops/ref/gemm.h" -#include "mace/ops/ref/gemv.h" #endif // MACE_ENABLE_NEON - -#ifdef MACE_ENABLE_QUANTIZE -#include "mace/ops/common/gemmlowp_util.h" #endif // MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_OPENCL @@ -103,7 +96,15 @@ template<> class MatMulOp : public MatMulOpBase { public: explicit MatMulOp(OpConstructContext *context) - : MatMulOpBase(context) {} + : MatMulOpBase(context), + gemm_(delegator::Gemm::Create( + context->workspace(), + MACE_DELEGATOR_KEY(Gemm, CPU, float, MACE_CPU_IMPL_TYPE), + delegator::GemmParam())), + gemv_(delegator::Gemv::Create( + context->workspace(), + MACE_DELEGATOR_KEY(Gemv, CPU, float, MACE_CPU_IMPL_TYPE), + DelegatorParam())) {} MaceStatus Run(OpContext *context) override { Validate(); @@ -154,43 +155,43 @@ class MatMulOp : public MatMulOpBase { MACE_RETURN_IF_ERROR(C->Resize(output_shape)); if (rows == 1 && transpose_b_) { - return gemv_.Compute(context, - rhs, - lhs, - bias, - batch, - cols, - depth, - rhs_batched, - lhs_batched, - C); + return gemv_->Compute(context, + rhs, + lhs, + bias, + batch, + cols, + depth, + rhs_batched, + lhs_batched, + C); } else if (cols == 1 && !transpose_a_) { - return gemv_.Compute(context, - lhs, - rhs, - bias, - batch, - rows, - depth, - lhs_batched, - rhs_batched, - C); + return gemv_->Compute(context, + lhs, + rhs, + bias, + batch, + rows, + depth, + lhs_batched, + rhs_batched, + C); } else { context->device()->scratch_buffer()->Rewind(); - MaceStatus ret = gemm_.Compute(context, - lhs, - rhs, - batch, - lhs_rows, - lhs_cols, - rhs_rows, - rhs_cols, - transpose_a_, - transpose_b_, - false, - lhs_batched, - rhs_batched, - C); + MaceStatus ret = gemm_->Compute(context, + lhs, + rhs, + batch, + lhs_rows, + lhs_cols, + rhs_rows, + rhs_cols, + transpose_a_, + transpose_b_, + false, + lhs_batched, + rhs_batched, + C); if (bias != nullptr) { MACE_CHECK(bias->dim_size() == 1 && bias->dim(0) == cols, "bias' dim should be <= 2."); @@ -217,13 +218,8 @@ class MatMulOp : public MatMulOpBase { } private: -#ifdef MACE_ENABLE_NEON - arm::fp32::Gemm gemm_; - arm::fp32::Gemv gemv_; -#else - ref::Gemv gemv_; - ref::Gemm gemm_; -#endif // MACE_ENABLE_NEON + std::unique_ptr gemm_; + std::unique_ptr gemv_; }; #ifdef MACE_ENABLE_QUANTIZE @@ -234,6 +230,10 @@ class MatMulFixpointImpl; template class MatMulFixpointImpl { public: +#ifdef MACE_ENABLE_NEON + MatMulFixpointImpl() + : gemv_kernel_(DelegatorParam()) {} +#endif // MACE_ENABLE_NEON void operator()(OpContext *context, const Tensor *A, const Tensor *B, @@ -318,6 +318,10 @@ class MatMulFixpointImpl { template class MatMulFixpointImpl { public: +#ifdef MACE_ENABLE_NEON + MatMulFixpointImpl() + : gemv_kernel_(DelegatorParam()) {} +#endif // MACE_ENABLE_NEON void operator()(OpContext *context, const Tensor *A, const Tensor *B, @@ -592,7 +596,7 @@ class MatMulOp : public MatMulOpBase { }; #endif // MACE_ENABLE_FP16_NEON -void RegisterMatMul(OpRegistryBase *op_registry) { +void RegisterMatMul(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "MatMul", MatMulOp, DeviceType::CPU, float); diff --git a/mace/ops/mvnorm.cc b/mace/ops/mvnorm.cc index ccb0018a5881d1341de931bead67a41a367af985..09d3bb9a3cf0239c68ad857e698c16d5d89439e7 100644 --- a/mace/ops/mvnorm.cc +++ b/mace/ops/mvnorm.cc @@ -16,7 +16,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/image/mvnorm.h" @@ -165,7 +166,7 @@ class MVNormOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterMVNorm(OpRegistryBase *op_registry) { +void RegisterMVNorm(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "MVNorm", MVNormOp, DeviceType::CPU, float); MACE_REGISTER_GPU_OP(op_registry, "MVNorm", MVNormOp); diff --git a/mace/ops/one_hot.cc b/mace/ops/one_hot.cc index 1596286af6ae4af96e5e7d01194fa5eff7e235a2..77d18bca3b7635b794c29d0b5a21ae7219876fad 100644 --- a/mace/ops/one_hot.cc +++ b/mace/ops/one_hot.cc @@ -15,7 +15,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -144,7 +145,7 @@ class OneHotOp : public OneHotOpBase { }; -void RegisterOneHot(OpRegistryBase *op_registry) { +void RegisterOneHot(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "OneHot", OneHotOp, DeviceType::CPU, float); } diff --git a/mace/ops/opencl/buffer/buffer_transform.h b/mace/ops/opencl/buffer/buffer_transform.h index 25415877e676707aab857fd09e81d4821ae99361..5b47bdc7403a222f3806e6309d12f868dacc3de4 100644 --- a/mace/ops/opencl/buffer/buffer_transform.h +++ b/mace/ops/opencl/buffer/buffer_transform.h @@ -19,7 +19,7 @@ #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/buffer/buffer_type_transform.cc b/mace/ops/opencl/buffer/buffer_type_transform.cc index 688ded664fa7dac533fd7fbafcfc7d1d8fbf9cdc..e86c460874552c3a8e8d56ee3eea13a1f3f73d1d 100644 --- a/mace/ops/opencl/buffer/buffer_type_transform.cc +++ b/mace/ops/opencl/buffer/buffer_type_transform.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/buffer/conv_2d_1x1.cc b/mace/ops/opencl/buffer/conv_2d_1x1.cc index 95c85b17dd24438a8c9bd45c974b7c23c46c85be..001c201d29281f66dbb8bc46c27b3a779114387b 100644 --- a/mace/ops/opencl/buffer/conv_2d_1x1.cc +++ b/mace/ops/opencl/buffer/conv_2d_1x1.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/ops/common/activation_type.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/buffer/conv_2d_general.cc b/mace/ops/opencl/buffer/conv_2d_general.cc index 4c03ee2af0c5b5452878db16067fff114088884c..9e7d75089b03d6d45a4f293b80105e3c5ac6a2d3 100644 --- a/mace/ops/opencl/buffer/conv_2d_general.cc +++ b/mace/ops/opencl/buffer/conv_2d_general.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/ops/common/activation_type.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/buffer/reshape.cc b/mace/ops/opencl/buffer/reshape.cc index ae3c119c2368d4c57d2151a641472d508999151b..73f78777c948ddd4a8f536be2d54e03ab19e9679 100644 --- a/mace/ops/opencl/buffer/reshape.cc +++ b/mace/ops/opencl/buffer/reshape.cc @@ -16,7 +16,7 @@ #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" namespace mace { namespace ops { diff --git a/mace/ops/opencl/buffer/softmax.h b/mace/ops/opencl/buffer/softmax.h index 0acae465953c75fc6d053b8d6c90040a17f75818..5555ad61787a0145f282b75decf08813b08ffdb0 100644 --- a/mace/ops/opencl/buffer/softmax.h +++ b/mace/ops/opencl/buffer/softmax.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/buffer/utils.h b/mace/ops/opencl/buffer/utils.h index e68fcb4a274e28900a440454fb34cfacfa1e2941..10d0dea1314be85d45329d9b6bbf0f63b27ceb5b 100644 --- a/mace/ops/opencl/buffer/utils.h +++ b/mace/ops/opencl/buffer/utils.h @@ -16,7 +16,7 @@ #define MACE_OPS_OPENCL_BUFFER_UTILS_H_ #include "mace/core/future.h" -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/public/mace.h" diff --git a/mace/ops/opencl/buffer_transform.cc b/mace/ops/opencl/buffer_transform.cc index fc1d9dcc2c514d289baa3f56bced871723e778fc..1cacaccb0abc9694c7432f915e03b4954f3a350b 100644 --- a/mace/ops/opencl/buffer_transform.cc +++ b/mace/ops/opencl/buffer_transform.cc @@ -14,7 +14,8 @@ #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/ops/opencl/buffer_transformer.h" namespace mace { @@ -51,7 +52,7 @@ class BufferTransformOp : public Operation { MemoryType out_mem_type_; }; -void RegisterBufferTransform(OpRegistryBase *op_registry) { +void RegisterBufferTransform(OpRegistry *op_registry) { MACE_REGISTER_GPU_OP(op_registry, "BufferTransform", BufferTransformOp); } diff --git a/mace/ops/opencl/buffer_transformer.h b/mace/ops/opencl/buffer_transformer.h index 0dcec529674a12ea54c56342c4730aed0b244c99..987507de3b2e260dfe755807607f92f780eaabc7 100644 --- a/mace/ops/opencl/buffer_transformer.h +++ b/mace/ops/opencl/buffer_transformer.h @@ -19,7 +19,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/ops/opencl/image/buffer_to_image.h" #include "mace/ops/opencl/image/image_to_buffer.h" #include "mace/ops/opencl/buffer/buffer_transform.h" diff --git a/mace/ops/opencl/image/activation.h b/mace/ops/opencl/image/activation.h index 929d267ddd2860161c45eb63b3be465e870298ed..bfbdc47c8cecb0dd10aa12dbcc17b6231fc9178a 100644 --- a/mace/ops/opencl/image/activation.h +++ b/mace/ops/opencl/image/activation.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/ops/common/activation_type.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/addn.h b/mace/ops/opencl/image/addn.h index 575dee22764af5e856ec19792f5fce60634f906b..ee2c526b486e0dc67bc584a0d1f732f6ee0aec30 100644 --- a/mace/ops/opencl/image/addn.h +++ b/mace/ops/opencl/image/addn.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/batch_norm.h b/mace/ops/opencl/image/batch_norm.h index 6b7773682ff546753b75f2f94f0fb2282a0b39fc..9a93b534188cd658322ce0fcda42a1d97419f611 100644 --- a/mace/ops/opencl/image/batch_norm.h +++ b/mace/ops/opencl/image/batch_norm.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/ops/common/activation_type.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/batch_to_space.h b/mace/ops/opencl/image/batch_to_space.h index a9d047aa2a7af096b535f2086afe9450beed46c3..ade029b6e2fb756ad9b0842ce29321785d6a751b 100644 --- a/mace/ops/opencl/image/batch_to_space.h +++ b/mace/ops/opencl/image/batch_to_space.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/bias_add.h b/mace/ops/opencl/image/bias_add.h index 67644d6a1f58b99fc0c3d4d2d4021c1e2e178adb..3430c81fd31bd0b92f7454ff8d24f2544aa9a6d4 100644 --- a/mace/ops/opencl/image/bias_add.h +++ b/mace/ops/opencl/image/bias_add.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/buffer_to_image.h b/mace/ops/opencl/image/buffer_to_image.h index 3389118279f3cdf7c8050e1bb5fd17c9e154530d..5a332f6a77d6220e7584b9658ea7416d7dd5b05e 100644 --- a/mace/ops/opencl/image/buffer_to_image.h +++ b/mace/ops/opencl/image/buffer_to_image.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/channel_shuffle.h b/mace/ops/opencl/image/channel_shuffle.h index 94448d805ccb86887bb1b9e12bce0cfba66db4a4..016b60e00bce11fa37c99b35c4ae8604f004013a 100644 --- a/mace/ops/opencl/image/channel_shuffle.h +++ b/mace/ops/opencl/image/channel_shuffle.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/concat.h b/mace/ops/opencl/image/concat.h index e5cd297779f7adb583653e31d25aa5816a377d4f..de9ee72fca7e1f5b8907f8f03dc0fdf64261c648 100644 --- a/mace/ops/opencl/image/concat.h +++ b/mace/ops/opencl/image/concat.h @@ -19,7 +19,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/conv_2d.h b/mace/ops/opencl/image/conv_2d.h index 6044c1a7235535cc0f67dcdc716b25189ed7a3d4..1ecd913137891542c11117ee54f437877e655971 100644 --- a/mace/ops/opencl/image/conv_2d.h +++ b/mace/ops/opencl/image/conv_2d.h @@ -19,7 +19,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/conv_2d_1x1.cc b/mace/ops/opencl/image/conv_2d_1x1.cc index 494672a4447cf0ed9e8611e11a241f9cc1387816..2d4baa5bbcd5123a2542bb9db1cb1a871f7a6e9c 100644 --- a/mace/ops/opencl/image/conv_2d_1x1.cc +++ b/mace/ops/opencl/image/conv_2d_1x1.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/ops/common/activation_type.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/conv_2d_3x3.cc b/mace/ops/opencl/image/conv_2d_3x3.cc index 8bfc988c8ebf4057b9a2942f632594d14cfcf7d0..c5ea2890751f9cf9ca5a7455d6abd35fe323f98e 100644 --- a/mace/ops/opencl/image/conv_2d_3x3.cc +++ b/mace/ops/opencl/image/conv_2d_3x3.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/ops/common/activation_type.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/conv_2d_general.cc b/mace/ops/opencl/image/conv_2d_general.cc index 9964c5f25cba4b39e401ac39764bd6d29b6f62f1..b84d83949d26235da4c51a135f4965ca6f8cfe3a 100644 --- a/mace/ops/opencl/image/conv_2d_general.cc +++ b/mace/ops/opencl/image/conv_2d_general.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/runtime/opencl/opencl_helper.h" #include "mace/ops/common/activation_type.h" diff --git a/mace/ops/opencl/image/crop.h b/mace/ops/opencl/image/crop.h index 33a5d2603e56e297b0c0271ad806009b38550a07..c6b9ca8a4425e9aa29a4d12388a4b9e91eebf0dd 100644 --- a/mace/ops/opencl/image/crop.h +++ b/mace/ops/opencl/image/crop.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/deconv_2d.h b/mace/ops/opencl/image/deconv_2d.h index 4f1db7e66fa4580690bd648c259543dce292083d..2ab385046f49ed629fa0b90d15b8d1b9416f5e59 100644 --- a/mace/ops/opencl/image/deconv_2d.h +++ b/mace/ops/opencl/image/deconv_2d.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/depth_to_space.h b/mace/ops/opencl/image/depth_to_space.h index 383a4c6f3fed98c2b4cec5b36121004a78a0109f..ee56b6eae3152e3c13c76d7035a95193176608db 100644 --- a/mace/ops/opencl/image/depth_to_space.h +++ b/mace/ops/opencl/image/depth_to_space.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/depthwise_conv2d.h b/mace/ops/opencl/image/depthwise_conv2d.h index c72170acdb1c15ebf27dbd327d64b5b73d40de2e..fc8833ddf6e842a6a6f4529822d7270457e76768 100644 --- a/mace/ops/opencl/image/depthwise_conv2d.h +++ b/mace/ops/opencl/image/depthwise_conv2d.h @@ -19,7 +19,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/depthwise_deconv2d.h b/mace/ops/opencl/image/depthwise_deconv2d.h index fe039cb679c449f0d432b86531d17795cb3e83e6..4643a9c1f46bc50b7d3cafa3e93649854113617f 100644 --- a/mace/ops/opencl/image/depthwise_deconv2d.h +++ b/mace/ops/opencl/image/depthwise_deconv2d.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/eltwise.h b/mace/ops/opencl/image/eltwise.h index a9298cc6582e4e5f8d805c1a0d00f9f65e99de0b..38c3dfe61428e868a490895848be58c6c0e35543 100644 --- a/mace/ops/opencl/image/eltwise.h +++ b/mace/ops/opencl/image/eltwise.h @@ -22,7 +22,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/ops/common/eltwise_type.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/fully_connected.h b/mace/ops/opencl/image/fully_connected.h index 010edcac9979c659e6d926e076d941d9fea426dd..46a93a6173a90e926a316a8f299df6b5e7f118ee 100644 --- a/mace/ops/opencl/image/fully_connected.h +++ b/mace/ops/opencl/image/fully_connected.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/ops/common/activation_type.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/image_to_buffer.h b/mace/ops/opencl/image/image_to_buffer.h index 5d5c524884c0ccb6ce976ee8cd45d345c445e20d..f6484e2f48bc3f16ff34aec359352d0076d54458 100644 --- a/mace/ops/opencl/image/image_to_buffer.h +++ b/mace/ops/opencl/image/image_to_buffer.h @@ -19,7 +19,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/ops/opencl/buffer_transform_kernel.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/lpnorm.h b/mace/ops/opencl/image/lpnorm.h index cac641125d99d4e93495d67b45e00f0f27bb3c7c..d500c66d14228d68e27f6cb415eb7dfa068af3b5 100644 --- a/mace/ops/opencl/image/lpnorm.h +++ b/mace/ops/opencl/image/lpnorm.h @@ -14,7 +14,7 @@ #ifndef MACE_OPS_OPENCL_IMAGE_LPNORM_H_ #define MACE_OPS_OPENCL_IMAGE_LPNORM_H_ -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/lstm_cell.h b/mace/ops/opencl/image/lstm_cell.h index 998d8147675c2dd1b3ade1b782055a86117aea83..f224ba07d95f469bfb1b7fe718e059a8eabcd498 100644 --- a/mace/ops/opencl/image/lstm_cell.h +++ b/mace/ops/opencl/image/lstm_cell.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/matmul.h b/mace/ops/opencl/image/matmul.h index 8ee05239b798d8c8b6f660fa4aea335ded3549b7..f9e3125d01449e476cb39a5c9055fe01754ba87b 100644 --- a/mace/ops/opencl/image/matmul.h +++ b/mace/ops/opencl/image/matmul.h @@ -22,7 +22,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/mvnorm.h b/mace/ops/opencl/image/mvnorm.h index f6e609d27240612a0c53141ce409790b6b826234..5752167e2157808b96e4da080b4bccbfa6d1934b 100644 --- a/mace/ops/opencl/image/mvnorm.h +++ b/mace/ops/opencl/image/mvnorm.h @@ -17,7 +17,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/pad.h b/mace/ops/opencl/image/pad.h index 3df88f34e31020a848ee34d9c958cf8bc0200b32..6c04c7c8a9c9b25dea9345450732784915959061 100644 --- a/mace/ops/opencl/image/pad.h +++ b/mace/ops/opencl/image/pad.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/ops/common/pad_type.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/pooling.h b/mace/ops/opencl/image/pooling.h index 5c0e14a52b544e65af82bfd05bcc2a939e9d2a1b..8f0e0c062f4be9b70febf6491a98ff68e057051d 100644 --- a/mace/ops/opencl/image/pooling.h +++ b/mace/ops/opencl/image/pooling.h @@ -22,7 +22,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/reduce.h b/mace/ops/opencl/image/reduce.h index 0dfb48b427a25df89e475e45873d0ec69197f95a..24e889d73384ee98e7db35a4a8045e297805a452 100644 --- a/mace/ops/opencl/image/reduce.h +++ b/mace/ops/opencl/image/reduce.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" #include "mace/ops/common/reduce_type.h" diff --git a/mace/ops/opencl/image/reshape.h b/mace/ops/opencl/image/reshape.h index 60be5fe0272c8827ce95003613ba0e07ab025396..3ee6bf297ba88aa12d5482d1262af1da1dd6366e 100644 --- a/mace/ops/opencl/image/reshape.h +++ b/mace/ops/opencl/image/reshape.h @@ -20,7 +20,7 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" #include "mace/core/runtime/opencl/opencl_helper.h" #include "mace/ops/opencl/buffer_transform_kernel.h" diff --git a/mace/ops/opencl/image/resize_bicubic.h b/mace/ops/opencl/image/resize_bicubic.h index 5abc553974e0c3fb1a4c2056ec140baf70e736cd..aab813691e8ab4dbc4cc5c84c361135a8758936a 100644 --- a/mace/ops/opencl/image/resize_bicubic.h +++ b/mace/ops/opencl/image/resize_bicubic.h @@ -22,7 +22,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/resize_bilinear.h b/mace/ops/opencl/image/resize_bilinear.h index ca3602d33942da03de3aa3f3cb093513af74a324..a428a81395a6d9fdf130ad4ff48184052c6c3d6e 100644 --- a/mace/ops/opencl/image/resize_bilinear.h +++ b/mace/ops/opencl/image/resize_bilinear.h @@ -22,7 +22,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/resize_nearest_neighbor.h b/mace/ops/opencl/image/resize_nearest_neighbor.h index 8bb10d4b2fd56046a689beae3e9abb3f0671f05e..1092665ee2ef32a8b1443259d4fcb8215b70f443 100644 --- a/mace/ops/opencl/image/resize_nearest_neighbor.h +++ b/mace/ops/opencl/image/resize_nearest_neighbor.h @@ -22,7 +22,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/softmax.h b/mace/ops/opencl/image/softmax.h index 525f1edc51ee8bc7637a2c9c83ffa876d67ab4b2..1873cd164f13630c593dc14245f3a04befd470ee 100644 --- a/mace/ops/opencl/image/softmax.h +++ b/mace/ops/opencl/image/softmax.h @@ -22,7 +22,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/space_to_batch.h b/mace/ops/opencl/image/space_to_batch.h index 20777dc88453bc1746aab4e50c2c20f98babecec..f1001b2f7a7cfaf2de8cb9ad3fd4b131a3f46033 100644 --- a/mace/ops/opencl/image/space_to_batch.h +++ b/mace/ops/opencl/image/space_to_batch.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/space_to_depth.h b/mace/ops/opencl/image/space_to_depth.h index 661e09af222ebf8ae07082d4192878d8e4703f36..6abb330f8e199cabeadd97131879e1ccf09113ef 100644 --- a/mace/ops/opencl/image/space_to_depth.h +++ b/mace/ops/opencl/image/space_to_depth.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/split.h b/mace/ops/opencl/image/split.h index 20e1936207dca72126efba0a1b80a3bafa149012..0d2eaff260e280595017cdbe9752508452e314d6 100644 --- a/mace/ops/opencl/image/split.h +++ b/mace/ops/opencl/image/split.h @@ -22,7 +22,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/sqrdiff_mean.h b/mace/ops/opencl/image/sqrdiff_mean.h index 5acddb2556946b42ad1062ce6ec8c7bcf255e2cf..740fc03658c3594445bee5d2320a127faca86d63 100644 --- a/mace/ops/opencl/image/sqrdiff_mean.h +++ b/mace/ops/opencl/image/sqrdiff_mean.h @@ -21,7 +21,7 @@ #include #include -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/image/winograd_conv2d.cc b/mace/ops/opencl/image/winograd_conv2d.cc index fd7cdfe67f1f37b4f1701d77d28f0759829594dc..539b4cf4f8604261dbc79d8536e84bcc3f9596d0 100644 --- a/mace/ops/opencl/image/winograd_conv2d.cc +++ b/mace/ops/opencl/image/winograd_conv2d.cc @@ -13,7 +13,7 @@ // limitations under the License. #include "mace/core/runtime/opencl/opencl_runtime.h" -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/ops/common/activation_type.h" #include "mace/ops/common/conv_pool_2d_util.h" #include "mace/core/runtime/opencl/opencl_helper.h" diff --git a/mace/ops/opencl/lstm_cell.cc b/mace/ops/opencl/lstm_cell.cc index ce45c84401f89d42762c8a2c2bccbb57c35c08e1..dbdc2650d875a4a0caa60aaab9495cece1b9c26b 100644 --- a/mace/ops/opencl/lstm_cell.cc +++ b/mace/ops/opencl/lstm_cell.cc @@ -17,7 +17,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/ops/opencl/buffer_transformer.h" #include "mace/ops/opencl/image/lstm_cell.h" #include "mace/utils/memory.h" @@ -89,7 +90,7 @@ class LSTMCellOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterLSTMCell(OpRegistryBase *op_registry) { +void RegisterLSTMCell(OpRegistry *op_registry) { MACE_REGISTER_GPU_OP(op_registry, "LSTMCell", LSTMCellOp); } diff --git a/mace/ops/pad.cc b/mace/ops/pad.cc index 49784c10db2c999b07faffe927aa6d6ebb061746..b210f40e87f3acc1712b92acf5ed4d6a7a161e5f 100644 --- a/mace/ops/pad.cc +++ b/mace/ops/pad.cc @@ -15,7 +15,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/ops/common/pad_type.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/image/pad.h" @@ -198,7 +199,7 @@ class PadOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterPad(OpRegistryBase *op_registry) { +void RegisterPad(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Pad", PadOp, DeviceType::CPU, float); diff --git a/mace/ops/pad_context.cc b/mace/ops/pad_context.cc index 25117df2562cc8a6d45ef70929a919ce8f9da0de..02a8c4250922a59b4d72e273b6e3ad6a82913e76 100644 --- a/mace/ops/pad_context.cc +++ b/mace/ops/pad_context.cc @@ -18,7 +18,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/utils/math.h" namespace mace { @@ -83,7 +84,7 @@ class PadContextOp : public Operation { int right_context_; }; -void RegisterPadContext(OpRegistryBase *op_registry) { +void RegisterPadContext(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "PadContext", PadContextOp, DeviceType::CPU, float); } diff --git a/mace/ops/pnorm.cc b/mace/ops/pnorm.cc index 1d0d6698604834fdd58fb390171d21d0976780ec..588e59745404b9252bda70e6e2ac0ef192a839f5 100644 --- a/mace/ops/pnorm.cc +++ b/mace/ops/pnorm.cc @@ -26,7 +26,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -132,7 +133,7 @@ class PNormOp : public Operation { int output_dim_; }; -void RegisterPNorm(OpRegistryBase *op_registry) { +void RegisterPNorm(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "PNorm", PNormOp, DeviceType::CPU, float); } diff --git a/mace/ops/pooling.cc b/mace/ops/pooling.cc index 4d4247f2b7236a0a3270c7d30a413c2885ca8256..2d51c1c4c64eb1a2274c2c6fd44d1965a66242c5 100644 --- a/mace/ops/pooling.cc +++ b/mace/ops/pooling.cc @@ -22,7 +22,8 @@ #include #include "mace/core/future.h" -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/core/tensor.h" #include "mace/ops/conv_pool_2d_base.h" #include "mace/ops/common/conv_pool_2d_util.h" @@ -510,7 +511,7 @@ class PoolingOp : public PoolingOpBase { }; #endif // MACE_ENABLE_OPENCL -void RegisterPooling(OpRegistryBase *op_registry) { +void RegisterPooling(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Pooling", PoolingOp, DeviceType::CPU, float); diff --git a/mace/ops/prior_box.cc b/mace/ops/prior_box.cc index 62040d272d4eb7ba46ba8b6d3bc20db401f9c644..3598c98a8b98d882d82f89c9b1fc8063b3258a56 100644 --- a/mace/ops/prior_box.cc +++ b/mace/ops/prior_box.cc @@ -18,7 +18,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -144,7 +145,7 @@ class PriorBoxOp : public Operation { MACE_OP_OUTPUT_TAGS(OUTPUT); }; -void RegisterPriorBox(OpRegistryBase *op_registry) { +void RegisterPriorBox(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "PriorBox", PriorBoxOp, DeviceType::CPU, float); } diff --git a/mace/ops/reduce.cc b/mace/ops/reduce.cc index 7c34db3e6a06fe89f84a5d980afc9da67585d584..a81a602d9be90b2eece8f2ca96f93609b1317b78 100644 --- a/mace/ops/reduce.cc +++ b/mace/ops/reduce.cc @@ -19,7 +19,8 @@ #include "mace/ops/common/reduce_type.h" #include "mace/core/future.h" -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/core/runtime/cpu/cpu_runtime.h" #include "mace/core/tensor.h" #ifdef MACE_ENABLE_OPENCL @@ -1032,7 +1033,7 @@ class ReduceOp : public ReduceOpBase { }; #endif // MACE_ENABLE_OPENCL -void RegisterReduce(OpRegistryBase *op_registry) { +void RegisterReduce(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Reduce", ReduceOp, DeviceType::CPU, float); MACE_REGISTER_OP(op_registry, "Reduce", ReduceOp, diff --git a/mace/ops/ref/activation.cc b/mace/ops/ref/activation.cc index 4e2e65dbe71ef5b0e243a2be7d7803028de1f8d8..da2ff26fabd940d0a5e1822df2d37486344cfcd7 100644 --- a/mace/ops/ref/activation.cc +++ b/mace/ops/ref/activation.cc @@ -13,18 +13,26 @@ // limitations under the License. #include -#include "mace/ops/ref/activation.h" + +#include "mace/ops/delegator/activation.h" namespace mace { namespace ops { namespace ref { -Activation::Activation(ActivationType type, - const float limit, - const float leakyrelu_coefficient) - : type_(type), - limit_(limit), - leakyrelu_coefficient_(leakyrelu_coefficient) {} +class Activation : public delegator::Activation { + public: + explicit Activation(const delegator::ActivationParam ¶m) + : delegator::Activation(param) {} + ~Activation() = default; + + MaceStatus Compute(const OpContext *context, const Tensor *input, + Tensor *output) override; + + private: + void DoActivation(const OpContext *context, const Tensor *input, + Tensor *output); +}; MaceStatus Activation::Compute(const OpContext *context, const Tensor *input, @@ -99,6 +107,9 @@ void Activation::DoActivation(const OpContext *context, } } +MACE_REGISTER_DELEGATOR(registry, Activation, delegator::ActivationParam, + MACE_DELEGATOR_KEY(Activation, CPU, float, REF)) + } // namespace ref } // namespace ops } // namespace mace diff --git a/mace/ops/ref/activation.h b/mace/ops/ref/activation.h deleted file mode 100644 index 7ad986a50ceed14b021abf2a4d81f2bb7b336e19..0000000000000000000000000000000000000000 --- a/mace/ops/ref/activation.h +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright 2019 The MACE Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef MACE_OPS_REF_ACTIVATION_H_ -#define MACE_OPS_REF_ACTIVATION_H_ - -#include "mace/core/op_context.h" -#include "mace/ops/common/activation_type.h" - -namespace mace { -namespace ops { -namespace ref { - -class Activation { - public: - explicit Activation(ActivationType type, - const float limit, - const float leakyrelu_coefficient); - ~Activation() = default; - - MaceStatus Compute( - const OpContext *context, - const Tensor *input, - Tensor *output); - - private: - void DoActivation(const OpContext *context, - const Tensor *input, - Tensor *output); - - ActivationType type_; - const float limit_; - const float leakyrelu_coefficient_; -}; - -} // namespace ref -} // namespace ops -} // namespace mace - -#endif // MACE_OPS_REF_ACTIVATION_H_ diff --git a/mace/ops/ref/bias_add.cc b/mace/ops/ref/bias_add.cc index efc56f74b412814da9643eccd9e4ce459299c622..221c2d2e9cc9b00f6157bdedaa276db36fc4dba3 100644 --- a/mace/ops/ref/bias_add.cc +++ b/mace/ops/ref/bias_add.cc @@ -12,12 +12,25 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/ops/ref/bias_add.h" +#include "mace/ops/delegator/bias_add.h" namespace mace { namespace ops { namespace ref { +class BiasAdd : public delegator::BiasAdd { + public: + explicit BiasAdd(const DelegatorParam ¶m) : delegator::BiasAdd(param) {} + ~BiasAdd() = default; + + MaceStatus Compute(const OpContext *context, const Tensor *input, + const Tensor *bias, Tensor *output) override; + + private: + void AddBias(const OpContext *context, const Tensor *input, + const Tensor *bias, Tensor *output); +}; + MaceStatus BiasAdd::Compute(const OpContext *context, const Tensor *input, const Tensor *bias, @@ -71,6 +84,9 @@ void BiasAdd::AddBias(const OpContext *context, } } +MACE_REGISTER_DELEGATOR(registry, BiasAdd, DelegatorParam, + MACE_DELEGATOR_KEY(BiasAdd, CPU, float, REF)) + } // namespace ref } // namespace ops } // namespace mace diff --git a/mace/ops/ref/conv_2d.cc b/mace/ops/ref/conv_2d.cc index 1c69ee9d72e98dbb357347ed2d4e10d971e1cb07..d90b7e2bcddb4f2bb8e5997637e4f189eb3c2ba7 100644 --- a/mace/ops/ref/conv_2d.cc +++ b/mace/ops/ref/conv_2d.cc @@ -109,6 +109,10 @@ MaceStatus Conv2d::Compute(const OpContext *context, return MaceStatus::MACE_SUCCESS; } +typedef Conv2d Conv2dRef; +MACE_REGISTER_DELEGATOR(registry, Conv2dRef, delegator::Conv2dParam, + MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, REF, General)) + } // namespace ref } // namespace ops } // namespace mace diff --git a/mace/ops/ref/conv_2d.h b/mace/ops/ref/conv_2d.h index 9a9fbb8f92363fed058d9a96929714c8870ab028..b241a58a179af6c485dc9ed916bb4f1c3dfae401 100644 --- a/mace/ops/ref/conv_2d.h +++ b/mace/ops/ref/conv_2d.h @@ -18,64 +18,41 @@ #include -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" #include "mace/ops/common/conv_pool_2d_util.h" +#include "mace/ops/delegator/conv_2d.h" +#include "mace/public/mace.h" namespace mace { namespace ops { namespace ref { template -class Conv2d { +class Conv2d : public delegator::Conv2d { public: - Conv2d(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type) - : strides_(strides), - dilations_(dilations), - paddings_(paddings), - padding_type_(padding_type) {} + explicit Conv2d(const delegator::Conv2dParam ¶m) + : delegator::Conv2d(param) {} ~Conv2d() {} MaceStatus Compute( const OpContext *context, const Tensor *input, const Tensor *filter, - Tensor *output); - - private: - const std::vector strides_; - const std::vector dilations_; - const std::vector paddings_; - const Padding padding_type_; + Tensor *output) override; }; template<> -class Conv2d { +class Conv2d : public delegator::Conv2d { public: - Conv2d(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type) - : strides_(strides), - dilations_(dilations), - paddings_(paddings), - padding_type_(padding_type) {} + explicit Conv2d(const delegator::Conv2dParam ¶m) + : delegator::Conv2d(param) {} ~Conv2d() {} MaceStatus Compute( const OpContext *context, const Tensor *input, const Tensor *filter, - Tensor *output); - - private: - const std::vector strides_; - const std::vector dilations_; - const std::vector paddings_; - const Padding padding_type_; + Tensor *output) override; }; } // namespace ref diff --git a/mace/ops/ref/deconv_2d.cc b/mace/ops/ref/deconv_2d.cc index d06c6634548dfb079f615f01f9e394950a214059..d19a96d273cb99096d3d0bf4877d558b4edff780 100644 --- a/mace/ops/ref/deconv_2d.cc +++ b/mace/ops/ref/deconv_2d.cc @@ -162,6 +162,11 @@ MaceStatus Deconv2d::Compute(const OpContext *context, return MaceStatus::MACE_SUCCESS; } +typedef Deconv2d Deconv2dRef; +MACE_REGISTER_DELEGATOR( + registry, Deconv2dRef, delegator::Deconv2dParam, + MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float, REF, General)) + } // namespace ref } // namespace ops } // namespace mace diff --git a/mace/ops/ref/deconv_2d.h b/mace/ops/ref/deconv_2d.h index a8ab6722b47037f2552faaea8d8cca5151f463ae..564ce7e7afdac1412ef2ddce8a20e2286ab7b3b0 100644 --- a/mace/ops/ref/deconv_2d.h +++ b/mace/ops/ref/deconv_2d.h @@ -18,28 +18,21 @@ #include -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" #include "mace/ops/common/conv_pool_2d_util.h" +#include "mace/ops/delegator/deconv_2d.h" +#include "mace/public/mace.h" namespace mace { namespace ops { namespace ref { template -class Deconv2d { +class Deconv2d : public delegator::Deconv2d { public: - Deconv2d(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : strides_(strides), - dilations_(dilations), - paddings_(paddings), - padding_type_(padding_type), - framework_type_(framework_type) {} + explicit Deconv2d(const delegator::Deconv2dParam ¶m) + : delegator::Deconv2d(param) {} ~Deconv2d() = default; @@ -48,29 +41,14 @@ class Deconv2d { const Tensor *input, const Tensor *filter, const Tensor *output_shape, - Tensor *output); - - private: - const std::vector strides_; - const std::vector dilations_; - const std::vector paddings_; - const Padding padding_type_; - const FrameworkType framework_type_; + Tensor *output) override; }; template<> -class Deconv2d { +class Deconv2d : public delegator::Deconv2d { public: - Deconv2d(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : strides_(strides), - dilations_(dilations), - paddings_(paddings), - padding_type_(padding_type), - framework_type_(framework_type) {} + explicit Deconv2d(const delegator::Deconv2dParam ¶m) + : delegator::Deconv2d(param) {} ~Deconv2d() = default; @@ -79,14 +57,7 @@ class Deconv2d { const Tensor *input, const Tensor *filter, const Tensor *output_shape, - Tensor *output); - - private: - const std::vector strides_; - const std::vector dilations_; - const std::vector paddings_; - const Padding padding_type_; - const FrameworkType framework_type_; + Tensor *output) override; }; } // namespace ref diff --git a/mace/ops/ref/depthwise_conv_2d.cc b/mace/ops/ref/depthwise_conv_2d.cc index bff950690d719103c31f4ddeb36a7cd934e256c3..03be506ce1e7ea36cb6a763db83c4f50bb0f1e0b 100644 --- a/mace/ops/ref/depthwise_conv_2d.cc +++ b/mace/ops/ref/depthwise_conv_2d.cc @@ -115,6 +115,11 @@ MaceStatus DepthwiseConv2d::Compute(const OpContext *context, return MaceStatus::MACE_SUCCESS; } +typedef DepthwiseConv2d DepthwiseConv2dRef; +MACE_REGISTER_DELEGATOR( + registry, DepthwiseConv2dRef, delegator::DepthwiseConv2dParam, + MACE_DELEGATOR_KEY_EX(DepthwiseConv2d, CPU, float, REF, General)) + } // namespace ref } // namespace ops } // namespace mace diff --git a/mace/ops/ref/depthwise_conv_2d.h b/mace/ops/ref/depthwise_conv_2d.h index 91a95192a43ba2cc97bc9cc08b9774e2fc6d0a8a..cc5a14ca433b62e9e50973e511551beab5dd5160 100644 --- a/mace/ops/ref/depthwise_conv_2d.h +++ b/mace/ops/ref/depthwise_conv_2d.h @@ -18,64 +18,41 @@ #include -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" #include "mace/ops/common/conv_pool_2d_util.h" +#include "mace/ops/delegator/depthwise_conv_2d.h" +#include "mace/public/mace.h" namespace mace { namespace ops { namespace ref { template -class DepthwiseConv2d { +class DepthwiseConv2d : public delegator::DepthwiseConv2d { public: - DepthwiseConv2d(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type) - : strides_(strides), - dilations_(dilations), - paddings_(paddings), - padding_type_(padding_type) {} + explicit DepthwiseConv2d(const delegator::DepthwiseConv2dParam ¶m) + : delegator::DepthwiseConv2d(param) {} ~DepthwiseConv2d() {} MaceStatus Compute( const OpContext *context, const Tensor *input, const Tensor *filter, - Tensor *output); - - private: - const std::vector strides_; - const std::vector dilations_; - const std::vector paddings_; - const Padding padding_type_; + Tensor *output) override; }; template<> -class DepthwiseConv2d { +class DepthwiseConv2d : public delegator::DepthwiseConv2d { public: - DepthwiseConv2d(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type) - : strides_(strides), - dilations_(dilations), - paddings_(paddings), - padding_type_(padding_type) {} + explicit DepthwiseConv2d(const delegator::DepthwiseConv2dParam ¶m) + : delegator::DepthwiseConv2d(param) {} ~DepthwiseConv2d() {} MaceStatus Compute( const OpContext *context, const Tensor *input, const Tensor *filter, - Tensor *output); - - private: - const std::vector strides_; - const std::vector dilations_; - const std::vector paddings_; - const Padding padding_type_; + Tensor *output) override; }; } // namespace ref diff --git a/mace/ops/ref/depthwise_deconv_2d.cc b/mace/ops/ref/depthwise_deconv_2d.cc index 63b3aa6959ef343ef226a671614626f73578ea53..badded160c49037dc0496a7cccaefe037459a8f0 100644 --- a/mace/ops/ref/depthwise_deconv_2d.cc +++ b/mace/ops/ref/depthwise_deconv_2d.cc @@ -302,6 +302,11 @@ MaceStatus GroupDeconv2d::Compute(const OpContext *context, return MaceStatus::MACE_SUCCESS; } +typedef DepthwiseDeconv2d DepthwiseDeconv2dRef; +MACE_REGISTER_DELEGATOR( + registry, DepthwiseDeconv2dRef, delegator::DepthwiseDeconv2dParam, + MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, CPU, float, REF, General)) + } // namespace ref } // namespace ops } // namespace mace diff --git a/mace/ops/ref/depthwise_deconv_2d.h b/mace/ops/ref/depthwise_deconv_2d.h index 5da7487192a3762e6219716969a826e3f602a85a..586f2627838c30bcb366a850f5b230af980cafca 100644 --- a/mace/ops/ref/depthwise_deconv_2d.h +++ b/mace/ops/ref/depthwise_deconv_2d.h @@ -18,63 +18,37 @@ #include -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" #include "mace/ops/common/conv_pool_2d_util.h" +#include "mace/ops/delegator/depthwise_deconv_2d.h" +#include "mace/public/mace.h" namespace mace { namespace ops { namespace ref { template -class GroupDeconv2d { +class GroupDeconv2d : public delegator::GroupDeconv2d { public: - GroupDeconv2d(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type, - const index_t group, - const FrameworkType framework_type) - : strides_(strides), - dilations_(dilations), - paddings_(paddings), - padding_type_(padding_type), - group_(group), - framework_type_(framework_type) {} + explicit GroupDeconv2d(const delegator::GroupDeconv2dParam ¶m) + : delegator::GroupDeconv2d(param) {} virtual ~GroupDeconv2d() = default; - virtual MaceStatus Compute( + MaceStatus Compute( const OpContext *context, const Tensor *input, const Tensor *filter, const Tensor *output_shape, - Tensor *output); - - private: - const std::vector strides_; - const std::vector dilations_; - const std::vector paddings_; - const Padding padding_type_; - const index_t group_; - const FrameworkType framework_type_; + Tensor *output) override; }; template class DepthwiseDeconv2d : public GroupDeconv2d { public: - DepthwiseDeconv2d(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : GroupDeconv2d(strides, - dilations, - paddings, - padding_type, - 0, - framework_type) {} + explicit DepthwiseDeconv2d(const delegator::DepthwiseDeconv2d ¶m) + : GroupDeconv2d(param) {} ~DepthwiseDeconv2d() = default; @@ -83,57 +57,30 @@ class DepthwiseDeconv2d : public GroupDeconv2d { const Tensor *input, const Tensor *filter, const Tensor *output_shape, - Tensor *output); + Tensor *output) override; }; template<> -class GroupDeconv2d { +class GroupDeconv2d : public delegator::GroupDeconv2d { public: - GroupDeconv2d(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type, - const index_t group, - const FrameworkType framework_type) - : strides_(strides), - dilations_(dilations), - paddings_(paddings), - padding_type_(padding_type), - group_(group), - framework_type_(framework_type) {} + explicit GroupDeconv2d(const delegator::GroupDeconv2dParam ¶m) + : delegator::GroupDeconv2d(param) {} virtual ~GroupDeconv2d() = default; - virtual MaceStatus Compute( + MaceStatus Compute( const OpContext *context, const Tensor *input, const Tensor *filter, const Tensor *output_shape, - Tensor *output); - - protected: - const std::vector strides_; - const std::vector dilations_; - const std::vector paddings_; - const Padding padding_type_; - const index_t group_; - const FrameworkType framework_type_; + Tensor *output) override; }; template<> class DepthwiseDeconv2d : public GroupDeconv2d { public: - DepthwiseDeconv2d(const std::vector &strides, - const std::vector &dilations, - const std::vector &paddings, - const Padding padding_type, - const FrameworkType framework_type) - : GroupDeconv2d(strides, - dilations, - paddings, - padding_type, - 0, - framework_type) {} + explicit DepthwiseDeconv2d(const delegator::DepthwiseDeconv2dParam ¶m) + : GroupDeconv2d(param) {} ~DepthwiseDeconv2d() = default; @@ -142,7 +89,7 @@ class DepthwiseDeconv2d : public GroupDeconv2d { const Tensor *input, const Tensor *filter, const Tensor *output_shape, - Tensor *output); + Tensor *output) override; }; } // namespace ref diff --git a/mace/ops/ref/gemm.cc b/mace/ops/ref/gemm.cc index e9d13c91bd9cb2b67eff1d997c94ba5bd4dba8b3..956a7affbf22904b2ab6a023c5ed2756660fe765 100644 --- a/mace/ops/ref/gemm.cc +++ b/mace/ops/ref/gemm.cc @@ -111,6 +111,10 @@ MaceStatus Gemm::Compute(const OpContext *context, output); } +typedef Gemm GemmRef; +MACE_REGISTER_DELEGATOR(registry, GemmRef, delegator::GemmParam, + MACE_DELEGATOR_KEY(Gemm, CPU, float, REF)) + } // namespace ref } // namespace ops } // namespace mace diff --git a/mace/ops/ref/gemm.h b/mace/ops/ref/gemm.h index bf1826ada55243e0abcba28eb9d0ca907fc87c45..b7b63fba856d862542f1afe4315990933c3271d2 100644 --- a/mace/ops/ref/gemm.h +++ b/mace/ops/ref/gemm.h @@ -16,19 +16,20 @@ #ifndef MACE_OPS_REF_GEMM_H_ #define MACE_OPS_REF_GEMM_H_ -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" #include "mace/ops/common/matrix.h" +#include "mace/ops/delegator/gemm.h" +#include "mace/public/mace.h" namespace mace { namespace ops { namespace ref { template -class Gemm { +class Gemm : public delegator::Gemm { public: - Gemm() {} + explicit Gemm(const delegator::GemmParam ¶m) : delegator::Gemm(param) {} ~Gemm() {} MaceStatus Compute(const OpContext *context, const Tensor *lhs, @@ -42,13 +43,13 @@ class Gemm { const MatrixMajor output_major, const bool lhs_batched, const bool rhs_batched, - Tensor *output); + Tensor *output) override; }; template<> -class Gemm { +class Gemm : public delegator::Gemm { public: - Gemm() {} + explicit Gemm(const delegator::GemmParam ¶m) : delegator::Gemm(param) {} ~Gemm() {} MaceStatus Compute(const OpContext *context, const Tensor *lhs, @@ -62,7 +63,7 @@ class Gemm { const MatrixMajor output_major, const bool lhs_batched, const bool rhs_batched, - Tensor *output); + Tensor *output) override; // Original matrix before transpose has row-major MaceStatus Compute( const OpContext *context, @@ -78,7 +79,7 @@ class Gemm { const bool transpose_out, const bool lhs_batched, const bool rhs_batched, - Tensor *output); + Tensor *output) override; }; } // namespace ref diff --git a/mace/ops/ref/gemv.cc b/mace/ops/ref/gemv.cc index bf0366f3ce8cab2c848172b511cdfb98d1cb9d27..350412c2f548b67d737bcffc924c36582866d05f 100644 --- a/mace/ops/ref/gemv.cc +++ b/mace/ops/ref/gemv.cc @@ -159,8 +159,16 @@ MaceStatus Gemv::Compute(const OpContext *context, } // b return MaceStatus::MACE_SUCCESS; } + +typedef Gemv GemvUint8Ref; +MACE_REGISTER_DELEGATOR(registry, GemvUint8Ref, DelegatorParam, + MACE_DELEGATOR_KEY(Gemv, CPU, uint8_t, Ref)) #endif // MACE_ENABLE_QUANTIZE +typedef Gemv GemvRef; +MACE_REGISTER_DELEGATOR(registry, GemvRef, DelegatorParam, + MACE_DELEGATOR_KEY(Gemv, CPU, float, REF)) + } // namespace ref } // namespace ops } // namespace mace diff --git a/mace/ops/ref/gemv.h b/mace/ops/ref/gemv.h index 7116b8fa81b214f6b3405aacc7ea18a18e449cf6..e14730bbd9556e0f14356c88e8276fcebd3ae5ec 100644 --- a/mace/ops/ref/gemv.h +++ b/mace/ops/ref/gemv.h @@ -16,18 +16,19 @@ #ifndef MACE_OPS_REF_GEMV_H_ #define MACE_OPS_REF_GEMV_H_ -#include "mace/public/mace.h" +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" +#include "mace/ops/delegator/gemv.h" +#include "mace/public/mace.h" namespace mace { namespace ops { namespace ref { template -class Gemv { +class Gemv : public delegator::Gemv { public: - Gemv() {} + explicit Gemv(const DelegatorParam ¶m) : delegator::Gemv(param) {} ~Gemv() {} // Always row-major after transpose MaceStatus Compute( @@ -40,13 +41,13 @@ class Gemv { const index_t lhs_width, const bool lhs_batched, const bool rhs_batched, - Tensor *output); + Tensor *output) override; }; template<> -class Gemv { +class Gemv : public delegator::Gemv { public: - Gemv() {} + explicit Gemv(const DelegatorParam ¶m) : delegator::Gemv(param) {} ~Gemv() {} // Always row-major after transpose MaceStatus Compute( @@ -59,14 +60,14 @@ class Gemv { const index_t lhs_width, const bool lhs_batched, const bool rhs_batched, - Tensor *output); + Tensor *output) override; }; #if defined(MACE_ENABLE_QUANTIZE) template<> -class Gemv { +class Gemv : public delegator::Gemv { public: - Gemv() {} + explicit Gemv(const DelegatorParam ¶m) : delegator::Gemv(param) {} ~Gemv() {} // Always row-major after transpose MaceStatus Compute( @@ -79,13 +80,13 @@ class Gemv { const index_t lhs_width, const bool lhs_batched, const bool rhs_batched, - Tensor *output); + Tensor *output) override; }; template<> -class Gemv { +class Gemv : public delegator::Gemv { public: - Gemv() {} + explicit Gemv(const DelegatorParam ¶m) : delegator::Gemv(param) {} ~Gemv() {} // Always row-major after transpose MaceStatus Compute( @@ -98,7 +99,7 @@ class Gemv { const index_t lhs_width, const bool lhs_batched, const bool rhs_batched, - Tensor *output); + Tensor *output) override; }; #endif // MACE_ENABLE_QUANTIZE diff --git a/mace/ops/ref/q8/eltwise.cc b/mace/ops/ref/q8/eltwise.cc new file mode 100644 index 0000000000000000000000000000000000000000..220378e4e0e1fdf52d091abf4d974f92edb57eec --- /dev/null +++ b/mace/ops/ref/q8/eltwise.cc @@ -0,0 +1,116 @@ +// Copyright 2019 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "mace/ops/common/gemmlowp_util.h" +#include "mace/ops/delegator/eltwise.h" +#include "mace/utils/logging.h" + +namespace mace { +namespace ops { +namespace ref { +namespace q8 { + +class Eltwise : public delegator::Eltwise { + public: + explicit Eltwise(const delegator::EltwiseParam ¶m) + : delegator::Eltwise(param) {} + ~Eltwise() = default; + + MaceStatus Compute(const OpContext *context, const Tensor *input0, + const Tensor *input1, Tensor *output) override; +}; + +MaceStatus Eltwise::Compute(const OpContext *context, + const Tensor *input0, + const Tensor *input1, + Tensor *output) { + constexpr int left_shift = 20; + const double doubled_scale = 2 * std::max(input0->scale(), input1->scale()); + const double adjusted_input0_scale = input0->scale() / doubled_scale; + const double adjusted_input1_scale = input1->scale() / doubled_scale; + const double adjusted_output_scale = + doubled_scale / ((1 << left_shift) * output->scale()); + + int32_t input0_multiplier; + int32_t input1_multiplier; + int32_t output_multiplier; + int32_t input0_shift; + int32_t input1_shift; + int32_t output_shift; + QuantizeMultiplier(adjusted_input0_scale, + &input0_multiplier, + &input0_shift); + QuantizeMultiplier(adjusted_input1_scale, + &input1_multiplier, + &input1_shift); + QuantizeMultiplier(adjusted_output_scale, + &output_multiplier, + &output_shift); + + Tensor::MappingGuard input0_guard(input0); + Tensor::MappingGuard input1_guard(input1); + Tensor::MappingGuard output_guard(output); + + auto input0_ptr = input0->data(); + auto input1_ptr = input1->data(); + auto output_ptr = output->mutable_data(); + + utils::ThreadPool + &thread_pool = context->device()->cpu_runtime()->thread_pool(); + thread_pool.Compute1D([=](index_t start, index_t end, index_t step) { + for (index_t i = start; i < end; i += step) { + const int32_t offset_input0 = input0_ptr[i] - input0->zero_point(); + const int32_t offset_input1 = input1_ptr[i] - input1->zero_point(); + const int32_t shifted_input0 = offset_input0 * (1 << left_shift); + const int32_t shifted_input1 = offset_input1 * (1 << left_shift); + const int32_t multiplied_input0 = + gemmlowp::RoundingDivideByPOT( + gemmlowp::SaturatingRoundingDoublingHighMul(shifted_input0, + input0_multiplier), + -input0_shift); + const int32_t multiplied_input1 = + gemmlowp::RoundingDivideByPOT( + gemmlowp::SaturatingRoundingDoublingHighMul(shifted_input1, + input1_multiplier), + -input1_shift); + + int32_t res; + if (type_ == SUM) { + res = multiplied_input0 + multiplied_input1; + } else { + res = multiplied_input0 - multiplied_input1; + } + + const int32_t output_val = + gemmlowp::RoundingDivideByPOT( + gemmlowp::SaturatingRoundingDoublingHighMul(res, + output_multiplier), + -output_shift) + output->zero_point(); + output_ptr[i] = Saturate(output_val); + } + }, 0, output->size(), 1); + + return MaceStatus::MACE_SUCCESS; +} + +MACE_REGISTER_DELEGATOR(registry, Eltwise, delegator::EltwiseParam, + MACE_DELEGATOR_KEY(Eltwise, CPU, uint8_t, REF)) + +} // namespace q8 +} // namespace ref +} // namespace ops +} // namespace mace diff --git a/mace/ops/registry/op_delegators_registry.cc b/mace/ops/registry/op_delegators_registry.cc new file mode 100644 index 0000000000000000000000000000000000000000..a596878016b222f1606f39201d18b0a40653485f --- /dev/null +++ b/mace/ops/registry/op_delegators_registry.cc @@ -0,0 +1,170 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mace/ops/registry/registry.h" + +namespace mace { +namespace ops { + +namespace ref { +extern void RegisterActivationDelegator(OpDelegatorRegistry *registry); +extern void RegisterBiasAddDelegator(OpDelegatorRegistry *registry); +extern void RegisterConv2dRefDelegator(OpDelegatorRegistry *registry); +extern void RegisterDeconv2dRefDelegator(OpDelegatorRegistry *registry); +extern void RegisterDepthwiseConv2dRefDelegator(OpDelegatorRegistry *registry); +extern void RegisterDepthwiseDeconv2dRefDelegator( + OpDelegatorRegistry *registry); +extern void RegisterGemmRefDelegator(OpDelegatorRegistry *registry); +extern void RegisterGemvRefDelegator(OpDelegatorRegistry *registry); + +#ifdef MACE_ENABLE_QUANTIZE +namespace q8 { +extern void RegisterEltwiseDelegator(OpDelegatorRegistry *registry); +} // namespace q8 +extern void RegisterGemvUint8RefDelegator(OpDelegatorRegistry *registry); +#endif // MACE_ENABLE_QUANTIZE +} // namespace ref + +#ifdef MACE_ENABLE_NEON +namespace arm { +namespace fp32 { +extern void RegisterActivationDelegator(OpDelegatorRegistry *registry); +extern void RegisterBiasAddDelegator(OpDelegatorRegistry *registry); + +extern void RegisterConv2dK1x1Delegator(OpDelegatorRegistry *registry); +extern void RegisterConv2dK1x7S1Delegator(OpDelegatorRegistry *registry); +extern void RegisterConv2dK7x1S1Delegator(OpDelegatorRegistry *registry); +extern void RegisterConv2dK1x15S1Delegator(OpDelegatorRegistry *registry); +extern void RegisterConv2dK15x1S1Delegator(OpDelegatorRegistry *registry); +extern void RegisterConv2dK3x3S1Delegator(OpDelegatorRegistry *registry); +extern void RegisterConv2dK3x3S2Delegator(OpDelegatorRegistry *registry); +extern void RegisterConv2dK3x3WinogradDelegator(OpDelegatorRegistry *registry); +extern void RegisterConv2dK5x5S1Delegator(OpDelegatorRegistry *registry); +extern void RegisterConv2dK7x7S1Delegator(OpDelegatorRegistry *registry); +extern void RegisterConv2dK7x7S2Delegator(OpDelegatorRegistry *registry); +extern void RegisterConv2dK7x7S3Delegator(OpDelegatorRegistry *registry); +extern void RegisterConv2dGeneralDelegator(OpDelegatorRegistry *registry); + +extern void RegisterDeconv2dK2x2S1Delegator(OpDelegatorRegistry *registry); +extern void RegisterDeconv2dK2x2S2Delegator(OpDelegatorRegistry *registry); +extern void RegisterDeconv2dK3x3S1Delegator(OpDelegatorRegistry *registry); +extern void RegisterDeconv2dK3x3S2Delegator(OpDelegatorRegistry *registry); +extern void RegisterDeconv2dK4x4S1Delegator(OpDelegatorRegistry *registry); +extern void RegisterDeconv2dK4x4S2Delegator(OpDelegatorRegistry *registry); +extern void RegisterDeconv2dGeneralDelegator(OpDelegatorRegistry *registry); + +extern void RegisterDepthwiseConv2dK3x3S1Delegator( + OpDelegatorRegistry *registry); +extern void RegisterDepthwiseConv2dK3x3S2Delegator( + OpDelegatorRegistry *registry); +extern void RegisterDepthwiseDeconv2dK3x3S1Delegator( + OpDelegatorRegistry *registry); +extern void RegisterDepthwiseDeconv2dK3x3S2Delegator( + OpDelegatorRegistry *registry); +extern void RegisterGroupDeconv2dK3x3S1Delegator(OpDelegatorRegistry *registry); +extern void RegisterGroupDeconv2dK3x3S2Delegator(OpDelegatorRegistry *registry); +extern void RegisterDepthwiseDeconv2dK4x4S1Delegator( + OpDelegatorRegistry *registry); +extern void RegisterDepthwiseDeconv2dK4x4S2Delegator( + OpDelegatorRegistry *registry); +extern void RegisterGroupDeconv2dK4x4S1Delegator(OpDelegatorRegistry *registry); +extern void RegisterGroupDeconv2dK4x4S2Delegator(OpDelegatorRegistry *registry); +extern void RegisterDepthwiseDeconv2dGeneralDelegator( + OpDelegatorRegistry *registry); +extern void RegisterGroupDeconv2dGeneralDelegator( + OpDelegatorRegistry *registry); + +extern void RegisterGemmDelegator(OpDelegatorRegistry *registry); +extern void RegisterGemvDelegator(OpDelegatorRegistry *registry); +} // namespace fp32 + +#ifdef MACE_ENABLE_QUANTIZE +namespace q8 { +extern void RegisterEltwiseDelegator(OpDelegatorRegistry *registry); +extern void RegisterGemvUint8Delegator(OpDelegatorRegistry *registry); +extern void RegisterGemvInt32Delegator(OpDelegatorRegistry *registry); +} // namespace q8 +#endif // MACE_ENABLE_QUANTIZE + +} // namespace arm +#endif // MACE_ENABLE_NEON + +void RegisterAllOpDelegators(OpDelegatorRegistry *registry) { + ref::RegisterActivationDelegator(registry); + ref::RegisterBiasAddDelegator(registry); + ref::RegisterConv2dRefDelegator(registry); + ref::RegisterDeconv2dRefDelegator(registry); + ref::RegisterDepthwiseConv2dRefDelegator(registry); + ref::RegisterDepthwiseDeconv2dRefDelegator(registry); + ref::RegisterGemmRefDelegator(registry); + ref::RegisterGemvRefDelegator(registry); + +#ifdef MACE_ENABLE_QUANTIZE + ref::q8::RegisterEltwiseDelegator(registry); + ref::RegisterGemvUint8RefDelegator(registry); +#endif // MACE_ENABLE_QUANTIZE + +#ifdef MACE_ENABLE_NEON + arm::fp32::RegisterActivationDelegator(registry); + arm::fp32::RegisterBiasAddDelegator(registry); + + arm::fp32::RegisterConv2dK1x1Delegator(registry); + arm::fp32::RegisterConv2dK1x7S1Delegator(registry); + arm::fp32::RegisterConv2dK7x1S1Delegator(registry); + arm::fp32::RegisterConv2dK1x15S1Delegator(registry); + arm::fp32::RegisterConv2dK15x1S1Delegator(registry); + arm::fp32::RegisterConv2dK3x3S1Delegator(registry); + arm::fp32::RegisterConv2dK3x3S2Delegator(registry); + arm::fp32::RegisterConv2dK3x3WinogradDelegator(registry); + arm::fp32::RegisterConv2dK5x5S1Delegator(registry); + arm::fp32::RegisterConv2dK7x7S1Delegator(registry); + arm::fp32::RegisterConv2dK7x7S2Delegator(registry); + arm::fp32::RegisterConv2dK7x7S3Delegator(registry); + arm::fp32::RegisterConv2dGeneralDelegator(registry); + + arm::fp32::RegisterDeconv2dK2x2S1Delegator(registry); + arm::fp32::RegisterDeconv2dK2x2S2Delegator(registry); + arm::fp32::RegisterDeconv2dK3x3S1Delegator(registry); + arm::fp32::RegisterDeconv2dK3x3S2Delegator(registry); + arm::fp32::RegisterDeconv2dK4x4S1Delegator(registry); + arm::fp32::RegisterDeconv2dK4x4S2Delegator(registry); + arm::fp32::RegisterDeconv2dGeneralDelegator(registry); + + arm::fp32::RegisterDepthwiseConv2dK3x3S1Delegator(registry); + arm::fp32::RegisterDepthwiseConv2dK3x3S2Delegator(registry); + arm::fp32::RegisterDepthwiseDeconv2dK3x3S1Delegator(registry); + arm::fp32::RegisterDepthwiseDeconv2dK3x3S2Delegator(registry); + arm::fp32::RegisterGroupDeconv2dK3x3S1Delegator(registry); + arm::fp32::RegisterGroupDeconv2dK3x3S2Delegator(registry); + arm::fp32::RegisterDepthwiseDeconv2dK4x4S1Delegator(registry); + arm::fp32::RegisterDepthwiseDeconv2dK4x4S2Delegator(registry); + arm::fp32::RegisterGroupDeconv2dK4x4S1Delegator(registry); + arm::fp32::RegisterGroupDeconv2dK4x4S2Delegator(registry); + arm::fp32::RegisterDepthwiseDeconv2dGeneralDelegator(registry); + arm::fp32::RegisterGroupDeconv2dGeneralDelegator(registry); + + arm::fp32::RegisterGemmDelegator(registry); + arm::fp32::RegisterGemvDelegator(registry); + +#ifdef MACE_ENABLE_QUANTIZE + arm::q8::RegisterEltwiseDelegator(registry); + arm::q8::RegisterGemvUint8Delegator(registry); + arm::q8::RegisterGemvInt32Delegator(registry); +#endif // MACE_ENABLE_QUANTIZE + +#endif // MACE_ENABLE_NEON +} + +} // namespace ops +} // namespace mace diff --git a/mace/ops/registry/ops_registry.cc b/mace/ops/registry/ops_registry.cc index eafa78ceb876549fff28cd2eb48df719ff3a17e9..2f6e8c73e8d424d709ca1dcab43981daf3c0a151 100644 --- a/mace/ops/registry/ops_registry.cc +++ b/mace/ops/registry/ops_registry.cc @@ -1,4 +1,4 @@ -// Copyright 2018 The MACE Authors. All Rights Reserved. +// Copyright 2020 The MACE Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,167 +12,167 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/ops/registry/ops_registry.h" +#include "mace/ops/registry/registry.h" namespace mace { namespace ops { // Keep in lexicographical order -extern void RegisterActivation(OpRegistryBase *op_registry); -extern void RegisterAddN(OpRegistryBase *op_registry); -extern void RegisterArgMax(OpRegistryBase *op_registry); -extern void RegisterBatchNorm(OpRegistryBase *op_registry); -extern void RegisterBatchToSpaceND(OpRegistryBase *op_registry); -extern void RegisterBiasAdd(OpRegistryBase *op_registry); -extern void RegisterCast(OpRegistryBase *op_registry); -extern void RegisterChannelShuffle(OpRegistryBase *op_registry); -extern void RegisterConcat(OpRegistryBase *op_registry); -extern void RegisterConv2D(OpRegistryBase *op_registry); -extern void RegisterCrop(OpRegistryBase *op_registry); -extern void RegisterCumsum(OpRegistryBase *op_registry); -extern void RegisterDeconv2D(OpRegistryBase *op_registry); -extern void RegisterDepthToSpace(OpRegistryBase *op_registry); -extern void RegisterDepthwiseConv2d(OpRegistryBase *op_registry); -extern void RegisterDepthwiseDeconv2d(OpRegistryBase *op_registry); -extern void RegisterDynamicLSTM(OpRegistryBase *op_registry); -extern void RegisterEltwise(OpRegistryBase *op_registry); -extern void RegisterExpandDims(OpRegistryBase *op_registry); -extern void RegisterExtractPooling(OpRegistryBase *op_registry); -extern void RegisterFill(OpRegistryBase *op_registry); -extern void RegisterFullyConnected(OpRegistryBase *op_registry); -extern void RegisterGather(OpRegistryBase *op_registry); -extern void RegisterIdentity(OpRegistryBase *op_registry); -extern void RegisterIfDefined(OpRegistryBase *op_registry); -extern void RegisterInferConv2dShape(OpRegistryBase *op_registry); -extern void RegisterKaldiBatchNorm(OpRegistryBase *op_registry); -extern void RegisterLocalResponseNorm(OpRegistryBase *op_registry); -extern void RegisterLpNorm(OpRegistryBase *op_registry); -extern void RegisterLSTMNonlinear(OpRegistryBase *op_registry); -extern void RegisterMatMul(OpRegistryBase *op_registry); -extern void RegisterMVNorm(OpRegistryBase *op_registry); -extern void RegisterOneHot(OpRegistryBase *op_registry); -extern void RegisterPad(OpRegistryBase *op_registry); -extern void RegisterPadContext(OpRegistryBase *op_registry); -extern void RegisterPNorm(OpRegistryBase *op_registry); -extern void RegisterPooling(OpRegistryBase *op_registry); -extern void RegisterReduce(OpRegistryBase *op_registry); -extern void RegisterReplaceIndex(OpRegistryBase *op_registry); -extern void RegisterPriorBox(OpRegistryBase *op_registry); -extern void RegisterReshape(OpRegistryBase *op_registry); -extern void RegisterResizeBicubic(OpRegistryBase *op_registry); -extern void RegisterResizeBilinear(OpRegistryBase *op_registry); -extern void RegisterResizeNearestNeighbor(OpRegistryBase *op_registry); -extern void RegisterReverse(OpRegistryBase *op_registry); -extern void RegisterScalarMath(OpRegistryBase *op_registry); -extern void RegisterSelect(OpRegistryBase *op_registry); -extern void RegisterShape(OpRegistryBase *op_registry); -extern void RegisterSlice(OpRegistryBase *op_registry); -extern void RegisterSoftmax(OpRegistryBase *op_registry); -extern void RegisterSpaceToBatchND(OpRegistryBase *op_registry); -extern void RegisterSpaceToDepth(OpRegistryBase *op_registry); -extern void RegisterSplice(OpRegistryBase *op_registry); -extern void RegisterSplit(OpRegistryBase *op_registry); -extern void RegisterSqrDiffMean(OpRegistryBase *op_registry); -extern void RegisterSqueeze(OpRegistryBase *op_registry); -extern void RegisterStack(OpRegistryBase *op_registry); -extern void RegisterStridedSlice(OpRegistryBase *op_registry); -extern void RegisterSubsample(OpRegistryBase *op_registry); -extern void RegisterSumGroup(OpRegistryBase *op_registry); -extern void RegisterTargetRMSNorm(OpRegistryBase *op_registry); -extern void RegisterTile(OpRegistryBase *op_registry); -extern void RegisterTranspose(OpRegistryBase *op_registry); -extern void RegisterUnstack(OpRegistryBase *op_registry); -extern void RegisterUnsqueeze(OpRegistryBase *op_registry); +extern void RegisterActivation(OpRegistry *op_registry); +extern void RegisterAddN(OpRegistry *op_registry); +extern void RegisterArgMax(OpRegistry *op_registry); +extern void RegisterBatchNorm(OpRegistry *op_registry); +extern void RegisterBatchToSpaceND(OpRegistry *op_registry); +extern void RegisterBiasAdd(OpRegistry *op_registry); +extern void RegisterCast(OpRegistry *op_registry); +extern void RegisterChannelShuffle(OpRegistry *op_registry); +extern void RegisterConcat(OpRegistry *op_registry); +extern void RegisterConv2D(OpRegistry *op_registry); +extern void RegisterCrop(OpRegistry *op_registry); +extern void RegisterCumsum(OpRegistry *op_registry); +extern void RegisterDeconv2D(OpRegistry *op_registry); +extern void RegisterDepthToSpace(OpRegistry *op_registry); +extern void RegisterDepthwiseConv2d(OpRegistry *op_registry); +extern void RegisterDepthwiseDeconv2d(OpRegistry *op_registry); +extern void RegisterDynamicLSTM(OpRegistry *op_registry); +extern void RegisterEltwise(OpRegistry *op_registry); +extern void RegisterExpandDims(OpRegistry *op_registry); +extern void RegisterExtractPooling(OpRegistry *op_registry); +extern void RegisterFill(OpRegistry *op_registry); +extern void RegisterFullyConnected(OpRegistry *op_registry); +extern void RegisterGather(OpRegistry *op_registry); +extern void RegisterIdentity(OpRegistry *op_registry); +extern void RegisterIfDefined(OpRegistry *op_registry); +extern void RegisterInferConv2dShape(OpRegistry *op_registry); +extern void RegisterKaldiBatchNorm(OpRegistry *op_registry); +extern void RegisterLocalResponseNorm(OpRegistry *op_registry); +extern void RegisterLpNorm(OpRegistry *op_registry); +extern void RegisterLSTMNonlinear(OpRegistry *op_registry); +extern void RegisterMatMul(OpRegistry *op_registry); +extern void RegisterMVNorm(OpRegistry *op_registry); +extern void RegisterOneHot(OpRegistry *op_registry); +extern void RegisterPad(OpRegistry *op_registry); +extern void RegisterPadContext(OpRegistry *op_registry); +extern void RegisterPNorm(OpRegistry *op_registry); +extern void RegisterPooling(OpRegistry *op_registry); +extern void RegisterReduce(OpRegistry *op_registry); +extern void RegisterReplaceIndex(OpRegistry *op_registry); +extern void RegisterPriorBox(OpRegistry *op_registry); +extern void RegisterReshape(OpRegistry *op_registry); +extern void RegisterResizeBicubic(OpRegistry *op_registry); +extern void RegisterResizeBilinear(OpRegistry *op_registry); +extern void RegisterResizeNearestNeighbor(OpRegistry *op_registry); +extern void RegisterReverse(OpRegistry *op_registry); +extern void RegisterScalarMath(OpRegistry *op_registry); +extern void RegisterSelect(OpRegistry *op_registry); +extern void RegisterShape(OpRegistry *op_registry); +extern void RegisterSlice(OpRegistry *op_registry); +extern void RegisterSoftmax(OpRegistry *op_registry); +extern void RegisterSpaceToBatchND(OpRegistry *op_registry); +extern void RegisterSpaceToDepth(OpRegistry *op_registry); +extern void RegisterSplice(OpRegistry *op_registry); +extern void RegisterSplit(OpRegistry *op_registry); +extern void RegisterSqrDiffMean(OpRegistry *op_registry); +extern void RegisterSqueeze(OpRegistry *op_registry); +extern void RegisterStack(OpRegistry *op_registry); +extern void RegisterStridedSlice(OpRegistry *op_registry); +extern void RegisterSubsample(OpRegistry *op_registry); +extern void RegisterSumGroup(OpRegistry *op_registry); +extern void RegisterTargetRMSNorm(OpRegistry *op_registry); +extern void RegisterTile(OpRegistry *op_registry); +extern void RegisterTranspose(OpRegistry *op_registry); +extern void RegisterUnstack(OpRegistry *op_registry); +extern void RegisterUnsqueeze(OpRegistry *op_registry); #ifdef MACE_ENABLE_QUANTIZE -extern void RegisterDequantize(OpRegistryBase *op_registry); -extern void RegisterQuantize(OpRegistryBase *op_registry); +extern void RegisterDequantize(OpRegistry *op_registry); +extern void RegisterQuantize(OpRegistry *op_registry); #endif // MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_OPENCL -extern void RegisterBufferTransform(OpRegistryBase *op_registry); -extern void RegisterLSTMCell(OpRegistryBase *op_registry); +extern void RegisterBufferTransform(OpRegistry *op_registry); +extern void RegisterLSTMCell(OpRegistry *op_registry); #endif // MACE_ENABLE_OPENCL -} // namespace ops -OpRegistry::OpRegistry() : OpRegistryBase() { +void RegisterAllOps(OpRegistry *registry) { // Keep in lexicographical order - ops::RegisterActivation(this); - ops::RegisterAddN(this); - ops::RegisterArgMax(this); - ops::RegisterBatchNorm(this); - ops::RegisterBatchToSpaceND(this); - ops::RegisterBiasAdd(this); - ops::RegisterCast(this); - ops::RegisterChannelShuffle(this); - ops::RegisterConcat(this); - ops::RegisterConv2D(this); - ops::RegisterCrop(this); - ops::RegisterCumsum(this); - ops::RegisterDeconv2D(this); - ops::RegisterDepthToSpace(this); - ops::RegisterDepthwiseConv2d(this); - ops::RegisterDepthwiseDeconv2d(this); - ops::RegisterDynamicLSTM(this); - ops::RegisterEltwise(this); - ops::RegisterExpandDims(this); - ops::RegisterExtractPooling(this); - ops::RegisterFill(this); - ops::RegisterFullyConnected(this); - ops::RegisterGather(this); - ops::RegisterIdentity(this); - ops::RegisterIfDefined(this); - ops::RegisterInferConv2dShape(this); - ops::RegisterKaldiBatchNorm(this); - ops::RegisterLocalResponseNorm(this); - ops::RegisterLpNorm(this); - ops::RegisterLSTMNonlinear(this); - ops::RegisterMatMul(this); - ops::RegisterMVNorm(this); - ops::RegisterOneHot(this); - ops::RegisterPad(this); - ops::RegisterPadContext(this); - ops::RegisterPNorm(this); - ops::RegisterPooling(this); - ops::RegisterReduce(this); - ops::RegisterReplaceIndex(this); - ops::RegisterPriorBox(this); - ops::RegisterReshape(this); - ops::RegisterResizeBicubic(this); - ops::RegisterResizeBilinear(this); - ops::RegisterResizeNearestNeighbor(this); - ops::RegisterReverse(this); - ops::RegisterScalarMath(this); - ops::RegisterSelect(this); - ops::RegisterShape(this); - ops::RegisterSlice(this); - ops::RegisterSoftmax(this); - ops::RegisterSpaceToBatchND(this); - ops::RegisterSpaceToDepth(this); - ops::RegisterSplice(this); - ops::RegisterSplit(this); - ops::RegisterStack(this); - ops::RegisterStridedSlice(this); - ops::RegisterSqrDiffMean(this); - ops::RegisterSqueeze(this); - ops::RegisterSubsample(this); - ops::RegisterSumGroup(this); - ops::RegisterTargetRMSNorm(this); - ops::RegisterTile(this); - ops::RegisterTranspose(this); - ops::RegisterUnstack(this); - ops::RegisterUnsqueeze(this); + ops::RegisterActivation(registry); + ops::RegisterAddN(registry); + ops::RegisterArgMax(registry); + ops::RegisterBatchNorm(registry); + ops::RegisterBatchToSpaceND(registry); + ops::RegisterBiasAdd(registry); + ops::RegisterCast(registry); + ops::RegisterChannelShuffle(registry); + ops::RegisterConcat(registry); + ops::RegisterConv2D(registry); + ops::RegisterCrop(registry); + ops::RegisterCumsum(registry); + ops::RegisterDeconv2D(registry); + ops::RegisterDepthToSpace(registry); + ops::RegisterDepthwiseConv2d(registry); + ops::RegisterDepthwiseDeconv2d(registry); + ops::RegisterDynamicLSTM(registry); + ops::RegisterEltwise(registry); + ops::RegisterExpandDims(registry); + ops::RegisterExtractPooling(registry); + ops::RegisterFill(registry); + ops::RegisterFullyConnected(registry); + ops::RegisterGather(registry); + ops::RegisterIdentity(registry); + ops::RegisterIfDefined(registry); + ops::RegisterInferConv2dShape(registry); + ops::RegisterKaldiBatchNorm(registry); + ops::RegisterLocalResponseNorm(registry); + ops::RegisterLpNorm(registry); + ops::RegisterLSTMNonlinear(registry); + ops::RegisterMatMul(registry); + ops::RegisterMVNorm(registry); + ops::RegisterOneHot(registry); + ops::RegisterPad(registry); + ops::RegisterPadContext(registry); + ops::RegisterPNorm(registry); + ops::RegisterPooling(registry); + ops::RegisterReduce(registry); + ops::RegisterReplaceIndex(registry); + ops::RegisterPriorBox(registry); + ops::RegisterReshape(registry); + ops::RegisterResizeBicubic(registry); + ops::RegisterResizeBilinear(registry); + ops::RegisterResizeNearestNeighbor(registry); + ops::RegisterReverse(registry); + ops::RegisterScalarMath(registry); + ops::RegisterSelect(registry); + ops::RegisterShape(registry); + ops::RegisterSlice(registry); + ops::RegisterSoftmax(registry); + ops::RegisterSpaceToBatchND(registry); + ops::RegisterSpaceToDepth(registry); + ops::RegisterSplice(registry); + ops::RegisterSplit(registry); + ops::RegisterStack(registry); + ops::RegisterStridedSlice(registry); + ops::RegisterSqrDiffMean(registry); + ops::RegisterSqueeze(registry); + ops::RegisterSubsample(registry); + ops::RegisterSumGroup(registry); + ops::RegisterTargetRMSNorm(registry); + ops::RegisterTile(registry); + ops::RegisterTranspose(registry); + ops::RegisterUnstack(registry); + ops::RegisterUnsqueeze(registry); #ifdef MACE_ENABLE_QUANTIZE - ops::RegisterDequantize(this); - ops::RegisterQuantize(this); + ops::RegisterDequantize(registry); + ops::RegisterQuantize(registry); #endif // MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_OPENCL - ops::RegisterBufferTransform(this); - ops::RegisterLSTMCell(this); + ops::RegisterBufferTransform(registry); + ops::RegisterLSTMCell(registry); #endif // MACE_ENABLE_OPENCL } +} // namespace ops } // namespace mace diff --git a/mace/ops/registry/ops_registry.h b/mace/ops/registry/registry.h similarity index 68% rename from mace/ops/registry/ops_registry.h rename to mace/ops/registry/registry.h index 01f013dc4c7d334be68b6d42a6a7abaa0c41e7a0..ed8d55f42297ed064450e393c68c8a43ce9f4dd7 100644 --- a/mace/ops/registry/ops_registry.h +++ b/mace/ops/registry/registry.h @@ -12,19 +12,19 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_OPS_REGISTRY_OPS_REGISTRY_H_ -#define MACE_OPS_REGISTRY_OPS_REGISTRY_H_ - -#include "mace/core/operator.h" +#ifndef MACE_OPS_REGISTRY_REGISTRY_H_ +#define MACE_OPS_REGISTRY_REGISTRY_H_ namespace mace { +class OpRegistry; +class OpDelegatorRegistry; + +namespace ops { -class OpRegistry : public OpRegistryBase { - public: - OpRegistry(); - ~OpRegistry() = default; -}; +void RegisterAllOps(OpRegistry *registry); +void RegisterAllOpDelegators(OpDelegatorRegistry *registry); +} // namespace ops } // namespace mace -#endif // MACE_OPS_REGISTRY_OPS_REGISTRY_H_ +#endif // MACE_OPS_REGISTRY_REGISTRY_H_ diff --git a/mace/ops/replace_index.cc b/mace/ops/replace_index.cc index d4f95323f84b70815ed7850c8593cd8d7f40c4a3..8b2f76db8ad9b133530e010935343f7eadbc7bad 100644 --- a/mace/ops/replace_index.cc +++ b/mace/ops/replace_index.cc @@ -20,7 +20,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -94,7 +95,7 @@ class ReplaceIndexOp : public Operation { std::vector forward_indexes_; }; -void RegisterReplaceIndex(OpRegistryBase *op_registry) { +void RegisterReplaceIndex(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "ReplaceIndex", ReplaceIndexOp, DeviceType::CPU, float); } diff --git a/mace/ops/reshape.cc b/mace/ops/reshape.cc index b5daa4301f5ceb036939d04da3767bb685ad9566..63c91c2e3ad0d4035844b4d18ea75f2e3285579d 100644 --- a/mace/ops/reshape.cc +++ b/mace/ops/reshape.cc @@ -14,7 +14,8 @@ #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/utils/math.h" #ifdef MACE_ENABLE_OPENCL @@ -149,7 +150,7 @@ class ReshapeOp : public Operation { }; #endif -void RegisterReshape(OpRegistryBase *op_registry) { +void RegisterReshape(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Reshape", ReshapeOp, DeviceType::CPU, float); MACE_REGISTER_OP(op_registry, "Reshape", ReshapeOp, DeviceType::CPU, int32_t); MACE_REGISTER_GPU_OP(op_registry, "Reshape", ReshapeOp); diff --git a/mace/ops/resize_bicubic.cc b/mace/ops/resize_bicubic.cc index 5e48ad392e9c46269187b632f5d19c1c058ef081..d5d25eda194c373e6271de01c54db796f18a833e 100644 --- a/mace/ops/resize_bicubic.cc +++ b/mace/ops/resize_bicubic.cc @@ -17,7 +17,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/ops/common/utils.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/image/resize_bicubic.h" @@ -232,7 +233,7 @@ class ResizeBicubicOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterResizeBicubic(OpRegistryBase *op_registry) { +void RegisterResizeBicubic(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "ResizeBicubic", ResizeBicubicOp, DeviceType::CPU, float); diff --git a/mace/ops/resize_bilinear.cc b/mace/ops/resize_bilinear.cc index e209864f15f1d18da6e6f96353f68e257252812e..2fa891d1bb39016a5da3aff565d27ab78296c357 100644 --- a/mace/ops/resize_bilinear.cc +++ b/mace/ops/resize_bilinear.cc @@ -16,7 +16,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/utils/memory.h" #include "mace/core/quantize.h" #include "mace/ops/common/utils.h" @@ -366,7 +367,7 @@ class ResizeBilinearOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterResizeBilinear(OpRegistryBase *op_registry) { +void RegisterResizeBilinear(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "ResizeBilinear", ResizeBilinearOp, DeviceType::CPU, float); diff --git a/mace/ops/resize_nearest_neighbor.cc b/mace/ops/resize_nearest_neighbor.cc index 6ac6b9e7157684805a7faf5a45ce9be169ba2af3..201c4515878ec4872e45e8fb7cc6fb23b53cd43d 100644 --- a/mace/ops/resize_nearest_neighbor.cc +++ b/mace/ops/resize_nearest_neighbor.cc @@ -16,7 +16,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/ops/common/utils.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/image/resize_nearest_neighbor.h" @@ -172,7 +173,7 @@ class ResizeNearestNeighborOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterResizeNearestNeighbor(OpRegistryBase *op_registry) { +void RegisterResizeNearestNeighbor(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "ResizeNearestNeighbor", ResizeNearestNeighborOp, DeviceType::CPU, float); diff --git a/mace/ops/reverse.cc b/mace/ops/reverse.cc index df3fe6f09ceb2e522c1ec330ba1736076e9e92d6..af9670e34563ab506c15e4c2317091d9ad864e91 100644 --- a/mace/ops/reverse.cc +++ b/mace/ops/reverse.cc @@ -12,7 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -72,7 +73,7 @@ class ReverseOp : public Operation { MACE_OP_OUTPUT_TAGS(OUTPUT); }; -void RegisterReverse(OpRegistryBase *op_registry) { +void RegisterReverse(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Reverse", ReverseOp, DeviceType::CPU, float); } diff --git a/mace/ops/scalar_math.cc b/mace/ops/scalar_math.cc index 07794065dbf678ccce6fe1c808240ce6508a4df7..1c2734205c0898e5216adeb0c7370ab73f773588 100644 --- a/mace/ops/scalar_math.cc +++ b/mace/ops/scalar_math.cc @@ -16,7 +16,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/ops/eltwise.h" namespace mace { @@ -154,7 +155,7 @@ class ScalarMathOp : public Operation { int32_t scalar_input_index_; }; -void RegisterScalarMath(OpRegistryBase *op_registry) { +void RegisterScalarMath(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "ScalarMath", ScalarMathOp, DeviceType::CPU, float); MACE_REGISTER_OP(op_registry, "ScalarMath", ScalarMathOp, diff --git a/mace/ops/select.cc b/mace/ops/select.cc index 4d094e651eea8e0113786ee078d4a3c04c8660e0..5001ba20140fa1634af972dc960776f979ea0753 100644 --- a/mace/ops/select.cc +++ b/mace/ops/select.cc @@ -12,7 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/core/tensor.h" namespace mace { @@ -204,7 +205,7 @@ class SelectOp : public Operation { MACE_OP_OUTPUT_TAGS(OUTPUT); }; -void RegisterSelect(OpRegistryBase *op_registry) { +void RegisterSelect(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Select", SelectOp, DeviceType::CPU, float); } diff --git a/mace/ops/shape.cc b/mace/ops/shape.cc index dcca202f3229f616a3ce89dddcd008cf998a1a69..0071ec258cb260145625505a5a835011e1e65461 100644 --- a/mace/ops/shape.cc +++ b/mace/ops/shape.cc @@ -12,7 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -58,7 +59,7 @@ class ShapeOp : public Operation { MACE_OP_OUTPUT_TAGS(OUTPUT); }; -void RegisterShape(OpRegistryBase *op_registry) { +void RegisterShape(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Shape", ShapeOp, DeviceType::CPU, float); } diff --git a/mace/ops/slice.cc b/mace/ops/slice.cc index ac7ca64a9a700412a19a9600afaccdc2e56d81a8..14e71cad6ceb951f0cc6c6d3ba95ef81dd0fcea2 100644 --- a/mace/ops/slice.cc +++ b/mace/ops/slice.cc @@ -15,7 +15,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -83,7 +84,7 @@ class SliceOp : public Operation { std::vector ends_; }; -void RegisterSlice(OpRegistryBase *op_registry) { +void RegisterSlice(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Slice", SliceOp, DeviceType::CPU, float); } diff --git a/mace/ops/softmax.cc b/mace/ops/softmax.cc index cfbde681eaac06aff6a5d84a8d5cc7afc45021b0..bf7cf202c8cffe528bcae1a9064cca8e0d4d967b 100644 --- a/mace/ops/softmax.cc +++ b/mace/ops/softmax.cc @@ -18,7 +18,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #ifdef MACE_ENABLE_QUANTIZE #include "mace/ops/fixpoint.h" @@ -520,7 +521,7 @@ class SoftmaxOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterSoftmax(OpRegistryBase *op_registry) { +void RegisterSoftmax(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Softmax", SoftmaxOp, DeviceType::CPU, float); diff --git a/mace/ops/space_to_batch.cc b/mace/ops/space_to_batch.cc index 156c2132289a487cb0db14d0bce05da85a31442d..e5d7ec5ca8ff5d33c215e913b4af4bd96b45cc71 100644 --- a/mace/ops/space_to_batch.cc +++ b/mace/ops/space_to_batch.cc @@ -15,7 +15,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/image/space_to_batch.h" #endif // MACE_ENABLE_OPENCL @@ -328,7 +329,7 @@ class SpaceToBatchNDOp : public SpaceToBatchOpBase { }; #endif // MACE_ENABLE_OPENCL -void RegisterSpaceToBatchND(OpRegistryBase *op_registry) { +void RegisterSpaceToBatchND(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "SpaceToBatchND", SpaceToBatchNDOp, DeviceType::CPU, float); diff --git a/mace/ops/space_to_depth.cc b/mace/ops/space_to_depth.cc index 59c1a342162d0637f8e2d30b33c9b1835fac61f5..3653d09a9454057f2d2143774f4fa97ecc13167d 100644 --- a/mace/ops/space_to_depth.cc +++ b/mace/ops/space_to_depth.cc @@ -15,7 +15,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/image/space_to_depth.h" #endif // MACE_ENABLE_OPENCL @@ -180,7 +181,7 @@ class SpaceToDepthOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterSpaceToDepth(OpRegistryBase *op_registry) { +void RegisterSpaceToDepth(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "SpaceToDepth", SpaceToDepthOp, DeviceType::CPU, float); diff --git a/mace/ops/splice.cc b/mace/ops/splice.cc index 8f9198c00079f1c364bbc49b7b7c011cd384dd3d..af1536717ae66c3a1223c5bb7b4f346c7821cfd6 100644 --- a/mace/ops/splice.cc +++ b/mace/ops/splice.cc @@ -29,7 +29,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/utils/math.h" namespace mace { @@ -153,7 +154,7 @@ class SpliceOp : public Operation { std::vector forward_const_indexes_; }; -void RegisterSplice(OpRegistryBase *op_registry) { +void RegisterSplice(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Splice", SpliceOp, DeviceType::CPU, float); } diff --git a/mace/ops/split.cc b/mace/ops/split.cc index ffe7172f841bb76be8e4428cdf9a30ac29ee27bd..bb86aecbfc872e1d439b2aaa07bbbe93da81af7e 100644 --- a/mace/ops/split.cc +++ b/mace/ops/split.cc @@ -15,7 +15,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/image/split.h" #endif // MACE_ENABLE_OPENCL @@ -128,7 +129,7 @@ class SplitOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterSplit(OpRegistryBase *op_registry) { +void RegisterSplit(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Split", SplitOp, DeviceType::CPU, float); diff --git a/mace/ops/sqrdiff_mean.cc b/mace/ops/sqrdiff_mean.cc index 2d85ed98448ba37e60572df7f87c6184ebbeddfb..0e2b8d2bb891eceb5c46836af0e2e9b0bb81af15 100644 --- a/mace/ops/sqrdiff_mean.cc +++ b/mace/ops/sqrdiff_mean.cc @@ -15,7 +15,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #ifdef MACE_ENABLE_OPENCL #include "mace/ops/opencl/image/sqrdiff_mean.h" #endif // MACE_ENABLE_OPENCL @@ -100,7 +101,7 @@ class SqrDiffMeanOp : public Operation { }; #endif // MACE_ENABLE_OPENCL -void RegisterSqrDiffMean(OpRegistryBase *op_registry) { +void RegisterSqrDiffMean(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "SqrDiffMean", SqrDiffMeanOp, DeviceType::CPU, float); diff --git a/mace/ops/squeeze.cc b/mace/ops/squeeze.cc index 0c08cfd589b6d5c5f080432bffb62162706f15bc..590479dd327f382286632bd27458135281e6aec7 100644 --- a/mace/ops/squeeze.cc +++ b/mace/ops/squeeze.cc @@ -15,7 +15,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -77,7 +78,7 @@ class SqueezeOp : public SqueezeOpRaw { } }; -void RegisterSqueeze(OpRegistryBase *op_registry) { +void RegisterSqueeze(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Squeeze", SqueezeOp, DeviceType::CPU, float); #ifdef MACE_ENABLE_QUANTIZE MACE_REGISTER_OP(op_registry, "Squeeze", SqueezeOp, DeviceType::CPU, uint8_t); diff --git a/mace/ops/stack.cc b/mace/ops/stack.cc index f49c401aebd19af8ca99681e710d8fa704dbf804..87cc51a0c0e89d9d8a6c48d715ce10d32a08061c 100644 --- a/mace/ops/stack.cc +++ b/mace/ops/stack.cc @@ -15,7 +15,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -77,7 +78,7 @@ class StackOp : public Operation { int axis_; }; -void RegisterStack(OpRegistryBase *op_registry) { +void RegisterStack(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Stack", StackOp, DeviceType::CPU, float); MACE_REGISTER_OP(op_registry, "Stack", StackOp, DeviceType::CPU, int32_t); } diff --git a/mace/ops/strided_slice.cc b/mace/ops/strided_slice.cc index 4218d1f78614b487c85d4d645a09495b9c380a6b..bf44d5a162b19b1d813acc5c39ad9a1077622887 100644 --- a/mace/ops/strided_slice.cc +++ b/mace/ops/strided_slice.cc @@ -16,7 +16,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/utils/math.h" namespace mace { @@ -350,7 +351,7 @@ class StridedSliceOp : public Operation { MACE_OP_OUTPUT_TAGS(OUTPUT); }; -void RegisterStridedSlice(OpRegistryBase *op_registry) { +void RegisterStridedSlice(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "StridedSlice", StridedSliceOp, DeviceType::CPU, float); MACE_REGISTER_OP(op_registry, "StridedSlice", StridedSliceOp, diff --git a/mace/ops/subsample.cc b/mace/ops/subsample.cc index 11835ac9987df4499d1686d0b03d547a3cbfd336..e3c2977e2e8b7f091c983d510faf1d51dea73a71 100644 --- a/mace/ops/subsample.cc +++ b/mace/ops/subsample.cc @@ -18,7 +18,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/utils/math.h" namespace mace { @@ -100,7 +101,7 @@ class SubsampleOp : public Operation { std::vector forward_indexes_; }; -void RegisterSubsample(OpRegistryBase *op_registry) { +void RegisterSubsample(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Subsample", SubsampleOp, DeviceType::CPU, float); } diff --git a/mace/ops/sum_group.cc b/mace/ops/sum_group.cc index 1b62af7e7809c70b1931844e8b606fc322a4a83e..b8524a7480f3c5095e5bbf6d50ec92f3c26240ea 100644 --- a/mace/ops/sum_group.cc +++ b/mace/ops/sum_group.cc @@ -20,7 +20,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -101,7 +102,7 @@ class SumGroupOp : public Operation { } }; -void RegisterSumGroup(OpRegistryBase *op_registry) { +void RegisterSumGroup(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "SumGroup", SumGroupOp, DeviceType::CPU, float); } diff --git a/mace/ops/target_rms_norm.cc b/mace/ops/target_rms_norm.cc index 23535e15804b476b4b979810f8a3f7663b96b266..e2b2fa2eb72177ae153c1b70f27fb333ebaee1af 100644 --- a/mace/ops/target_rms_norm.cc +++ b/mace/ops/target_rms_norm.cc @@ -22,7 +22,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -148,7 +149,7 @@ class TargetRMSNormOp : public Operation { int block_dim_; }; -void RegisterTargetRMSNorm(OpRegistryBase *op_registry) { +void RegisterTargetRMSNorm(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "TargetRMSNorm", TargetRMSNormOp, DeviceType::CPU, float); } diff --git a/mace/ops/tile.cc b/mace/ops/tile.cc index 36d0bfe9b826b51763bc62fb0758d1fa7e665f11..c09ca92104706649c525dc4a0bba258d5dbc1f0c 100644 --- a/mace/ops/tile.cc +++ b/mace/ops/tile.cc @@ -16,7 +16,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/utils/memory.h" namespace mace { @@ -110,7 +111,7 @@ class TileOp : public Operation { int has_data_format_; }; -void RegisterTile(OpRegistryBase *op_registry) { +void RegisterTile(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Tile", TileOp, DeviceType::CPU, float); MACE_REGISTER_OP_CONDITION( op_registry, OpConditionBuilder("Tile").SetDevicePlacerFunc( diff --git a/mace/ops/transpose.cc b/mace/ops/transpose.cc index 4eb41e5b7b4a902d6cf930cec2e39b7616853f4c..a366f3d421cec6dbd7172dc25b18bd660165cb12 100644 --- a/mace/ops/transpose.cc +++ b/mace/ops/transpose.cc @@ -20,7 +20,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" #include "mace/ops/common/transpose.h" namespace mace { @@ -64,7 +65,7 @@ class TransposeOp : public Operation { std::vector dims_; }; -void RegisterTranspose(OpRegistryBase *op_registry) { +void RegisterTranspose(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Transpose", TransposeOp, DeviceType::CPU, float); } diff --git a/mace/ops/unsqueeze.cc b/mace/ops/unsqueeze.cc index 9fde2a91b946a0fbe8db29307615cfa0c735f189..cc28c14d8865f4bdcac79f6c5b8974f5530fba52 100644 --- a/mace/ops/unsqueeze.cc +++ b/mace/ops/unsqueeze.cc @@ -15,7 +15,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -59,7 +60,7 @@ class UnsqueezeOp : public Operation { MACE_OP_OUTPUT_TAGS(OUTPUT); }; -void RegisterUnsqueeze(OpRegistryBase *op_registry) { +void RegisterUnsqueeze(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Unsqueeze", UnsqueezeOp, DeviceType::CPU, float); MACE_REGISTER_OP(op_registry, "Unsqueeze", UnsqueezeOp, diff --git a/mace/ops/unstack.cc b/mace/ops/unstack.cc index b2a6eb6cee3b1adff4ecd7a40c3dcabb583e86ba..d0928614293dee689c77b607c57469c933c32b0a 100644 --- a/mace/ops/unstack.cc +++ b/mace/ops/unstack.cc @@ -15,7 +15,8 @@ #include #include -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" +#include "mace/core/registry/ops_registry.h" namespace mace { namespace ops { @@ -73,7 +74,7 @@ class UnstackOp : public Operation { int axis_; }; -void RegisterUnstack(OpRegistryBase *op_registry) { +void RegisterUnstack(OpRegistry *op_registry) { MACE_REGISTER_OP(op_registry, "Unstack", UnstackOp, DeviceType::CPU, float); MACE_REGISTER_OP(op_registry, "Unstack", UnstackOp, diff --git a/test/ccbenchmark/mace/ops/depthwise_deconv2d_benchmark.cc b/test/ccbenchmark/mace/ops/depthwise_deconv2d_benchmark.cc index 2ac04e0c0a398e68e1b4cd4bab8b9b78db7a48ae..fc0e7ed71dfceec442360df57d45a8447ea2deb6 100644 --- a/test/ccbenchmark/mace/ops/depthwise_deconv2d_benchmark.cc +++ b/test/ccbenchmark/mace/ops/depthwise_deconv2d_benchmark.cc @@ -15,7 +15,7 @@ #include #include "mace/utils/statistics.h" -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" #include "mace/benchmark_utils/test_benchmark.h" #include "mace/ops/ops_test_util.h" diff --git a/test/ccbenchmark/mace/ops/quantize_benchmark.cc b/test/ccbenchmark/mace/ops/quantize_benchmark.cc index 0923a29310b4483ee9abcd249194b1782213c37a..c43bcacb86489c34af42f505e0b3c2a89511395a 100644 --- a/test/ccbenchmark/mace/ops/quantize_benchmark.cc +++ b/test/ccbenchmark/mace/ops/quantize_benchmark.cc @@ -14,7 +14,7 @@ #ifdef MACE_ENABLE_QUANTIZE -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" #include "mace/benchmark_utils/test_benchmark.h" #include "mace/ops/ops_test_util.h" diff --git a/test/ccbenchmark/mace/ops/sqrdiff_mean_benchmark.cc b/test/ccbenchmark/mace/ops/sqrdiff_mean_benchmark.cc index 05eaf21d11c3fb5dd36173f73f9ba1d70a892c62..791182e82eec7b8d9a3a2ceae9496809e872e252 100644 --- a/test/ccbenchmark/mace/ops/sqrdiff_mean_benchmark.cc +++ b/test/ccbenchmark/mace/ops/sqrdiff_mean_benchmark.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" #include "mace/benchmark_utils/test_benchmark.h" #include "mace/ops/ops_test_util.h" diff --git a/test/ccunit/mace/ops/arm/fp32/gemm_test.cc b/test/ccunit/mace/ops/arm/fp32/gemm_test.cc index 805720331b193895301b40b408b4eac0b384104c..65a516f966326661da8f214de5803fe32e2402b0 100644 --- a/test/ccunit/mace/ops/arm/fp32/gemm_test.cc +++ b/test/ccunit/mace/ops/arm/fp32/gemm_test.cc @@ -15,8 +15,8 @@ #include +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" #include "mace/ops/arm/fp32/gemm.h" #include "mace/ops/ref/gemm.h" #include "mace/ops/testing/test_utils.h" @@ -50,7 +50,7 @@ void TestGemmFloat32(const index_t batch, GenerateRandomRealTypeData(rhs.shape(), rhs_data); GenerateRandomRealTypeData(output.shape(), output_data); } - ::mace::ops::arm::fp32::Gemm gemm; + ::mace::ops::arm::fp32::Gemm gemm((delegator::GemmParam())); utils::ThreadPool thread_pool(1, AFFINITY_NONE); thread_pool.Init(); CPUDevice cpu_device(1, AFFINITY_NONE, &thread_pool); @@ -71,7 +71,7 @@ void TestGemmFloat32(const index_t batch, Tensor expected_output(GetCPUAllocator(), DataType::DT_FLOAT); expected_output.Resize({batch, rows, cols}); - ::mace::ops::ref::Gemm gemm_ref; + ::mace::ops::ref::Gemm gemm_ref((delegator::GemmParam())); gemm_ref.Compute(nullptr, &lhs, &rhs, diff --git a/test/ccunit/mace/ops/arm/fp32/gemv_test.cc b/test/ccunit/mace/ops/arm/fp32/gemv_test.cc index bc97bc3ee8ed9c52f62518830cba2b8775973702..3a224ea261c3782ec37336f309fddd9ef539f110 100644 --- a/test/ccunit/mace/ops/arm/fp32/gemv_test.cc +++ b/test/ccunit/mace/ops/arm/fp32/gemv_test.cc @@ -15,8 +15,8 @@ #include +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" #include "mace/ops/arm/fp32/gemv.h" #include "mace/ops/ref/gemv.h" #include "mace/ops/testing/test_utils.h" @@ -53,7 +53,8 @@ void TestGemvFloat32(const index_t batch, thread_pool.Init(); CPUDevice cpu_device(1, AFFINITY_NONE, &thread_pool); OpContext context(nullptr, &cpu_device); - ::mace::ops::arm::fp32::Gemv gemv; + ::mace::ops::arm::fp32::Gemv gemv = + ::mace::ops::arm::fp32::Gemv(DelegatorParam()); gemv.Compute(&context, &lhs, &rhs, @@ -67,7 +68,8 @@ void TestGemvFloat32(const index_t batch, Tensor expected_output(GetCPUAllocator(), DataType::DT_FLOAT); expected_output.Resize({batch, height}); - ::mace::ops::ref::Gemv gemv_ref; + ::mace::ops::ref::Gemv gemv_ref = + ::mace::ops::ref::Gemv(DelegatorParam()); gemv_ref.Compute(nullptr, &lhs, &rhs, diff --git a/test/ccunit/mace/ops/arm/q8/gemv_test.cc b/test/ccunit/mace/ops/arm/q8/gemv_test.cc index 6216cabaed02bbfc84ebc4b10adc0a012cdece3e..619d343fdd4ccf9ea051b22d0004cb3edc1a5352 100644 --- a/test/ccunit/mace/ops/arm/q8/gemv_test.cc +++ b/test/ccunit/mace/ops/arm/q8/gemv_test.cc @@ -15,8 +15,8 @@ #include +#include "mace/core/ops/op_context.h" #include "mace/core/tensor.h" -#include "mace/core/op_context.h" #include "mace/ops/arm/q8/gemv.h" #include "mace/ops/ref/gemv.h" #include "mace/ops/testing/test_utils.h" @@ -58,7 +58,8 @@ void TestGemvInt32(const index_t batch, thread_pool.Init(); CPUDevice cpu_device(1, AFFINITY_NONE, &thread_pool); OpContext context(nullptr, &cpu_device); - mace::ops::arm::q8::Gemv gemv; + mace::ops::arm::q8::Gemv gemv = + mace::ops::arm::q8::Gemv(DelegatorParam()); gemv.Compute(&context, &lhs, &rhs, @@ -72,7 +73,8 @@ void TestGemvInt32(const index_t batch, Tensor expected_output(GetCPUAllocator(), DataType::DT_INT32); expected_output.Resize({batch, height}); - mace::ops::ref::Gemv gemv_ref; + mace::ops::ref::Gemv gemv_ref = + mace::ops::ref::Gemv(DelegatorParam()); gemv_ref.Compute(nullptr, &lhs, &rhs, @@ -130,7 +132,8 @@ void TestGemvUint8(const index_t batch, thread_pool.Init(); CPUDevice cpu_device(1, AFFINITY_NONE, &thread_pool); OpContext context(nullptr, &cpu_device); - mace::ops::arm::q8::Gemv gemv; + mace::ops::arm::q8::Gemv gemv = + mace::ops::arm::q8::Gemv(DelegatorParam()); gemv.Compute(&context, &lhs, &rhs, @@ -146,7 +149,8 @@ void TestGemvUint8(const index_t batch, expected_output.SetScale(0.6); expected_output.SetZeroPoint(57); expected_output.Resize({batch, height}); - mace::ops::ref::Gemv gemv_ref; + mace::ops::ref::Gemv gemv_ref = + mace::ops::ref::Gemv(DelegatorParam()); gemv_ref.Compute(nullptr, &lhs, &rhs, diff --git a/test/ccunit/mace/ops/matmul_test.cc b/test/ccunit/mace/ops/matmul_test.cc index d0432bb0b958ae6ee452b976b5c403e4bb4c04ba..9d46f0e1d97391e6dbf539f0cbee21b29918a1fc 100644 --- a/test/ccunit/mace/ops/matmul_test.cc +++ b/test/ccunit/mace/ops/matmul_test.cc @@ -14,6 +14,7 @@ #include +#include "mace/ops/delegator/gemm.h" #include "mace/ops/ops_test_util.h" #include "mace/ops/ref/gemm.h" @@ -111,7 +112,7 @@ void Complex(const std::vector &batch, .Finalize(net.NewOperatorDef()); net.RunOp(CPU); - ref::Gemm gemm; + ref::Gemm gemm = ref::Gemm(delegator::GemmParam()); Tensor expected_output_tensor; std::vector expected_output_shape({rows, cols}); expected_output_shape.insert(expected_output_shape.begin(), diff --git a/test/ccunit/mace/ops/opencl/out_of_range_check_test.cc b/test/ccunit/mace/ops/opencl/out_of_range_check_test.cc index 3dfe468a8db889418c48a15776e79adccadf9319..808ea9aa7f89905a890dbb67493a1f9c3922269c 100644 --- a/test/ccunit/mace/ops/opencl/out_of_range_check_test.cc +++ b/test/ccunit/mace/ops/opencl/out_of_range_check_test.cc @@ -16,7 +16,7 @@ #include #include "gtest/gtest.h" -#include "mace/core/op_context.h" +#include "mace/core/ops/op_context.h" #include "mace/core/runtime/opencl/gpu_device.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/tensor.h" @@ -134,7 +134,7 @@ TEST(OutOfRangeCheckTest, RandomTest) { std::unique_ptr device = make_unique( gpu_context.opencl_tuner()); - Workspace ws; + Workspace ws(nullptr); OpContext context(&ws, device.get()); std::vector buffer_shape = {batch, height, width, channels}; diff --git a/test/ccunit/mace/ops/sqrdiff_mean_test.cc b/test/ccunit/mace/ops/sqrdiff_mean_test.cc index 3257987c7b9d8dc65a218059cd5c44ae9ab2e55d..42375b7df4e32a0ab55ce06730db5f1bf9280a03 100644 --- a/test/ccunit/mace/ops/sqrdiff_mean_test.cc +++ b/test/ccunit/mace/ops/sqrdiff_mean_test.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/operator.h" +#include "mace/core/ops/operator.h" #include "mace/ops/ops_test_util.h" namespace mace { diff --git a/test/ccutils/mace/ops/ops_test_util.h b/test/ccutils/mace/ops/ops_test_util.h index a9f8a9842890c4d9737040c89bd8d68c4fb5d7a1..e1e563426ded80603dff60b6415a9eb81ad62fcb 100644 --- a/test/ccutils/mace/ops/ops_test_util.h +++ b/test/ccutils/mace/ops/ops_test_util.h @@ -31,7 +31,9 @@ #include "mace/core/device_context.h" #include "mace/core/tensor.h" #include "mace/core/workspace.h" -#include "mace/ops/registry/ops_registry.h" +#include "mace/core/registry/ops_registry.h" +#include "mace/core/registry/op_delegator_registry.h" +#include "mace/ops/registry/registry.h" #include "mace/public/mace.h" #include "mace/utils/memory.h" #include "mace/utils/math.h" @@ -109,7 +111,12 @@ class OpTestContext { class OpsTestNet { public: OpsTestNet() : - op_registry_(make_unique()) {} + op_registry_(make_unique()), + op_delegator_registry_(make_unique()), + ws_(op_delegator_registry_.get()) { + ops::RegisterAllOps(op_registry_.get()); + ops::RegisterAllOpDelegators(op_delegator_registry_.get()); + } template void AddInputFromArray(const std::string &name, @@ -426,7 +433,8 @@ class OpsTestNet { void Sync(); public: - std::shared_ptr op_registry_; + std::unique_ptr op_registry_; + std::unique_ptr op_delegator_registry_; Workspace ws_; std::vector op_defs_; std::unique_ptr net_;