提交 2895c3ee 编写于 作者: L luxuhui

refactor: refactor op base module and op delegator mechanism

N/A
Signed-off-by: NLuxuhui <luxuhui@xiaomi.com>
上级 28954099
......@@ -19,7 +19,7 @@ Define the new Op class in `mace/ops/my_custom_op.cc`.
The structure of Op is like the following code.
```c++
#include "mace/core/operator.h"
#include "mace/core/ops/operator.h"
namespace mace {
namespace ops {
......@@ -39,7 +39,7 @@ class MyCustomOp<DeviceType::GPU, float> : public Operation {
};
#endif // MACE_ENABLE_OPENCL
void RegisterMyCustomOp(OpRegistryBase *op_registry) {
void RegisterMyCustomOp(OpRegistry *op_registry) {
MACE_REGISTER_OP(op_registry, "MyCustomOp", MyCustomOp,
DeviceType::CPU, float);
......@@ -63,14 +63,14 @@ namespace ops {
...
extern void RegisterMyCustomOp(OpRegistryBase *op_registry);
extern void RegisterMyCustomOp(OpRegistry *op_registry);
...
} // namespace ops
OpRegistry::OpRegistry() : OpRegistryBase() {
OpRegistry::OpRegistry() {
// Keep in lexicographical order
...
......
......@@ -557,7 +557,7 @@ which will reduce the library size significantly. the final binary just link the
} // namespace ops
OpRegistry::OpRegistry() : OpRegistryBase() {
OpRegistry::OpRegistry() {
// Just leave the ops used in your models
...
......
......@@ -370,12 +370,13 @@ the sample code show how to calculate the Top-1 accuracy with imagenet validatio
Reduce Library Size
-------------------
Remove the registration of the ops unused for your models in the ``mace/ops/ops_register.cc``,
which will reduce the library size significantly. the final binary just link the registered ops' code.
Remove the registration of the ops and delegators unused for your models in the
``mace/ops/registry/ops_registry.cc`` and ``mace/ops/registry/op_delegators_registry.cc``,
which will reduce the library size significantly. the final binary just link the registered ops and delegators' code.
.. code-block:: cpp
#include "mace/ops/ops_register.h"
#include "mace/ops/registry/registry.h"
namespace mace {
namespace ops {
......@@ -386,12 +387,38 @@ which will reduce the library size significantly. the final binary just link the
} // namespace ops
OpRegistry::OpRegistry() : OpRegistryBase() {
void RegisterAllOps(OpRegistry *registry) {
// Just leave the ops used in your models
...
ops::RegisterMyCustomOp(this);
ops::RegisterMyCustomOp(registry);
...
}
} // namespace mace
.. code-block:: cpp
#include "mace/ops/registry/registry.h"
namespace mace {
namespace ops {
// Just leave the delegators used in your ops
...
} // namespace ops
void RegisterAllOpDelegators(OpDelegatorRegistry *registry) {
// Just leave the delegators used in your ops
...
ops::RegisterMyCustomDelegator(registry);
...
......
......@@ -26,6 +26,8 @@ cc_library(
srcs = glob(
[
"*.cc",
"ops/*.cc",
"registry/*.cc",
"runtime/cpu/*.cc",
],
exclude = [
......@@ -53,6 +55,8 @@ cc_library(
hdrs = glob(
[
"*.h",
"ops/*.h",
"registry/*.h",
"runtime/cpu/*.h",
],
exclude = [
......@@ -68,7 +72,7 @@ cc_library(
])) + if_hta_enabled(glob([
"runtime/hexagon/*hta*.h",
])) + if_apu_enabled(glob([
"runtime/apu/*.h"
"runtime/apu/*.h",
])) + if_rpcmem_enabled([
"rpcmem.h",
]),
......
......@@ -8,9 +8,16 @@ set(CORE_SRCS
net.cc
net_def_adapter.cc
net_optimizer.cc
op_context.cc
operator.cc
ops/op_condition_builder.cc
ops/op_condition_context.cc
ops/op_construct_context.cc
ops/op_context.cc
ops/operator.cc
ops/op_init_context.cc
quantize.cc
registry/op_delegator_registry.cc
registry/op_registration_info.cc
registry/ops_registry.cc
runtime_failure_mock.cc
types.cc
workspace.cc
......
......@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/core/net.h"
#include <algorithm>
#include <limits>
#include <set>
......@@ -20,8 +22,9 @@
#include "mace/core/future.h"
#include "mace/core/memory_optimizer.h"
#include "mace/core/net.h"
#include "mace/core/op_context.h"
#include "mace/core/ops/op_init_context.h"
#include "mace/core/ops/op_context.h"
#include "mace/core/registry/ops_registry.h"
#include "mace/public/mace.h"
#include "mace/port/env.h"
#include "mace/utils/conf_util.h"
......@@ -33,7 +36,7 @@
namespace mace {
SerialNet::SerialNet(const OpRegistryBase *op_registry,
SerialNet::SerialNet(const OpRegistry *op_registry,
const NetDef *net_def,
Workspace *ws,
Device *target_device,
......
......@@ -21,13 +21,14 @@
#include <unordered_map>
#include <sstream>
#include "mace/core/operator.h"
#include "mace/core/ops/operator.h"
namespace mace {
class RunMetadata;
class Workspace;
class MemoryOptimizer;
class OpRegistry;
class NetBase {
public:
......@@ -44,7 +45,7 @@ class NetBase {
class SerialNet : public NetBase {
public:
SerialNet(const OpRegistryBase *op_registry,
SerialNet(const OpRegistry *op_registry,
const NetDef *net_def,
Workspace *ws,
Device *target_device,
......
......@@ -17,7 +17,9 @@
#include <string>
#include <vector>
#include "mace/core/operator.h"
#include "mace/core/ops/operator.h"
#include "mace/core/ops/op_condition_context.h"
#include "mace/core/registry/ops_registry.h"
#include "mace/utils/math.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/opencl_util.h"
......@@ -82,7 +84,7 @@ void BuildTransposeOpDef(
} // namespace
NetDefAdapter::NetDefAdapter(const OpRegistryBase *op_registry,
NetDefAdapter::NetDefAdapter(const OpRegistry *op_registry,
const Workspace *ws)
: op_registry_(op_registry), ws_(ws) {}
......
......@@ -23,14 +23,17 @@
#include "mace/core/types.h"
#include "mace/proto/mace.pb.h"
#include "mace/port/port.h"
#include "mace/core/operator.h"
#include "mace/core/ops/operator.h"
#include "mace/core/net_optimizer.h"
namespace mace {
class OpRegistryBase;
class Workspace;
class Device;
class OpConditionContext;
class OperatorDef;
class OpRegistry;
class Workspace;
///////////////////////////////////////////////////////////////////////////////
/// Conventions
......@@ -49,7 +52,7 @@ class Device;
///////////////////////////////////////////////////////////////////////////////
class NetDefAdapter {
public:
NetDefAdapter(const OpRegistryBase *op_registry,
NetDefAdapter(const OpRegistry *op_registry,
const Workspace *ws);
// Adapt original net_def to a better net.
// 1. Adapt device: choose best device for every op in the net.
......@@ -122,7 +125,7 @@ class NetDefAdapter {
std::string DebugString(const NetDef *net_def);
private:
const OpRegistryBase *op_registry_;
const OpRegistry *op_registry_;
const Workspace *ws_;
NetOptimizer net_optimizer_;
};
......
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_CORE_OPERATOR_H_
#define MACE_CORE_OPERATOR_H_
#include <memory>
#include <set>
#include <string>
#include <unordered_map>
#include <vector>
#include "mace/core/arg_helper.h"
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/core/workspace.h"
#include "mace/proto/mace.pb.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/opencl_util.h"
#endif // MACE_ENABLE_OPENCL
namespace mace {
// OpConditionContext has all information used for choosing proper Op
class OpConditionContext {
public:
typedef std::unordered_map<std::string, std::vector<index_t>> TensorShapeMap;
OpConditionContext(const Workspace *ws, TensorShapeMap *info);
~OpConditionContext() = default;
void set_operator_def(const OperatorDef *operator_def);
inline const OperatorDef *operator_def() const {
return operator_def_;
}
inline const Workspace *workspace() const {
return ws_;
}
inline void set_device(Device *device) {
device_ = device;
}
inline Device *device() const {
return device_;
}
inline TensorShapeMap *tensor_shape_info() const {
return tensor_shape_info_;
}
void set_output_mem_type(MemoryType type);
inline MemoryType output_mem_type() const {
return output_mem_type_;
}
void SetInputInfo(size_t idx, MemoryType mem_type, DataType dt);
MemoryType GetInputMemType(size_t idx) const;
DataType GetInputDataType(size_t idx) const;
#ifdef MACE_ENABLE_OPENCL
void SetInputOpenCLBufferType(size_t idx, OpenCLBufferType buffer_type);
OpenCLBufferType GetInputOpenCLBufferType(size_t idx) const;
#endif // MACE_ENABLE_OPENCL
private:
const OperatorDef *operator_def_;
const Workspace *ws_;
Device *device_;
TensorShapeMap *tensor_shape_info_;
// used for memory transform
std::vector<MemoryType> input_mem_types_;
std::vector<DataType> input_data_types_;
MemoryType output_mem_type_; // there is only one output memory type now.
#ifdef MACE_ENABLE_OPENCL
std::vector<OpenCLBufferType> input_opencl_buffer_types_;
#endif // MACE_ENABLE_OPENCL
};
// memory_optimizer, device
class OpConstructContext {
typedef std::unordered_map<std::string, std::vector<index_t>> TensorShapeMap;
public:
explicit OpConstructContext(Workspace *ws);
~OpConstructContext() = default;
void set_operator_def(std::shared_ptr<OperatorDef> operator_def);
inline std::shared_ptr<OperatorDef> operator_def() const {
return operator_def_;
}
inline Workspace *workspace() const {
return ws_;
}
inline void set_device(Device *device) {
device_ = device;
}
inline Device *device() const {
return device_;
}
#ifdef MACE_ENABLE_OPENCL
inline MemoryType GetOpMemoryType() const {
return static_cast<MemoryType>(
ProtoArgHelper::GetOptionalArg<OperatorDef, int>(
*operator_def_, OutputMemoryTypeTagName(),
static_cast<int>(MemoryType::CPU_BUFFER)));
}
#endif // MACE_ENABLE_OPENCL
private:
std::shared_ptr<OperatorDef> operator_def_;
Workspace *ws_;
Device *device_;
};
// memory_optimizer, device
class OpInitContext {
public:
explicit OpInitContext(Workspace *ws, Device *device = nullptr);
~OpInitContext() = default;
inline Workspace *workspace() const {
return ws_;
}
inline void set_device(Device *device) {
device_ = device;
}
inline Device *device() const {
return device_;
}
private:
Workspace *ws_;
Device *device_;
};
// Conventions
// * If there exist format, NHWC is the default format
// * The input/output format of CPU ops with float data type is NCHW
// * The input/output format of GPU ops and CPU Quantization ops is NHWC
// * Inputs' data type is same as the operation data type by default.
// * The outputs' data type is same as the operation data type by default.
class Operation {
public:
explicit Operation(OpConstructContext *context);
virtual ~Operation() = default;
template<typename T>
inline T GetOptionalArg(const std::string &name,
const T &default_value) const {
MACE_CHECK(operator_def_, "operator_def was null!");
return ProtoArgHelper::GetOptionalArg<OperatorDef, T>(
*operator_def_, name, default_value);
}
template<typename T>
inline std::vector<T> GetRepeatedArgs(
const std::string &name, const std::vector<T> &default_value = {}) const {
MACE_CHECK(operator_def_, "operator_def was null!");
return ProtoArgHelper::GetRepeatedArgs<OperatorDef, T>(
*operator_def_, name, default_value);
}
inline DeviceType device_type() const {
return static_cast<DeviceType>(operator_def_->device_type());
}
inline const Tensor *Input(unsigned int idx) {
MACE_CHECK(idx < inputs_.size());
return inputs_[idx];
}
inline Tensor *Output(int idx) { return outputs_[idx]; }
inline int InputSize() { return inputs_.size(); }
inline int OutputSize() { return outputs_.size(); }
inline const std::vector<const Tensor *> &Inputs() const { return inputs_; }
inline const std::vector<Tensor *> &Outputs() { return outputs_; }
// Run Op asynchronously (depends on device), return a future if not nullptr.
virtual MaceStatus Init(OpInitContext *);
virtual MaceStatus Run(OpContext *) = 0;
inline const OperatorDef &debug_def() const {
MACE_CHECK(has_debug_def(), "operator_def was null!");
return *operator_def_;
}
inline void set_debug_def(
const std::shared_ptr<OperatorDef> &operator_def) {
operator_def_ = operator_def;
}
inline bool has_debug_def() const { return operator_def_ != nullptr; }
inline std::shared_ptr<OperatorDef> operator_def() {
return operator_def_;
}
protected:
std::shared_ptr<OperatorDef> operator_def_;
std::vector<const Tensor *> inputs_;
std::vector<Tensor *> outputs_;
MACE_DISABLE_COPY_AND_ASSIGN(Operation);
};
// MACE_OP_INPUT_TAGS and MACE_OP_OUTPUT_TAGS are optional features to name the
// indices of the operator's inputs and outputs, in order to avoid confusion.
// For example, for a fully convolution layer that has input, weight and bias,
// you can define its input tags as:
// MACE_OP_INPUT_TAGS(INPUT, WEIGHT, BIAS);
// And in the code, instead of doing
// auto& weight = Input(1);
// you can now do
// auto& weight = Input(WEIGHT);
// to make it more clear.
#define MACE_OP_INPUT_TAGS(first_input, ...) \
enum _InputTags { first_input = 0, __VA_ARGS__ }
#define MACE_OP_OUTPUT_TAGS(first_input, ...) \
enum _OutputTags { first_input = 0, __VA_ARGS__ }
struct OpRegistrationInfo {
public:
typedef std::function<std::unique_ptr<Operation>(OpConstructContext *)>
OpCreator;
typedef std::function<std::set<DeviceType>(OpConditionContext *)>
DevicePlacer;
typedef std::function<void(OpConditionContext *)> MemoryTypeSetter;
typedef std::function<std::vector<DataFormat>(OpConditionContext *)>
DataFormatSelector;
OpRegistrationInfo();
void AddDevice(DeviceType);
void Register(const std::string &key, OpCreator creator);
std::set<DeviceType> devices;
std::unordered_map<std::string, OpCreator> creators;
DevicePlacer device_placer;
MemoryTypeSetter memory_type_setter;
DataFormatSelector data_format_selector;
};
class OpConditionBuilder {
public:
explicit OpConditionBuilder(const std::string &type);
const std::string type() const;
OpConditionBuilder &SetDevicePlacerFunc(
OpRegistrationInfo::DevicePlacer placer);
// If you set input memory type for specified Op,
// you must call OpConditionContext::set_output_mem_type
OpConditionBuilder &SetInputMemoryTypeSetter(
OpRegistrationInfo::MemoryTypeSetter setter);
OpConditionBuilder &SetInputsDataFormatSelector(
OpRegistrationInfo::DataFormatSelector selector);
void Finalize(OpRegistrationInfo *info) const;
private:
std::string type_;
OpRegistrationInfo::DevicePlacer placer_;
OpRegistrationInfo::MemoryTypeSetter memory_type_setter_;
OpRegistrationInfo::DataFormatSelector data_format_selector_;
};
class OpRegistryBase {
public:
OpRegistryBase() = default;
virtual ~OpRegistryBase() = default;
MaceStatus Register(const std::string &op_type,
const DeviceType device_type,
const DataType dt,
OpRegistrationInfo::OpCreator creator);
MaceStatus Register(const OpConditionBuilder &builder);
const std::set<DeviceType> AvailableDevices(
const std::string &op_type, OpConditionContext *context) const;
void GetInOutMemoryTypes(
const std::string &op_type, OpConditionContext *context) const;
const std::vector<DataFormat> InputsDataFormat(
const std::string &op_type, OpConditionContext *context) const;
std::unique_ptr<Operation> CreateOperation(
OpConstructContext *context,
DeviceType device_type) const;
template<class DerivedType>
static std::unique_ptr<Operation> DefaultCreator(
OpConstructContext *context) {
return std::unique_ptr<Operation>(new DerivedType(context));
}
private:
std::unordered_map<
std::string,
std::unique_ptr<OpRegistrationInfo>> registry_;
MACE_DISABLE_COPY_AND_ASSIGN(OpRegistryBase);
};
#define MACE_REGISTER_OP(op_registry, op_type, class_name, device, dt) \
op_registry->Register(op_type, \
device, \
DataTypeToEnum<dt>::value, \
OpRegistryBase::DefaultCreator<class_name<device, dt>>)
#define MACE_REGISTER_OP_BY_CLASS( \
op_registry, op_type, class_name, device, dt) \
op_registry->Register(op_type, \
device, \
DataTypeToEnum<dt>::value, \
OpRegistryBase::DefaultCreator<class_name>)
#ifdef MACE_ENABLE_OPENCL
#define MACE_REGISTER_GPU_OP(op_registry, op_type, class_name) \
op_registry->Register( \
op_type, \
DeviceType::GPU, \
DT_FLOAT, \
OpRegistryBase::DefaultCreator<class_name<DeviceType::GPU, float>>)
#else
#define MACE_REGISTER_GPU_OP(op_registry, op_type, class_name)
#endif
#define MACE_REGISTER_OP_CONDITION(op_registry, builder) \
op_registry->Register(builder)
} // namespace mace
#endif // MACE_CORE_OPERATOR_H_
// Copyright 2019 The MACE Authors. All Rights Reserved.
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,39 +12,48 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_FP32_CONV_GENERAL_H_
#define MACE_OPS_ARM_FP32_CONV_GENERAL_H_
#include <vector>
#include "mace/public/mace.h"
#include "mace/core/tensor.h"
#include "mace/core/op_context.h"
#include "mace/ops/arm/fp32/conv_2d.h"
#include "mace/core/ops/op_condition_builder.h"
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
class Conv2dGeneral : public Conv2dBase {
public:
Conv2dGeneral(const std::vector<int> &strides,
const std::vector<int> &dilations,
const std::vector<int> &paddings,
const Padding padding_type)
: Conv2dBase(strides, dilations, paddings, padding_type) {}
virtual ~Conv2dGeneral() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
Tensor *output) override;
};
} // namespace fp32
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_FP32_CONV_GENERAL_H_
OpConditionBuilder::OpConditionBuilder(const std::string &type)
: type_(type) {}
const std::string OpConditionBuilder::type() const {
return type_;
}
OpConditionBuilder &OpConditionBuilder::SetDevicePlacerFunc(
OpRegistrationInfo::DevicePlacer placer) {
placer_ = placer;
return *this;
}
OpConditionBuilder &OpConditionBuilder::SetInputMemoryTypeSetter(
OpRegistrationInfo::MemoryTypeSetter setter) {
memory_type_setter_ = setter;
return *this;
}
OpConditionBuilder &OpConditionBuilder::SetInputsDataFormatSelector(
OpRegistrationInfo::DataFormatSelector selector) {
data_format_selector_ = selector;
return *this;
}
void OpConditionBuilder::Finalize(OpRegistrationInfo *info) const {
if (info != nullptr) {
if (placer_) {
info->device_placer = placer_;
}
if (memory_type_setter_) {
info->memory_type_setter = memory_type_setter_;
}
if (data_format_selector_) {
info->data_format_selector = data_format_selector_;
}
}
}
} // namespace mace
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_CORE_OPS_OP_CONDITION_BUILDER_H_
#define MACE_CORE_OPS_OP_CONDITION_BUILDER_H_
#include <memory>
#include <string>
#include "mace/core/registry/op_registration_info.h"
#include "mace/core/types.h"
namespace mace {
class OpConditionBuilder {
public:
explicit OpConditionBuilder(const std::string &type);
const std::string type() const;
OpConditionBuilder &SetDevicePlacerFunc(
OpRegistrationInfo::DevicePlacer placer);
// If you set input memory type for specified Op,
// you must call OpConditionContext::set_output_mem_type
OpConditionBuilder &SetInputMemoryTypeSetter(
OpRegistrationInfo::MemoryTypeSetter setter);
OpConditionBuilder &SetInputsDataFormatSelector(
OpRegistrationInfo::DataFormatSelector selector);
void Finalize(OpRegistrationInfo *info) const;
private:
std::string type_;
OpRegistrationInfo::DevicePlacer placer_;
OpRegistrationInfo::MemoryTypeSetter memory_type_setter_;
OpRegistrationInfo::DataFormatSelector data_format_selector_;
};
} // namespace mace
#endif // MACE_CORE_OPS_OP_CONDITION_BUILDER_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/core/ops/op_condition_context.h"
#include "mace/core/arg_helper.h"
#include "mace/proto/mace.pb.h"
#include "mace/utils/logging.h"
namespace mace {
OpConditionContext::OpConditionContext(
const Workspace *ws,
OpConditionContext::TensorShapeMap *info)
: operator_def_(nullptr),
ws_(ws),
device_(nullptr),
tensor_shape_info_(info) {}
void OpConditionContext::set_operator_def(
const OperatorDef *operator_def) {
operator_def_ = operator_def;
input_data_types_.clear();
}
void OpConditionContext::SetInputInfo(size_t idx,
MemoryType mem_type,
DataType dt) {
if (input_mem_types_.empty()) {
// the default inputs' memory types are same as output memory type.
input_mem_types_.resize(operator_def_->input_size(), output_mem_type_);
}
if (input_data_types_.empty()) {
// the default inputs' data types are same as operation's data type.
DataType op_dt = static_cast<DataType>(
ProtoArgHelper::GetOptionalArg<OperatorDef, int>(
*operator_def_, "T", static_cast<int>(DataType::DT_FLOAT)));
input_data_types_.resize(operator_def_->input_size(), op_dt);
}
MACE_CHECK(idx < input_mem_types_.size() && idx < input_data_types_.size());
input_mem_types_[idx] = mem_type;
input_data_types_[idx] = dt;
}
void OpConditionContext::set_output_mem_type(MemoryType type) {
MACE_CHECK(operator_def_ != nullptr);
output_mem_type_ = type;
input_mem_types_.clear();
}
MemoryType OpConditionContext::GetInputMemType(size_t idx) const {
if (input_mem_types_.empty()) {
return output_mem_type_;
}
MACE_CHECK(idx < input_mem_types_.size(),
idx, " < ", input_mem_types_.size());
return input_mem_types_[idx];
}
DataType OpConditionContext::GetInputDataType(size_t idx) const {
if (input_data_types_.empty()) {
// the default inputs' data types are same as operation's data type.
return static_cast<DataType>(
ProtoArgHelper::GetOptionalArg<OperatorDef, int>(
*operator_def_, "T", static_cast<int>(DataType::DT_FLOAT)));
}
MACE_CHECK(idx < input_data_types_.size());
return input_data_types_[idx];
}
#ifdef MACE_ENABLE_OPENCL
void OpConditionContext::SetInputOpenCLBufferType(
size_t idx, OpenCLBufferType buffer_type) {
if (input_opencl_buffer_types_.empty()) {
// the default inputs' memory types are same as output memory type.
input_opencl_buffer_types_.resize(operator_def_->input_size(),
OpenCLBufferType::IN_OUT_CHANNEL);
}
MACE_CHECK(idx < input_opencl_buffer_types_.size());
input_opencl_buffer_types_[idx] = buffer_type;
}
OpenCLBufferType OpConditionContext::GetInputOpenCLBufferType(
size_t idx) const {
if (input_opencl_buffer_types_.empty()) {
return OpenCLBufferType::IN_OUT_CHANNEL;
}
MACE_CHECK(idx < input_opencl_buffer_types_.size());
return input_opencl_buffer_types_[idx];
}
#endif // MACE_ENABLE_OPENCL
} // namespace mace
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_CORE_OPS_OP_CONDITION_CONTEXT_H_
#define MACE_CORE_OPS_OP_CONDITION_CONTEXT_H_
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "mace/core/types.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/opencl_util.h"
#endif // MACE_ENABLE_OPENCL
namespace mace {
class Workspace;
class Device;
// OpConditionContext has all information used for choosing proper Op
class OpConditionContext {
public:
typedef std::unordered_map<std::string, std::vector<index_t>> TensorShapeMap;
OpConditionContext(const Workspace *ws, TensorShapeMap *info);
~OpConditionContext() = default;
void set_operator_def(const OperatorDef *operator_def);
const OperatorDef *operator_def() const {
return operator_def_;
}
const Workspace *workspace() const {
return ws_;
}
void set_device(Device *device) {
device_ = device;
}
Device *device() const {
return device_;
}
TensorShapeMap *tensor_shape_info() const {
return tensor_shape_info_;
}
void set_output_mem_type(MemoryType type);
MemoryType output_mem_type() const {
return output_mem_type_;
}
void SetInputInfo(size_t idx, MemoryType mem_type, DataType dt);
MemoryType GetInputMemType(size_t idx) const;
DataType GetInputDataType(size_t idx) const;
#ifdef MACE_ENABLE_OPENCL
void SetInputOpenCLBufferType(size_t idx, OpenCLBufferType buffer_type);
OpenCLBufferType GetInputOpenCLBufferType(size_t idx) const;
#endif // MACE_ENABLE_OPENCL
private:
const OperatorDef *operator_def_;
const Workspace *ws_;
Device *device_;
TensorShapeMap *tensor_shape_info_;
// used for memory transform
std::vector<MemoryType> input_mem_types_;
std::vector<DataType> input_data_types_;
MemoryType output_mem_type_; // there is only one output memory type now.
#ifdef MACE_ENABLE_OPENCL
std::vector<OpenCLBufferType> input_opencl_buffer_types_;
#endif // MACE_ENABLE_OPENCL
};
} // namespace mace
#endif // MACE_CORE_OPS_OP_CONDITION_CONTEXT_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/core/ops/op_construct_context.h"
namespace mace {
OpConstructContext::OpConstructContext(Workspace *ws)
: operator_def_(nullptr),
ws_(ws),
device_(nullptr) {}
void OpConstructContext::set_operator_def(
std::shared_ptr<OperatorDef> operator_def) {
operator_def_ = operator_def;
}
} // namespace mace
// Copyright 2019 The MACE Authors. All Rights Reserved.
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,49 +12,62 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_FP32_DECONV_2D_GENERAL_H_
#define MACE_OPS_ARM_FP32_DECONV_2D_GENERAL_H_
#ifndef MACE_CORE_OPS_OP_CONSTRUCT_CONTEXT_H_
#define MACE_CORE_OPS_OP_CONSTRUCT_CONTEXT_H_
#include <vector>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "mace/public/mace.h"
#include "mace/core/tensor.h"
#include "mace/core/arg_helper.h"
#include "mace/core/types.h"
#include "mace/core/op_context.h"
#include "mace/ops/arm/fp32/deconv_2d.h"
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/proto/mace.pb.h"
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
class Device;
class Workspace;
// memory_optimizer, device
class OpConstructContext {
typedef std::unordered_map<std::string, std::vector<index_t>> TensorShapeMap;
class Deconv2dGeneral : public Deconv2dBase {
public:
Deconv2dGeneral(const std::vector<int> &strides,
const std::vector<int> &dilations,
const std::vector<int> &paddings,
const Padding padding_type,
const FrameworkType framework_type)
: Deconv2dBase(strides,
dilations,
paddings,
padding_type,
framework_type) {}
virtual ~Deconv2dGeneral() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
explicit OpConstructContext(Workspace *ws);
~OpConstructContext() = default;
void set_operator_def(std::shared_ptr<OperatorDef> operator_def);
std::shared_ptr<OperatorDef> operator_def() const {
return operator_def_;
}
Workspace *workspace() const {
return ws_;
}
void set_device(Device *device) {
device_ = device;
}
Device *device() const {
return device_;
}
#ifdef MACE_ENABLE_OPENCL
inline MemoryType GetOpMemoryType() const {
return static_cast<MemoryType>(
ProtoArgHelper::GetOptionalArg<OperatorDef, int>(
*operator_def_, OutputMemoryTypeTagName(),
static_cast<int>(MemoryType::CPU_BUFFER)));
}
#endif // MACE_ENABLE_OPENCL
private:
std::shared_ptr<OperatorDef> operator_def_;
Workspace *ws_;
Device *device_;
};
} // namespace fp32
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_FP32_DECONV_2D_GENERAL_H_
#endif // MACE_CORE_OPS_OP_CONSTRUCT_CONTEXT_H_
......@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/core/op_context.h"
#include "mace/core/ops/op_context.h"
namespace mace {
......
......@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_CORE_OP_CONTEXT_H_
#define MACE_CORE_OP_CONTEXT_H_
#ifndef MACE_CORE_OPS_OP_CONTEXT_H_
#define MACE_CORE_OPS_OP_CONTEXT_H_
#include "mace/core/device.h"
#include "mace/core/workspace.h"
......@@ -35,8 +35,7 @@ class OpContext {
Device *device_;
Workspace *ws_;
StatsFuture *future_;
// metadata
};
} // namespace mace
#endif // MACE_CORE_OP_CONTEXT_H_
#endif // MACE_CORE_OPS_OP_CONTEXT_H_
// Copyright 2019 The MACE Authors. All Rights Reserved.
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,40 +12,47 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_REF_ACTIVATION_H_
#define MACE_OPS_REF_ACTIVATION_H_
#ifndef MACE_CORE_OPS_OP_DELEGATOR_H_
#define MACE_CORE_OPS_OP_DELEGATOR_H_
#include "mace/core/op_context.h"
#include "mace/ops/common/activation_type.h"
#include <memory>
#include "mace/utils/macros.h"
#include "mace/utils/memory.h"
namespace mace {
namespace ops {
namespace ref {
class Activation {
enum ImplType {
REF = 0,
NEON,
};
#ifdef MACE_ENABLE_NEON
#define MACE_CPU_IMPL_TYPE NEON
#else
#define MACE_CPU_IMPL_TYPE REF
#endif
struct DelegatorParam {
public:
DelegatorParam() = default;
virtual ~DelegatorParam() = default;
};
class OpDelegator {
public:
explicit Activation(ActivationType type,
const float limit,
const float leakyrelu_coefficient);
~Activation() = default;
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
Tensor *output);
private:
void DoActivation(const OpContext *context,
const Tensor *input,
Tensor *output);
ActivationType type_;
const float limit_;
const float leakyrelu_coefficient_;
explicit OpDelegator(const DelegatorParam &param) {
MACE_UNUSED(param);
}
virtual ~OpDelegator() = default;
template<class DerivedType, class ParamType>
static std::unique_ptr<OpDelegator> DefaultCreator(
const DelegatorParam &param) {
return make_unique<DerivedType>(static_cast<const ParamType &>(param));
}
};
} // namespace ref
} // namespace ops
} // namespace mace
#endif // MACE_OPS_REF_ACTIVATION_H_
#endif // MACE_CORE_OPS_OP_DELEGATOR_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/core/ops/op_init_context.h"
namespace mace {
OpInitContext::OpInitContext(Workspace *ws, Device *device)
: ws_(ws), device_(device) {}
} // namespace mace
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_CORE_OPS_OP_INIT_CONTEXT_H_
#define MACE_CORE_OPS_OP_INIT_CONTEXT_H_
namespace mace {
class Workspace;
class Device;
// memory_optimizer, device
class OpInitContext {
public:
explicit OpInitContext(Workspace *ws, Device *device = nullptr);
~OpInitContext() = default;
Workspace *workspace() const {
return ws_;
}
void set_device(Device *device) {
device_ = device;
}
Device *device() const {
return device_;
}
private:
Workspace *ws_;
Device *device_;
};
} // namespace mace
#endif // MACE_CORE_OPS_OP_INIT_CONTEXT_H_
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/core/ops/operator.h"
#include <vector>
#include "mace/core/ops/op_construct_context.h"
#include "mace/core/ops/op_init_context.h"
namespace mace {
Operation::Operation(OpConstructContext *context)
: operator_def_(context->operator_def()) {}
MaceStatus Operation::Init(OpInitContext *context) {
Workspace *ws = context->workspace();
for (const std::string &input_str : operator_def_->input()) {
const Tensor *tensor = ws->GetTensor(input_str);
MACE_CHECK(tensor != nullptr, "op ", operator_def_->type(),
": Encountered a non-existing input tensor: ", input_str);
inputs_.push_back(tensor);
}
for (int i = 0; i < operator_def_->output_size(); ++i) {
const std::string output_str = operator_def_->output(i);
if (ws->HasTensor(output_str)) {
outputs_.push_back(ws->GetTensor(output_str));
} else {
MACE_CHECK(
operator_def_->output_type_size() == 0 ||
operator_def_->output_size() == operator_def_->output_type_size(),
"operator output size != operator output type size",
operator_def_->output_size(),
operator_def_->output_type_size());
DataType output_type;
if (i < operator_def_->output_type_size()) {
output_type = operator_def_->output_type(i);
} else {
output_type = static_cast<DataType>(
ProtoArgHelper::GetOptionalArg<OperatorDef, int>(
*operator_def_, "T", static_cast<int>(DT_FLOAT)));
}
outputs_.push_back(MACE_CHECK_NOTNULL(ws->CreateTensor(
output_str, context->device()->allocator(), output_type)));
}
if (i < operator_def_->output_shape_size()) {
std::vector<index_t>
shape_configured(operator_def_->output_shape(i).dims_size());
for (size_t dim = 0; dim < shape_configured.size(); ++dim) {
shape_configured[dim] = operator_def_->output_shape(i).dims(dim);
}
ws->GetTensor(output_str)->SetShapeConfigured(shape_configured);
}
}
return MaceStatus::MACE_SUCCESS;
}
} // namespace mace
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_CORE_OPS_OPERATOR_H_
#define MACE_CORE_OPS_OPERATOR_H_
#include <memory>
#include <string>
#include <vector>
#include "mace/core/arg_helper.h"
#include "mace/core/ops/op_construct_context.h"
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/proto/mace.pb.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/opencl_util.h"
#endif // MACE_ENABLE_OPENCL
namespace mace {
class OpInitContext;
// Conventions
// * If there exist format, NHWC is the default format
// * The input/output format of CPU ops with float data type is NCHW
// * The input/output format of GPU ops and CPU Quantization ops is NHWC
// * Inputs' data type is same as the operation data type by default.
// * The outputs' data type is same as the operation data type by default.
class Operation {
public:
explicit Operation(OpConstructContext *context);
virtual ~Operation() = default;
template<typename T>
T GetOptionalArg(const std::string &name,
const T &default_value) const {
MACE_CHECK(operator_def_, "operator_def was null!");
return ProtoArgHelper::GetOptionalArg<OperatorDef, T>(
*operator_def_, name, default_value);
}
template<typename T>
std::vector<T> GetRepeatedArgs(
const std::string &name, const std::vector<T> &default_value = {}) const {
MACE_CHECK(operator_def_, "operator_def was null!");
return ProtoArgHelper::GetRepeatedArgs<OperatorDef, T>(
*operator_def_, name, default_value);
}
DeviceType device_type() const {
return static_cast<DeviceType>(operator_def_->device_type());
}
const Tensor *Input(unsigned int idx) {
MACE_CHECK(idx < inputs_.size());
return inputs_[idx];
}
Tensor *Output(int idx) { return outputs_[idx]; }
int InputSize() { return inputs_.size(); }
int OutputSize() { return outputs_.size(); }
const std::vector<const Tensor *> &Inputs() const { return inputs_; }
const std::vector<Tensor *> &Outputs() { return outputs_; }
// Run Op asynchronously (depends on device), return a future if not nullptr.
virtual MaceStatus Init(OpInitContext *);
virtual MaceStatus Run(OpContext *) = 0;
const OperatorDef &debug_def() const {
MACE_CHECK(has_debug_def(), "operator_def was null!");
return *operator_def_;
}
void set_debug_def(
const std::shared_ptr<OperatorDef> &operator_def) {
operator_def_ = operator_def;
}
bool has_debug_def() const { return operator_def_ != nullptr; }
inline std::shared_ptr<OperatorDef> operator_def() {
return operator_def_;
}
protected:
std::shared_ptr<OperatorDef> operator_def_;
std::vector<const Tensor *> inputs_;
std::vector<Tensor *> outputs_;
MACE_DISABLE_COPY_AND_ASSIGN(Operation);
};
// MACE_OP_INPUT_TAGS and MACE_OP_OUTPUT_TAGS are optional features to name the
// indices of the operator's inputs and outputs, in order to avoid confusion.
// For example, for a fully convolution layer that has input, weight and bias,
// you can define its input tags as:
// MACE_OP_INPUT_TAGS(INPUT, WEIGHT, BIAS);
// And in the code, instead of doing
// auto& weight = Input(1);
// you can now do
// auto& weight = Input(WEIGHT);
// to make it more clear.
#define MACE_OP_INPUT_TAGS(first_input, ...) \
enum _InputTags { first_input = 0, __VA_ARGS__ }
#define MACE_OP_OUTPUT_TAGS(first_input, ...) \
enum _OutputTags { first_input = 0, __VA_ARGS__ }
} // namespace mace
#endif // MACE_CORE_OPS_OPERATOR_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/core/registry/op_delegator_registry.h"
#include <utility>
#include "mace/utils/logging.h"
namespace mace {
MaceStatus OpDelegatorRegistry::Register(const std::string &key,
DelegatorCreator creator) {
MACE_CHECK(registry_.count(key) == 0, "Register an exist key.");
registry_[key] = std::move(creator);
return MaceStatus::MACE_SUCCESS;
}
DelegatorCreator OpDelegatorRegistry::GetCreator(const std::string &key) const {
MACE_CHECK(registry_.count(key) > 0, key, " not exist.");
return registry_.at(key);
}
template<> const char *DType<float>::name_ = "float";
template<> const char *DType<int>::name_ = "int";
template<> const char *DType<uint8_t>::name_ = "uint8_t";
} // namespace mace
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_CORE_REGISTRY_OP_DELEGATOR_REGISTRY_H_
#define MACE_CORE_REGISTRY_OP_DELEGATOR_REGISTRY_H_
#include <functional>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "mace/core/ops/op_delegator.h"
#include "mace/proto/mace.pb.h"
#include "mace/public/mace.h"
namespace mace {
typedef std::function<std::unique_ptr<OpDelegator>(const DelegatorParam &)>
DelegatorCreator;
class OpDelegatorRegistry {
public:
OpDelegatorRegistry() = default;
~OpDelegatorRegistry() = default;
MaceStatus Register(const std::string &key, DelegatorCreator creator);
DelegatorCreator GetCreator(const std::string &key) const;
private:
std::unordered_map<std::string, DelegatorCreator> registry_;
};
template<typename T>
struct DType { static const char *name_; };
template<> const char *DType<float>::name_;
template<> const char *DType<int>::name_;
template<> const char *DType<uint8_t>::name_;
} // namespace mace
#ifndef MACE_DELEGATOR_KEY_TMP
#define MACE_DELEGATOR_KEY_TMP(delegator_name, device, DT, impl) \
(std::string(#delegator_name"_"#device"_"#impl"_") + DType<DT>::name_)
#endif // MACE_DELEGATOR_KEY_TMP
#ifndef MACE_DELEGATOR_KEY
#define MACE_DELEGATOR_KEY(delegator_name, device, DT, impl) \
MACE_DELEGATOR_KEY_TMP(delegator_name, device, DT, impl)
#endif // MACE_DELEGATOR_KEY
#ifndef MACE_DELEGATOR_KEY_EX_TMP
#define MACE_DELEGATOR_KEY_EX_TMP(delegator_name, device, DT, impl, tag) \
(std::string(#delegator_name"_"#device"_"#impl"_"#tag"_") + DType<DT>::name_)
#endif // MACE_DELEGATOR_KEY_EX_TMP
#ifndef MACE_DELEGATOR_KEY_EX
#define MACE_DELEGATOR_KEY_EX(delegator_name, device, DT, impl, tag) \
MACE_DELEGATOR_KEY_EX_TMP(delegator_name, device, DT, impl, tag)
#endif // MACE_DELEGATOR_KEY_EX
#ifndef MACE_REGISTER_DELEGATOR
#define MACE_REGISTER_DELEGATOR(registry, class_name, param_name, key) \
void Register##class_name##Delegator(OpDelegatorRegistry *registry) { \
registry->Register( \
key, OpDelegator::DefaultCreator<class_name, param_name>); \
}
#endif // MACE_REGISTER_DELEGATOR
#ifndef MACE_DEFINE_DELEGATOR_CREATOR
#define MACE_DEFINE_DELEGATOR_CREATOR(class_name) \
static std::unique_ptr<class_name> Create( \
Workspace *workspace, const std::string &tag, \
const DelegatorParam &param) { \
DelegatorCreator creator = \
workspace->GetDelegatorRegistry()->GetCreator(tag); \
std::unique_ptr<OpDelegator> delegator = creator(param); \
return std::unique_ptr<class_name>( \
static_cast<class_name *>(delegator.release())); \
}
#endif // MACE_DEFINE_DELEGATOR_CREATOR
#endif // MACE_CORE_REGISTRY_OP_DELEGATOR_REGISTRY_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/core/registry/op_registration_info.h"
#include <set>
#include <string>
#include <utility>
#include <vector>
#include "mace/core/ops/op_condition_context.h"
namespace mace {
OpRegistrationInfo::OpRegistrationInfo() {
// default device type placer
device_placer = [this](OpConditionContext *context) -> std::set<DeviceType> {
MACE_UNUSED(context);
return this->devices;
};
// default input and output memory type setter
memory_type_setter = [](OpConditionContext *context) -> void {
if (context->device()->device_type() == DeviceType::GPU) {
#ifdef MACE_ENABLE_OPENCL
if (context->device()->gpu_runtime()->UseImageMemory()) {
context->set_output_mem_type(MemoryType::GPU_IMAGE);
} else {
context->set_output_mem_type(MemoryType::GPU_BUFFER);
}
#endif // MACE_ENABLE_OPENCL
} else {
context->set_output_mem_type(MemoryType::CPU_BUFFER);
}
};
data_format_selector = [](OpConditionContext *context)
-> std::vector<DataFormat> {
DataFormat op_data_format =
static_cast<DataFormat>(
ProtoArgHelper::GetOptionalArg<OperatorDef, int>(
*context->operator_def(), "data_format",
static_cast<int>(DataFormat::NONE)));
return std::vector<DataFormat>(context->operator_def()->input_size(),
op_data_format);
};
}
void OpRegistrationInfo::AddDevice(DeviceType device) {
devices.insert(device);
}
void OpRegistrationInfo::Register(const std::string &key, OpCreator creator) {
VLOG(3) << "Registering: " << key;
MACE_CHECK(creators.count(key) == 0, "Key already registered: ", key);
creators[key] = std::move(creator);
}
} // namespace mace
// Copyright 2019 The MACE Authors. All Rights Reserved.
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,40 +12,45 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_FP32_CONV_2D_1X1_H_
#define MACE_OPS_ARM_FP32_CONV_2D_1X1_H_
#ifndef MACE_CORE_REGISTRY_OP_REGISTRATION_INFO_H_
#define MACE_CORE_REGISTRY_OP_REGISTRATION_INFO_H_
#include <memory>
#include <set>
#include <string>
#include <unordered_map>
#include <vector>
#include "mace/public/mace.h"
#include "mace/core/tensor.h"
#include "mace/core/op_context.h"
#include "mace/ops/arm/fp32/gemm.h"
#include "mace/ops/arm/fp32/conv_2d.h"
#include "mace/core/ops/operator.h"
#include "mace/proto/mace.pb.h"
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
class OpConstructContext;
class OpConditionContext;
class Conv2dK1x1 : public Conv2dBase {
class OpRegistrationInfo {
public:
Conv2dK1x1(const std::vector<int> &paddings, const Padding padding_type)
: Conv2dBase({1, 1}, {1, 1}, paddings, padding_type) {}
virtual ~Conv2dK1x1() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
Tensor *output) override;
private:
Gemm gemm_;
};
typedef std::function<std::unique_ptr<Operation>(OpConstructContext *)>
OpCreator;
typedef std::function<std::set<DeviceType>(OpConditionContext *)>
DevicePlacer;
typedef std::function<void(OpConditionContext *)> MemoryTypeSetter;
typedef std::function<std::vector<DataFormat>(OpConditionContext *)>
DataFormatSelector;
OpRegistrationInfo();
} // namespace fp32
} // namespace arm
} // namespace ops
void AddDevice(DeviceType);
void Register(const std::string &key, OpCreator creator);
std::set<DeviceType> devices;
std::unordered_map<std::string, OpCreator> creators;
DevicePlacer device_placer;
MemoryTypeSetter memory_type_setter;
DataFormatSelector data_format_selector;
};
} // namespace mace
#endif // MACE_OPS_ARM_FP32_CONV_2D_1X1_H_
#endif // MACE_CORE_REGISTRY_OP_REGISTRATION_INFO_H_
// Copyright 2018 The MACE Authors. All Rights Reserved.
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,153 +12,15 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <sstream>
#include "mace/core/registry/ops_registry.h"
#include <map>
#include <memory>
#include <set>
#include <string>
#include <vector>
#include "mace/core/operator.h"
namespace mace {
OpConditionContext::OpConditionContext(
const Workspace *ws,
OpConditionContext::TensorShapeMap *info)
: operator_def_(nullptr),
ws_(ws),
device_(nullptr),
tensor_shape_info_(info) {}
void OpConditionContext::set_operator_def(
const OperatorDef *operator_def) {
operator_def_ = operator_def;
input_data_types_.clear();
}
void OpConditionContext::SetInputInfo(size_t idx,
MemoryType mem_type,
DataType dt) {
if (input_mem_types_.empty()) {
// the default inputs' memory types are same as output memory type.
input_mem_types_.resize(operator_def_->input_size(), output_mem_type_);
}
if (input_data_types_.empty()) {
// the default inputs' data types are same as operation's data type.
DataType op_dt = static_cast<DataType>(
ProtoArgHelper::GetOptionalArg<OperatorDef, int>(
*operator_def_, "T", static_cast<int>(DataType::DT_FLOAT)));
input_data_types_.resize(operator_def_->input_size(), op_dt);
}
MACE_CHECK(idx < input_mem_types_.size() && idx < input_data_types_.size());
input_mem_types_[idx] = mem_type;
input_data_types_[idx] = dt;
}
void OpConditionContext::set_output_mem_type(MemoryType type) {
MACE_CHECK(operator_def_ != nullptr);
output_mem_type_ = type;
input_mem_types_.clear();
}
MemoryType OpConditionContext::GetInputMemType(size_t idx) const {
if (input_mem_types_.empty()) {
return output_mem_type_;
}
MACE_CHECK(idx < input_mem_types_.size(),
idx, " < ", input_mem_types_.size());
return input_mem_types_[idx];
}
DataType OpConditionContext::GetInputDataType(size_t idx) const {
if (input_data_types_.empty()) {
// the default inputs' data types are same as operation's data type.
return static_cast<DataType>(
ProtoArgHelper::GetOptionalArg<OperatorDef, int>(
*operator_def_, "T", static_cast<int>(DataType::DT_FLOAT)));
}
MACE_CHECK(idx < input_data_types_.size());
return input_data_types_[idx];
}
#ifdef MACE_ENABLE_OPENCL
void OpConditionContext::SetInputOpenCLBufferType(
size_t idx, OpenCLBufferType buffer_type) {
if (input_opencl_buffer_types_.empty()) {
// the default inputs' memory types are same as output memory type.
input_opencl_buffer_types_.resize(operator_def_->input_size(),
OpenCLBufferType::IN_OUT_CHANNEL);
}
MACE_CHECK(idx < input_opencl_buffer_types_.size());
input_opencl_buffer_types_[idx] = buffer_type;
}
OpenCLBufferType OpConditionContext::GetInputOpenCLBufferType(
size_t idx) const {
if (input_opencl_buffer_types_.empty()) {
return OpenCLBufferType::IN_OUT_CHANNEL;
}
MACE_CHECK(idx < input_opencl_buffer_types_.size());
return input_opencl_buffer_types_[idx];
}
#endif // MACE_ENABLE_OPENCL
OpConstructContext::OpConstructContext(Workspace *ws)
: operator_def_(nullptr),
ws_(ws),
device_(nullptr) {}
void OpConstructContext::set_operator_def(
std::shared_ptr<OperatorDef> operator_def) {
operator_def_ = operator_def;
}
OpInitContext::OpInitContext(Workspace *ws, Device *device)
: ws_(ws), device_(device) {}
Operation::Operation(OpConstructContext *context)
: operator_def_(context->operator_def()) {}
MaceStatus Operation::Init(OpInitContext *context) {
Workspace *ws = context->workspace();
for (const std::string &input_str : operator_def_->input()) {
const Tensor *tensor = ws->GetTensor(input_str);
MACE_CHECK(tensor != nullptr, "op ", operator_def_->type(),
": Encountered a non-existing input tensor: ", input_str);
inputs_.push_back(tensor);
}
for (int i = 0; i < operator_def_->output_size(); ++i) {
const std::string output_str = operator_def_->output(i);
if (ws->HasTensor(output_str)) {
outputs_.push_back(ws->GetTensor(output_str));
} else {
MACE_CHECK(
operator_def_->output_type_size() == 0 ||
operator_def_->output_size() == operator_def_->output_type_size(),
"operator output size != operator output type size",
operator_def_->output_size(),
operator_def_->output_type_size());
DataType output_type;
if (i < operator_def_->output_type_size()) {
output_type = operator_def_->output_type(i);
} else {
output_type = static_cast<DataType>(
ProtoArgHelper::GetOptionalArg<OperatorDef, int>(
*operator_def_, "T", static_cast<int>(DT_FLOAT)));
}
outputs_.push_back(MACE_CHECK_NOTNULL(ws->CreateTensor(
output_str, context->device()->allocator(), output_type)));
}
if (i < operator_def_->output_shape_size()) {
std::vector<index_t>
shape_configured(operator_def_->output_shape(i).dims_size());
for (size_t dim = 0; dim < shape_configured.size(); ++dim) {
shape_configured[dim] = operator_def_->output_shape(i).dims(dim);
}
ws->GetTensor(output_str)->SetShapeConfigured(shape_configured);
}
}
return MaceStatus::MACE_SUCCESS;
}
// op registry
namespace {
class OpKeyBuilder {
public:
......@@ -203,51 +65,7 @@ const std::string OpKeyBuilder::Build() {
}
} // namespace
OpRegistrationInfo::OpRegistrationInfo() {
// default device type placer
device_placer = [this](OpConditionContext *context) -> std::set<DeviceType> {
MACE_UNUSED(context);
return this->devices;
};
// default input and output memory type setter
memory_type_setter = [](OpConditionContext *context) -> void {
if (context->device()->device_type() == DeviceType::GPU) {
#ifdef MACE_ENABLE_OPENCL
if (context->device()->gpu_runtime()->UseImageMemory()) {
context->set_output_mem_type(MemoryType::GPU_IMAGE);
} else {
context->set_output_mem_type(MemoryType::GPU_BUFFER);
}
#endif // MACE_ENABLE_OPENCL
} else {
context->set_output_mem_type(MemoryType::CPU_BUFFER);
}
};
data_format_selector = [](OpConditionContext *context)
-> std::vector<DataFormat> {
DataFormat op_data_format =
static_cast<DataFormat>(
ProtoArgHelper::GetOptionalArg<OperatorDef, int>(
*context->operator_def(), "data_format",
static_cast<int>(DataFormat::NONE)));
return std::vector<DataFormat>(context->operator_def()->input_size(),
op_data_format);
};
}
void OpRegistrationInfo::AddDevice(DeviceType device) {
devices.insert(device);
}
void OpRegistrationInfo::Register(const std::string &key, OpCreator creator) {
VLOG(3) << "Registering: " << key;
MACE_CHECK(creators.count(key) == 0, "Key already registered: ", key);
creators[key] = creator;
}
MaceStatus OpRegistryBase::Register(
MaceStatus OpRegistry::Register(
const std::string &op_type,
const DeviceType device_type,
const DataType dt,
......@@ -266,7 +84,7 @@ MaceStatus OpRegistryBase::Register(
return MaceStatus::MACE_SUCCESS;
}
MaceStatus OpRegistryBase::Register(
MaceStatus OpRegistry::Register(
const OpConditionBuilder &builder) {
std::string op_type = builder.type();
if (registry_.count(op_type) == 0) {
......@@ -277,7 +95,7 @@ MaceStatus OpRegistryBase::Register(
return MaceStatus::MACE_SUCCESS;
}
const std::set<DeviceType> OpRegistryBase::AvailableDevices(
const std::set<DeviceType> OpRegistry::AvailableDevices(
const std::string &op_type, OpConditionContext *context) const {
MACE_CHECK(registry_.count(op_type) != 0,
op_type, " operation is not registered.");
......@@ -285,7 +103,7 @@ const std::set<DeviceType> OpRegistryBase::AvailableDevices(
return registry_.at(op_type)->device_placer(context);
}
void OpRegistryBase::GetInOutMemoryTypes(
void OpRegistry::GetInOutMemoryTypes(
const std::string &op_type,
OpConditionContext *context) const {
MACE_CHECK(registry_.count(op_type) != 0,
......@@ -293,7 +111,7 @@ void OpRegistryBase::GetInOutMemoryTypes(
return registry_.at(op_type)->memory_type_setter(context);
}
const std::vector<DataFormat> OpRegistryBase::InputsDataFormat(
const std::vector<DataFormat> OpRegistry::InputsDataFormat(
const std::string &op_type,
OpConditionContext *context) const {
MACE_CHECK(registry_.count(op_type) != 0,
......@@ -301,7 +119,7 @@ const std::vector<DataFormat> OpRegistryBase::InputsDataFormat(
return registry_.at(op_type)->data_format_selector(context);
}
std::unique_ptr<Operation> OpRegistryBase::CreateOperation(
std::unique_ptr<Operation> OpRegistry::CreateOperation(
OpConstructContext *context,
DeviceType device_type) const {
auto operator_def = context->operator_def();
......@@ -328,44 +146,4 @@ std::unique_ptr<Operation> OpRegistryBase::CreateOperation(
return registry_.at(op_type)->creators.at(key)(context);
}
OpConditionBuilder::OpConditionBuilder(const std::string &type)
: type_(type) {}
const std::string OpConditionBuilder::type() const {
return type_;
}
OpConditionBuilder &OpConditionBuilder::SetDevicePlacerFunc(
OpRegistrationInfo::DevicePlacer placer) {
placer_ = placer;
return *this;
}
OpConditionBuilder &OpConditionBuilder::SetInputMemoryTypeSetter(
OpRegistrationInfo::MemoryTypeSetter setter) {
memory_type_setter_ = setter;
return *this;
}
OpConditionBuilder &OpConditionBuilder::SetInputsDataFormatSelector(
OpRegistrationInfo::DataFormatSelector selector) {
data_format_selector_ = selector;
return *this;
}
void OpConditionBuilder::Finalize(OpRegistrationInfo *info) const {
if (info != nullptr) {
if (placer_) {
info->device_placer = placer_;
}
if (memory_type_setter_) {
info->memory_type_setter = memory_type_setter_;
}
if (data_format_selector_) {
info->data_format_selector = data_format_selector_;
}
}
}
} // namespace mace
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_CORE_REGISTRY_OPS_REGISTRY_H_
#define MACE_CORE_REGISTRY_OPS_REGISTRY_H_
#include <memory>
#include <set>
#include <string>
#include <unordered_map>
#include <vector>
#include "mace/core/ops/operator.h"
#include "mace/core/ops/op_condition_builder.h"
#include "mace/core/ops/op_condition_context.h"
#include "mace/public/mace.h"
#include "mace/proto/mace.pb.h"
#include "mace/utils/memory.h"
namespace mace {
class OpRegistry {
public:
OpRegistry() = default;
virtual ~OpRegistry() = default;
MaceStatus Register(const std::string &op_type,
const DeviceType device_type,
const DataType dt,
OpRegistrationInfo::OpCreator creator);
MaceStatus Register(const OpConditionBuilder &builder);
const std::set<DeviceType> AvailableDevices(
const std::string &op_type, OpConditionContext *context) const;
void GetInOutMemoryTypes(
const std::string &op_type, OpConditionContext *context) const;
const std::vector<DataFormat> InputsDataFormat(
const std::string &op_type, OpConditionContext *context) const;
std::unique_ptr<Operation> CreateOperation(
OpConstructContext *context,
DeviceType device_type) const;
template<class DerivedType>
static std::unique_ptr<Operation> DefaultCreator(
OpConstructContext *context) {
return make_unique<DerivedType>(context);
}
private:
std::unordered_map<std::string, std::unique_ptr<OpRegistrationInfo>>
registry_;
MACE_DISABLE_COPY_AND_ASSIGN(OpRegistry);
};
#define MACE_REGISTER_OP(op_registry, op_type, class_name, device, dt) \
op_registry->Register(op_type, \
device, \
DataTypeToEnum<dt>::value, \
OpRegistry::DefaultCreator<class_name<device, dt>>)
#define MACE_REGISTER_OP_BY_CLASS(\
op_registry, op_type, class_name, device, dt) \
op_registry->Register(op_type, \
device, \
DataTypeToEnum<dt>::value, \
OpRegistry::DefaultCreator<class_name>)
#ifdef MACE_ENABLE_OPENCL
#define MACE_REGISTER_GPU_OP(op_registry, op_type, class_name) \
op_registry->Register( \
op_type, \
DeviceType::GPU, \
DT_FLOAT, \
OpRegistry::DefaultCreator<class_name<DeviceType::GPU, float>>)
#else
#define MACE_REGISTER_GPU_OP(op_registry, op_type, class_name)
#endif
#define MACE_REGISTER_OP_CONDITION(op_registry, builder) \
op_registry->Register(builder)
} // namespace mace
#endif // MACE_CORE_REGISTRY_OPS_REGISTRY_H_
......@@ -46,7 +46,7 @@ bool HasHalfTensor(const NetDef &net_def) {
return false;
}
template <typename T>
template<typename T>
void DequantizeTensor(Device *device,
const unsigned char *model_data,
const ConstTensor &const_tensor,
......@@ -66,7 +66,8 @@ void DequantizeTensor(Device *device,
} // namespace
Workspace::Workspace() = default;
Workspace::Workspace(const OpDelegatorRegistry *registry) :
op_delegator_registry_(registry) {}
Tensor *Workspace::CreateTensor(const std::string &name,
Allocator *alloc,
......@@ -144,7 +145,7 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def,
DataType dst_data_type = const_tensor.data_type();
if (device_type == DeviceType::CPU &&
const_tensor.data_type() == DataType::DT_HALF) {
const_tensor.data_type() == DataType::DT_HALF) {
dst_data_type = DataType::DT_FLOAT;
} else if (!is_quantize_model && const_tensor.quantized()) {
if (device_type == GPU && net_def.data_type() != DataType::DT_FLOAT) {
......@@ -173,13 +174,13 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def,
if (device_type == DeviceType::CPU &&
const_tensor.data_type() == DataType::DT_HALF) {
// uncompress the weights of fp16
auto org_data = reinterpret_cast<const half *>(
model_data + const_tensor.offset());
float *dst_data = tensor->mutable_data<float>();
for (int i = 0; i < const_tensor.data_size(); ++i) {
dst_data[i] = half_float::half_cast<float>(org_data[i]);
}
// uncompress the weights of fp16
auto org_data = reinterpret_cast<const half *>(
model_data + const_tensor.offset());
float *dst_data = tensor->mutable_data<float>();
for (int i = 0; i < const_tensor.data_size(); ++i) {
dst_data[i] = half_float::half_cast<float>(org_data[i]);
}
} else if (!is_quantize_model && const_tensor.quantized()) {
// uncompress the weights of uint8
if (dst_data_type != DT_FLOAT) {
......@@ -401,4 +402,8 @@ void Workspace::RemoveTensor(const std::string &name) {
}
}
const OpDelegatorRegistry *Workspace::GetDelegatorRegistry() const {
return op_delegator_registry_;
}
} // namespace mace
......@@ -27,13 +27,14 @@
namespace mace {
class OpDelegatorRegistry;
class MemoryOptimizer;
class Workspace {
public:
typedef std::map<std::string, std::unique_ptr<Tensor>> TensorMap;
Workspace();
explicit Workspace(const OpDelegatorRegistry *registry);
~Workspace() {}
Tensor *CreateTensor(const std::string &name,
......@@ -71,15 +72,16 @@ class Workspace {
void RemoveTensor(const std::string &name);
const OpDelegatorRegistry *GetDelegatorRegistry() const;
private:
TensorMap tensor_map_;
std::unique_ptr<BufferBase> tensor_buffer_;
PreallocatedPooledAllocator preallocated_allocator_;
bool diffused_buffer_;
const OpDelegatorRegistry *op_delegator_registry_;
MACE_DISABLE_COPY_AND_ASSIGN(Workspace);
};
......
......@@ -19,8 +19,10 @@
#include "mace/core/device_context.h"
#include "mace/core/memory_optimizer.h"
#include "mace/core/net.h"
#include "mace/ops/registry/ops_registry.h"
#include "mace/core/registry/ops_registry.h"
#include "mace/core/registry/op_delegator_registry.h"
#include "mace/ops/common/transpose.h"
#include "mace/ops/registry/registry.h"
#include "mace/utils/math.h"
#include "mace/utils/memory.h"
#include "mace/utils/stl_util.h"
......@@ -451,7 +453,8 @@ class MaceEngine::Impl {
private:
std::unique_ptr<port::ReadOnlyMemoryRegion> model_data_;
std::unique_ptr<OpRegistryBase> op_registry_;
std::unique_ptr<OpRegistry> op_registry_;
std::unique_ptr<OpDelegatorRegistry> op_delegator_registry_;
DeviceType device_type_;
std::unique_ptr<Device> device_;
std::unique_ptr<Workspace> ws_;
......@@ -478,9 +481,10 @@ class MaceEngine::Impl {
MaceEngine::Impl::Impl(const MaceEngineConfig &config)
: model_data_(nullptr),
op_registry_(new OpRegistry),
op_delegator_registry_(new OpDelegatorRegistry),
device_type_(config.impl_->device_type()),
device_(nullptr),
ws_(new Workspace()),
ws_(new Workspace(op_delegator_registry_.get())),
net_(nullptr),
is_quantized_model_(false),
thread_pool_(new utils::ThreadPool(config.impl_->num_threads(),
......@@ -498,6 +502,8 @@ MaceEngine::Impl::Impl(const MaceEngineConfig &config)
#endif
{
LOG(INFO) << "Creating MaceEngine, MACE version: " << MaceVersion();
ops::RegisterAllOps(op_registry_.get());
ops::RegisterAllOpDelegators(op_delegator_registry_.get());
thread_pool_->Init();
if (device_type_ == DeviceType::CPU) {
device_.reset(new CPUDevice(config.impl_->num_threads(),
......
......@@ -22,11 +22,13 @@ cc_library(
srcs = glob(
[
"common/*.cc",
"delegator/*.cc",
],
),
hdrs = glob(
[
"common/*.h",
"delegator/*.h",
],
),
copts = [
......@@ -58,12 +60,16 @@ cc_library(
[
"ref/*.cc",
],
),
) + if_quantize_enabled(glob([
"ref/q8/*.cc",
])),
hdrs = glob(
[
"ref/*.h",
],
),
) + if_quantize_enabled(glob([
"ref/q8/*.h",
])),
copts = [
"-Werror",
"-Wextra",
......@@ -236,12 +242,12 @@ cc_library(
cc_library(
name = "ops",
srcs = [
"registry/ops_registry.cc",
],
hdrs = [
"registry/ops_registry.h",
],
srcs = glob([
"registry/*.cc",
]),
hdrs = glob([
"registry/*.h",
]),
copts = [
"-Werror",
"-Wextra",
......
file(GLOB OPS_COMMON_SRCS common/*.cc)
file(GLOB OPS_REF_KERNELS_SRCS ref/*.cc)
file(GLOB OPS_REF_Q8_KERNELS_SRCS
ref/q8/*.cc
)
file(GLOB OPS_ARM_NEON_FP32_KERNELS_SRCS
arm/fp32/*.cc
)
......@@ -17,20 +21,23 @@ file(GLOB OPS_OPENCL_KERNELS_SRCS
file(GLOB OPS_INTERNAL_OPS_SRCS *.cc)
set(OPS_SRCS registry/ops_registry.cc)
set(OPS_SRCS registry/ops_registry.cc registry/op_delegators_registry.cc)
set(OPS_SRCS ${OPS_SRCS} ${OPS_COMMON_SRCS})
set(OPS_SRCS ${OPS_SRCS} ${OPS_INTERNAL_OPS_SRCS})
# TODO we need to remove this in production build
set(OPS_SRCS ${OPS_SRCS} ${OPS_REF_KERNELS_SRCS})
if(MACE_ENABLE_QUANTIZE)
set(OPS_SRCS ${OPS_SRCS} ${OPS_REF_Q8_KERNELS_SRCS})
endif(MACE_ENABLE_QUANTIZE)
if(MACE_ENABLE_NEON)
set(OPS_SRCS ${OPS_SRCS} ${OPS_ARM_NEON_FP32_KERNELS_SRCS})
if(MACE_ENABLE_QUANTIZE)
set(OPS_SRCS ${OPS_SRCS} ${OPS_ARM_NEON_Q8_KERNELS_SRCS})
endif(MACE_ENABLE_QUANTIZE)
endif(MACE_ENABLE_NEON)
if(MACE_ENABLE_QUANTIZE)
set(OPS_SRCS ${OPS_SRCS} ${OPS_ARM_NEON_Q8_KERNELS_SRCS})
endif(MACE_ENABLE_QUANTIZE)
if(MACE_ENABLE_OPENCL)
set(OPS_SRCS ${OPS_SRCS} ${OPS_OPENCL_KERNELS_SRCS})
endif(MACE_ENABLE_OPENCL)
......
......@@ -17,13 +17,10 @@
#include <memory>
#include <set>
#include "mace/core/operator.h"
#include "mace/core/ops/operator.h"
#include "mace/core/registry/ops_registry.h"
#if defined(MACE_ENABLE_NEON)
#include "mace/ops/arm/fp32/activation.h"
#else
#include "mace/ops/ref/activation.h"
#endif
#include "mace/ops/delegator/activation.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/buffer_transformer.h"
......@@ -37,19 +34,20 @@ namespace ops {
template<DeviceType D, class T>
class ActivationOp;
template<>
class ActivationOp<DeviceType::CPU, float> : public Operation {
template<typename T>
class ActivationOp<DeviceType::CPU, T> : public Operation {
public:
explicit ActivationOp(OpConstructContext *context)
: Operation(context),
activation_type_(ops::StringToActivationType(
Operation::GetOptionalArg<std::string>("activation",
"NOOP"))),
activation_delegator_(activation_type_,
Operation::GetOptionalArg<float>("max_limit",
0.0f),
Operation::GetOptionalArg<float>(
"leakyrelu_coefficient", 0.0f)) {}
Operation::GetOptionalArg<std::string>("activation", "NOOP"))),
activation_delegator_(delegator::Activation::Create(
context->workspace(),
MACE_DELEGATOR_KEY(Activation, CPU, T, MACE_CPU_IMPL_TYPE),
delegator::ActivationParam(
activation_type_,
Operation::GetOptionalArg<T>("max_limit", 0),
Operation::GetOptionalArg<T>("leakyrelu_coefficient", 0)))) {}
MaceStatus Run(OpContext *context) override {
MACE_UNUSED(context);
......@@ -58,28 +56,24 @@ class ActivationOp<DeviceType::CPU, float> : public Operation {
if (activation_type_ == PRELU) {
MACE_RETURN_IF_ERROR(output->ResizeLike(input));
const float *input_ptr = input->data<float>();
float *output_ptr = output->mutable_data<float>();
const T *input_ptr = input->data<T>();
T *output_ptr = output->mutable_data<T>();
MACE_CHECK(this->InputSize() > 1);
const Tensor *alpha = this->Input(1);
const float *alpha_ptr = alpha->data<float>();
const T *alpha_ptr = alpha->data<T>();
const index_t outer_size = output->dim(0);
const index_t inner_size = output->dim(2) * output->dim(3);
PReLUActivation(context, input_ptr, outer_size, input->dim(1), inner_size,
alpha_ptr, output_ptr);
} else {
activation_delegator_.Compute(context, input, output);
activation_delegator_->Compute(context, input, output);
}
return MaceStatus::MACE_SUCCESS;
}
private:
ActivationType activation_type_;
#if defined(MACE_ENABLE_NEON)
arm::fp32::Activation activation_delegator_;
#else
ref::Activation activation_delegator_;
#endif // MACE_ENABLE_NEON
std::unique_ptr<delegator::Activation> activation_delegator_;
};
#ifdef MACE_ENABLE_OPENCL
......@@ -122,7 +116,7 @@ class ActivationOp<DeviceType::GPU, float> : public Operation {
};
#endif // MACE_ENABLE_OPENCL
void RegisterActivation(OpRegistryBase *op_registry) {
void RegisterActivation(OpRegistry *op_registry) {
MACE_REGISTER_OP(op_registry, "Activation", ActivationOp,
DeviceType::CPU, float);
MACE_REGISTER_GPU_OP(op_registry, "Activation", ActivationOp);
......
......@@ -20,7 +20,7 @@
#include <string>
#include "mace/core/types.h"
#include "mace/core/op_context.h"
#include "mace/core/ops/op_context.h"
#include "mace/ops/common/activation_type.h"
#include "mace/utils/logging.h"
......
......@@ -19,7 +19,8 @@
#include <algorithm>
#include <memory>
#include "mace/core/operator.h"
#include "mace/core/ops/operator.h"
#include "mace/core/registry/ops_registry.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/addn.h"
......@@ -92,7 +93,7 @@ class AddNOp<DeviceType::GPU, float> : public Operation {
};
#endif // MACE_ENABLE_OPENCL
void RegisterAddN(OpRegistryBase *op_registry) {
void RegisterAddN(OpRegistry *op_registry) {
MACE_REGISTER_OP(op_registry, "AddN", AddNOp, DeviceType::CPU, float);
MACE_REGISTER_GPU_OP(op_registry, "AddN", AddNOp);
MACE_REGISTER_OP_CONDITION(
......
......@@ -18,7 +18,8 @@
#include <memory>
#include <vector>
#include "mace/core/operator.h"
#include "mace/core/ops/operator.h"
#include "mace/core/registry/ops_registry.h"
namespace mace {
namespace ops {
......@@ -109,7 +110,7 @@ class ArgMaxOp : public Operation {
void RegisterArgMax(OpRegistryBase *op_registry) {
void RegisterArgMax(OpRegistry *op_registry) {
MACE_REGISTER_OP(op_registry, "ArgMax", ArgMaxOp,
DeviceType::CPU, float);
}
......
......@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/fp32/activation.h"
#include "mace/ops/delegator/activation.h"
#include <arm_neon.h>
#include <algorithm>
......@@ -22,16 +22,22 @@ namespace ops {
namespace arm {
namespace fp32 {
Activation::Activation(ActivationType type,
const float limit,
const float leakyrelu_coefficient)
: type_(type),
limit_(limit),
leakyrelu_coefficient_(leakyrelu_coefficient) {}
class Activation : public delegator::Activation {
public:
explicit Activation(const delegator::ActivationParam &param)
: delegator::Activation(param) {}
~Activation() = default;
MaceStatus Compute(const OpContext *context,
const Tensor *input, Tensor *output) override;
private:
void DoActivation(const OpContext *context,
const Tensor *input, Tensor *output);
};
MaceStatus Activation::Compute(const OpContext *context,
const Tensor *input,
Tensor *output) {
const Tensor *input, Tensor *output) {
Tensor::MappingGuard input_guard(input);
if (input != output) {
MACE_RETURN_IF_ERROR(output->ResizeLike(input));
......@@ -139,7 +145,7 @@ void Activation::DoActivation(const OpContext *context,
// remain
for (index_t i = block_count * 4; i < size; ++i) {
output_data[i] = std::max(input_data[i], 0.f) +
std::min(input_data[i], 0.f) * leakyrelu_coefficient_;
std::min(input_data[i], 0.f) * leakyrelu_coefficient_;
}
break;
......@@ -169,14 +175,19 @@ void Activation::DoActivation(const OpContext *context,
break;
}
case NOOP:
case NOOP: {
break;
}
default:
default: {
MACE_NOT_IMPLEMENTED;
}
}
}
MACE_REGISTER_DELEGATOR(registry, Activation, delegator::ActivationParam,
MACE_DELEGATOR_KEY(Activation, CPU, float, NEON))
} // namespace fp32
} // namespace arm
} // namespace ops
......
......@@ -12,15 +12,27 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/fp32/bias_add.h"
#include <arm_neon.h>
#include "mace/ops/delegator/bias_add.h"
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
class BiasAdd : public delegator::BiasAdd {
public:
explicit BiasAdd(const DelegatorParam &param) : delegator::BiasAdd(param) {}
~BiasAdd() = default;
MaceStatus Compute(const OpContext *context, const Tensor *input,
const Tensor *bias, Tensor *output) override;
private:
void AddBias(const OpContext *context, const Tensor *input,
const Tensor *bias, Tensor *output);
};
MaceStatus BiasAdd::Compute(const OpContext *context,
const Tensor *input,
const Tensor *bias,
......@@ -117,6 +129,9 @@ void BiasAdd::AddBias(const OpContext *context,
}
}
MACE_REGISTER_DELEGATOR(registry, BiasAdd, DelegatorParam,
MACE_DELEGATOR_KEY(BiasAdd, CPU, float, NEON))
} // namespace fp32
} // namespace arm
} // namespace ops
......
......@@ -18,36 +18,25 @@
#include <vector>
#include <memory>
#include "mace/public/mace.h"
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/core/op_context.h"
#include "mace/ops/delegator/conv_2d.h"
#include "mace/ops/arm/fp32/gemm.h"
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
class Conv2dBase {
class Conv2dBase : public delegator::Conv2d {
public:
Conv2dBase(const std::vector<int> &strides,
const std::vector<int> &dilations,
const std::vector<int> &paddings,
const Padding padding_type)
: strides_(strides),
dilations_(dilations),
paddings_(paddings),
padding_type_(padding_type) {}
explicit Conv2dBase(const delegator::Conv2dParam &param)
: delegator::Conv2d(param) {}
virtual ~Conv2dBase() = default;
virtual MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
Tensor *output) = 0;
protected:
void CalOutputShapeAndInputPadSize(const std::vector<index_t> &input_shape,
const std::vector<index_t> &filter_shape,
......@@ -83,11 +72,6 @@ class Conv2dBase {
const int pad_left,
Tensor *dst);
void UnPadOutput(const Tensor &src, Tensor *dst);
const std::vector<int> strides_;
const std::vector<int> dilations_;
const std::vector<int> paddings_;
const Padding padding_type_;
};
} // namespace fp32
......
......@@ -12,13 +12,32 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/fp32/conv_2d_1x1.h"
#include "mace/ops/arm/fp32/conv_2d.h"
#include "mace/ops/arm/fp32/gemm.h"
#include "mace/ops/delegator/conv_2d.h"
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
class Conv2dK1x1 : public Conv2dBase {
public:
explicit Conv2dK1x1(const delegator::Conv2dParam &param)
: Conv2dBase(param),
gemm_(delegator::GemmParam()) {}
virtual ~Conv2dK1x1() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
Tensor *output) override;
private:
Gemm gemm_;
};
MaceStatus Conv2dK1x1::Compute(const OpContext *context,
const Tensor *input,
const Tensor *filter,
......@@ -94,6 +113,9 @@ MaceStatus Conv2dK1x1::Compute(const OpContext *context,
output);
}
MACE_REGISTER_DELEGATOR(registry, Conv2dK1x1, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, NEON, K1x1))
} // namespace fp32
} // namespace arm
} // namespace ops
......
......@@ -17,6 +17,8 @@
#include <arm_neon.h>
#include <memory>
#include "mace/ops/delegator/conv_2d.h"
namespace mace {
namespace ops {
namespace arm {
......@@ -859,6 +861,19 @@ MaceStatus Conv2dK15x1S1::Compute(const OpContext *context,
return MaceStatus::MACE_SUCCESS;
}
MACE_REGISTER_DELEGATOR(registry, Conv2dK1x7S1, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, NEON, K1x7S1))
MACE_REGISTER_DELEGATOR(registry, Conv2dK7x1S1, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, NEON, K7x1S1))
MACE_REGISTER_DELEGATOR(registry, Conv2dK1x15S1, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float,
NEON, K1x15S1))
MACE_REGISTER_DELEGATOR(registry, Conv2dK15x1S1, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float,
NEON, K15x1S1))
} // namespace fp32
} // namespace arm
} // namespace ops
......
......@@ -16,10 +16,11 @@
#define MACE_OPS_ARM_FP32_CONV_2D_1XN_H_
#include <vector>
#include "mace/public/mace.h"
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/core/op_context.h"
#include "mace/ops/arm/fp32/conv_2d.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
......@@ -28,8 +29,8 @@ namespace fp32 {
class Conv2dK1x7S1 : public Conv2dBase {
public:
Conv2dK1x7S1(const std::vector<int> &paddings, const Padding padding_type)
: Conv2dBase({1, 1}, {1, 1}, paddings, padding_type) {}
explicit Conv2dK1x7S1(const delegator::Conv2dParam &param)
: Conv2dBase(param) {}
virtual ~Conv2dK1x7S1() {}
MaceStatus Compute(
......@@ -41,8 +42,8 @@ class Conv2dK1x7S1 : public Conv2dBase {
class Conv2dK7x1S1 : public Conv2dBase {
public:
Conv2dK7x1S1(const std::vector<int> &paddings, const Padding padding_type)
: Conv2dBase({1, 1}, {1, 1}, paddings, padding_type) {}
explicit Conv2dK7x1S1(const delegator::Conv2dParam &param)
: Conv2dBase(param) {}
virtual ~Conv2dK7x1S1() {}
MaceStatus Compute(
......@@ -54,8 +55,8 @@ class Conv2dK7x1S1 : public Conv2dBase {
class Conv2dK1x15S1 : public Conv2dBase {
public:
Conv2dK1x15S1(const std::vector<int> &paddings, const Padding padding_type)
: Conv2dBase({1, 1}, {1, 1}, paddings, padding_type) {}
explicit Conv2dK1x15S1(const delegator::Conv2dParam &param)
: Conv2dBase(param) {}
virtual ~Conv2dK1x15S1() {}
MaceStatus Compute(
......@@ -67,8 +68,8 @@ class Conv2dK1x15S1 : public Conv2dBase {
class Conv2dK15x1S1 : public Conv2dBase {
public:
Conv2dK15x1S1(const std::vector<int> &paddings, const Padding padding_type)
: Conv2dBase({1, 1}, {1, 1}, paddings, padding_type) {}
explicit Conv2dK15x1S1(const delegator::Conv2dParam &param)
: Conv2dBase(param) {}
virtual ~Conv2dK15x1S1() {}
MaceStatus Compute(
......
......@@ -17,6 +17,8 @@
#include <arm_neon.h>
#include <memory>
#include "mace/ops/delegator/conv_2d.h"
namespace mace {
namespace ops {
namespace arm {
......@@ -735,6 +737,11 @@ MaceStatus Conv2dK3x3S2::Compute(const OpContext *context,
return MaceStatus::MACE_SUCCESS;
}
MACE_REGISTER_DELEGATOR(registry, Conv2dK3x3S1, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, NEON, K3x3S1))
MACE_REGISTER_DELEGATOR(registry, Conv2dK3x3S2, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, NEON, K3x3S2))
} // namespace fp32
} // namespace arm
} // namespace ops
......
......@@ -16,10 +16,11 @@
#define MACE_OPS_ARM_FP32_CONV_2D_3X3_H_
#include <vector>
#include "mace/public/mace.h"
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/core/op_context.h"
#include "mace/ops/arm/fp32/conv_2d.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
......@@ -28,8 +29,8 @@ namespace fp32 {
class Conv2dK3x3S1 : public Conv2dBase {
public:
Conv2dK3x3S1(const std::vector<int> &paddings, const Padding padding_type)
: Conv2dBase({1, 1}, {1, 1}, paddings, padding_type) {}
explicit Conv2dK3x3S1(const delegator::Conv2dParam &param)
: Conv2dBase(param) {}
virtual ~Conv2dK3x3S1() {}
MaceStatus Compute(
......@@ -41,8 +42,8 @@ class Conv2dK3x3S1 : public Conv2dBase {
class Conv2dK3x3S2 : public Conv2dBase {
public:
Conv2dK3x3S2(const std::vector<int> &paddings, const Padding padding_type)
: Conv2dBase({2, 2}, {1, 1}, paddings, padding_type) {}
explicit Conv2dK3x3S2(const delegator::Conv2dParam &param)
: Conv2dBase(param) {}
virtual ~Conv2dK3x3S2() {}
MaceStatus Compute(
......
......@@ -17,6 +17,7 @@
#include <algorithm>
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/ops/delegator/conv_2d.h"
#include "mace/utils/memory.h"
#include "mace/utils/math.h"
......@@ -800,6 +801,10 @@ void Conv2dK3x3Winograd::TransformOutput8x8(const OpContext *context,
}, 0, batch, 1, 0, out_channels, 1);
}
MACE_REGISTER_DELEGATOR(registry, Conv2dK3x3Winograd, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(
Conv2d, CPU, float, NEON, K3x3Winograd))
} // namespace fp32
} // namespace arm
} // namespace ops
......
......@@ -18,11 +18,11 @@
#include <vector>
#include <memory>
#include "mace/public/mace.h"
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/core/op_context.h"
#include "mace/ops/arm/fp32/gemm.h"
#include "mace/ops/arm/fp32/conv_2d.h"
#include "mace/ops/arm/fp32/gemm.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
......@@ -31,10 +31,9 @@ namespace fp32 {
class Conv2dK3x3Winograd : public Conv2dBase {
public:
Conv2dK3x3Winograd(const std::vector<int> &paddings,
const Padding padding_type)
: Conv2dBase({1, 1}, {1, 1}, paddings, padding_type),
gemm_(),
explicit Conv2dK3x3Winograd(const delegator::Conv2dParam &param)
: Conv2dBase(param),
gemm_(delegator::GemmParam()),
transformed_filter_(nullptr),
out_tile_size_(0) {}
......
......@@ -12,16 +12,30 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/fp32/conv_2d_5x5.h"
#include <arm_neon.h>
#include <memory>
#include "mace/ops/arm/fp32/conv_2d.h"
#include "mace/ops/delegator/conv_2d.h"
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
class Conv2dK5x5S1 : public Conv2dBase {
public:
explicit Conv2dK5x5S1(const delegator::Conv2dParam &param)
: Conv2dBase(param) {}
virtual ~Conv2dK5x5S1() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
Tensor *output) override;
};
#define MACE_Conv2dNeonK5x5SnLoadCalc4 \
/* load filter (4 outch x 1 height x 4 width) */ \
float32x4_t vf00, vf10, vf20, vf30; \
......@@ -244,6 +258,9 @@ MaceStatus Conv2dK5x5S1::Compute(const OpContext *context,
return MaceStatus::MACE_SUCCESS;
}
MACE_REGISTER_DELEGATOR(registry, Conv2dK5x5S1, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, NEON, K5x5S1))
} // namespace fp32
} // namespace arm
} // namespace ops
......
// Copyright 2019 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_FP32_CONV_2D_5X5_H_
#define MACE_OPS_ARM_FP32_CONV_2D_5X5_H_
#include <vector>
#include "mace/public/mace.h"
#include "mace/core/tensor.h"
#include "mace/core/op_context.h"
#include "mace/ops/arm/fp32/conv_2d.h"
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
class Conv2dK5x5S1 : public Conv2dBase {
public:
Conv2dK5x5S1(const std::vector<int> &paddings, const Padding padding_type)
: Conv2dBase({1, 1}, {1, 1}, paddings, padding_type) {}
virtual ~Conv2dK5x5S1() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
Tensor *output) override;
};
} // namespace fp32
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_FP32_CONV_2D_5X5_H_
......@@ -17,6 +17,8 @@
#include <arm_neon.h>
#include <memory>
#include "mace/ops/delegator/conv_2d.h"
namespace mace {
namespace ops {
namespace arm {
......@@ -720,6 +722,13 @@ MaceStatus Conv2dK7x7S3::Compute(const OpContext *context,
return MaceStatus::MACE_SUCCESS;
}
MACE_REGISTER_DELEGATOR(registry, Conv2dK7x7S1, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, NEON, K7x7S1))
MACE_REGISTER_DELEGATOR(registry, Conv2dK7x7S2, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, NEON, K7x7S2))
MACE_REGISTER_DELEGATOR(registry, Conv2dK7x7S3, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, NEON, K7x7S3))
} // namespace fp32
} // namespace arm
} // namespace ops
......
......@@ -16,10 +16,11 @@
#define MACE_OPS_ARM_FP32_CONV_2D_7X7_H_
#include <vector>
#include "mace/public/mace.h"
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/core/op_context.h"
#include "mace/ops/arm/fp32/conv_2d.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
......@@ -28,8 +29,8 @@ namespace fp32 {
class Conv2dK7x7S1 : public Conv2dBase {
public:
Conv2dK7x7S1(const std::vector<int> &paddings, const Padding padding_type)
: Conv2dBase({1, 1}, {1, 1}, paddings, padding_type) {}
explicit Conv2dK7x7S1(const delegator::Conv2dParam &param)
: Conv2dBase(param) {}
virtual ~Conv2dK7x7S1() {}
MaceStatus Compute(
......@@ -41,8 +42,8 @@ class Conv2dK7x7S1 : public Conv2dBase {
class Conv2dK7x7S2 : public Conv2dBase {
public:
Conv2dK7x7S2(const std::vector<int> &paddings, const Padding padding_type)
: Conv2dBase({2, 2}, {1, 1}, paddings, padding_type) {}
explicit Conv2dK7x7S2(const delegator::Conv2dParam &param)
: Conv2dBase(param) {}
virtual ~Conv2dK7x7S2() {}
MaceStatus Compute(
......@@ -54,8 +55,8 @@ class Conv2dK7x7S2 : public Conv2dBase {
class Conv2dK7x7S3 : public Conv2dBase {
public:
Conv2dK7x7S3(const std::vector<int> &paddings, const Padding padding_type)
: Conv2dBase({3, 3}, {1, 1}, paddings, padding_type) {}
explicit Conv2dK7x7S3(const delegator::Conv2dParam &param)
: Conv2dBase(param) {}
virtual ~Conv2dK7x7S3() {}
MaceStatus Compute(
......
......@@ -12,15 +12,30 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/fp32/conv_general.h"
#include "mace/ops/arm/fp32/conv_2d.h"
#include <memory>
#include "mace/ops/delegator/conv_2d.h"
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
class Conv2dGeneral : public Conv2dBase {
public:
explicit Conv2dGeneral(const delegator::Conv2dParam &param)
: Conv2dBase(param) {}
virtual ~Conv2dGeneral() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
Tensor *output) override;
};
MaceStatus Conv2dGeneral::Compute(const OpContext *context,
const Tensor *input,
const Tensor *filter,
......@@ -237,6 +252,10 @@ MaceStatus Conv2dGeneral::Compute(const OpContext *context,
return MaceStatus::MACE_SUCCESS;
}
MACE_REGISTER_DELEGATOR(
registry, Conv2dGeneral, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float, NEON, General))
} // namespace fp32
} // namespace arm
} // namespace ops
......
......@@ -18,54 +18,27 @@
#include <vector>
#include <memory>
#include "mace/public/mace.h"
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/core/types.h"
#include "mace/core/op_context.h"
#include "mace/ops/arm/fp32/gemm.h"
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/ops/delegator/deconv_2d.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
class Deconv2dBase {
class Deconv2dBase : public delegator::Deconv2d {
public:
Deconv2dBase(const std::vector<int> &strides,
const std::vector<int> &dilations,
const std::vector<int> &paddings,
const Padding padding_type,
const index_t group,
const FrameworkType framework_type)
: strides_(strides),
dilations_(dilations),
paddings_(paddings),
padding_type_(padding_type),
group_(group),
framework_type_(framework_type) {}
Deconv2dBase(const std::vector<int> &strides,
const std::vector<int> &dilations,
const std::vector<int> &paddings,
const Padding padding_type,
const FrameworkType framework_type)
: Deconv2dBase(strides,
dilations,
paddings,
padding_type,
1,
framework_type) {}
explicit Deconv2dBase(const delegator::Deconv2dParam &param)
: delegator::Deconv2d(param),
group_(param.group_) {}
virtual ~Deconv2dBase() = default;
virtual MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) = 0;
protected:
MaceStatus ResizeOutAndPadOut(const OpContext *context,
const Tensor *input,
......@@ -78,13 +51,7 @@ class Deconv2dBase {
void UnPadOutput(const Tensor &src,
const std::vector<int> &out_pad_size,
Tensor *dst);
const std::vector<int> strides_;
const std::vector<int> dilations_;
const std::vector<int> paddings_;
const Padding padding_type_;
index_t group_;
const FrameworkType framework_type_;
};
} // namespace fp32
......
......@@ -330,12 +330,18 @@ MaceStatus Deconv2dK2x2S2::Compute(const OpContext *context,
}
}, 0, batch, 1, 0, outch, 1);
UnPadOutput(*out_tensor, out_pad_size, output);
return MaceStatus::MACE_SUCCESS;
}
MACE_REGISTER_DELEGATOR(registry, Deconv2dK2x2S1, delegator::Deconv2dParam,
MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float,
NEON, K2x2S1))
MACE_REGISTER_DELEGATOR(registry, Deconv2dK2x2S2, delegator::Deconv2dParam,
MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float,
NEON, K2x2S2))
} // namespace fp32
} // namespace arm
} // namespace ops
......
......@@ -18,12 +18,12 @@
#include <vector>
#include <memory>
#include "mace/public/mace.h"
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/core/types.h"
#include "mace/core/op_context.h"
#include "mace/ops/arm/fp32/deconv_2d.h"
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
......@@ -32,10 +32,8 @@ namespace fp32 {
class Deconv2dK2x2S1 : public Deconv2dBase {
public:
Deconv2dK2x2S1(const std::vector<int> &paddings,
const Padding padding_type,
const FrameworkType framework_type)
: Deconv2dBase({1, 1}, {1, 1}, paddings, padding_type, framework_type) {}
explicit Deconv2dK2x2S1(const delegator::Deconv2dParam &param)
: Deconv2dBase(param) {}
virtual ~Deconv2dK2x2S1() {}
MaceStatus Compute(
......@@ -48,10 +46,8 @@ class Deconv2dK2x2S1 : public Deconv2dBase {
class Deconv2dK2x2S2 : public Deconv2dBase {
public:
Deconv2dK2x2S2(const std::vector<int> &paddings,
const Padding padding_type,
const FrameworkType framework_type)
: Deconv2dBase({2, 2}, {1, 1}, paddings, padding_type, framework_type) {}
explicit Deconv2dK2x2S2(const delegator::Deconv2dParam &param)
: Deconv2dBase(param) {}
virtual ~Deconv2dK2x2S2() {}
MaceStatus Compute(
......
......@@ -464,6 +464,13 @@ MaceStatus Deconv2dK3x3S2::Compute(const OpContext *context,
return MaceStatus::MACE_SUCCESS;
}
MACE_REGISTER_DELEGATOR(registry, Deconv2dK3x3S1, delegator::Deconv2dParam,
MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float,
NEON, K3x3S1))
MACE_REGISTER_DELEGATOR(registry, Deconv2dK3x3S2, delegator::Deconv2dParam,
MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float,
NEON, K3x3S2))
} // namespace fp32
} // namespace arm
} // namespace ops
......
......@@ -18,12 +18,12 @@
#include <vector>
#include <memory>
#include "mace/public/mace.h"
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/core/types.h"
#include "mace/core/op_context.h"
#include "mace/ops/arm/fp32/deconv_2d.h"
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
......@@ -32,10 +32,8 @@ namespace fp32 {
class Deconv2dK3x3S1 : public Deconv2dBase {
public:
Deconv2dK3x3S1(const std::vector<int> &paddings,
const Padding padding_type,
const FrameworkType framework_type)
: Deconv2dBase({1, 1}, {1, 1}, paddings, padding_type, framework_type) {}
explicit Deconv2dK3x3S1(const delegator::Deconv2dParam &param)
: Deconv2dBase(param) {}
virtual ~Deconv2dK3x3S1() {}
MaceStatus Compute(
......@@ -48,10 +46,8 @@ class Deconv2dK3x3S1 : public Deconv2dBase {
class Deconv2dK3x3S2 : public Deconv2dBase {
public:
Deconv2dK3x3S2(const std::vector<int> &paddings,
const Padding padding_type,
const FrameworkType framework_type)
: Deconv2dBase({2, 2}, {1, 1}, paddings, padding_type, framework_type) {}
explicit Deconv2dK3x3S2(const delegator::Deconv2dParam &param)
: Deconv2dBase(param) {}
virtual ~Deconv2dK3x3S2() {}
MaceStatus Compute(
......
......@@ -449,7 +449,6 @@ MaceStatus Deconv2dK4x4S2::Compute(const OpContext *context,
const index_t outw = out_shape[3];
const index_t out_img_size = outh * outw;
utils::ThreadPool
&thread_pool = context->device()->cpu_runtime()->thread_pool();
......@@ -575,6 +574,13 @@ MaceStatus Deconv2dK4x4S2::Compute(const OpContext *context,
return MaceStatus::MACE_SUCCESS;
}
MACE_REGISTER_DELEGATOR(registry, Deconv2dK4x4S1, delegator::Deconv2dParam,
MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float,
NEON, K4x4S1))
MACE_REGISTER_DELEGATOR(registry, Deconv2dK4x4S2, delegator::Deconv2dParam,
MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float,
NEON, K4x4S2))
} // namespace fp32
} // namespace arm
} // namespace ops
......
......@@ -18,12 +18,12 @@
#include <vector>
#include <memory>
#include "mace/public/mace.h"
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/core/types.h"
#include "mace/core/op_context.h"
#include "mace/ops/arm/fp32/deconv_2d.h"
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
......@@ -32,10 +32,8 @@ namespace fp32 {
class Deconv2dK4x4S1 : public Deconv2dBase {
public:
Deconv2dK4x4S1(const std::vector<int> &paddings,
const Padding padding_type,
const FrameworkType framework_type)
: Deconv2dBase({1, 1}, {1, 1}, paddings, padding_type, framework_type) {}
explicit Deconv2dK4x4S1(const delegator::Deconv2dParam &param)
: Deconv2dBase(param) {}
virtual ~Deconv2dK4x4S1() {}
MaceStatus Compute(
......@@ -48,10 +46,8 @@ class Deconv2dK4x4S1 : public Deconv2dBase {
class Deconv2dK4x4S2 : public Deconv2dBase {
public:
Deconv2dK4x4S2(const std::vector<int> &paddings,
const Padding padding_type,
const FrameworkType framework_type)
: Deconv2dBase({2, 2}, {1, 1}, paddings, padding_type, framework_type) {}
explicit Deconv2dK4x4S2(const delegator::Deconv2dParam &param)
: Deconv2dBase(param) {}
virtual ~Deconv2dK4x4S2() {}
MaceStatus Compute(
......
......@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/fp32/deconv_2d_general.h"
#include "mace/ops/arm/fp32/deconv_2d.h"
// TODO(liutuo): optimize it
......@@ -21,6 +21,20 @@ namespace ops {
namespace arm {
namespace fp32 {
class Deconv2dGeneral : public Deconv2dBase {
public:
explicit Deconv2dGeneral(const delegator::Deconv2dParam &param)
: Deconv2dBase(param) {}
virtual ~Deconv2dGeneral() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
};
MaceStatus Deconv2dGeneral::Compute(const OpContext *context,
const Tensor *input,
const Tensor *filter,
......@@ -110,6 +124,10 @@ MaceStatus Deconv2dGeneral::Compute(const OpContext *context,
return MaceStatus::MACE_SUCCESS;
}
MACE_REGISTER_DELEGATOR(registry, Deconv2dGeneral, delegator::Deconv2dParam,
MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float,
NEON, General))
} // namespace fp32
} // namespace arm
} // namespace ops
......
......@@ -512,6 +512,13 @@ MaceStatus DepthwiseConv2dK3x3S2::Compute(const mace::OpContext *context,
return MaceStatus::MACE_SUCCESS;
}
MACE_REGISTER_DELEGATOR(
registry, DepthwiseConv2dK3x3S1, delegator::DepthwiseConv2dParam,
MACE_DELEGATOR_KEY_EX(DepthwiseConv2d, CPU, float, NEON, K3x3S1))
MACE_REGISTER_DELEGATOR(
registry, DepthwiseConv2dK3x3S2, delegator::DepthwiseConv2dParam,
MACE_DELEGATOR_KEY_EX(DepthwiseConv2d, CPU, float, NEON, K3x3S2))
} // namespace fp32
} // namespace arm
} // namespace ops
......
......@@ -16,10 +16,12 @@
#define MACE_OPS_ARM_FP32_DEPTHWISE_CONV_2D_3X3_H_
#include <vector>
#include "mace/public/mace.h"
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/core/op_context.h"
#include "mace/ops/arm/fp32/conv_2d.h"
#include "mace/ops/delegator/depthwise_conv_2d.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
......@@ -28,9 +30,8 @@ namespace fp32 {
class DepthwiseConv2dK3x3S1 : public Conv2dBase {
public:
DepthwiseConv2dK3x3S1(const std::vector<int> &paddings,
const Padding padding_type)
: Conv2dBase({1, 1}, {1, 1}, paddings, padding_type) {}
explicit DepthwiseConv2dK3x3S1(const delegator::DepthwiseConv2dParam &param)
: Conv2dBase(param) {}
virtual ~DepthwiseConv2dK3x3S1() {}
MaceStatus Compute(
......@@ -42,9 +43,8 @@ class DepthwiseConv2dK3x3S1 : public Conv2dBase {
class DepthwiseConv2dK3x3S2 : public Conv2dBase {
public:
DepthwiseConv2dK3x3S2(const std::vector<int> &paddings,
const Padding padding_type)
: Conv2dBase({2, 2}, {1, 1}, paddings, padding_type) {}
explicit DepthwiseConv2dK3x3S2(const delegator::DepthwiseConv2dParam &param)
: Conv2dBase(param) {}
virtual ~DepthwiseConv2dK3x3S2() {}
MaceStatus Compute(
......
......@@ -776,6 +776,20 @@ MaceStatus GroupDeconv2dK3x3S2::Compute(const OpContext *context,
return MaceStatus::MACE_SUCCESS;
}
MACE_REGISTER_DELEGATOR(
registry, DepthwiseDeconv2dK3x3S1, delegator::DepthwiseDeconv2dParam,
MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, CPU, float, NEON, K3x3S1))
MACE_REGISTER_DELEGATOR(
registry, DepthwiseDeconv2dK3x3S2, delegator::DepthwiseDeconv2dParam,
MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, CPU, float, NEON, K3x3S2))
MACE_REGISTER_DELEGATOR(
registry, GroupDeconv2dK3x3S1, delegator::GroupDeconv2dParam,
MACE_DELEGATOR_KEY_EX(GroupDeconv2d, CPU, float, NEON, K3x3S1))
MACE_REGISTER_DELEGATOR(
registry, GroupDeconv2dK3x3S2, delegator::GroupDeconv2dParam,
MACE_DELEGATOR_KEY_EX(GroupDeconv2d, CPU, float, NEON, K3x3S2))
} // namespace fp32
} // namespace arm
} // namespace ops
......
......@@ -18,12 +18,13 @@
#include <vector>
#include <memory>
#include "mace/public/mace.h"
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/core/types.h"
#include "mace/core/op_context.h"
#include "mace/ops/arm/fp32/deconv_2d.h"
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/ops/delegator/depthwise_deconv_2d.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
......@@ -32,14 +33,9 @@ namespace fp32 {
class DepthwiseDeconv2dK3x3S1 : public Deconv2dBase {
public:
DepthwiseDeconv2dK3x3S1(const std::vector<int> &paddings,
const Padding padding_type,
const FrameworkType framework_type)
: Deconv2dBase({1, 1},
{1, 1},
paddings,
padding_type,
framework_type) {}
explicit DepthwiseDeconv2dK3x3S1(
const delegator::DepthwiseDeconv2dParam &param)
: Deconv2dBase(param) {}
virtual ~DepthwiseDeconv2dK3x3S1() {}
MaceStatus Compute(
......@@ -52,14 +48,9 @@ class DepthwiseDeconv2dK3x3S1 : public Deconv2dBase {
class DepthwiseDeconv2dK3x3S2 : public Deconv2dBase {
public:
DepthwiseDeconv2dK3x3S2(const std::vector<int> &paddings,
const Padding padding_type,
const FrameworkType framework_type)
: Deconv2dBase({2, 2},
{1, 1},
paddings,
padding_type,
framework_type) {}
explicit DepthwiseDeconv2dK3x3S2(
const delegator::DepthwiseDeconv2dParam &param)
: Deconv2dBase(param) {}
virtual ~DepthwiseDeconv2dK3x3S2() {}
MaceStatus Compute(
......@@ -72,16 +63,9 @@ class DepthwiseDeconv2dK3x3S2 : public Deconv2dBase {
class GroupDeconv2dK3x3S1 : public Deconv2dBase {
public:
GroupDeconv2dK3x3S1(const std::vector<int> &paddings,
const Padding padding_type,
const int group,
const FrameworkType framework_type)
: Deconv2dBase({1, 1},
{1, 1},
paddings,
padding_type,
group,
framework_type) {}
explicit GroupDeconv2dK3x3S1(
const delegator::GroupDeconv2dParam &param)
: Deconv2dBase(param) {}
virtual ~GroupDeconv2dK3x3S1() {}
MaceStatus Compute(
......@@ -94,16 +78,8 @@ class GroupDeconv2dK3x3S1 : public Deconv2dBase {
class GroupDeconv2dK3x3S2 : public Deconv2dBase {
public:
GroupDeconv2dK3x3S2(const std::vector<int> &paddings,
const Padding padding_type,
const int group,
const FrameworkType framework_type)
: Deconv2dBase({2, 2},
{1, 1},
paddings,
padding_type,
group,
framework_type) {}
explicit GroupDeconv2dK3x3S2(const delegator::GroupDeconv2dParam &param)
: Deconv2dBase(param) {}
virtual ~GroupDeconv2dK3x3S2() {}
MaceStatus Compute(
......
......@@ -959,6 +959,20 @@ MaceStatus GroupDeconv2dK4x4S2::Compute(const OpContext *context,
return MaceStatus::MACE_SUCCESS;
}
MACE_REGISTER_DELEGATOR(
registry, DepthwiseDeconv2dK4x4S1, delegator::DepthwiseDeconv2dParam,
MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, CPU, float, NEON, K4x4S1))
MACE_REGISTER_DELEGATOR(
registry, DepthwiseDeconv2dK4x4S2, delegator::DepthwiseDeconv2dParam,
MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, CPU, float, NEON, K4x4S2))
MACE_REGISTER_DELEGATOR(
registry, GroupDeconv2dK4x4S1, delegator::GroupDeconv2dParam,
MACE_DELEGATOR_KEY_EX(GroupDeconv2d, CPU, float, NEON, K4x4S1))
MACE_REGISTER_DELEGATOR(
registry, GroupDeconv2dK4x4S2, delegator::GroupDeconv2dParam,
MACE_DELEGATOR_KEY_EX(GroupDeconv2d, CPU, float, NEON, K4x4S2))
} // namespace fp32
} // namespace arm
} // namespace ops
......
......@@ -18,12 +18,13 @@
#include <vector>
#include <memory>
#include "mace/public/mace.h"
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/core/types.h"
#include "mace/core/op_context.h"
#include "mace/ops/arm/fp32/deconv_2d.h"
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/ops/delegator/depthwise_deconv_2d.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
......@@ -32,14 +33,9 @@ namespace fp32 {
class DepthwiseDeconv2dK4x4S1 : public Deconv2dBase {
public:
DepthwiseDeconv2dK4x4S1(const std::vector<int> &paddings,
const Padding padding_type,
const FrameworkType framework_type)
: Deconv2dBase({1, 1},
{1, 1},
paddings,
padding_type,
framework_type) {}
explicit DepthwiseDeconv2dK4x4S1(
const delegator::DepthwiseDeconv2dParam &param)
: Deconv2dBase(param) {}
virtual ~DepthwiseDeconv2dK4x4S1() {}
MaceStatus Compute(
......@@ -52,14 +48,9 @@ class DepthwiseDeconv2dK4x4S1 : public Deconv2dBase {
class DepthwiseDeconv2dK4x4S2 : public Deconv2dBase {
public:
DepthwiseDeconv2dK4x4S2(const std::vector<int> &paddings,
const Padding padding_type,
const FrameworkType framework_type)
: Deconv2dBase({2, 2},
{1, 1},
paddings,
padding_type,
framework_type) {}
explicit DepthwiseDeconv2dK4x4S2(
const delegator::DepthwiseDeconv2dParam &param)
: Deconv2dBase(param) {}
virtual ~DepthwiseDeconv2dK4x4S2() {}
MaceStatus Compute(
......@@ -72,16 +63,8 @@ class DepthwiseDeconv2dK4x4S2 : public Deconv2dBase {
class GroupDeconv2dK4x4S1 : public Deconv2dBase {
public:
GroupDeconv2dK4x4S1(const std::vector<int> &paddings,
const Padding padding_type,
const int group,
const FrameworkType framework_type)
: Deconv2dBase({1, 1},
{1, 1},
paddings,
padding_type,
group,
framework_type) {}
explicit GroupDeconv2dK4x4S1(const delegator::GroupDeconv2dParam &param)
: Deconv2dBase(param) {}
virtual ~GroupDeconv2dK4x4S1() {}
MaceStatus Compute(
......@@ -94,16 +77,8 @@ class GroupDeconv2dK4x4S1 : public Deconv2dBase {
class GroupDeconv2dK4x4S2 : public Deconv2dBase {
public:
GroupDeconv2dK4x4S2(const std::vector<int> &paddings,
const Padding padding_type,
const int group,
const FrameworkType framework_type)
: Deconv2dBase({2, 2},
{1, 1},
paddings,
padding_type,
group,
framework_type) {}
explicit GroupDeconv2dK4x4S2(const delegator::GroupDeconv2dParam &param)
: Deconv2dBase(param) {}
virtual ~GroupDeconv2dK4x4S2() {}
MaceStatus Compute(
......
......@@ -207,6 +207,14 @@ MaceStatus GroupDeconv2dGeneral::Compute(const OpContext *context,
return MaceStatus::MACE_SUCCESS;
}
MACE_REGISTER_DELEGATOR(
registry, DepthwiseDeconv2dGeneral, delegator::DepthwiseDeconv2dParam,
MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, CPU, float, NEON, General))
MACE_REGISTER_DELEGATOR(
registry, GroupDeconv2dGeneral, delegator::GroupDeconv2dParam,
MACE_DELEGATOR_KEY_EX(GroupDeconv2d, CPU, float, NEON, General))
} // namespace fp32
} // namespace arm
} // namespace ops
......
......@@ -18,12 +18,13 @@
#include <vector>
#include <memory>
#include "mace/public/mace.h"
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/core/types.h"
#include "mace/core/op_context.h"
#include "mace/ops/arm/fp32/deconv_2d.h"
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/ops/delegator/depthwise_deconv_2d.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
......@@ -32,16 +33,9 @@ namespace fp32 {
class DepthwiseDeconv2dGeneral : public Deconv2dBase {
public:
DepthwiseDeconv2dGeneral(const std::vector<int> &strides,
const std::vector<int> &dilations,
const std::vector<int> &paddings,
const Padding padding_type,
const FrameworkType framework_type)
: Deconv2dBase(strides,
dilations,
paddings,
padding_type,
framework_type) {}
explicit DepthwiseDeconv2dGeneral(
const delegator::DepthwiseDeconv2dParam &param)
: Deconv2dBase(param) {}
virtual ~DepthwiseDeconv2dGeneral() {}
MaceStatus Compute(
......@@ -54,18 +48,8 @@ class DepthwiseDeconv2dGeneral : public Deconv2dBase {
class GroupDeconv2dGeneral : public Deconv2dBase {
public:
GroupDeconv2dGeneral(const std::vector<int> &strides,
const std::vector<int> &dilations,
const std::vector<int> &paddings,
const Padding padding_type,
const int group,
const FrameworkType framework_type)
: Deconv2dBase(strides,
dilations,
paddings,
padding_type,
group,
framework_type) {}
explicit GroupDeconv2dGeneral(const delegator::GroupDeconv2dParam &param)
: Deconv2dBase(param) {}
virtual ~GroupDeconv2dGeneral() {}
MaceStatus Compute(
......
......@@ -1224,6 +1224,9 @@ MaceStatus Gemm::Compute(const OpContext *context,
output);
}
MACE_REGISTER_DELEGATOR(registry, Gemm, delegator::GemmParam,
MACE_DELEGATOR_KEY(Gemm, CPU, float, NEON))
} // namespace fp32
} // namespace arm
} // namespace ops
......
......@@ -15,10 +15,11 @@
#ifndef MACE_OPS_ARM_FP32_GEMM_H_
#define MACE_OPS_ARM_FP32_GEMM_H_
#include "mace/public/mace.h"
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/core/op_context.h"
#include "mace/ops/common/matrix.h"
#include "mace/ops/delegator/gemm.h"
#include "mace/public/mace.h"
#include "mace/utils/math.h"
// This implements matrix-matrix multiplication.
......@@ -29,13 +30,12 @@ namespace ops {
namespace arm {
namespace fp32 {
class Gemm {
class Gemm : public delegator::Gemm {
public:
explicit Gemm(const bool should_cache_pack)
: pack_cache_(GetCPUAllocator()),
should_cache_pack_(should_cache_pack),
explicit Gemm(const delegator::GemmParam &param)
: delegator::Gemm(param), pack_cache_(GetCPUAllocator()),
should_cache_pack_(param.should_cache_pack_),
cached_(0) {}
Gemm() : Gemm(false) {}
~Gemm() {}
MaceStatus Compute(
......@@ -51,7 +51,7 @@ class Gemm {
const MatrixMajor output_major,
const bool lhs_batched,
const bool rhs_batched,
Tensor *output);
Tensor *output) override;
// Original matrix before transpose has row-major
MaceStatus Compute(
......@@ -68,7 +68,7 @@ class Gemm {
const bool transpose_out,
const bool lhs_batched,
const bool rhs_batched,
Tensor *output);
Tensor *output) override;
private:
void ComputeBlock(const float *packed_lhs_data,
......
......@@ -378,6 +378,10 @@ MaceStatus Gemv::Compute(const OpContext *context,
#undef vaddvq_f32
#endif
MACE_REGISTER_DELEGATOR(registry, Gemv, DelegatorParam,
MACE_DELEGATOR_KEY(Gemv, CPU, float, NEON))
} // namespace fp32
} // namespace arm
} // namespace ops
......
......@@ -15,18 +15,19 @@
#ifndef MACE_OPS_ARM_FP32_GEMV_H_
#define MACE_OPS_ARM_FP32_GEMV_H_
#include "mace/public/mace.h"
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/core/op_context.h"
#include "mace/ops/delegator/gemv.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
class Gemv {
class Gemv : public delegator::Gemv {
public:
Gemv() {}
explicit Gemv(const DelegatorParam &param) : delegator::Gemv(param) {}
~Gemv() {}
// Always row-major after transpose
MaceStatus Compute(
......@@ -39,7 +40,7 @@ class Gemv {
const index_t lhs_width,
const bool lhs_batched,
const bool rhs_batched,
Tensor *output);
Tensor *output) override;
};
} // namespace fp32
......
......@@ -12,12 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/q8/eltwise.h"
#include <arm_neon.h>
#include <algorithm>
#include "mace/ops/common/gemmlowp_util.h"
#include "mace/ops/delegator/eltwise.h"
#include "mace/utils/logging.h"
namespace mace {
......@@ -25,6 +24,16 @@ namespace ops {
namespace arm {
namespace q8 {
class Eltwise : public delegator::Eltwise {
public:
explicit Eltwise(const delegator::EltwiseParam &param)
: delegator::Eltwise(param) {}
~Eltwise() = default;
MaceStatus Compute(const OpContext *context, const Tensor *input0,
const Tensor *input1, Tensor *output) override;
};
MaceStatus Eltwise::Compute(const OpContext *context,
const Tensor *input0,
const Tensor *input1,
......@@ -144,7 +153,7 @@ MaceStatus Eltwise::Compute(const OpContext *context,
gemmlowp::SaturatingRoundingDoublingHighMul(
res, output_multiplier),
-output_shift) +
output->zero_point();
output->zero_point();
output_ptr[i] = Saturate<uint8_t>(output_val);
}
},
......@@ -153,6 +162,9 @@ MaceStatus Eltwise::Compute(const OpContext *context,
return MaceStatus::MACE_SUCCESS;
}
MACE_REGISTER_DELEGATOR(registry, Eltwise, delegator::EltwiseParam,
MACE_DELEGATOR_KEY(Eltwise, CPU, uint8_t, NEON))
} // namespace q8
} // namespace arm
} // namespace ops
......
......@@ -181,6 +181,14 @@ class Gemv<uint8_t>;
template
class Gemv<int32_t>;
typedef Gemv<uint8_t> GemvUint8;
MACE_REGISTER_DELEGATOR(registry, GemvUint8, DelegatorParam,
MACE_DELEGATOR_KEY(Gemv, CPU, uint8_t, NEON))
typedef Gemv<int32_t> GemvInt32;
MACE_REGISTER_DELEGATOR(registry, GemvInt32, DelegatorParam,
MACE_DELEGATOR_KEY(Gemv, CPU, int32_t, NEON))
} // namespace q8
} // namespace arm
} // namespace ops
......
// Copyright 2019 The MACE Authors. All Rights Reserved.
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,15 +12,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
// This implements matrix-vector multiplication described as
// https://github.com/google/gemmlowp/blob/master/todo/fast-gemv.txt
#ifndef MACE_OPS_ARM_Q8_GEMV_H_
#define MACE_OPS_ARM_Q8_GEMV_H_
#include "mace/public/mace.h"
#include "mace/core/tensor.h"
#include "mace/core/op_context.h"
#include "mace/ops/delegator/gemv.h"
namespace mace {
namespace ops {
......@@ -28,11 +23,11 @@ namespace arm {
namespace q8 {
template<typename OUTPUT_TYPE>
class Gemv {
class Gemv : public delegator::Gemv {
public:
Gemv() : is_output_type_uint8_(
DataTypeToEnum<OUTPUT_TYPE>::value == DataType::DT_UINT8) {
}
explicit Gemv(const DelegatorParam &param)
: delegator::Gemv(param), is_output_type_uint8_(
DataTypeToEnum<OUTPUT_TYPE>::value == DataType::DT_UINT8) {}
~Gemv() {}
// Always row-major after transpose
MaceStatus Compute(
......@@ -45,7 +40,7 @@ class Gemv {
const index_t lhs_width,
const bool lhs_batched,
const bool rhs_batched,
Tensor *output);
Tensor *output) override;
private:
bool is_output_type_uint8_;
......
......@@ -17,7 +17,8 @@
#include <algorithm>
#include <limits>
#include "mace/core/operator.h"
#include "mace/core/ops/operator.h"
#include "mace/core/registry/ops_registry.h"
#include "mace/core/tensor.h"
#include "mace/core/quantize.h"
......@@ -106,12 +107,12 @@ class DequantizeOp<DeviceType::CPU, T> : public Operation {
QuantizeUtil<float, T> quantize_util_;
};
void RegisterQuantize(OpRegistryBase *op_registry) {
void RegisterQuantize(OpRegistry *op_registry) {
MACE_REGISTER_OP(op_registry, "Quantize", QuantizeOp,
DeviceType::CPU, uint8_t);
}
void RegisterDequantize(OpRegistryBase *op_registry) {
void RegisterDequantize(OpRegistry *op_registry) {
MACE_REGISTER_OP(op_registry, "Dequantize", DequantizeOp,
DeviceType::CPU, uint8_t);
MACE_REGISTER_OP(op_registry, "Dequantize", DequantizeOp,
......
......@@ -16,14 +16,10 @@
#include <string>
#include <vector>
#include "mace/core/operator.h"
#include "mace/core/ops/operator.h"
#include "mace/core/registry/ops_registry.h"
#include "mace/ops/activation.h"
#if defined(MACE_ENABLE_NEON)
#include "mace/ops/arm/fp32/activation.h"
#else
#include "mace/ops/ref/activation.h"
#endif
#include "mace/ops/delegator/activation.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/buffer_transformer.h"
......@@ -45,11 +41,16 @@ class BatchNormOp<DeviceType::CPU, float> : public Operation {
epsilon_(Operation::GetOptionalArg<float>("epsilon",
static_cast<float>(1e-4))),
activation_delegator_(
ops::StringToActivationType(
Operation::GetOptionalArg<std::string>("activation", "NOOP")),
Operation::GetOptionalArg<float>("max_limit", 0.0f),
Operation::GetOptionalArg<float>(
"leakyrelu_coefficient", 0.0f)) {}
delegator::Activation::Create(
context->workspace(),
MACE_DELEGATOR_KEY(Activation, CPU, float, MACE_CPU_IMPL_TYPE),
delegator::ActivationParam(
ops::StringToActivationType(
Operation::GetOptionalArg<std::string>("activation",
"NOOP")),
Operation::GetOptionalArg<float>("max_limit", 0.0f),
Operation::GetOptionalArg<float>("leakyrelu_coefficient",
0.0f)))) {}
MaceStatus Run(OpContext *context) override {
MACE_UNUSED(context);
......@@ -142,18 +143,14 @@ class BatchNormOp<DeviceType::CPU, float> : public Operation {
}, 0, batch, 1, 0, channels, 1);
}
activation_delegator_.Compute(context, output, output);
activation_delegator_->Compute(context, output, output);
return MaceStatus::MACE_SUCCESS;
}
private:
float epsilon_;
#ifdef MACE_ENABLE_NEON
arm::fp32::Activation activation_delegator_;
#else
ref::Activation activation_delegator_;
#endif // MACE_ENABLE_NEON
std::unique_ptr<delegator::Activation> activation_delegator_;
protected:
MACE_OP_INPUT_TAGS(INPUT, SCALE, OFFSET, MEAN, VAR);
......@@ -232,7 +229,7 @@ class BatchNormOp<DeviceType::GPU, float> : public Operation {
};
#endif // MACE_ENABLE_OPENCL
void RegisterBatchNorm(OpRegistryBase *op_registry) {
void RegisterBatchNorm(OpRegistry *op_registry) {
MACE_REGISTER_OP(op_registry, "BatchNorm", BatchNormOp,
DeviceType::CPU, float);
MACE_REGISTER_GPU_OP(op_registry, "BatchNorm", BatchNormOp);
......
......@@ -15,7 +15,8 @@
#include <algorithm>
#include <memory>
#include "mace/core/operator.h"
#include "mace/core/ops/operator.h"
#include "mace/core/registry/ops_registry.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/batch_to_space.h"
#endif // MACE_ENABLE_OPENCL
......@@ -285,7 +286,7 @@ class BatchToSpaceNDOp<DeviceType::GPU, float> : public BatchToSpaceOpBase {
};
#endif // MACE_ENABLE_OPENCL
void RegisterBatchToSpaceND(OpRegistryBase *op_registry) {
void RegisterBatchToSpaceND(OpRegistry *op_registry) {
MACE_REGISTER_OP(op_registry, "BatchToSpaceND",
BatchToSpaceNDOp, DeviceType::CPU, float);
......
......@@ -16,14 +16,10 @@
#include <memory>
#include <vector>
#include "mace/core/operator.h"
#include "mace/core/ops/operator.h"
#include "mace/core/registry/ops_registry.h"
#include "mace/ops/activation.h"
#ifdef MACE_ENABLE_NEON
#include "mace/ops/arm/fp32/bias_add.h"
#else
#include "mace/ops/ref/bias_add.h"
#endif // MACE_ENABLE_NEON
#include "mace/ops/delegator/bias_add.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/buffer_transformer.h"
......@@ -42,8 +38,11 @@ class BiasAddOp<DeviceType::CPU, float> : public Operation {
public:
explicit BiasAddOp(OpConstructContext *context)
: Operation(context),
has_data_format_(Operation::GetOptionalArg<int>("has_data_format",
0)) {}
has_data_format_(Operation::GetOptionalArg<int>("has_data_format", 0)),
bias_add_delegator_(delegator::BiasAdd::Create(
context->workspace(),
MACE_DELEGATOR_KEY(BiasAdd, CPU, float, MACE_CPU_IMPL_TYPE),
DelegatorParam())) {}
MaceStatus Run(OpContext *context) override {
MACE_UNUSED(context);
......@@ -56,7 +55,7 @@ class BiasAddOp<DeviceType::CPU, float> : public Operation {
MACE_CHECK(bias->dim_size() == 1 || bias->dim_size() == 2,
"bias must be 1-dimensional or n*c for caffee.",
MakeString(bias->shape()));
bias_add_delegator_.Compute(context, input, bias, output);
bias_add_delegator_->Compute(context, input, bias, output);
} else { // NHWC
MACE_CHECK(bias->dim_size() == 1 || bias->dim_size() == 2,
"bias must be 1 or 2 dimensionals for caffee.",
......@@ -115,11 +114,7 @@ class BiasAddOp<DeviceType::CPU, float> : public Operation {
private:
int has_data_format_;
#ifdef MACE_ENABLE_NEON
arm::fp32::BiasAdd bias_add_delegator_;
#else
ref::BiasAdd bias_add_delegator_;
#endif // MACE_ENABLE_NEON
std::unique_ptr<delegator::BiasAdd> bias_add_delegator_;
};
#ifdef MACE_ENABLE_OPENCL
......@@ -164,7 +159,7 @@ class BiasAddOp<DeviceType::GPU, float> : public Operation {
};
#endif // MACE_ENABLE_OPENCL
void RegisterBiasAdd(OpRegistryBase *op_registry) {
void RegisterBiasAdd(OpRegistry *op_registry) {
MACE_REGISTER_OP(op_registry, "BiasAdd", BiasAddOp,
DeviceType::CPU, float);
MACE_REGISTER_GPU_OP(op_registry, "BiasAdd", BiasAddOp);
......
......@@ -12,7 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/core/operator.h"
#include "mace/core/ops/operator.h"
#include "mace/core/registry/ops_registry.h"
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__)
#include <arm_neon.h>
......@@ -54,7 +55,7 @@ class CastOp : public Operation {
MACE_OP_OUTPUT_TAGS(OUTPUT);
};
void RegisterCast(OpRegistryBase *op_registry) {
void RegisterCast(OpRegistry *op_registry) {
MACE_REGISTER_OP(op_registry, "Cast", CastOp,
DeviceType::CPU, float);
MACE_REGISTER_OP(op_registry, "Cast", CastOp,
......
......@@ -14,7 +14,8 @@
#include <memory>
#include "mace/core/operator.h"
#include "mace/core/ops/operator.h"
#include "mace/core/registry/ops_registry.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/channel_shuffle.h"
#endif // MACE_ENABLE_OPENCL
......@@ -98,7 +99,7 @@ class ChannelShuffleOp<DeviceType::GPU, float> : public Operation {
};
#endif // MACE_ENABLE_OPENCL
void RegisterChannelShuffle(OpRegistryBase *op_registry) {
void RegisterChannelShuffle(OpRegistry *op_registry) {
MACE_REGISTER_OP(op_registry, "ChannelShuffle",
ChannelShuffleOp, DeviceType::CPU, float);
......
......@@ -15,8 +15,8 @@
#ifndef MACE_OPS_COMMON_LSTM_H_
#define MACE_OPS_COMMON_LSTM_H_
#include "mace/core/ops/op_context.h"
#include "mace/core/types.h"
#include "mace/core/op_context.h"
namespace mace {
namespace ops {
......
......@@ -20,7 +20,7 @@
#endif // MACE_ENABLE_NEON
#include <algorithm>
#include <vector>
#include "mace/core/op_context.h"
#include "mace/core/ops/op_context.h"
#include "mace/public/mace.h"
namespace mace {
......
......@@ -14,7 +14,8 @@
#include <memory>
#include "mace/core/operator.h"
#include "mace/core/ops/operator.h"
#include "mace/core/registry/ops_registry.h"
#include "mace/core/quantize.h"
#include "mace/utils/memory.h"
......@@ -221,7 +222,7 @@ class ConcatOp<DeviceType::GPU, float> : public ConcatOpBase {
};
#endif // MACE_ENABLE_OPENCL
void RegisterConcat(OpRegistryBase *op_registry) {
void RegisterConcat(OpRegistry *op_registry) {
MACE_REGISTER_OP(op_registry, "Concat", ConcatOp,
DeviceType::CPU, float);
......
......@@ -24,32 +24,18 @@
#include <vector>
#include "mace/core/future.h"
#include "mace/core/operator.h"
#include "mace/core/ops/operator.h"
#include "mace/core/registry/ops_registry.h"
#include "mace/core/tensor.h"
#include "mace/ops/activation.h"
#include "mace/ops/conv_pool_2d_base.h"
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/ops/delegator/activation.h"
#include "mace/ops/delegator/bias_add.h"
#include "mace/ops/delegator/conv_2d.h"
#include "mace/utils/memory.h"
#include "mace/utils/math.h"
#ifdef MACE_ENABLE_NEON
#include "mace/ops/arm/fp32/conv_2d.h"
#include "mace/ops/arm/fp32/conv_2d_1x1.h"
#include "mace/ops/arm/fp32/conv_2d_3x3.h"
#include "mace/ops/arm/fp32/conv_2d_3x3_winograd.h"
#include "mace/ops/arm/fp32/conv_2d_5x5.h"
#include "mace/ops/arm/fp32/conv_2d_7x7.h"
#include "mace/ops/arm/fp32/conv_2d_1xn.h"
#include "mace/ops/arm/fp32/conv_general.h"
#include "mace/ops/arm/fp32/bias_add.h"
#include "mace/ops/arm/fp32/activation.h"
#else
#include "mace/ops/ref/activation.h"
#include "mace/ops/ref/bias_add.h"
#endif // MACE_ENABLE_NEON
#include "mace/ops/ref/conv_2d.h"
#ifdef MACE_ENABLE_QUANTIZE
#include "mace/ops/common/gemmlowp_util.h"
#include "mace/ops/arm/q8/quantization_util.h"
......@@ -72,13 +58,21 @@ class Conv2dOp<DeviceType::CPU, float> : public ConvPool2dOpBase {
public:
explicit Conv2dOp(OpConstructContext *context)
: ConvPool2dOpBase(context),
activation_delegator_(ops::StringToActivationType(
Operation::GetOptionalArg<std::string>("activation",
"NOOP")),
Operation::GetOptionalArg<float>("max_limit",
0.0f),
Operation::GetOptionalArg<float>(
"leakyrelu_coefficient", 0.0f)) {}
activation_delegator_(
delegator::Activation::Create(
context->workspace(),
MACE_DELEGATOR_KEY(Activation, CPU, float, MACE_CPU_IMPL_TYPE),
delegator::ActivationParam(
ops::StringToActivationType(
Operation::GetOptionalArg<std::string>("activation",
"NOOP")),
Operation::GetOptionalArg<float>("max_limit", 0.0f),
Operation::GetOptionalArg<float>("leakyrelu_coefficient",
0.0f)))),
bias_add_delegator_(delegator::BiasAdd::Create(
context->workspace(),
MACE_DELEGATOR_KEY(BiasAdd, CPU, float, MACE_CPU_IMPL_TYPE),
DelegatorParam())) {}
MaceStatus Run(OpContext *context) override {
const Tensor *input = this->Input(INPUT);
......@@ -86,116 +80,100 @@ class Conv2dOp<DeviceType::CPU, float> : public ConvPool2dOpBase {
const Tensor *bias = this->InputSize() >= 3 ? this->Input(BIAS) : nullptr;
Tensor *output = this->Output(OUTPUT);
#ifdef MACE_ENABLE_NEON
// the following params are used to decide which conv delegator to use
const index_t stride_h = strides_[0];
const index_t stride_w = strides_[1];
const index_t dilation_h = dilations_[0];
const index_t dilation_w = dilations_[1];
const index_t filter_h = filter->dim(2);
const index_t filter_w = filter->dim(3);
const index_t input_channels = input->dim(1);
const index_t channels = filter->dim(0);
// NOTE: delegator is fixed after first round of running,
// although winograd depends on input params.
// We do not support changeable filter for now.
if (conv2d_delegator_ == nullptr) {
if (filter_h == 1 && filter_w == 1 && stride_h == 1 && stride_w == 1
&& dilation_h == 1 && dilation_w == 1) {
conv2d_delegator_ = make_unique<arm::fp32::Conv2dK1x1>(
paddings_, padding_type_);
} else if (filter_h == 3 && filter_w == 3
&& stride_h == 1 && stride_w == 1 && dilation_h == 1
&& dilation_w == 1) {
if (input_channels >= 8 && channels >= 8) {
conv2d_delegator_ = make_unique<arm::fp32::Conv2dK3x3Winograd>(
paddings_, padding_type_);
} else {
conv2d_delegator_ = make_unique<arm::fp32::Conv2dK3x3S1>(
paddings_, padding_type_);
std::string tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float,
MACE_CPU_IMPL_TYPE, General);
if (MACE_CPU_IMPL_TYPE == NEON) {
// the following params are used to decide which conv delegator to use
const index_t stride_h = strides_[0];
const index_t stride_w = strides_[1];
const index_t dilation_h = dilations_[0];
const index_t dilation_w = dilations_[1];
const index_t filter_h = filter->dim(2);
const index_t filter_w = filter->dim(3);
const index_t input_channels = input->dim(1);
const index_t channels = filter->dim(0);
// NOTE: delegator is fixed after first round of running,
// although winograd depends on input params.
// We do not support changeable filter for now.
if (filter_h == 1 && filter_w == 1 && stride_h == 1 && stride_w == 1
&& dilation_h == 1 && dilation_w == 1) {
tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float,
MACE_CPU_IMPL_TYPE, K1x1);
} else if (filter_h == 3 && filter_w == 3
&& stride_h == 1 && stride_w == 1 && dilation_h == 1
&& dilation_w == 1) {
if (input_channels >= 8 && channels >= 8) {
tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float,
MACE_CPU_IMPL_TYPE, K3x3Winograd);
} else {
tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float,
MACE_CPU_IMPL_TYPE, K3x3S1);
}
} else if (filter_h == 3 && filter_w == 3
&& stride_h == 2 && stride_w == 2 && dilation_h == 1
&& dilation_w == 1) {
tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float,
MACE_CPU_IMPL_TYPE, K3x3S2);
} else if (filter_h == 5 && filter_w == 5
&& stride_h == 1 && stride_w == 1 && dilation_h == 1
&& dilation_w == 1) {
tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float,
MACE_CPU_IMPL_TYPE, K5x5S1);
} else if (filter_h == 7 && filter_w == 7
&& stride_h == 1 && stride_w == 1 && dilation_h == 1
&& dilation_w == 1) {
tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float,
MACE_CPU_IMPL_TYPE, K7x7S1);
} else if (filter_h == 7 && filter_w == 7
&& stride_h == 2 && stride_w == 2 && dilation_h == 1
&& dilation_w == 1) {
tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float,
MACE_CPU_IMPL_TYPE, K7x7S2);
} else if (filter_h == 7 && filter_w == 7
&& stride_h == 3 && stride_w == 3 && dilation_h == 1
&& dilation_w == 1) {
tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float,
MACE_CPU_IMPL_TYPE, K7x7S3);
} else if (filter_h == 1 && filter_w == 7
&& stride_h == 1 && stride_w == 1 && dilation_h == 1
&& dilation_w == 1) {
tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float,
MACE_CPU_IMPL_TYPE, K1x7S1);
} else if (filter_h == 7 && filter_w == 1
&& stride_h == 1 && stride_w == 1 && dilation_h == 1
&& dilation_w == 1) {
tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float,
MACE_CPU_IMPL_TYPE, K7x1S1);
} else if (filter_h == 1 && filter_w == 15
&& stride_h == 1 && stride_w == 1 && dilation_h == 1
&& dilation_w == 1) {
tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float,
MACE_CPU_IMPL_TYPE, K1x15S1);
} else if (filter_h == 15 && filter_w == 1
&& stride_h == 1 && stride_w == 1 && dilation_h == 1
&& dilation_w == 1) {
tag = MACE_DELEGATOR_KEY_EX(Conv2d, CPU, float,
MACE_CPU_IMPL_TYPE, K15x1S1);
}
} else if (filter_h == 3 && filter_w == 3
&& stride_h == 2 && stride_w == 2 && dilation_h == 1
&& dilation_w == 1) {
conv2d_delegator_ = make_unique<arm::fp32::Conv2dK3x3S2>(
paddings_, padding_type_);
} else if (filter_h == 5 && filter_w == 5
&& stride_h == 1 && stride_w == 1 && dilation_h == 1
&& dilation_w == 1) {
conv2d_delegator_ = make_unique<arm::fp32::Conv2dK5x5S1>(
paddings_, padding_type_);
} else if (filter_h == 7 && filter_w == 7
&& stride_h == 1 && stride_w == 1 && dilation_h == 1
&& dilation_w == 1) {
conv2d_delegator_ = make_unique<arm::fp32::Conv2dK7x7S1>(
paddings_, padding_type_);
} else if (filter_h == 7 && filter_w == 7
&& stride_h == 2 && stride_w == 2 && dilation_h == 1
&& dilation_w == 1) {
conv2d_delegator_ = make_unique<arm::fp32::Conv2dK7x7S2>(
paddings_, padding_type_);
} else if (filter_h == 7 && filter_w == 7
&& stride_h == 3 && stride_w == 3 && dilation_h == 1
&& dilation_w == 1) {
conv2d_delegator_ = make_unique<arm::fp32::Conv2dK7x7S3>(
paddings_, padding_type_);
} else if (filter_h == 1 && filter_w == 7
&& stride_h == 1 && stride_w == 1 && dilation_h == 1
&& dilation_w == 1) {
conv2d_delegator_ = make_unique<arm::fp32::Conv2dK1x7S1>(
paddings_, padding_type_);
} else if (filter_h == 7 && filter_w == 1
&& stride_h == 1 && stride_w == 1 && dilation_h == 1
&& dilation_w == 1) {
conv2d_delegator_ = make_unique<arm::fp32::Conv2dK7x1S1>(
paddings_, padding_type_);
} else if (filter_h == 1 && filter_w == 15
&& stride_h == 1 && stride_w == 1 && dilation_h == 1
&& dilation_w == 1) {
conv2d_delegator_ = make_unique<arm::fp32::Conv2dK1x15S1>(
paddings_, padding_type_);
} else if (filter_h == 15 && filter_w == 1
&& stride_h == 1 && stride_w == 1 && dilation_h == 1
&& dilation_w == 1) {
conv2d_delegator_ = make_unique<arm::fp32::Conv2dK15x1S1>(
paddings_, padding_type_);
} else {
conv2d_delegator_ = make_unique<arm::fp32::Conv2dGeneral>(
strides_,
dilations_,
paddings_,
padding_type_);
}
delegator::Conv2dParam param(strides_, dilations_,
paddings_, padding_type_);
conv2d_delegator_ = delegator::Conv2d::Create(context->workspace(),
tag, param);
}
conv2d_delegator_->Compute(context, input, filter, output);
#else
if (ref_conv2d_delegator_ == nullptr) {
ref_conv2d_delegator_ = make_unique<ref::Conv2d<float>>(strides_,
dilations_,
paddings_,
padding_type_);
}
ref_conv2d_delegator_->Compute(context, input, filter, output);
#endif
bias_add_delegator_.Compute(context, output, bias, output);
activation_delegator_.Compute(context, output, output);
bias_add_delegator_->Compute(context, output, bias, output);
activation_delegator_->Compute(context, output, output);
return MaceStatus::MACE_SUCCESS;
}
private:
#ifdef MACE_ENABLE_NEON
std::unique_ptr<arm::fp32::Conv2dBase> conv2d_delegator_;
arm::fp32::BiasAdd bias_add_delegator_;
arm::fp32::Activation activation_delegator_;
#else
std::unique_ptr<ref::Conv2d<float>> ref_conv2d_delegator_;
ref::BiasAdd bias_add_delegator_;
ref::Activation activation_delegator_;
#endif // MACE_ENABLE_NEON
std::unique_ptr<delegator::Activation> activation_delegator_;
std::unique_ptr<delegator::BiasAdd> bias_add_delegator_;
std::unique_ptr<delegator::Conv2d> conv2d_delegator_;
private:
MACE_OP_INPUT_TAGS(INPUT, FILTER, BIAS);
......@@ -518,7 +496,7 @@ class Conv2dOp<DeviceType::GPU, float> : public ConvPool2dOpBase {
};
#endif // MACE_ENABLE_OPENCL
void RegisterConv2D(OpRegistryBase *op_registry) {
void RegisterConv2D(OpRegistry *op_registry) {
MACE_REGISTER_OP(op_registry, "Conv2D", Conv2dOp,
DeviceType::CPU, float);
......
......@@ -17,7 +17,7 @@
#include <vector>
#include "mace/core/operator.h"
#include "mace/core/ops/operator.h"
#include "mace/ops/common/conv_pool_2d_util.h"
namespace mace {
......
......@@ -14,7 +14,8 @@
#include <memory>
#include "mace/core/operator.h"
#include "mace/core/ops/operator.h"
#include "mace/core/registry/ops_registry.h"
#include "mace/utils/math.h"
#include "mace/utils/memory.h"
#ifdef MACE_ENABLE_OPENCL
......@@ -132,7 +133,7 @@ class CropOp<DeviceType::GPU, float> : public Operation {
};
#endif // MACE_ENABLE_OPENCL
void RegisterCrop(OpRegistryBase *op_registry) {
void RegisterCrop(OpRegistry *op_registry) {
MACE_REGISTER_OP(op_registry, "Crop", CropOp,
DeviceType::CPU, float);
MACE_REGISTER_GPU_OP(op_registry, "Crop", CropOp);
......
......@@ -14,7 +14,8 @@
#include <functional>
#include "mace/core/operator.h"
#include "mace/core/ops/operator.h"
#include "mace/core/registry/ops_registry.h"
namespace mace {
namespace ops {
......@@ -141,7 +142,7 @@ class CumsumOp<DeviceType::CPU, T> : public Operation {
bool checked_;
};
void RegisterCumsum(OpRegistryBase *op_registry) {
void RegisterCumsum(OpRegistry *op_registry) {
MACE_REGISTER_OP(op_registry, "Cumsum", CumsumOp,
DeviceType::CPU, float);
}
......
......@@ -14,20 +14,6 @@
#include "mace/ops/deconv_2d.h"
#if defined(MACE_ENABLE_NEON)
#include <arm_neon.h>
#include "mace/ops/arm/fp32/deconv_2d_2x2.h"
#include "mace/ops/arm/fp32/deconv_2d_3x3.h"
#include "mace/ops/arm/fp32/deconv_2d_4x4.h"
#include "mace/ops/arm/fp32/deconv_2d_general.h"
#include "mace/ops/arm/fp32/bias_add.h"
#include "mace/ops/arm/fp32/activation.h"
#else
#include "mace/ops/ref/bias_add.h"
#include "mace/ops/ref/activation.h"
#include "mace/ops/ref/deconv_2d.h"
#endif
#include <algorithm>
#include <functional>
#include <memory>
......@@ -35,9 +21,13 @@
#include <vector>
#include "mace/core/future.h"
#include "mace/core/registry/ops_registry.h"
#include "mace/core/tensor.h"
#include "mace/ops/activation.h"
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/ops/delegator/activation.h"
#include "mace/ops/delegator/bias_add.h"
#include "mace/ops/delegator/deconv_2d.h"
#include "mace/utils/memory.h"
#include "mace/utils/math.h"
......@@ -49,6 +39,10 @@
namespace mace {
namespace ops {
namespace {
const std::vector<int> kDeconv2dStrides = {1, 1};
}
template<DeviceType D, class T>
class Deconv2dOp;
......@@ -57,9 +51,16 @@ class Deconv2dOp<DeviceType::CPU, float> : public Deconv2dOpBase {
public:
explicit Deconv2dOp(OpConstructContext *context)
: Deconv2dOpBase(context),
activation_delegator_(activation_,
relux_max_limit_,
leakyrelu_coefficient_) {}
activation_delegator_(
delegator::Activation::Create(
context->workspace(),
MACE_DELEGATOR_KEY(Activation, CPU, float, MACE_CPU_IMPL_TYPE),
delegator::ActivationParam(activation_, relux_max_limit_,
leakyrelu_coefficient_))),
bias_add_delegator_(delegator::BiasAdd::Create(
context->workspace(),
MACE_DELEGATOR_KEY(BiasAdd, CPU, float, MACE_CPU_IMPL_TYPE),
DelegatorParam())) {}
MaceStatus Run(OpContext *context) override {
const Tensor *input = this->Input(0);
......@@ -79,91 +80,67 @@ class Deconv2dOp<DeviceType::CPU, float> : public Deconv2dOpBase {
MACE_CHECK_NOTNULL(filter);
MACE_CHECK_NOTNULL(output);
#ifdef MACE_ENABLE_NEON
const index_t kernel_h = filter->dim(2);
const index_t kernel_w = filter->dim(3);
bool use_neon_2x2_s1 = kernel_h == kernel_w && kernel_h == 2 &&
strides_[0] == strides_[1] && strides_[0] == 1;
bool use_neon_2x2_s2 = kernel_h == kernel_w && kernel_h == 2 &&
strides_[0] == strides_[1] && strides_[0] == 2;
if (deconv2d_delegator_ == nullptr) {
std::string tag = MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float,
MACE_CPU_IMPL_TYPE, General);
if (MACE_CPU_IMPL_TYPE == NEON) {
const index_t kernel_h = filter->dim(2);
const index_t kernel_w = filter->dim(3);
bool use_neon_3x3_s1 = kernel_h == kernel_w && kernel_h == 3 &&
strides_[0] == strides_[1] && strides_[0] == 1;
bool use_neon_3x3_s2 = kernel_h == kernel_w && kernel_h == 3 &&
strides_[0] == strides_[1] && strides_[0] == 2;
bool use_neon_2x2_s1 = kernel_h == kernel_w && kernel_h == 2 &&
strides_[0] == strides_[1] && strides_[0] == 1;
bool use_neon_2x2_s2 = kernel_h == kernel_w && kernel_h == 2 &&
strides_[0] == strides_[1] && strides_[0] == 2;
bool use_neon_4x4_s1 = kernel_h == kernel_w && kernel_h == 4 &&
strides_[0] == strides_[1] && strides_[0] == 1;
bool use_neon_4x4_s2 = kernel_h == kernel_w && kernel_h == 4 &&
strides_[0] == strides_[1] && strides_[0] == 2;
bool use_neon_3x3_s1 = kernel_h == kernel_w && kernel_h == 3 &&
strides_[0] == strides_[1] && strides_[0] == 1;
bool use_neon_3x3_s2 = kernel_h == kernel_w && kernel_h == 3 &&
strides_[0] == strides_[1] && strides_[0] == 2;
if (deconv2d_delegator_ == nullptr) {
if (use_neon_2x2_s1) {
deconv2d_delegator_ = make_unique<arm::fp32::Deconv2dK2x2S1>(
paddings_, padding_type_, model_type_);
} else if (use_neon_2x2_s2) {
deconv2d_delegator_ = make_unique<arm::fp32::Deconv2dK2x2S2>(
paddings_, padding_type_, model_type_);
} else if (use_neon_3x3_s1) {
deconv2d_delegator_ = make_unique<arm::fp32::Deconv2dK3x3S1>(
paddings_, padding_type_, model_type_);
} else if (use_neon_3x3_s2) {
deconv2d_delegator_ = make_unique<arm::fp32::Deconv2dK3x3S2>(
paddings_, padding_type_, model_type_);
} else if (use_neon_4x4_s1) {
deconv2d_delegator_ = make_unique<arm::fp32::Deconv2dK4x4S1>(
paddings_, padding_type_, model_type_);
} else if (use_neon_4x4_s2) {
deconv2d_delegator_ = make_unique<arm::fp32::Deconv2dK4x4S2>(
paddings_, padding_type_, model_type_);
} else {
deconv2d_delegator_ =
make_unique<arm::fp32::Deconv2dGeneral>(strides_,
std::vector<int>{1, 1},
paddings_,
padding_type_,
model_type_);
bool use_neon_4x4_s1 = kernel_h == kernel_w && kernel_h == 4 &&
strides_[0] == strides_[1] && strides_[0] == 1;
bool use_neon_4x4_s2 = kernel_h == kernel_w && kernel_h == 4 &&
strides_[0] == strides_[1] && strides_[0] == 2;
if (use_neon_2x2_s1) {
tag = MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float,
MACE_CPU_IMPL_TYPE, K2x2S1);
} else if (use_neon_2x2_s2) {
tag = MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float,
MACE_CPU_IMPL_TYPE, K2x2S2);
} else if (use_neon_3x3_s1) {
tag = MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float,
MACE_CPU_IMPL_TYPE, K3x3S1);
} else if (use_neon_3x3_s2) {
tag = MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float,
MACE_CPU_IMPL_TYPE, K3x3S2);
} else if (use_neon_4x4_s1) {
tag = MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float,
MACE_CPU_IMPL_TYPE, K4x4S1);
} else if (use_neon_4x4_s2) {
tag = MACE_DELEGATOR_KEY_EX(Deconv2d, CPU, float,
MACE_CPU_IMPL_TYPE, K4x4S2);
}
}
delegator::Deconv2dParam param(strides_, kDeconv2dStrides, paddings_,
padding_type_, model_type_);
deconv2d_delegator_ = delegator::Deconv2d::Create(context->workspace(),
tag, param);
}
deconv2d_delegator_->Compute(context,
input,
filter,
output_shape_tensor,
output);
#else
if (deconv2d_delegator_ == nullptr) {
deconv2d_delegator_ = make_unique<ref::Deconv2d<float>>(strides_,
std::vector<int>{
1, 1},
paddings_,
padding_type_,
model_type_);
}
deconv2d_delegator_->Compute(context,
input,
filter,
output_shape_tensor,
output);
#endif // MACE_ENABLE_NEON
bias_add_delegator_.Compute(context, output, bias, output);
activation_delegator_.Compute(context, output, output);
deconv2d_delegator_->Compute(context, input, filter,
output_shape_tensor, output);
bias_add_delegator_->Compute(context, output, bias, output);
activation_delegator_->Compute(context, output, output);
return MaceStatus::MACE_SUCCESS;
}
private:
#ifdef MACE_ENABLE_NEON
std::unique_ptr<arm::fp32::Deconv2dBase> deconv2d_delegator_;
arm::fp32::BiasAdd bias_add_delegator_;
arm::fp32::Activation activation_delegator_;
#else
ref::BiasAdd bias_add_delegator_;
ref::Activation activation_delegator_;
std::unique_ptr<ref::Deconv2d<float>> deconv2d_delegator_;
#endif // MACE_ENABLE_NEON
std::unique_ptr<delegator::Activation> activation_delegator_;
std::unique_ptr<delegator::BiasAdd> bias_add_delegator_;
std::unique_ptr<delegator::Deconv2d> deconv2d_delegator_;
};
#ifdef MACE_ENABLE_OPENCL
......@@ -258,7 +235,7 @@ class Deconv2dOp<DeviceType::GPU, float> : public Deconv2dOpBase {
};
#endif // MACE_ENABLE_OPENCL
void RegisterDeconv2D(OpRegistryBase *op_registry) {
void RegisterDeconv2D(OpRegistry *op_registry) {
MACE_REGISTER_OP(op_registry, "Deconv2D", Deconv2dOp,
DeviceType::CPU, float);
MACE_REGISTER_GPU_OP(op_registry, "Deconv2D", Deconv2dOp);
......
......@@ -19,7 +19,7 @@
#include <string>
#include <vector>
#include "mace/core/operator.h"
#include "mace/core/ops/operator.h"
#include "mace/core/types.h"
#include "mace/ops/activation.h"
#include "mace/ops/common/conv_pool_2d_util.h"
......
// Copyright 2019 The MACE Authors. All Rights Reserved.
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,42 +12,50 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_FP32_ACTIVATION_H_
#define MACE_OPS_ARM_FP32_ACTIVATION_H_
#ifndef MACE_OPS_DELEGATOR_ACTIVATION_H_
#define MACE_OPS_DELEGATOR_ACTIVATION_H_
#include "mace/core/op_context.h"
#include "mace/core/ops/op_context.h"
#include "mace/core/ops/op_delegator.h"
#include "mace/core/registry/op_delegator_registry.h"
#include "mace/ops/common/activation_type.h"
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
namespace delegator {
class Activation {
struct ActivationParam : public DelegatorParam {
explicit ActivationParam(ActivationType type, const float limit,
const float leakyrelu_coefficient)
: type_(type), limit_(limit),
leakyrelu_coefficient_(leakyrelu_coefficient) {}
ActivationType type_;
const float limit_;
const float leakyrelu_coefficient_;
};
class Activation : public OpDelegator {
public:
explicit Activation(ActivationType type,
const float limit,
const float leakyrelu_coefficient);
~Activation() = default;
explicit Activation(const ActivationParam &param)
: OpDelegator(param), type_(param.type_), limit_(param.limit_),
leakyrelu_coefficient_(param.leakyrelu_coefficient_) {}
virtual ~Activation() = default;
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
Tensor *output);
MACE_DEFINE_DELEGATOR_CREATOR(Activation)
private:
void DoActivation(const OpContext *context,
const Tensor *input,
Tensor *output);
virtual MaceStatus Compute(const OpContext *context,
const Tensor *input,
Tensor *output) = 0;
protected:
ActivationType type_;
const float limit_;
const float leakyrelu_coefficient_;
};
} // namespace fp32
} // namespace arm
} // namespace delegator
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_FP32_ACTIVATION_H_
#endif // MACE_OPS_DELEGATOR_ACTIVATION_H_
// Copyright 2019 The MACE Authors. All Rights Reserved.
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,37 +12,32 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_FP32_BIAS_ADD_H_
#define MACE_OPS_ARM_FP32_BIAS_ADD_H_
#ifndef MACE_OPS_DELEGATOR_BIAS_ADD_H_
#define MACE_OPS_DELEGATOR_BIAS_ADD_H_
#include "mace/core/op_context.h"
#include "mace/core/ops/op_context.h"
#include "mace/core/ops/op_delegator.h"
#include "mace/core/registry/op_delegator_registry.h"
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
namespace delegator {
class BiasAdd {
class BiasAdd : public OpDelegator {
public:
BiasAdd() = default;
~BiasAdd() = default;
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *bias,
Tensor *output);
private:
void AddBias(const OpContext *context,
const Tensor *input,
const Tensor *bias,
Tensor *output);
explicit BiasAdd(const DelegatorParam &param) : OpDelegator(param) {}
virtual ~BiasAdd() = default;
MACE_DEFINE_DELEGATOR_CREATOR(BiasAdd)
virtual MaceStatus Compute(const OpContext *context,
const Tensor *input,
const Tensor *bias,
Tensor *output) = 0;
};
} // namespace fp32
} // namespace arm
} // namespace delegator
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_FP32_BIAS_ADD_H_
#endif // MACE_OPS_DELEGATOR_BIAS_ADD_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_DELEGATOR_CONV_2D_H_
#define MACE_OPS_DELEGATOR_CONV_2D_H_
#include <vector>
#include "mace/core/ops/op_context.h"
#include "mace/core/ops/op_delegator.h"
#include "mace/core/registry/op_delegator_registry.h"
#include "mace/ops/common/conv_pool_2d_util.h"
namespace mace {
namespace ops {
enum ConvType {
General,
K1x1,
K1x7S1,
K7x1S1,
K1x15S1,
K15x1S1,
K3x3S1,
K3x3S2,
K3x3Winograd,
K5x5S1,
K7x7S1,
K7x7S2,
K7x7S3,
};
namespace delegator {
struct Conv2dParam : public DelegatorParam {
explicit Conv2dParam(const std::vector<int> &strides,
const std::vector<int> &dilations,
const std::vector<int> &paddings,
const Padding padding_type)
: strides_(strides), dilations_(dilations),
paddings_(paddings), padding_type_(padding_type) {}
const std::vector<int> &strides_;
const std::vector<int> &dilations_;
const std::vector<int> &paddings_;
const Padding padding_type_;
};
class Conv2d : public OpDelegator {
public:
explicit Conv2d(const delegator::Conv2dParam &param)
: OpDelegator(param),
strides_(param.strides_),
dilations_(param.dilations_),
paddings_(param.paddings_),
padding_type_(param.padding_type_) {}
virtual ~Conv2d() = default;
MACE_DEFINE_DELEGATOR_CREATOR(Conv2d)
virtual MaceStatus Compute(const OpContext *context,
const Tensor *input,
const Tensor *filter,
Tensor *output) = 0;
protected:
const std::vector<int> strides_;
const std::vector<int> dilations_;
const std::vector<int> paddings_;
const Padding padding_type_;
};
} // namespace delegator
} // namespace ops
} // namespace mace
#endif // MACE_OPS_DELEGATOR_CONV_2D_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_DELEGATOR_DECONV_2D_H_
#define MACE_OPS_DELEGATOR_DECONV_2D_H_
#include <vector>
#include "mace/core/ops/op_context.h"
#include "mace/core/ops/op_delegator.h"
#include "mace/core/registry/op_delegator_registry.h"
namespace mace {
namespace ops {
enum DeconvType {
General,
K2x2S1,
K2x2S2,
K3x3S1,
K3x3S2,
K4x4S1,
K4x4S2,
};
namespace delegator {
struct Deconv2dParam : public DelegatorParam {
explicit Deconv2dParam(const std::vector<int> &strides,
const std::vector<int> &dilations,
const std::vector<int> &paddings,
const Padding padding_type,
const FrameworkType framework_type,
const int group = 1)
: strides_(strides), dilations_(dilations),
paddings_(paddings), padding_type_(padding_type),
framework_type_(framework_type),
group_(group) {}
const std::vector<int> &strides_;
const std::vector<int> &dilations_;
const std::vector<int> &paddings_;
const Padding padding_type_;
const FrameworkType framework_type_;
const int group_;
};
class Deconv2d : public OpDelegator {
public:
explicit Deconv2d(const Deconv2dParam &param)
: OpDelegator(param),
strides_(param.strides_),
dilations_(param.dilations_),
paddings_(param.paddings_),
padding_type_(param.padding_type_),
framework_type_(param.framework_type_),
group_(param.group_) {}
virtual ~Deconv2d() = default;
MACE_DEFINE_DELEGATOR_CREATOR(Deconv2d)
virtual MaceStatus Compute(const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) = 0;
protected:
const std::vector<int> strides_;
const std::vector<int> dilations_;
const std::vector<int> paddings_;
const Padding padding_type_;
const FrameworkType framework_type_;
const int group_;
};
} // namespace delegator
} // namespace ops
} // namespace mace
#endif // MACE_OPS_DELEGATOR_DECONV_2D_H_
// Copyright 2019 The MACE Authors. All Rights Reserved.
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,35 +12,22 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_REF_BIAS_ADD_H_
#define MACE_OPS_REF_BIAS_ADD_H_
#include "mace/core/op_context.h"
#ifndef MACE_OPS_DELEGATOR_DEPTHWISE_CONV_2D_H_
#define MACE_OPS_DELEGATOR_DEPTHWISE_CONV_2D_H_
#include "mace/ops/delegator/conv_2d.h"
namespace mace {
namespace ops {
namespace ref {
class BiasAdd {
public:
BiasAdd() = default;
~BiasAdd() = default;
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *bias,
Tensor *output);
private:
void AddBias(const OpContext *context,
const Tensor *input,
const Tensor *bias,
Tensor *output);
};
} // namespace ref
namespace delegator {
typedef Conv2dParam DepthwiseConv2dParam;
typedef Conv2d DepthwiseConv2d;
} // namespace delegator
} // namespace ops
} // namespace mace
#endif // MACE_OPS_REF_BIAS_ADD_H_
#endif // MACE_OPS_DELEGATOR_DEPTHWISE_CONV_2D_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_DELEGATOR_DEPTHWISE_DECONV_2D_H_
#define MACE_OPS_DELEGATOR_DEPTHWISE_DECONV_2D_H_
#include "mace/ops/delegator/deconv_2d.h"
namespace mace {
namespace ops {
namespace delegator {
typedef Deconv2dParam DepthwiseDeconv2dParam;
typedef Deconv2dParam GroupDeconv2dParam;
typedef Deconv2d DepthwiseDeconv2d;
typedef Deconv2d GroupDeconv2d;
} // namespace delegator
} // namespace ops
} // namespace mace
#endif // MACE_OPS_DELEGATOR_DEPTHWISE_DECONV_2D_H_
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册