提交 607a1f48 编写于 作者: L liuqi

Refactor: move all files in kernels directory to ops and remove kernels.

1. Move all files in kernels to ops
2. unify the op_def_registry and op_registry.
3. Support op clip: do not link the ops' code when the op is not registered.
上级 d5679c53
...@@ -67,7 +67,6 @@ extra_tests: ...@@ -67,7 +67,6 @@ extra_tests:
stage: extra_tests stage: extra_tests
script: script:
- if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi - if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi
- python tools/bazel_adb_run.py --target="//mace/kernels:kernels_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS
- python tools/bazel_adb_run.py --target="//mace/utils:tuner_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS - python tools/bazel_adb_run.py --target="//mace/utils:tuner_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS
platform_compatible_tests: platform_compatible_tests:
......
...@@ -94,7 +94,6 @@ jobs: ...@@ -94,7 +94,6 @@ jobs:
- python tools/bazel_adb_run.py --target="//mace/test:mace_api_test" --run_target=False --target_abis=armeabi-v7a || exit 1 - python tools/bazel_adb_run.py --target="//mace/test:mace_api_test" --run_target=False --target_abis=armeabi-v7a || exit 1
- python tools/bazel_adb_run.py --target="//mace/test:mace_api_mt_test" --run_target=False --target_abis=armeabi-v7a || exit 1 - python tools/bazel_adb_run.py --target="//mace/test:mace_api_mt_test" --run_target=False --target_abis=armeabi-v7a || exit 1
- echo 'Extra Test' - echo 'Extra Test'
- python tools/bazel_adb_run.py --target="//mace/kernels:kernels_test" --run_target=False --target_abis=armeabi-v7a || exit 1
- python tools/bazel_adb_run.py --target="//mace/utils:tuner_test" --run_target=False --target_abis=armeabi-v7a || exit 1 - python tools/bazel_adb_run.py --target="//mace/utils:tuner_test" --run_target=False --target_abis=armeabi-v7a || exit 1
env: TYPE=Extra-Test-ARMEABI-v7a env: TYPE=Extra-Test-ARMEABI-v7a
os: linux os: linux
...@@ -106,7 +105,6 @@ jobs: ...@@ -106,7 +105,6 @@ jobs:
- python tools/bazel_adb_run.py --target="//mace/test:mace_api_test" --run_target=False --target_abis=arm64-v8a || exit 1 - python tools/bazel_adb_run.py --target="//mace/test:mace_api_test" --run_target=False --target_abis=arm64-v8a || exit 1
- python tools/bazel_adb_run.py --target="//mace/test:mace_api_mt_test" --run_target=False --target_abis=arm64-v8a || exit 1 - python tools/bazel_adb_run.py --target="//mace/test:mace_api_mt_test" --run_target=False --target_abis=arm64-v8a || exit 1
- echo 'Extra Test on ARM64' - echo 'Extra Test on ARM64'
- python tools/bazel_adb_run.py --target="//mace/kernels:kernels_test" --run_target=False --target_abis=arm64-v8a || exit 1
- python tools/bazel_adb_run.py --target="//mace/utils:tuner_test" --run_target=False --target_abis=arm64-v8a || exit 1 - python tools/bazel_adb_run.py --target="//mace/utils:tuner_test" --run_target=False --target_abis=arm64-v8a || exit 1
env: TYPE=Extra-Test-ARM64-v8a env: TYPE=Extra-Test-ARM64-v8a
os: linux os: linux
......
...@@ -5,46 +5,24 @@ You can create a custom op if it is not supported yet. ...@@ -5,46 +5,24 @@ You can create a custom op if it is not supported yet.
To add a custom op, you need to follow these steps: To add a custom op, you need to follow these steps:
Register the new OpDef information
----------------------------------
Register the OpDef information about which devices the operation could run on.
Registry file is in `mace/ops/ops_def_register.cc`
```c++
#include "mace/ops/ops_def_register.h"
namespace mace {
namespace ops {
void RegisterOpDefs(OpDefRegistryBase *op_def_registry) {
MACE_REGISTER_OP_DEF(
op_def_registry,
OpRegistrationBuilder("MyCustomOp")
.SetDevicePlaceFunc([]() -> std::vector<DeviceType> {
return {DeviceType::CPU, DeviceType::GPU};
}));
......
}
} // namespace ops
} // namespace mace
```
Implement the Operation Implement the Operation
----------------------- -----------------------
The Best way is to refer to the implementation of other operator(e.g. `/mace/kernels/activation.cc`) The Best way is to refer to the implementation of other operator(e.g. `/mace/ops/activation.cc`)
Define the new Op class in `mace/kernels/my_custom_op.cc`. Define the new Op class in `mace/ops/my_custom_op.cc`.
1. CPU code: just write the code in `mace/kernels/my_custom_op.cc`. 1. ARM kernels: Kernel about NEON is located at `mace/ops/arm/my_custom_op.cc`
2. GPU code: Kernel API is defined in `mace/kernels/my_custom_op.h`, 2. GPU kernels: OpenCL kernel API is defined in `mace/ops/opencl/my_custom_op.h`,
Kernel based on Image is realized in `mace/kernels/opencl/image/my_custom_op.cc`, * Kernel based on Image is realized in `mace/ops/opencl/image/my_custom_op.cc`,
Kernel based on Buffer is realized in `mace/kernels/opencl/buffer/my_custom_op.cc`. * Kernel based on Buffer is realized in `mace/ops/opencl/buffer/my_custom_op.cc`.
* OpenCL kernel file is realized in `mace/ops/opencl/cl/my_custom_op.cl`.
* Add the path of opencl kernel file in file `mace/repository/opencl-kernel/opencl_kernel_configure.bzl`
The structure like the following code. The structure of Op is like the following code.
```c++ ```c++
#include "mace/core/operator.h" #include "mace/core/operator.h"
namespace mace { namespace mace {
namespace kernels { namespace ops {
template <DeviceType D, class T> template <DeviceType D, class T>
class MyCustomOp; class MyCustomOp;
...@@ -56,43 +34,34 @@ class MyCustomOp<DeviceType::CPU, float> : public Operation { ...@@ -56,43 +34,34 @@ class MyCustomOp<DeviceType::CPU, float> : public Operation {
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
template <typename T> template <typename T>
class ActivationOp<DeviceType::GPU, T> : public Operation { class MyCustomOp<DeviceType::GPU, T> : public Operation {
... ...
}; };
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
} // namespace ops
} // namespace mace
```
Register the Operation
-----------------------
1, Add register function in `mace/kernels/my_custom_op.cc`
```c++
#include "mace/core/operator.h"
namespace mace {
namespace kernels {
void RegisterMyCustomOp(OpRegistryBase *op_registry) { void RegisterMyCustomOp(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "MyCustomOp", ActivationOp, MACE_REGISTER_OP(op_registry, "MyCustomOp", MyCustomOp,
DeviceType::CPU, float); DeviceType::CPU, float);
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OP(op_registry, "MyCustomOp", ActivationOp, MACE_REGISTER_OP(op_registry, "MyCustomOp", MyCustomOp,
DeviceType::GPU, float); DeviceType::GPU, float);
MACE_REGISTER_OP(op_registry, "MyCustomOp", ActivationOp, MACE_REGISTER_OP(op_registry, "MyCustomOp", MyCustomOp,
DeviceType::GPU, half); DeviceType::GPU, half);
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
} }
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
``` ```
2, And then register the new Op in `mace/kernels/ops_register.cc`.
Register the Operation
-----------------------
Register the new Op in `mace/ops/ops_register.cc`.
``` ```
#include "mace/kernels/ops_register.h" #include "mace/ops/ops_register.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
......
...@@ -112,7 +112,8 @@ There are two common advanced use cases: ...@@ -112,7 +112,8 @@ There are two common advanced use cases:
- converting model to C++ code. - converting model to C++ code.
- tuning GPU kernels for a specific SoC. - tuning GPU kernels for a specific SoC.
* **Convert model(s) to C++ code** Convert model(s) to C++ code
--------------------------------
* **1. Change the model deployment file(.yml)** * **1. Change the model deployment file(.yml)**
...@@ -204,7 +205,8 @@ There are two common advanced use cases: ...@@ -204,7 +205,8 @@ There are two common advanced use cases:
// ... Same with the code in basic usage // ... Same with the code in basic usage
* **Tuning for specific SoC's GPU** Tuning for specific SoC's GPU
---------------------------------
If you want to use the GPU of a specific device, you can just specify the ``target_socs`` in your YAML file and If you want to use the GPU of a specific device, you can just specify the ``target_socs`` in your YAML file and
then tune the MACE lib for it (OpenCL kernels), which may get 1~10% performance improvement. then tune the MACE lib for it (OpenCL kernels), which may get 1~10% performance improvement.
...@@ -375,25 +377,52 @@ Use ``-h`` to get detailed help. ...@@ -375,25 +377,52 @@ Use ``-h`` to get detailed help.
Reduce Library Size Reduce Library Size
------------------- -------------------
* **dynamic library** * Build for your own usage purpose.
* **dynamic library**
The generated dynamic library by script ``tools/build-standalone-lib.sh`` is about ``1.6M`` for - If the models don't need to run on device ``dsp``, change the build option ``--define hexagon=true``
``armeabi-v7a`` and ``2.1M`` for ``arm64-v8a``. It can be reduced by modifying some build options. to ``false``. And the library will be decreased about ``100KB``.
- If the models don't need to run on device ``dsp``, change the build option ``--define hexagon=true`` - Futher more, if only ``cpu`` device needed, change ``--define opencl=true`` to ``false``. This way
to ``false``. And the library will be decreased about ``100KB``. will reduce half of library size to about ``700KB`` for ``armeabi-v7a`` and ``1000KB`` for ``arm64-v8a``
- Futher more, if only ``cpu`` device needed, change ``--define opencl=true`` to ``false``. This way - About ``300KB`` can be reduced when add ``--config symbol_hidden`` building option. It will change
will reduce half of library size to about ``700KB`` for ``armeabi-v7a`` and ``1000KB`` for ``arm64-v8a`` the visibility of inner apis in libmace.so and lead to linking error when load model(s) in ``code``
but no effection for ``file`` mode.
- About ``300KB`` can be reduced when add ``--config symbol_hidden`` building option. It will change * **static library**
the visibility of inner apis in libmace.so and lead to linking error when load model(s) in ``code``
but no effection for ``file`` mode.
* **static library** - The methods in dynamic library can be useful for static library too. In additional, the static
library may also contain model graph and model datas if the configs ``model_graph_format`` and
``model_data_format`` in deployment file are set to ``code``.
- The methods in dynamic library can be useful for static library too. In additional, the static - It is recommended to use ``version script`` and ``strip`` feature when linking mace static library. The effect is remarkable.
library may also contain model graph and model datas if the configs ``model_graph_format`` and
``model_data_format`` in deployment file are set to ``code``.
- It is recommended to use ``version script`` and ``strip`` feature when linking mace static library. The effect is remarkable. * Remove the unused ops.
Remove the registration of the ops unused for your models in the ``mace/ops/ops_register.cc``,
which will reduce the library size significantly. the final binary just link the registered ops' code.
```
#include "mace/ops/ops_register.h"
namespace mace {
namespace ops {
// Just leave the ops used in your models
...
} // namespace ops
OpRegistry::OpRegistry() : OpRegistryBase() {
// Just leave the ops used in your models
...
ops::RegisterMyCustomOp(this);
...
}
} // namespace mace
```
...@@ -23,9 +23,10 @@ ...@@ -23,9 +23,10 @@
#include <cstring> #include <cstring>
#include "mace/core/macros.h" #include "mace/core/macros.h"
#include "mace/core/registry.h"
#include "mace/core/types.h" #include "mace/core/types.h"
#include "mace/core/runtime_failure_mock.h" #include "mace/core/runtime_failure_mock.h"
#include "mace/public/mace.h"
#include "mace/utils/logging.h"
namespace mace { namespace mace {
......
...@@ -27,8 +27,7 @@ ...@@ -27,8 +27,7 @@
namespace mace { namespace mace {
SerialNet::SerialNet(OpDefRegistryBase *op_def_registry, SerialNet::SerialNet(const OpRegistryBase *op_registry,
const OpRegistryBase *op_registry,
const NetDef *net_def, const NetDef *net_def,
Workspace *ws, Workspace *ws,
Device *target_device, Device *target_device,
...@@ -41,15 +40,7 @@ SerialNet::SerialNet(OpDefRegistryBase *op_def_registry, ...@@ -41,15 +40,7 @@ SerialNet::SerialNet(OpDefRegistryBase *op_def_registry,
target_device->cpu_runtime()->policy(), target_device->cpu_runtime()->policy(),
target_device->cpu_runtime()->use_gemmlowp())) { target_device->cpu_runtime()->use_gemmlowp())) {
MACE_LATENCY_LOGGER(1, "Constructing SerialNet"); MACE_LATENCY_LOGGER(1, "Constructing SerialNet");
// Register Operations
MaceStatus status;
for (int idx = 0; idx < net_def->op_types_size(); ++idx) {
status = op_def_registry->Register(net_def->op_types(idx));
MACE_CHECK(status == MaceStatus::MACE_SUCCESS, status.information());
}
// Create Operations // Create Operations
operators_.clear();
const OpRegistrationInfo *info;
DeviceType target_device_type = target_device_->device_type(); DeviceType target_device_type = target_device_->device_type();
OpConstructContext construct_context(ws_); OpConstructContext construct_context(ws_);
for (int idx = 0; idx < net_def->op_size(); ++idx) { for (int idx = 0; idx < net_def->op_size(); ++idx) {
...@@ -59,16 +50,13 @@ SerialNet::SerialNet(OpDefRegistryBase *op_def_registry, ...@@ -59,16 +50,13 @@ SerialNet::SerialNet(OpDefRegistryBase *op_def_registry,
ProtoArgHelper::GetOptionalArg<OperatorDef, int>( ProtoArgHelper::GetOptionalArg<OperatorDef, int>(
operator_def, "device", static_cast<int>(target_device_type)); operator_def, "device", static_cast<int>(target_device_type));
if (op_device == target_device_type) { if (op_device == target_device_type) {
// Find op registration information
status = op_def_registry->Find(operator_def.type(), &info);
MACE_CHECK(status == MaceStatus::MACE_SUCCESS, status.information());
// Get available devices (sorted based on priority) // Get available devices (sorted based on priority)
OperatorDef temp_def(operator_def); OperatorDef temp_def(operator_def);
auto available_devices = info->device_place_func_(); auto available_devices = op_registry->AvailableDevices(temp_def.type());
// Find the device type to run the op. // Find the device type to run the op.
// If the target_device_type in available devices, use target_device_type, // If the target_device_type in available devices, use target_device_type,
// otherwise, fallback to the first device (top priority). // otherwise, fallback to CPU device.
DeviceType device_type = available_devices[0]; DeviceType device_type = DeviceType::CPU;
construct_context.set_device(cpu_device_); construct_context.set_device(cpu_device_);
for (auto device : available_devices) { for (auto device : available_devices) {
if (device == target_device_type) { if (device == target_device_type) {
......
...@@ -21,8 +21,6 @@ ...@@ -21,8 +21,6 @@
#include <unordered_map> #include <unordered_map>
#include <sstream> #include <sstream>
#include "mace/core/op_def_registry.h"
#include "mace/core/operator.h" #include "mace/core/operator.h"
namespace mace { namespace mace {
...@@ -45,8 +43,7 @@ class NetBase { ...@@ -45,8 +43,7 @@ class NetBase {
class SerialNet : public NetBase { class SerialNet : public NetBase {
public: public:
SerialNet(OpDefRegistryBase *op_def_registry, SerialNet(const OpRegistryBase *op_registry,
const OpRegistryBase *op_registry,
const NetDef *net_def, const NetDef *net_def,
Workspace *ws, Workspace *ws,
Device *target_device, Device *target_device,
......
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/core/op_def_registry.h"
#include "mace/utils/logging.h"
namespace mace {
void AddOpRegistrar(OpDefRegistryBase *registry,
const OpRegistrationBuilder &builder) {
registry->AddRegistrar(
builder.name(),
[builder](OpRegistrationInfo *info){
builder.Finalize(info);
});
}
OpRegistrationBuilder::OpRegistrationBuilder(const std::string name)
: name_(name) {}
const std::string OpRegistrationBuilder::name() const { return name_; }
OpRegistrationBuilder &OpRegistrationBuilder::SetDevicePlaceFunc(
std::vector<DeviceType> (*func)()) {
info_.device_place_func_ = func;
return *this;
}
void OpRegistrationBuilder::Finalize(OpRegistrationInfo *info) const {
*info = info_;
}
void OpDefRegistryBase::AddRegistrar(const std::string name,
const OpRegistrar &registrar) {
registrar_.emplace(name, registrar);
}
MaceStatus OpDefRegistryBase::Register(const std::string &name) {
VLOG(3) << "Registering operation definition: " << name;
if (registry_.find(name) != registry_.end()) {
return MaceStatus::MACE_SUCCESS;
}
auto iter = registrar_.find(name);
if (iter == registrar_.end()) {
return MaceStatus(MaceStatus::MACE_INVALID_ARGS,
"MACE do not support the operation: " + name);
}
registry_.emplace(
name, std::unique_ptr<OpRegistrationInfo>(new OpRegistrationInfo()));
iter->second(registry_[name].get());
return MaceStatus::MACE_SUCCESS;
}
MaceStatus OpDefRegistryBase::Find(const std::string &name,
const OpRegistrationInfo **info) {
auto iter = registry_.find(name);
if (iter == registry_.end()) {
*info = nullptr;
return MaceStatus(MaceStatus::MACE_INVALID_ARGS,
"Mace do not support the operation: " + name);
}
*info = iter->second.get();
return MaceStatus::MACE_SUCCESS;
}
} // namespace mace
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_CORE_OP_DEF_REGISTRY_H_
#define MACE_CORE_OP_DEF_REGISTRY_H_
#include <functional>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "mace/proto/mace.pb.h"
#include "mace/public/mace.h"
#include "mace/utils/utils.h"
namespace mace {
// Device placement function
typedef std::function<std::vector<DeviceType>()> DevicePlaceFunc;
struct OpRegistrationInfo {
OpRegistrationInfo() = default;
explicit OpRegistrationInfo(const DevicePlaceFunc &func)
: device_place_func_(func) {}
DevicePlaceFunc device_place_func_;
};
class OpRegistrationBuilder {
public:
explicit OpRegistrationBuilder(const std::string name);
const std::string name() const;
OpRegistrationBuilder &SetDevicePlaceFunc(
std::vector<DeviceType> (*func)());
void Finalize(OpRegistrationInfo *info) const;
private:
std::string name_;
OpRegistrationInfo info_;
};
class OpDefRegistryBase {
public:
typedef std::function<void(OpRegistrationInfo *)> OpRegistrar;
OpDefRegistryBase() = default;
virtual ~OpDefRegistryBase() = default;
void AddRegistrar(const std::string name, const OpRegistrar &registrar);
MaceStatus Register(const std::string &name);
MaceStatus Find(const std::string &name, const OpRegistrationInfo **info);
private:
std::unordered_map<std::string, OpRegistrar> registrar_;
std::unordered_map<
std::string,
std::unique_ptr<OpRegistrationInfo>> registry_;
MACE_DISABLE_COPY_AND_ASSIGN(OpDefRegistryBase);
};
void AddOpRegistrar(OpDefRegistryBase *registry,
const OpRegistrationBuilder &builder);
#define MACE_REGISTER_OP_DEF(op_def_registry, builder) \
AddOpRegistrar(op_def_registry, builder)
} // namespace mace
#endif // MACE_CORE_OP_DEF_REGISTRY_H_
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include <sstream> #include <sstream>
#include <map>
#include <memory> #include <memory>
#include <vector> #include <vector>
...@@ -79,7 +80,26 @@ MaceStatus Operation::Init(OpInitContext *context) { ...@@ -79,7 +80,26 @@ MaceStatus Operation::Init(OpInitContext *context) {
return MaceStatus::MACE_SUCCESS; return MaceStatus::MACE_SUCCESS;
} }
OpKeyBuilder::OpKeyBuilder(const char *op_name) : op_name_(op_name) {} // op registry
namespace {
class OpKeyBuilder {
public:
explicit OpKeyBuilder(const std::string &op_name);
OpKeyBuilder &Device(DeviceType device);
OpKeyBuilder &TypeConstraint(const char *attr_name,
DataType allowed);
const std::string Build();
private:
std::string op_name_;
DeviceType device_type_;
std::map<std::string, DataType> type_constraint_;
};
OpKeyBuilder::OpKeyBuilder(const std::string &op_name) : op_name_(op_name) {}
OpKeyBuilder &OpKeyBuilder::Device(DeviceType device) { OpKeyBuilder &OpKeyBuilder::Device(DeviceType device) {
device_type_ = device; device_type_ = device;
...@@ -103,16 +123,53 @@ const std::string OpKeyBuilder::Build() { ...@@ -103,16 +123,53 @@ const std::string OpKeyBuilder::Build() {
return ss.str(); return ss.str();
} }
} // namespace
void OpRegistrationInfo::AddDevice(mace::DeviceType device) {
devices.insert(device);
}
void OpRegistrationInfo::Register(const std::string &key, OpCreator creator) {
VLOG(3) << "Registering: " << key;
MACE_CHECK(creators.count(key) == 0, "Key already registered: ", key);
creators[key] = creator;
}
MaceStatus OpRegistryBase::Register(const std::string &op_type,
const mace::DeviceType device_type,
const mace::DataType dt,
mace::OpRegistrationInfo::OpCreator creator) {
if (registry_.count(op_type) == 0) {
registry_[op_type] = std::unique_ptr<OpRegistrationInfo>(
new OpRegistrationInfo);
}
registry_[op_type]->AddDevice(device_type);
std::string op_key = OpKeyBuilder(op_type)
.Device(device_type)
.TypeConstraint("T", dt)
.Build();
registry_.at(op_type)->Register(op_key, creator);
return MaceStatus::MACE_SUCCESS;
}
const std::set<DeviceType> OpRegistryBase::AvailableDevices(
const std::string &op_type) const {
MACE_CHECK(registry_.count(op_type) != 0,
op_type, " operation is not registered.");
return registry_.at(op_type)->devices;
}
OpRegistryBase::~OpRegistryBase() = default;
std::unique_ptr<Operation> OpRegistryBase::CreateOperation( std::unique_ptr<Operation> OpRegistryBase::CreateOperation(
OpConstructContext *context, OpConstructContext *context,
DeviceType device_type, DeviceType device_type,
const NetMode mode) const { const NetMode mode) const {
OperatorDef *operator_def = context->operator_def(); OperatorDef *operator_def = context->operator_def();
const int dtype = ProtoArgHelper::GetOptionalArg<OperatorDef, int>( const DataType dtype = static_cast<DataType>(
*operator_def, "T", static_cast<int>(DT_FLOAT)); ProtoArgHelper::GetOptionalArg<OperatorDef, int>(
*operator_def, "T", static_cast<int>(DT_FLOAT)));
const int op_mode_i = ProtoArgHelper::GetOptionalArg<OperatorDef, int>( const int op_mode_i = ProtoArgHelper::GetOptionalArg<OperatorDef, int>(
*operator_def, "mode", static_cast<int>(NetMode::NORMAL)); *operator_def, "mode", static_cast<int>(NetMode::NORMAL));
const NetMode op_mode = static_cast<NetMode>(op_mode_i); const NetMode op_mode = static_cast<NetMode>(op_mode_i);
...@@ -120,15 +177,20 @@ std::unique_ptr<Operation> OpRegistryBase::CreateOperation( ...@@ -120,15 +177,20 @@ std::unique_ptr<Operation> OpRegistryBase::CreateOperation(
<< operator_def->type() << "<" << dtype << ">" << ") on " << operator_def->type() << "<" << dtype << ">" << ") on "
<< device_type; << device_type;
if (op_mode == mode) { if (op_mode == mode) {
return registry_.Create( const std::string op_type = context->operator_def()->type();
OpKeyBuilder(operator_def->type().data()) MACE_CHECK(registry_.count(op_type) != 0,
.Device(device_type) op_type, " operation is not registered.");
.TypeConstraint("T", static_cast<DataType>(dtype))
.Build(), std::string key = OpKeyBuilder(op_type)
context); .Device(device_type)
.TypeConstraint("T", dtype)
.Build();
if (registry_.at(op_type)->creators.count(key) == 0) {
LOG(FATAL) << "Key not registered: " << key;
}
return registry_.at(op_type)->creators.at(key)(context);
} else { } else {
return nullptr; return nullptr;
} }
} }
} // namespace mace } // namespace mace
...@@ -16,13 +16,13 @@ ...@@ -16,13 +16,13 @@
#define MACE_CORE_OPERATOR_H_ #define MACE_CORE_OPERATOR_H_
#include <memory> #include <memory>
#include <set>
#include <string> #include <string>
#include <unordered_map>
#include <vector> #include <vector>
#include <map>
#include "mace/core/arg_helper.h" #include "mace/core/arg_helper.h"
#include "mace/core/op_context.h" #include "mace/core/op_context.h"
#include "mace/core/registry.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/core/workspace.h" #include "mace/core/workspace.h"
#include "mace/proto/mace.pb.h" #include "mace/proto/mace.pb.h"
...@@ -160,62 +160,57 @@ class Operation { ...@@ -160,62 +160,57 @@ class Operation {
#define MACE_OP_OUTPUT_TAGS(first_input, ...) \ #define MACE_OP_OUTPUT_TAGS(first_input, ...) \
enum _OutputTags { first_input = 0, __VA_ARGS__ } enum _OutputTags { first_input = 0, __VA_ARGS__ }
class OpKeyBuilder {
public:
explicit OpKeyBuilder(const char *op_name);
OpKeyBuilder &Device(DeviceType device); struct OpRegistrationInfo {
public:
typedef std::function<std::unique_ptr<Operation>(OpConstructContext *)>
OpCreator;
OpKeyBuilder &TypeConstraint(const char *attr_name, OpRegistrationInfo() = default;
DataType allowed);
template <typename T> void AddDevice(DeviceType);
OpKeyBuilder &TypeConstraint(const char *attr_name);
const std::string Build(); void Register(const std::string &key, OpCreator creator);
private: std::set<DeviceType> devices;
std::string op_name_; std::unordered_map<std::string, OpCreator> creators;
DeviceType device_type_;
std::map<std::string, DataType> type_constraint_;
}; };
template <typename T>
OpKeyBuilder &OpKeyBuilder::TypeConstraint(const char *attr_name) {
return this->TypeConstraint(attr_name, DataTypeToEnum<T>::value);
}
class OpRegistryBase { class OpRegistryBase {
public: public:
typedef Registry<std::string,
Operation,
OpConstructContext *>
RegistryType;
OpRegistryBase() = default; OpRegistryBase() = default;
virtual ~OpRegistryBase(); virtual ~OpRegistryBase() = default;
RegistryType *registry() { return &registry_; } MaceStatus Register(const std::string &op_type,
const DeviceType device_type,
const DataType dt,
OpRegistrationInfo::OpCreator creator);
const std::set<DeviceType> AvailableDevices(
const std::string &op_type) const;
std::unique_ptr<Operation> CreateOperation( std::unique_ptr<Operation> CreateOperation(
OpConstructContext *context, OpConstructContext *context,
DeviceType device_type, DeviceType device_type,
const NetMode mode) const; const NetMode mode) const;
template <class DerivedType>
static std::unique_ptr<Operation> DefaultCreator(
OpConstructContext *context) {
return std::unique_ptr<Operation>(new DerivedType(context));
}
private: private:
RegistryType registry_; std::unordered_map<
std::string,
std::unique_ptr<OpRegistrationInfo>> registry_;
MACE_DISABLE_COPY_AND_ASSIGN(OpRegistryBase); MACE_DISABLE_COPY_AND_ASSIGN(OpRegistryBase);
}; };
MACE_DECLARE_REGISTRY(OpRegistry,
Operation,
OpConstructContext *);
#define MACE_REGISTER_OP(op_registry, op_type, class_name, device, dt) \ #define MACE_REGISTER_OP(op_registry, op_type, class_name, device, dt) \
MACE_REGISTER_CLASS(OpRegistry, \ op_registry->Register(op_type, \
op_registry->registry(), \ device, \
OpKeyBuilder(op_type) \ DataTypeToEnum<dt>::value, \
.Device(device) \ OpRegistryBase::DefaultCreator<class_name<device, dt>>)
.TypeConstraint<dt>("T") \
.Build(), \
class_name<device, dt>)
} // namespace mace } // namespace mace
......
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_CORE_REGISTRY_H_
#define MACE_CORE_REGISTRY_H_
#include <functional>
#include <map>
#include <memory>
#include <mutex> // NOLINT(build/c++11)
#include <string>
#include <vector>
#include "mace/utils/logging.h"
namespace mace {
template <class SrcType, class ObjectType, class... Args>
class Registry {
public:
typedef std::function<std::unique_ptr<ObjectType>(Args...)> Creator;
Registry() : registry_() {}
void Register(const SrcType &key, Creator creator) {
VLOG(3) << "Registering: " << key;
std::lock_guard<std::mutex> lock(register_mutex_);
MACE_CHECK(registry_.count(key) == 0, "Key already registered: ", key);
registry_[key] = creator;
}
std::unique_ptr<ObjectType> Create(const SrcType &key, Args... args) const {
if (registry_.count(key) == 0) {
LOG(FATAL) << "Key not registered: " << key;
}
return registry_.at(key)(args...);
}
private:
std::map<SrcType, Creator> registry_;
std::mutex register_mutex_;
MACE_DISABLE_COPY_AND_ASSIGN(Registry);
};
template <class SrcType, class ObjectType, class... Args>
class Registerer {
public:
Registerer(const SrcType &key,
Registry<SrcType, ObjectType, Args...> *registry,
typename Registry<SrcType, ObjectType, Args...>::Creator creator) {
registry->Register(key, creator);
}
template <class DerivedType>
static std::unique_ptr<ObjectType> DefaultCreator(Args... args) {
return std::unique_ptr<ObjectType>(new DerivedType(args...));
}
};
#define MACE_CONCATENATE_IMPL(s1, s2) s1##s2
#define MACE_CONCATENATE(s1, s2) MACE_CONCATENATE_IMPL(s1, s2)
#ifdef __COUNTER__
#define MACE_ANONYMOUS_VARIABLE(str) MACE_CONCATENATE(str, __COUNTER__)
#else
#define MACE_ANONYMOUS_VARIABLE(str) MACE_CONCATENATE(str, __LINE__)
#endif
#define MACE_DECLARE_TYPED_REGISTRY(RegistryName, SrcType, ObjectType, ...) \
typedef Registerer<SrcType, ObjectType, ##__VA_ARGS__> \
Registerer##RegistryName;
#define MACE_DECLARE_REGISTRY(RegistryName, ObjectType, ...) \
MACE_DECLARE_TYPED_REGISTRY(RegistryName, std::string, ObjectType, \
##__VA_ARGS__)
#define MACE_REGISTER_TYPED_CLASS(RegistryName, registry, key, ...) \
Registerer##RegistryName MACE_ANONYMOUS_VARIABLE(RegistryName)( \
key, registry, Registerer##RegistryName::DefaultCreator<__VA_ARGS__>);
#define MACE_REGISTER_CLASS(RegistryName, registry, key, ...) \
MACE_REGISTER_TYPED_CLASS(RegistryName, registry, key, __VA_ARGS__)
} // namespace mace
#endif // MACE_CORE_REGISTRY_H_
...@@ -15,10 +15,11 @@ ...@@ -15,10 +15,11 @@
#ifndef MACE_CORE_TENSOR_H_ #ifndef MACE_CORE_TENSOR_H_
#define MACE_CORE_TENSOR_H_ #define MACE_CORE_TENSOR_H_
#include <algorithm>
#include <functional>
#include <numeric>
#include <string> #include <string>
#include <vector> #include <vector>
#include <functional>
#include <algorithm>
#include "mace/core/buffer.h" #include "mace/core/buffer.h"
#include "mace/core/preallocated_pooled_allocator.h" #include "mace/core/preallocated_pooled_allocator.h"
......
# Description:
# Mace neon kernels.
#
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"]) # Apache 2.0
load(
"//mace:mace.bzl",
"if_android",
"if_neon_enabled",
"if_openmp_enabled",
"if_android_armv7",
"if_hexagon_enabled",
"if_opencl_enabled",
)
cc_library(
name = "kernels",
srcs = glob(
[
"*.cc",
"arm/*.cc",
],
exclude = [
"*_test.cc",
"*_benchmark.cc",
"arm/*_test.cc",
"buffer_inverse_transform.cc",
"buffer_transform.cc",
"lstm_cell.cc",
"winograd_transform.cc",
],
) + if_opencl_enabled(glob(
[
"opencl/*.cc",
"opencl/image/*.cc",
"opencl/buffer/*.cc",
"buffer_inverse_transform.cc",
"buffer_transform.cc",
"lstm_cell.cc",
"winograd_transform.cc",
],
exclude = [
"opencl/*_test.cc",
],
)),
hdrs = glob(
[
"*.h",
"arm/*.h",
],
) + if_opencl_enabled(glob([
"opencl/*.h",
"opencl/image/*.h",
"opencl/buffer/*.h",
])),
copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
] + if_openmp_enabled([
"-fopenmp",
]) + if_neon_enabled([
"-DMACE_ENABLE_NEON",
]) + if_android_armv7([
"-mfpu=neon",
]) + if_android_armv7([
"-mfloat-abi=softfp",
]) + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL",
]) + if_hexagon_enabled([
"-DMACE_ENABLE_HEXAGON",
]),
linkopts = if_android(["-lm"]),
deps = [
"//mace/core",
"@gemmlowp",
"@tflite",
],
)
cc_test(
name = "kernels_test",
testonly = 1,
srcs = glob(
[
"*_test.cc",
"arm/*_test.cc",
"opencl/*_test.cc",
],
),
copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
] + if_openmp_enabled([
"-fopenmp",
]) + if_neon_enabled([
"-DMACE_ENABLE_NEON",
]) + if_android_armv7([
"-mfpu=neon",
"-mfloat-abi=softfp",
]) + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL",
]) + if_hexagon_enabled([
"-DMACE_ENABLE_HEXAGON",
]),
linkopts = ["-fopenmp"],
linkstatic = 1,
deps = [
":kernels",
"//mace/ops",
"@gtest",
"@gtest//:gtest_main",
],
)
cc_test(
name = "kernels_benchmark",
testonly = 1,
srcs = glob(["*_benchmark.cc"]),
copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
] + if_openmp_enabled([
"-fopenmp",
]) + if_neon_enabled([
"-DMACE_ENABLE_NEON",
]) + if_android_armv7([
"-mfpu=neon",
"-mfloat-abi=softfp",
]) + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL",
]) + if_hexagon_enabled([
"-DMACE_ENABLE_HEXAGON",
]),
linkopts = ["-fopenmp"],
linkstatic = 1,
deps = [
":kernels",
"//mace/core:test_benchmark_main",
"//mace/ops",
"//third_party/eigen3",
"@gemmlowp",
],
)
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <Eigen/Dense>
#include <algorithm>
#include <string>
#include <tuple>
#include <vector>
#include "public/gemmlowp.h"
#include "mace/core/testing/test_benchmark.h"
#include "mace/kernels/gemm.h"
#include "mace/kernels/sgemm.h"
#include "mace/ops/ops_test_util.h"
namespace gemmlowp {
template<typename tScalar, MapOrder tOrder>
class Matrix : public MatrixMap<tScalar, tOrder> {
public:
typedef MatrixMap<tScalar, tOrder> Map;
typedef MatrixMap<const tScalar, tOrder> ConstMap;
typedef typename Map::Scalar Scalar;
static const MapOrder Order = tOrder;
using Map::cols_;
using Map::data_;
using Map::kOrder;
using Map::rows_;
using Map::stride_;
public:
Matrix() : Map(nullptr, 0, 0, 0) {}
Matrix(int rows, int cols) : Map(nullptr, 0, 0, 0) { Resize(rows, cols); }
Matrix(const Matrix &other) : Map(nullptr, 0, 0, 0) { *this = other; }
Matrix &operator=(const Matrix &other) {
Resize(other.rows_, other.cols_);
std::memcpy(data_, other.data_, size() * sizeof(Scalar));
return *this;
}
friend bool operator==(const Matrix &a, const Matrix &b) {
return a.rows_ == b.rows_ && a.cols_ == b.cols_ &&
!std::memcmp(a.data_, b.data_, a.size());
}
void Resize(int rows, int cols) {
rows_ = rows;
cols_ = cols;
stride_ = kOrder == gemmlowp::MapOrder::ColMajor ? rows : cols;
storage.resize(size());
data_ = storage.data();
}
int size() const { return rows_ * cols_; }
Map &map() { return *static_cast<Map *>(this); }
ConstMap const_map() const { return ConstMap(data_, rows_, cols_, stride_); }
protected:
std::vector<Scalar> storage;
};
template<typename MatrixType>
void MakeZero(MatrixType *m) {
for (int c = 0; c < m->cols(); c++) {
for (int r = 0; r < m->rows(); r++) {
(*m)(r, c) = 128;
}
}
}
} // namespace gemmlowp
namespace mace {
namespace kernels {
namespace test {
// Test the speed of different access order of a NHWC buffer
namespace {
// Matmul with (m, k) x (k, n)
void MatmulBenchmark_Mace(int iters, int m, int k, int n) {
mace::testing::StopTiming();
std::vector<float> lhs(m * k);
std::vector<float> rhs(k * n);
std::vector<float> result(m * n);
// warm up
Gemm(lhs.data(), rhs.data(), 1, m, k, n, result.data());
mace::testing::StartTiming();
while (iters--) {
Gemm(lhs.data(), rhs.data(), 1, m, k, n, result.data());
}
}
void MatmulBenchmark_Mace_SGemm(int iters, int m, int k, int n) {
mace::testing::StopTiming();
std::vector<float> lhs(m * k);
std::vector<float> rhs(k * n);
std::vector<float> result(m * n);
kernels::MatrixMap<const float> matrix_lhs(1, m, k, RowMajor, lhs.data(),
true);
kernels::MatrixMap<const float> matrix_rhs(1, k, n, RowMajor, rhs.data(),
true);
kernels::MatrixMap<float> matrix_result(1, m, n, RowMajor, result.data());
kernels::SGemm sgemm;
sgemm(matrix_lhs, matrix_rhs, &matrix_result);
mace::testing::StartTiming();
while (iters--) {
sgemm(matrix_lhs, matrix_rhs, &matrix_result);
}
}
void MatmulBenchmark_Eigen(int iters, int m, int k, int n) {
mace::testing::StopTiming();
Eigen::MatrixXf lhs = Eigen::MatrixXf::Random(m, k);
Eigen::MatrixXf rhs = Eigen::MatrixXf::Random(k, n);
Eigen::MatrixXf result = Eigen::MatrixXf::Zero(m, n);
// warm up
result = lhs * rhs;
mace::testing::StartTiming();
while (iters--) {
result = lhs * rhs;
}
}
void MatmulBenchmark_gemmlowp_uint8(int iters, int rows, int depth, int cols) {
mace::testing::StopTiming();
gemmlowp::Matrix<std::uint8_t, gemmlowp::MapOrder::RowMajor> lhs;
gemmlowp::Matrix<std::uint8_t, gemmlowp::MapOrder::ColMajor> rhs;
gemmlowp::Matrix<std::uint8_t, gemmlowp::MapOrder::ColMajor> result;
lhs.Resize(rows, depth);
rhs.Resize(depth, cols);
result.Resize(rows, cols);
gemmlowp::MakeZero(&lhs);
gemmlowp::MakeZero(&rhs);
gemmlowp::MakeZero(&result);
gemmlowp::OutputStageQuantizeDownInt32ByFixedPoint quantize_down_stage;
quantize_down_stage.result_offset_after_shift = 128;
quantize_down_stage.result_fixedpoint_multiplier = 1234567890;
quantize_down_stage.result_shift = 16;
gemmlowp::OutputStageSaturatingCastToUint8 saturating_cast_stage;
const auto output_pipeline =
std::make_tuple(quantize_down_stage, saturating_cast_stage);
auto gemm_context =
mace::ops::test::OpTestContext::Get()
->GetDevice(CPU)->cpu_runtime()->GetGemmlowpContext();
MACE_CHECK_NOTNULL(gemm_context);
using BitDepthParams = gemmlowp::L8R8WithLhsNonzeroBitDepthParams;
gemmlowp::GemmWithOutputPipeline<std::uint8_t, std::uint8_t, BitDepthParams>(
gemm_context, lhs.const_map(), rhs.const_map(), &result.map(), -128,
-128, output_pipeline);
mace::testing::StartTiming();
while (iters--) {
gemmlowp::GemmWithOutputPipeline<std::uint8_t, std::uint8_t,
BitDepthParams>(
gemm_context, lhs.const_map(), rhs.const_map(), &result.map(), -128,
-128, output_pipeline);
}
}
void MatmulBenchmark_gemmlowp_int32(int iters, int rows, int depth, int cols) {
mace::testing::StopTiming();
gemmlowp::Matrix<std::uint8_t, gemmlowp::MapOrder::RowMajor> lhs;
gemmlowp::Matrix<std::uint8_t, gemmlowp::MapOrder::ColMajor> rhs;
gemmlowp::Matrix<std::int32_t, gemmlowp::MapOrder::ColMajor> result;
lhs.Resize(rows, depth);
rhs.Resize(depth, cols);
result.Resize(rows, cols);
gemmlowp::MakeZero(&lhs);
gemmlowp::MakeZero(&rhs);
gemmlowp::MakeZero(&result);
const auto output_pipeline = std::make_tuple();
auto gemm_context =
mace::ops::test::OpTestContext::Get()
->GetDevice(CPU)->cpu_runtime()->GetGemmlowpContext();
MACE_CHECK_NOTNULL(gemm_context);
using BitDepthParams = gemmlowp::L8R8WithLhsNonzeroBitDepthParams;
gemmlowp::GemmWithOutputPipeline<std::uint8_t, std::int32_t, BitDepthParams>(
gemm_context, lhs.const_map(), rhs.const_map(), &result.map(), -128,
-128, output_pipeline);
mace::testing::StartTiming();
while (iters--) {
gemmlowp::GemmWithOutputPipeline<std::uint8_t, std::int32_t,
BitDepthParams>(
gemm_context, lhs.const_map(), rhs.const_map(), &result.map(), -128,
-128, output_pipeline);
}
}
} // namespace
#define MACE_BM_MATMUL_FUNC(M, K, N, FUNC, TYPE) \
static void MACE_BM_MATMUL_##M##_##K##_##N##_##FUNC(int iters) { \
const int64_t macc = static_cast<int64_t>(iters) * M * K * N; \
const int64_t tot = static_cast<int64_t>(iters) * (M + N) * K; \
mace::testing::MaccProcessed(macc); \
mace::testing::BytesProcessed(tot * sizeof(TYPE)); \
MatmulBenchmark_##FUNC(iters, M, K, N); \
} \
MACE_BENCHMARK(MACE_BM_MATMUL_##M##_##K##_##N##_##FUNC)
#define MACE_BM_MATMUL(M, K, N) \
MACE_BM_MATMUL_FUNC(M, K, N, Mace, float); \
MACE_BM_MATMUL_FUNC(M, K, N, Mace_SGemm, float); \
MACE_BM_MATMUL_FUNC(M, K, N, Eigen, float); \
MACE_BM_MATMUL_FUNC(M, K, N, gemmlowp_uint8, uint8_t); \
MACE_BM_MATMUL_FUNC(M, K, N, gemmlowp_int32, uint8_t);
// Embedding size 384
MACE_BM_MATMUL(7, 384, 384);
MACE_BM_MATMUL(7, 384, 1536);
MACE_BM_MATMUL(7, 1536, 384);
MACE_BM_MATMUL(15, 384, 384);
MACE_BM_MATMUL(15, 384, 1536);
MACE_BM_MATMUL(15, 1536, 384);
MACE_BM_MATMUL(1, 256, 256);
MACE_BM_MATMUL(1, 256, 1536);
MACE_BM_MATMUL(1, 1536, 256);
MACE_BM_MATMUL(256, 256, 1);
MACE_BM_MATMUL(1536, 256, 1);
MACE_BM_MATMUL(256, 1536, 1);
MACE_BM_MATMUL(29792, 256, 1);
MACE_BM_MATMUL(1, 256, 29792);
MACE_BM_MATMUL(2, 256, 256);
MACE_BM_MATMUL(2, 256, 1536);
MACE_BM_MATMUL(2, 1536, 256);
MACE_BM_MATMUL(3, 256, 256);
MACE_BM_MATMUL(3, 256, 1536);
MACE_BM_MATMUL(3, 1536, 256);
MACE_BM_MATMUL(4, 256, 256);
MACE_BM_MATMUL(4, 256, 1536);
MACE_BM_MATMUL(4, 1536, 256);
MACE_BM_MATMUL(8, 256, 256);
MACE_BM_MATMUL(8, 256, 1536);
MACE_BM_MATMUL(8, 1536, 256);
MACE_BM_MATMUL(10, 256, 256);
MACE_BM_MATMUL(10, 256, 1536);
MACE_BM_MATMUL(10, 1536, 256);
MACE_BM_MATMUL(15, 256, 256);
MACE_BM_MATMUL(15, 256, 1536);
MACE_BM_MATMUL(15, 1536, 256);
// Embedding size 128
MACE_BM_MATMUL(1, 128, 1536);
MACE_BM_MATMUL(1, 128, 44678);
// MobileNet
MACE_BM_MATMUL(128, 128, 3136);
MACE_BM_MATMUL(256, 256, 784);
MACE_BM_MATMUL(512, 512, 196);
MACE_BM_MATMUL(1024, 1024, 49);
} // namespace test
} // namespace kernels
} // namespace mace
...@@ -40,7 +40,6 @@ cc_library( ...@@ -40,7 +40,6 @@ cc_library(
deps = [ deps = [
"//mace/public", "//mace/public",
"//mace/ops", "//mace/ops",
"//mace/kernels",
], ],
alwayslink = 1, alwayslink = 1,
) )
...@@ -79,7 +78,7 @@ genrule( ...@@ -79,7 +78,7 @@ genrule(
srcs = [ srcs = [
"//mace/codegen:generated_version", "//mace/codegen:generated_version",
"//mace/core", "//mace/core",
"//mace/kernels", "//mace/ops:internal_ops",
"//mace/ops", "//mace/ops",
"//mace/libmace", "//mace/libmace",
"//mace/utils", "//mace/utils",
...@@ -93,7 +92,7 @@ genrule( ...@@ -93,7 +92,7 @@ genrule(
"mri_stream=$$(python $(location //mace/python/tools:archive_static_lib) " + "mri_stream=$$(python $(location //mace/python/tools:archive_static_lib) " +
"$(locations //mace/codegen:generated_version) " + "$(locations //mace/codegen:generated_version) " +
"$(locations //mace/core:core) " + "$(locations //mace/core:core) " +
"$(locations //mace/kernels:kernels) " + "$(locations //mace/ops:internal_ops) " +
"$(locations //mace/ops:ops) " + "$(locations //mace/ops:ops) " +
"$(locations //mace/libmace:libmace) " + "$(locations //mace/libmace:libmace) " +
"$(locations //mace/utils:utils) " + "$(locations //mace/utils:utils) " +
......
...@@ -22,8 +22,7 @@ ...@@ -22,8 +22,7 @@
#include "mace/core/net.h" #include "mace/core/net.h"
#include "mace/core/device_context.h" #include "mace/core/device_context.h"
#include "mace/kernels/ops_register.h" #include "mace/ops/ops_registry.h"
#include "mace/ops/ops_def_register.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
...@@ -359,7 +358,6 @@ class MaceEngine::Impl { ...@@ -359,7 +358,6 @@ class MaceEngine::Impl {
private: private:
const unsigned char *model_data_; const unsigned char *model_data_;
size_t model_data_size_; size_t model_data_size_;
std::unique_ptr<OpDefRegistryBase> op_def_registry_;
std::unique_ptr<OpRegistryBase> op_registry_; std::unique_ptr<OpRegistryBase> op_registry_;
DeviceType device_type_; DeviceType device_type_;
std::unique_ptr<Device> device_; std::unique_ptr<Device> device_;
...@@ -377,7 +375,6 @@ class MaceEngine::Impl { ...@@ -377,7 +375,6 @@ class MaceEngine::Impl {
MaceEngine::Impl::Impl(const MaceEngineConfig &config) MaceEngine::Impl::Impl(const MaceEngineConfig &config)
: model_data_(nullptr), : model_data_(nullptr),
model_data_size_(0), model_data_size_(0),
op_def_registry_(new OpDefRegistry()),
op_registry_(new OpRegistry), op_registry_(new OpRegistry),
device_type_(config.impl_->device_type()), device_type_(config.impl_->device_type()),
device_(nullptr), device_(nullptr),
...@@ -466,7 +463,6 @@ MaceStatus MaceEngine::Impl::Init( ...@@ -466,7 +463,6 @@ MaceStatus MaceEngine::Impl::Init(
// Init model // Init model
auto net = std::unique_ptr<NetBase>(new SerialNet( auto net = std::unique_ptr<NetBase>(new SerialNet(
op_def_registry_.get(),
op_registry_.get(), op_registry_.get(),
net_def, net_def,
ws_.get(), ws_.get(),
...@@ -474,8 +470,7 @@ MaceStatus MaceEngine::Impl::Init( ...@@ -474,8 +470,7 @@ MaceStatus MaceEngine::Impl::Init(
NetMode::INIT)); NetMode::INIT));
MACE_RETURN_IF_ERROR(net->Init()); MACE_RETURN_IF_ERROR(net->Init());
MACE_RETURN_IF_ERROR(net->Run()); MACE_RETURN_IF_ERROR(net->Run());
net_ = std::unique_ptr<NetBase>(new SerialNet(op_def_registry_.get(), net_ = std::unique_ptr<NetBase>(new SerialNet(op_registry_.get(),
op_registry_.get(),
net_def, net_def,
ws_.get(), ws_.get(),
device_.get())); device_.get()));
......
# Description: # Description:
# Mace operators.
# #
package( package(
default_visibility = ["//visibility:public"], default_visibility = ["//visibility:public"],
...@@ -18,18 +17,58 @@ load( ...@@ -18,18 +17,58 @@ load(
) )
cc_library( cc_library(
name = "test", name = "internal_ops",
testonly = 1, srcs = glob(
hdrs = glob([ [
"*_test_util.h", "*.cc",
]), "arm/*.cc",
srcs = [ ],
"ops_test_util.cc", exclude = [
], "*_test.cc",
"*_benchmark.cc",
"arm/*_test.cc",
"ops_registry.cc",
"ops_test_util.cc",
"buffer_inverse_transform.cc",
"buffer_transform.cc",
"lstm_cell.cc",
"winograd_transform.cc",
],
) + if_opencl_enabled(glob(
[
"opencl/*.cc",
"opencl/image/*.cc",
"opencl/buffer/*.cc",
"buffer_inverse_transform.cc",
"buffer_transform.cc",
"lstm_cell.cc",
"winograd_transform.cc",
],
exclude = [
"opencl/*_test.cc",
],
)),
hdrs = glob(
[
"*.h",
"arm/*.h",
],
exclude = [
"ops_registry.h",
"ops_test_util.h",
]
) + if_opencl_enabled(glob([
"opencl/*.h",
"opencl/image/*.h",
"opencl/buffer/*.h",
])),
copts = [ copts = [
"-Werror", "-Werror",
"-Wextra", "-Wextra",
] + if_openmp_enabled(["-fopenmp"]) + if_neon_enabled([ "-Wno-missing-field-initializers",
] + if_openmp_enabled([
"-fopenmp",
]) + if_neon_enabled([
"-DMACE_ENABLE_NEON", "-DMACE_ENABLE_NEON",
]) + if_android_armv7([ ]) + if_android_armv7([
"-mfpu=neon", "-mfpu=neon",
...@@ -40,20 +79,54 @@ cc_library( ...@@ -40,20 +79,54 @@ cc_library(
]) + if_hexagon_enabled([ ]) + if_hexagon_enabled([
"-DMACE_ENABLE_HEXAGON", "-DMACE_ENABLE_HEXAGON",
]), ]),
linkopts = if_android(["-lm"]),
deps = [ deps = [
"ops", "//mace/core",
"//mace/kernels", "@gemmlowp",
"@gtest", "@tflite",
], ],
) )
cc_library( cc_library(
name = "ops", name = "ops",
srcs = [ srcs = [
"ops_def_register.cc", "ops_registry.cc"
], ],
hdrs = [ hdrs = [
"ops_def_register.h", "ops_registry.h",
],
copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
] + if_openmp_enabled([
"-fopenmp",
]) + if_neon_enabled([
"-DMACE_ENABLE_NEON",
]) + if_android_armv7([
"-mfpu=neon",
]) + if_android_armv7([
"-mfloat-abi=softfp",
]) + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL",
]) + if_hexagon_enabled([
"-DMACE_ENABLE_HEXAGON",
]),
linkopts = if_android(["-lm"]),
deps = [
"internal_ops",
],
)
cc_library(
name = "test",
testonly = 1,
hdrs = glob([
"*_test_util.h",
]),
srcs = [
"ops_test_util.cc",
], ],
copts = [ copts = [
"-Werror", "-Werror",
...@@ -70,7 +143,8 @@ cc_library( ...@@ -70,7 +143,8 @@ cc_library(
"-DMACE_ENABLE_HEXAGON", "-DMACE_ENABLE_HEXAGON",
]), ]),
deps = [ deps = [
"//mace/core", "ops",
"@gtest",
], ],
) )
...@@ -78,16 +152,22 @@ cc_test( ...@@ -78,16 +152,22 @@ cc_test(
name = "ops_test", name = "ops_test",
testonly = 1, testonly = 1,
srcs = glob( srcs = glob(
["*_test.cc"], [
"*_test.cc",
"arm/*_test.cc",
"opencl/*_test.cc",
],
), ),
copts = [ copts = [
"-Werror", "-Werror",
"-Wextra", "-Wextra",
] + if_openmp_enabled(["-fopenmp"]) + if_neon_enabled([ "-Wno-missing-field-initializers",
] + if_openmp_enabled([
"-fopenmp",
]) + if_neon_enabled([
"-DMACE_ENABLE_NEON", "-DMACE_ENABLE_NEON",
]) + if_android_armv7([ ]) + if_android_armv7([
"-mfpu=neon", "-mfpu=neon",
]) + if_android_armv7([
"-mfloat-abi=softfp", "-mfloat-abi=softfp",
]) + if_opencl_enabled([ ]) + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL", "-DMACE_ENABLE_OPENCL",
...@@ -97,8 +177,7 @@ cc_test( ...@@ -97,8 +177,7 @@ cc_test(
linkopts = ["-fopenmp"], linkopts = ["-fopenmp"],
linkstatic = 1, linkstatic = 1,
deps = [ deps = [
":ops", "test",
":test",
"@gtest//:gtest_main", "@gtest//:gtest_main",
], ],
) )
...@@ -110,11 +189,13 @@ cc_test( ...@@ -110,11 +189,13 @@ cc_test(
copts = [ copts = [
"-Werror", "-Werror",
"-Wextra", "-Wextra",
] + if_openmp_enabled(["-fopenmp"]) + if_neon_enabled([ "-Wno-missing-field-initializers",
] + if_openmp_enabled([
"-fopenmp",
]) + if_neon_enabled([
"-DMACE_ENABLE_NEON", "-DMACE_ENABLE_NEON",
]) + if_android_armv7([ ]) + if_android_armv7([
"-mfpu=neon", "-mfpu=neon",
]) + if_android_armv7([
"-mfloat-abi=softfp", "-mfloat-abi=softfp",
]) + if_opencl_enabled([ ]) + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL", "-DMACE_ENABLE_OPENCL",
...@@ -124,8 +205,9 @@ cc_test( ...@@ -124,8 +205,9 @@ cc_test(
linkopts = ["-fopenmp"], linkopts = ["-fopenmp"],
linkstatic = 1, linkstatic = 1,
deps = [ deps = [
":ops", "test",
":test",
"//mace/core:test_benchmark_main", "//mace/core:test_benchmark_main",
"//third_party/eigen3",
"@gemmlowp",
], ],
) )
...@@ -12,18 +12,18 @@ ...@@ -12,18 +12,18 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "mace/kernels/activation.h" #include "mace/ops/activation.h"
#include <memory> #include <memory>
#include "mace/core/operator.h" #include "mace/core/operator.h"
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/kernels/opencl/image/activation.h" #include "mace/ops/opencl/image/activation.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace ops {
template <DeviceType D, class T> template <DeviceType D, class T>
class ActivationOp; class ActivationOp;
...@@ -33,7 +33,7 @@ class ActivationOp<DeviceType::CPU, float> : public Operation { ...@@ -33,7 +33,7 @@ class ActivationOp<DeviceType::CPU, float> : public Operation {
public: public:
explicit ActivationOp(OpConstructContext *context) explicit ActivationOp(OpConstructContext *context)
: Operation(context), : Operation(context),
activation_(kernels::StringToActivationType( activation_(ops::StringToActivationType(
Operation::GetOptionalArg<std::string>("activation", Operation::GetOptionalArg<std::string>("activation",
"NOOP"))), "NOOP"))),
relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", relux_max_limit_(Operation::GetOptionalArg<float>("max_limit",
...@@ -74,7 +74,7 @@ class ActivationOp<DeviceType::GPU, T> : public Operation { ...@@ -74,7 +74,7 @@ class ActivationOp<DeviceType::GPU, T> : public Operation {
public: public:
explicit ActivationOp(OpConstructContext *context) explicit ActivationOp(OpConstructContext *context)
: Operation(context) { : Operation(context) {
ActivationType type = kernels::StringToActivationType( ActivationType type = ops::StringToActivationType(
Operation::GetOptionalArg<std::string>("activation", Operation::GetOptionalArg<std::string>("activation",
"NOOP")); "NOOP"));
auto relux_max_limit = static_cast<T>( auto relux_max_limit = static_cast<T>(
...@@ -114,5 +114,5 @@ void RegisterActivation(OpRegistryBase *op_registry) { ...@@ -114,5 +114,5 @@ void RegisterActivation(OpRegistryBase *op_registry) {
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -12,19 +12,19 @@ ...@@ -12,19 +12,19 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef MACE_KERNELS_ACTIVATION_H_ #ifndef MACE_OPS_ACTIVATION_H_
#define MACE_KERNELS_ACTIVATION_H_ #define MACE_OPS_ACTIVATION_H_
#include <algorithm> #include <algorithm>
#include <cmath> #include <cmath>
#include <string> #include <string>
#include "mace/core/types.h" #include "mace/core/types.h"
#include "mace/kernels/arm/activation_neon.h" #include "mace/ops/arm/activation_neon.h"
#include "mace/utils/logging.h" #include "mace/utils/logging.h"
namespace mace { namespace mace {
namespace kernels { namespace ops {
enum ActivationType { enum ActivationType {
NOOP = 0, NOOP = 0,
...@@ -149,7 +149,7 @@ void PReLUActivation(const T *input_ptr, ...@@ -149,7 +149,7 @@ void PReLUActivation(const T *input_ptr,
} }
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
#endif // MACE_KERNELS_ACTIVATION_H_ #endif // MACE_OPS_ACTIVATION_H_
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
#include <string> #include <string>
#include "mace/core/op_def_registry.h"
#include "mace/core/testing/test_benchmark.h" #include "mace/core/testing/test_benchmark.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
...@@ -46,7 +45,7 @@ void ReluBenchmark(int iters, int batch, int channels, int height, int width) { ...@@ -46,7 +45,7 @@ void ReluBenchmark(int iters, int batch, int channels, int height, int width) {
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
} else if (D == DeviceType::GPU) { } else if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Activation", "ReluBM") OpDefBuilder("Activation", "ReluBM")
.Input("InputImage") .Input("InputImage")
...@@ -108,7 +107,7 @@ void ReluxBenchmark(int iters, int batch, int channels, int height, int width) { ...@@ -108,7 +107,7 @@ void ReluxBenchmark(int iters, int batch, int channels, int height, int width) {
if (D == DeviceType::GPU) { if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Activation", "ReluxBM") OpDefBuilder("Activation", "ReluxBM")
.Input("InputImage") .Input("InputImage")
...@@ -186,9 +185,9 @@ void PreluBenchmark(int iters, int batch, int channels, int height, int width) { ...@@ -186,9 +185,9 @@ void PreluBenchmark(int iters, int batch, int channels, int height, int width) {
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
} else if (D == DeviceType::GPU) { } else if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, float>(&net, "Alpha", "AlphaImage", BufferToImage<D, float>(&net, "Alpha", "AlphaImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
OpDefBuilder("Activation", "PreluBM") OpDefBuilder("Activation", "PreluBM")
.Input("InputImage") .Input("InputImage")
...@@ -251,7 +250,7 @@ void TanhBenchmark(int iters, int batch, int channels, int height, int width) { ...@@ -251,7 +250,7 @@ void TanhBenchmark(int iters, int batch, int channels, int height, int width) {
if (D == DeviceType::GPU) { if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Activation", "TanhBM") OpDefBuilder("Activation", "TanhBM")
.Input("InputImage") .Input("InputImage")
...@@ -318,7 +317,7 @@ void SigmoidBenchmark( ...@@ -318,7 +317,7 @@ void SigmoidBenchmark(
if (D == DeviceType::GPU) { if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Activation", "SigmoidBM") OpDefBuilder("Activation", "SigmoidBM")
.Input("InputImage") .Input("InputImage")
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "mace/core/op_def_registry.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
namespace mace { namespace mace {
...@@ -33,7 +32,7 @@ void TestSimpleRelu() { ...@@ -33,7 +32,7 @@ void TestSimpleRelu() {
if (D == DeviceType::GPU) { if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Activation", "ReluTest") OpDefBuilder("Activation", "ReluTest")
.Input("InputImage") .Input("InputImage")
...@@ -46,7 +45,7 @@ void TestSimpleRelu() { ...@@ -46,7 +45,7 @@ void TestSimpleRelu() {
// Transfer output // Transfer output
ImageToBuffer<D, float>(&net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
} else { } else {
OpDefBuilder("Activation", "ReluTest") OpDefBuilder("Activation", "ReluTest")
.Input("Input") .Input("Input")
...@@ -81,7 +80,7 @@ void TestUnalignedSimpleRelu() { ...@@ -81,7 +80,7 @@ void TestUnalignedSimpleRelu() {
if (D == DeviceType::GPU) { if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Activation", "ReluTest") OpDefBuilder("Activation", "ReluTest")
.Input("InputImage") .Input("InputImage")
...@@ -94,7 +93,7 @@ void TestUnalignedSimpleRelu() { ...@@ -94,7 +93,7 @@ void TestUnalignedSimpleRelu() {
// Transfer output // Transfer output
ImageToBuffer<D, float>(&net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
} else { } else {
OpDefBuilder("Activation", "ReluTest") OpDefBuilder("Activation", "ReluTest")
.Input("Input") .Input("Input")
...@@ -132,7 +131,7 @@ void TestSimpleRelux() { ...@@ -132,7 +131,7 @@ void TestSimpleRelux() {
if (D == DeviceType::GPU) { if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Activation", "ReluxTest") OpDefBuilder("Activation", "ReluxTest")
.Input("InputImage") .Input("InputImage")
...@@ -146,7 +145,7 @@ void TestSimpleRelux() { ...@@ -146,7 +145,7 @@ void TestSimpleRelux() {
// Transfer output // Transfer output
ImageToBuffer<D, float>(&net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
} else { } else {
OpDefBuilder("Activation", "ReluxTest") OpDefBuilder("Activation", "ReluxTest")
.Input("Input") .Input("Input")
...@@ -182,7 +181,7 @@ void TestSimpleReluRelux() { ...@@ -182,7 +181,7 @@ void TestSimpleReluRelux() {
if (D == DeviceType::GPU) { if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Activation", "ReluxTest") OpDefBuilder("Activation", "ReluxTest")
.Input("InputImage") .Input("InputImage")
...@@ -196,7 +195,7 @@ void TestSimpleReluRelux() { ...@@ -196,7 +195,7 @@ void TestSimpleReluRelux() {
// Transfer output // Transfer output
ImageToBuffer<D, float>(&net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
} else { } else {
OpDefBuilder("Activation", "ReluxTest") OpDefBuilder("Activation", "ReluxTest")
.Input("Input") .Input("Input")
...@@ -237,9 +236,9 @@ void TestSimplePrelu() { ...@@ -237,9 +236,9 @@ void TestSimplePrelu() {
if (D == DeviceType::GPU) { if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, float>(&net, "Alpha", "AlphaImage", BufferToImage<D, float>(&net, "Alpha", "AlphaImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
OpDefBuilder("Activation", "PreluTest") OpDefBuilder("Activation", "PreluTest")
.Input("InputImage") .Input("InputImage")
...@@ -253,7 +252,7 @@ void TestSimplePrelu() { ...@@ -253,7 +252,7 @@ void TestSimplePrelu() {
// Transfer output // Transfer output
ImageToBuffer<D, float>(&net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
} else { } else {
OpDefBuilder("Activation", "PreluTest") OpDefBuilder("Activation", "PreluTest")
.Input("Input") .Input("Input")
...@@ -293,7 +292,7 @@ void TestSimpleTanh() { ...@@ -293,7 +292,7 @@ void TestSimpleTanh() {
if (D == DeviceType::GPU) { if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Activation", "TanhTest") OpDefBuilder("Activation", "TanhTest")
.Input("InputImage") .Input("InputImage")
...@@ -306,7 +305,7 @@ void TestSimpleTanh() { ...@@ -306,7 +305,7 @@ void TestSimpleTanh() {
// Transfer output // Transfer output
ImageToBuffer<D, float>(&net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
} else { } else {
OpDefBuilder("Activation", "TanhTest") OpDefBuilder("Activation", "TanhTest")
.Input("Input") .Input("Input")
...@@ -346,7 +345,7 @@ void TestSimpleSigmoid() { ...@@ -346,7 +345,7 @@ void TestSimpleSigmoid() {
if (D == DeviceType::GPU) { if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Activation", "SigmoidTest") OpDefBuilder("Activation", "SigmoidTest")
.Input("InputImage") .Input("InputImage")
...@@ -359,7 +358,7 @@ void TestSimpleSigmoid() { ...@@ -359,7 +358,7 @@ void TestSimpleSigmoid() {
// Transfer output // Transfer output
ImageToBuffer<D, float>(&net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
} else { } else {
OpDefBuilder("Activation", "SigmoidTest") OpDefBuilder("Activation", "SigmoidTest")
.Input("Input") .Input("Input")
......
...@@ -22,11 +22,11 @@ ...@@ -22,11 +22,11 @@
#include "mace/core/operator.h" #include "mace/core/operator.h"
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/kernels/opencl/image/addn.h" #include "mace/ops/opencl/image/addn.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace ops {
static constexpr int kCostPerGroup = 1024; static constexpr int kCostPerGroup = 1024;
...@@ -142,5 +142,5 @@ void RegisterAddN(OpRegistryBase *op_registry) { ...@@ -142,5 +142,5 @@ void RegisterAddN(OpRegistryBase *op_registry) {
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
#include <string> #include <string>
#include "mace/core/op_def_registry.h"
#include "mace/core/testing/test_benchmark.h" #include "mace/core/testing/test_benchmark.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
...@@ -37,7 +36,7 @@ void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) { ...@@ -37,7 +36,7 @@ void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) {
for (int i = 0; i < inputs; ++i) { for (int i = 0; i < inputs; ++i) {
BufferToImage<D, T>(&net, MakeString("Input", i).c_str(), BufferToImage<D, T>(&net, MakeString("Input", i).c_str(),
MakeString("InputImage", i).c_str(), MakeString("InputImage", i).c_str(),
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
} }
OpDefBuilder op_def_builder("AddN", "AddNBM"); OpDefBuilder op_def_builder("AddN", "AddNBM");
for (int i = 0; i < inputs; ++i) { for (int i = 0; i < inputs; ++i) {
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "mace/core/op_def_registry.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
namespace mace { namespace mace {
...@@ -69,7 +68,7 @@ void SimpleAdd3() { ...@@ -69,7 +68,7 @@ void SimpleAdd3() {
for (int i = 0; i < input_num; ++i) { for (int i = 0; i < input_num; ++i) {
BufferToImage<D, half>(&net, MakeString("Input", i), BufferToImage<D, half>(&net, MakeString("Input", i),
MakeString("InputImage", i), MakeString("InputImage", i),
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
} }
auto op_def_cl = OpDefBuilder("AddN", "AddNTest"); auto op_def_cl = OpDefBuilder("AddN", "AddNTest");
...@@ -84,7 +83,7 @@ void SimpleAdd3() { ...@@ -84,7 +83,7 @@ void SimpleAdd3() {
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, float>(&net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
} else { } else {
OpDefBuilder("AddN", "AddNTest") OpDefBuilder("AddN", "AddNTest")
.Input("Input0") .Input("Input0")
...@@ -143,7 +142,7 @@ void RandomTest() { ...@@ -143,7 +142,7 @@ void RandomTest() {
for (int i = 0; i < input_num; ++i) { for (int i = 0; i < input_num; ++i) {
BufferToImage<D, half>(&net, MakeString("Input", i), BufferToImage<D, half>(&net, MakeString("Input", i),
MakeString("InputImage", i), MakeString("InputImage", i),
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
} }
auto op_def_cl = OpDefBuilder("AddN", "AddNTest"); auto op_def_cl = OpDefBuilder("AddN", "AddNTest");
...@@ -158,7 +157,7 @@ void RandomTest() { ...@@ -158,7 +157,7 @@ void RandomTest() {
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, float>(&net, "OutputImage", "OPENCLOutput", ImageToBuffer<D, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(*expected, *net.GetOutput("OPENCLOutput"), 1e-2, ExpectTensorNear<float>(*expected, *net.GetOutput("OPENCLOutput"), 1e-2,
1e-2); 1e-2);
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
#include "mace/core/operator.h" #include "mace/core/operator.h"
namespace mace { namespace mace {
namespace kernels { namespace ops {
template <DeviceType D, class T> template <DeviceType D, class T>
class ArgMaxOp : public Operation { class ArgMaxOp : public Operation {
...@@ -84,5 +84,5 @@ void RegisterArgMax(OpRegistryBase *op_registry) { ...@@ -84,5 +84,5 @@ void RegisterArgMax(OpRegistryBase *op_registry) {
DeviceType::CPU, float); DeviceType::CPU, float);
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "mace/core/op_def_registry.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
namespace mace { namespace mace {
......
...@@ -17,10 +17,10 @@ ...@@ -17,10 +17,10 @@
#endif #endif
#include <algorithm> #include <algorithm>
#include "mace/kernels/arm/activation_neon.h" #include "mace/ops/arm/activation_neon.h"
namespace mace { namespace mace {
namespace kernels { namespace ops {
void ReluNeon(const float *input, const index_t size, float *output) { void ReluNeon(const float *input, const index_t size, float *output) {
#if defined(MACE_ENABLE_NEON) #if defined(MACE_ENABLE_NEON)
...@@ -67,5 +67,5 @@ void ReluxNeon(const float *input, const float limit, ...@@ -67,5 +67,5 @@ void ReluxNeon(const float *input, const float limit,
#endif #endif
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -12,20 +12,20 @@ ...@@ -12,20 +12,20 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef MACE_KERNELS_ARM_ACTIVATION_NEON_H_ #ifndef MACE_OPS_ARM_ACTIVATION_NEON_H_
#define MACE_KERNELS_ARM_ACTIVATION_NEON_H_ #define MACE_OPS_ARM_ACTIVATION_NEON_H_
#include "mace/core/types.h" #include "mace/core/types.h"
namespace mace { namespace mace {
namespace kernels { namespace ops {
void ReluNeon(const float *input, const index_t size, float *output); void ReluNeon(const float *input, const index_t size, float *output);
void ReluxNeon(const float *input, const float limit, void ReluxNeon(const float *input, const float limit,
const index_t size, float *output); const index_t size, float *output);
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
#endif // MACE_KERNELS_ARM_ACTIVATION_NEON_H_ #endif // MACE_OPS_ARM_ACTIVATION_NEON_H_
...@@ -12,14 +12,14 @@ ...@@ -12,14 +12,14 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef MACE_KERNELS_ARM_CONV_2D_NEON_H_ #ifndef MACE_OPS_ARM_CONV_2D_NEON_H_
#define MACE_KERNELS_ARM_CONV_2D_NEON_H_ #define MACE_OPS_ARM_CONV_2D_NEON_H_
#include "mace/core/types.h" #include "mace/core/types.h"
#include "mace/kernels/sgemm.h" #include "mace/ops/sgemm.h"
namespace mace { namespace mace {
namespace kernels { namespace ops {
void Conv2dNeonK1x1S1(const float *input, void Conv2dNeonK1x1S1(const float *input,
const float *filter, const float *filter,
...@@ -115,7 +115,7 @@ inline void Conv2dCPUKHxKWCalc(const float *in_ptr, ...@@ -115,7 +115,7 @@ inline void Conv2dCPUKHxKWCalc(const float *in_ptr,
} }
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
#endif // MACE_KERNELS_ARM_CONV_2D_NEON_H_ #endif // MACE_OPS_ARM_CONV_2D_NEON_H_
...@@ -16,11 +16,11 @@ ...@@ -16,11 +16,11 @@
#include <arm_neon.h> #include <arm_neon.h>
#endif #endif
#include "mace/kernels/arm/conv_2d_neon.h" #include "mace/ops/arm/conv_2d_neon.h"
#include "mace/utils/utils.h" #include "mace/utils/utils.h"
namespace mace { namespace mace {
namespace kernels { namespace ops {
inline void Conv2dCPUK15x1Calc(const float *in_ptr, inline void Conv2dCPUK15x1Calc(const float *in_ptr,
const float *filter_ptr, const float *filter_ptr,
...@@ -157,5 +157,5 @@ void Conv2dNeonK15x1S1(const float *input, ...@@ -157,5 +157,5 @@ void Conv2dNeonK15x1S1(const float *input,
} // b } // b
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -12,10 +12,10 @@ ...@@ -12,10 +12,10 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "mace/kernels/arm/conv_2d_neon.h" #include "mace/ops/arm/conv_2d_neon.h"
namespace mace { namespace mace {
namespace kernels { namespace ops {
void Conv2dNeonK1x1S1(const float *input, void Conv2dNeonK1x1S1(const float *input,
const float *filter, const float *filter,
...@@ -44,5 +44,5 @@ void Conv2dNeonK1x1S1(const float *input, ...@@ -44,5 +44,5 @@ void Conv2dNeonK1x1S1(const float *input,
} }
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -16,12 +16,12 @@ ...@@ -16,12 +16,12 @@
#include <arm_neon.h> #include <arm_neon.h>
#endif #endif
#include "mace/kernels/arm/conv_2d_neon.h" #include "mace/ops/arm/conv_2d_neon.h"
#include "mace/utils/logging.h" #include "mace/utils/logging.h"
#include "mace/utils/utils.h" #include "mace/utils/utils.h"
namespace mace { namespace mace {
namespace kernels { namespace ops {
inline void Conv2dCPUK1x15Calc(const float *in_ptr, inline void Conv2dCPUK1x15Calc(const float *in_ptr,
const float *filter_ptr, const float *filter_ptr,
...@@ -143,5 +143,5 @@ void Conv2dNeonK1x15S1(const float *input, ...@@ -143,5 +143,5 @@ void Conv2dNeonK1x15S1(const float *input,
} // b } // b
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -16,10 +16,10 @@ ...@@ -16,10 +16,10 @@
#include <arm_neon.h> #include <arm_neon.h>
#endif #endif
#include "mace/kernels/arm/conv_2d_neon.h" #include "mace/ops/arm/conv_2d_neon.h"
namespace mace { namespace mace {
namespace kernels { namespace ops {
// Ho = 1, Wo = 4, Co = 4 // Ho = 1, Wo = 4, Co = 4
void Conv2dNeonK1x7S1(const float *input, void Conv2dNeonK1x7S1(const float *input,
...@@ -247,5 +247,5 @@ void Conv2dNeonK1x7S1(const float *input, ...@@ -247,5 +247,5 @@ void Conv2dNeonK1x7S1(const float *input,
} // b } // b
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -17,10 +17,10 @@ ...@@ -17,10 +17,10 @@
#endif #endif
#include "mace/core/macros.h" #include "mace/core/macros.h"
#include "mace/kernels/arm/conv_2d_neon.h" #include "mace/ops/arm/conv_2d_neon.h"
namespace mace { namespace mace {
namespace kernels { namespace ops {
// Ho = 2, Wo = 4, Co = 2 // Ho = 2, Wo = 4, Co = 2
void Conv2dNeonK3x3S1(const float *input, void Conv2dNeonK3x3S1(const float *input,
...@@ -658,5 +658,5 @@ void Conv2dNeonK3x3S2(const float *input, ...@@ -658,5 +658,5 @@ void Conv2dNeonK3x3S2(const float *input,
} // b } // b
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -16,10 +16,10 @@ ...@@ -16,10 +16,10 @@
#include <arm_neon.h> #include <arm_neon.h>
#endif #endif
#include "mace/kernels/arm/conv_2d_neon.h" #include "mace/ops/arm/conv_2d_neon.h"
namespace mace { namespace mace {
namespace kernels { namespace ops {
#define MACE_Conv2dNeonK5x5SnLoadCalc4 \ #define MACE_Conv2dNeonK5x5SnLoadCalc4 \
/* load filter (4 outch x 1 height x 4 width) */ \ /* load filter (4 outch x 1 height x 4 width) */ \
...@@ -215,5 +215,5 @@ void Conv2dNeonK5x5S1(const float *input, ...@@ -215,5 +215,5 @@ void Conv2dNeonK5x5S1(const float *input,
} // b } // b
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -16,10 +16,10 @@ ...@@ -16,10 +16,10 @@
#include <arm_neon.h> #include <arm_neon.h>
#endif #endif
#include "mace/kernels/arm/conv_2d_neon.h" #include "mace/ops/arm/conv_2d_neon.h"
namespace mace { namespace mace {
namespace kernels { namespace ops {
// Ho = 4, Wo = 1, Co = 4 // Ho = 4, Wo = 1, Co = 4
void Conv2dNeonK7x1S1(const float *input, void Conv2dNeonK7x1S1(const float *input,
...@@ -287,5 +287,5 @@ void Conv2dNeonK7x1S1(const float *input, ...@@ -287,5 +287,5 @@ void Conv2dNeonK7x1S1(const float *input,
} // b } // b
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -16,10 +16,10 @@ ...@@ -16,10 +16,10 @@
#include <arm_neon.h> #include <arm_neon.h>
#endif #endif
#include "mace/kernels/arm/conv_2d_neon.h" #include "mace/ops/arm/conv_2d_neon.h"
namespace mace { namespace mace {
namespace kernels { namespace ops {
#define MACE_Conv2dArmv8NeonK7x7SnLoadCalc4 \ #define MACE_Conv2dArmv8NeonK7x7SnLoadCalc4 \
/* load filter (4 outch x 1 height x 4 width) */ \ /* load filter (4 outch x 1 height x 4 width) */ \
...@@ -638,5 +638,5 @@ void Conv2dNeonK7x7S3(const float *input, ...@@ -638,5 +638,5 @@ void Conv2dNeonK7x7S3(const float *input,
} // b } // b
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -14,11 +14,11 @@ ...@@ -14,11 +14,11 @@
#include <algorithm> #include <algorithm>
#include "mace/kernels/arm/conv_winograd.h" #include "mace/ops/arm/conv_winograd.h"
#include "mace/kernels/gemm.h" #include "mace/ops/gemm.h"
namespace mace { namespace mace {
namespace kernels { namespace ops {
namespace { namespace {
// NCHW => NTCB (T: in tile pixels, B: tile indices) // NCHW => NTCB (T: in tile pixels, B: tile indices)
...@@ -747,5 +747,5 @@ void ConvRef3x3s1(const float *input, ...@@ -747,5 +747,5 @@ void ConvRef3x3s1(const float *input,
} }
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -12,18 +12,18 @@ ...@@ -12,18 +12,18 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef MACE_KERNELS_ARM_CONV_WINOGRAD_H_ #ifndef MACE_OPS_ARM_CONV_WINOGRAD_H_
#define MACE_KERNELS_ARM_CONV_WINOGRAD_H_ #define MACE_OPS_ARM_CONV_WINOGRAD_H_
#if defined(MACE_ENABLE_NEON) && defined(__aarch64__) #if defined(MACE_ENABLE_NEON) && defined(__aarch64__)
#include <arm_neon.h> #include <arm_neon.h>
#endif #endif
#include "mace/core/types.h" #include "mace/core/types.h"
#include "mace/kernels/sgemm.h" #include "mace/ops/sgemm.h"
namespace mace { namespace mace {
namespace kernels { namespace ops {
void TransformFilter4x4(const float *filter, void TransformFilter4x4(const float *filter,
const index_t in_channels, const index_t in_channels,
...@@ -70,7 +70,7 @@ void ConvRef3x3s1(const float *input, ...@@ -70,7 +70,7 @@ void ConvRef3x3s1(const float *input,
const index_t out_channels, const index_t out_channels,
float *output); float *output);
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
#endif // MACE_KERNELS_ARM_CONV_WINOGRAD_H_ #endif // MACE_OPS_ARM_CONV_WINOGRAD_H_
...@@ -19,10 +19,10 @@ ...@@ -19,10 +19,10 @@
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/core/types.h" #include "mace/core/types.h"
#include "mace/kernels/arm/conv_winograd.h" #include "mace/ops/arm/conv_winograd.h"
namespace mace { namespace mace {
namespace kernels { namespace ops {
TEST(ConvWinogradTest, winograd) { TEST(ConvWinogradTest, winograd) {
index_t batch = 1; index_t batch = 1;
...@@ -62,11 +62,11 @@ TEST(ConvWinogradTest, winograd) { ...@@ -62,11 +62,11 @@ TEST(ConvWinogradTest, winograd) {
return std::max(-1.0f, std::min(1.0f, nd(gen))); return std::max(-1.0f, std::min(1.0f, nd(gen)));
}); });
kernels::ConvRef3x3s1(input_data, filter_data, batch, in_height, in_width, ops::ConvRef3x3s1(input_data, filter_data, batch, in_height, in_width,
in_channels, out_channels, output_data_ref); in_channels, out_channels, output_data_ref);
SGemm sgemm; SGemm sgemm;
kernels::WinoGradConv3x3s1(input_data, filter_data, batch, in_height, ops::WinoGradConv3x3s1(input_data, filter_data, batch, in_height,
in_width, in_channels, out_channels, 6, in_width, in_channels, out_channels, 6,
output_data, &sgemm, nullptr); output_data, &sgemm, nullptr);
...@@ -76,5 +76,5 @@ TEST(ConvWinogradTest, winograd) { ...@@ -76,5 +76,5 @@ TEST(ConvWinogradTest, winograd) {
} }
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -12,8 +12,8 @@ ...@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef MACE_KERNELS_ARM_DECONV_2D_NEON_H_ #ifndef MACE_OPS_ARM_DECONV_2D_NEON_H_
#define MACE_KERNELS_ARM_DECONV_2D_NEON_H_ #define MACE_OPS_ARM_DECONV_2D_NEON_H_
#if defined(MACE_ENABLE_NEON) #if defined(MACE_ENABLE_NEON)
#include <arm_neon.h> #include <arm_neon.h>
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#include "mace/core/types.h" #include "mace/core/types.h"
namespace mace { namespace mace {
namespace kernels { namespace ops {
void Deconv2dNeonK3x3S1(const float *input, void Deconv2dNeonK3x3S1(const float *input,
const float *filter, const float *filter,
...@@ -90,7 +90,7 @@ inline float32x4_t neon_vfma_lane_3(float32x4_t a, ...@@ -90,7 +90,7 @@ inline float32x4_t neon_vfma_lane_3(float32x4_t a,
} }
#endif #endif
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
#endif // MACE_KERNELS_ARM_DECONV_2D_NEON_H_ #endif // MACE_OPS_ARM_DECONV_2D_NEON_H_
...@@ -13,10 +13,10 @@ ...@@ -13,10 +13,10 @@
// limitations under the License. // limitations under the License.
#include "mace/core/macros.h" #include "mace/core/macros.h"
#include "mace/kernels/arm/deconv_2d_neon.h" #include "mace/ops/arm/deconv_2d_neon.h"
namespace mace { namespace mace {
namespace kernels { namespace ops {
void Deconv2dNeonK3x3S1(const float *input, void Deconv2dNeonK3x3S1(const float *input,
const float *filter, const float *filter,
...@@ -387,5 +387,5 @@ void Deconv2dNeonK3x3S2(const float *input, ...@@ -387,5 +387,5 @@ void Deconv2dNeonK3x3S2(const float *input,
} }
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -13,10 +13,10 @@ ...@@ -13,10 +13,10 @@
// limitations under the License. // limitations under the License.
#include "mace/core/macros.h" #include "mace/core/macros.h"
#include "mace/kernels/arm/deconv_2d_neon.h" #include "mace/ops/arm/deconv_2d_neon.h"
namespace mace { namespace mace {
namespace kernels { namespace ops {
void Deconv2dNeonK4x4S1(const float *input, void Deconv2dNeonK4x4S1(const float *input,
const float *filter, const float *filter,
...@@ -501,5 +501,5 @@ void Deconv2dNeonK4x4S2(const float *input, ...@@ -501,5 +501,5 @@ void Deconv2dNeonK4x4S2(const float *input,
} }
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -12,13 +12,13 @@ ...@@ -12,13 +12,13 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef MACE_KERNELS_ARM_DEPTHWISE_CONV2D_NEON_H_ #ifndef MACE_OPS_ARM_DEPTHWISE_CONV2D_NEON_H_
#define MACE_KERNELS_ARM_DEPTHWISE_CONV2D_NEON_H_ #define MACE_OPS_ARM_DEPTHWISE_CONV2D_NEON_H_
#include "mace/core/types.h" #include "mace/core/types.h"
namespace mace { namespace mace {
namespace kernels { namespace ops {
void DepthwiseConv2dNeonK3x3S1(const float *input, void DepthwiseConv2dNeonK3x3S1(const float *input,
const float *filter, const float *filter,
...@@ -42,7 +42,7 @@ void DepthwiseConv2dNeonK3x3S2(const float *input, ...@@ -42,7 +42,7 @@ void DepthwiseConv2dNeonK3x3S2(const float *input,
const index_t valid_w_stop, const index_t valid_w_stop,
float *output); float *output);
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
#endif // MACE_KERNELS_ARM_DEPTHWISE_CONV2D_NEON_H_ #endif // MACE_OPS_ARM_DEPTHWISE_CONV2D_NEON_H_
...@@ -17,10 +17,10 @@ ...@@ -17,10 +17,10 @@
#endif #endif
#include "mace/core/macros.h" #include "mace/core/macros.h"
#include "mace/kernels/arm/depthwise_conv2d_neon.h" #include "mace/ops/arm/depthwise_conv2d_neon.h"
namespace mace { namespace mace {
namespace kernels { namespace ops {
namespace { namespace {
void DepthwiseConv2dPixel(const float *in_base, void DepthwiseConv2dPixel(const float *in_base,
...@@ -381,5 +381,5 @@ void DepthwiseConv2dNeonK3x3S2(const float *input, ...@@ -381,5 +381,5 @@ void DepthwiseConv2dNeonK3x3S2(const float *input,
} // b } // b
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -17,13 +17,13 @@ ...@@ -17,13 +17,13 @@
#include <vector> #include <vector>
#include "mace/core/operator.h" #include "mace/core/operator.h"
#include "mace/kernels/activation.h" #include "mace/ops/activation.h"
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/kernels/opencl/image/batch_norm.h" #include "mace/ops/opencl/image/batch_norm.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace ops {
template <DeviceType D, class T> template <DeviceType D, class T>
class BatchNormOp; class BatchNormOp;
...@@ -35,7 +35,7 @@ class BatchNormOp<DeviceType::CPU, float> : public Operation { ...@@ -35,7 +35,7 @@ class BatchNormOp<DeviceType::CPU, float> : public Operation {
: Operation(context), : Operation(context),
epsilon_(Operation::GetOptionalArg<float>("epsilon", epsilon_(Operation::GetOptionalArg<float>("epsilon",
static_cast<float>(1e-4))), static_cast<float>(1e-4))),
activation_(kernels::StringToActivationType( activation_(ops::StringToActivationType(
Operation::GetOptionalArg<std::string>("activation", "NOOP"))), Operation::GetOptionalArg<std::string>("activation", "NOOP"))),
relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)) {} relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)) {}
...@@ -144,7 +144,7 @@ class BatchNormOp<DeviceType::GPU, T> : public Operation { ...@@ -144,7 +144,7 @@ class BatchNormOp<DeviceType::GPU, T> : public Operation {
: Operation(context) { : Operation(context) {
float epsilon = Operation::GetOptionalArg<float>( float epsilon = Operation::GetOptionalArg<float>(
"epsilon", static_cast<float>(1e-4)); "epsilon", static_cast<float>(1e-4));
ActivationType activation = kernels::StringToActivationType( ActivationType activation = ops::StringToActivationType(
Operation::GetOptionalArg<std::string>("activation", "NOOP")); Operation::GetOptionalArg<std::string>("activation", "NOOP"));
float relux_max_limit = Operation::GetOptionalArg<float>("max_limit", 0.0f); float relux_max_limit = Operation::GetOptionalArg<float>("max_limit", 0.0f);
if (context->device()->opencl_runtime()->UseImageMemory()) { if (context->device()->opencl_runtime()->UseImageMemory()) {
...@@ -205,5 +205,5 @@ void RegisterBatchNorm(OpRegistryBase *op_registry) { ...@@ -205,5 +205,5 @@ void RegisterBatchNorm(OpRegistryBase *op_registry) {
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "mace/core/op_def_registry.h"
#include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/testing/test_benchmark.h" #include "mace/core/testing/test_benchmark.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
...@@ -54,15 +53,15 @@ void BatchNorm( ...@@ -54,15 +53,15 @@ void BatchNorm(
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
} else if (D == DeviceType::GPU) { } else if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, float>(&net, "Scale", "ScaleImage", BufferToImage<D, float>(&net, "Scale", "ScaleImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
BufferToImage<D, float>(&net, "Offset", "OffsetImage", BufferToImage<D, float>(&net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
BufferToImage<D, float>(&net, "Mean", "MeanImage", BufferToImage<D, float>(&net, "Mean", "MeanImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
BufferToImage<D, float>(&net, "Var", "VarImage", BufferToImage<D, float>(&net, "Var", "VarImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
OpDefBuilder("BatchNorm", "BatchNormBM") OpDefBuilder("BatchNorm", "BatchNormBM")
.Input("InputImage") .Input("InputImage")
.Input("ScaleImage") .Input("ScaleImage")
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "mace/core/op_def_registry.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
namespace mace { namespace mace {
...@@ -51,15 +50,15 @@ void Simple() { ...@@ -51,15 +50,15 @@ void Simple() {
net.TransformDataFormat<D, float>("OutputNCHW", NCHW, "Output", NHWC); net.TransformDataFormat<D, float>("OutputNCHW", NCHW, "Output", NHWC);
} else if (D == DeviceType::GPU) { } else if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, float>(&net, "Scale", "ScaleImage", BufferToImage<D, float>(&net, "Scale", "ScaleImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
BufferToImage<D, float>(&net, "Offset", "OffsetImage", BufferToImage<D, float>(&net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
BufferToImage<D, float>(&net, "Mean", "MeanImage", BufferToImage<D, float>(&net, "Mean", "MeanImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
BufferToImage<D, float>(&net, "Var", "VarImage", BufferToImage<D, float>(&net, "Var", "VarImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
OpDefBuilder("BatchNorm", "BatchNormTest") OpDefBuilder("BatchNorm", "BatchNormTest")
.Input("InputImage") .Input("InputImage")
...@@ -75,7 +74,7 @@ void Simple() { ...@@ -75,7 +74,7 @@ void Simple() {
// Transfer output // Transfer output
ImageToBuffer<D, float>(&net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
} }
// Check // Check
...@@ -135,15 +134,15 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { ...@@ -135,15 +134,15 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
// Run on opencl // Run on opencl
BufferToImage<DeviceType::GPU, float>(&net, "Input", "InputImage", BufferToImage<DeviceType::GPU, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::GPU, float>(&net, "Scale", "ScaleImage", BufferToImage<DeviceType::GPU, float>(&net, "Scale", "ScaleImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
BufferToImage<DeviceType::GPU, float>(&net, "Offset", "OffsetImage", BufferToImage<DeviceType::GPU, float>(&net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
BufferToImage<DeviceType::GPU, float>(&net, "Mean", "MeanImage", BufferToImage<DeviceType::GPU, float>(&net, "Mean", "MeanImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
BufferToImage<DeviceType::GPU, float>(&net, "Var", "VarImage", BufferToImage<DeviceType::GPU, float>(&net, "Var", "VarImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
OpDefBuilder("BatchNorm", "BatchNormTest") OpDefBuilder("BatchNorm", "BatchNormTest")
.Input("InputImage") .Input("InputImage")
...@@ -165,7 +164,7 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { ...@@ -165,7 +164,7 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
net.Sync(); net.Sync();
ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "OPENCLOutput", ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(*expected, *net.GetOutput("OPENCLOutput"), ExpectTensorNear<float>(*expected, *net.GetOutput("OPENCLOutput"),
1e-5, 1e-4); 1e-5, 1e-4);
} }
...@@ -214,15 +213,15 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) { ...@@ -214,15 +213,15 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) {
// Run on opencl // Run on opencl
BufferToImage<DeviceType::GPU, half>(&net, "Input", "InputImage", BufferToImage<DeviceType::GPU, half>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::GPU, half>(&net, "Scale", "ScaleImage", BufferToImage<DeviceType::GPU, half>(&net, "Scale", "ScaleImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
BufferToImage<DeviceType::GPU, half>(&net, "Offset", "OffsetImage", BufferToImage<DeviceType::GPU, half>(&net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
BufferToImage<DeviceType::GPU, half>(&net, "Mean", "MeanImage", BufferToImage<DeviceType::GPU, half>(&net, "Mean", "MeanImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
BufferToImage<DeviceType::GPU, half>(&net, "Var", "VarImage", BufferToImage<DeviceType::GPU, half>(&net, "Var", "VarImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
OpDefBuilder("BatchNorm", "BatchNormTest") OpDefBuilder("BatchNorm", "BatchNormTest")
.Input("InputImage") .Input("InputImage")
...@@ -245,7 +244,7 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) { ...@@ -245,7 +244,7 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) {
net.Sync(); net.Sync();
ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "OPENCLOutput", ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(*expected, *net.GetOutput("OPENCLOutput"), ExpectTensorNear<float>(*expected, *net.GetOutput("OPENCLOutput"),
1e-1, 1e-2); 1e-1, 1e-2);
} }
...@@ -294,15 +293,15 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) { ...@@ -294,15 +293,15 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
// Run on opencl // Run on opencl
BufferToImage<DeviceType::GPU, float>(&net, "Input", "InputImage", BufferToImage<DeviceType::GPU, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::GPU, float>(&net, "Scale", "ScaleImage", BufferToImage<DeviceType::GPU, float>(&net, "Scale", "ScaleImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
BufferToImage<DeviceType::GPU, float>(&net, "Offset", "OffsetImage", BufferToImage<DeviceType::GPU, float>(&net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
BufferToImage<DeviceType::GPU, float>(&net, "Mean", "MeanImage", BufferToImage<DeviceType::GPU, float>(&net, "Mean", "MeanImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
BufferToImage<DeviceType::GPU, float>(&net, "Var", "VarImage", BufferToImage<DeviceType::GPU, float>(&net, "Var", "VarImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
OpDefBuilder("BatchNorm", "BatchNormTest") OpDefBuilder("BatchNorm", "BatchNormTest")
.Input("InputImage") .Input("InputImage")
...@@ -324,7 +323,7 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) { ...@@ -324,7 +323,7 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
net.Sync(); net.Sync();
ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "OPENCLOutput", ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(*expected, *net.GetOutput("OPENCLOutput"), ExpectTensorNear<float>(*expected, *net.GetOutput("OPENCLOutput"),
1e-5, 1e-4); 1e-5, 1e-4);
} }
...@@ -373,15 +372,15 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) { ...@@ -373,15 +372,15 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) {
// Run on opencl // Run on opencl
BufferToImage<DeviceType::GPU, half>(&net, "Input", "InputImage", BufferToImage<DeviceType::GPU, half>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::GPU, half>(&net, "Scale", "ScaleImage", BufferToImage<DeviceType::GPU, half>(&net, "Scale", "ScaleImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
BufferToImage<DeviceType::GPU, half>(&net, "Offset", "OffsetImage", BufferToImage<DeviceType::GPU, half>(&net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
BufferToImage<DeviceType::GPU, half>(&net, "Mean", "MeanImage", BufferToImage<DeviceType::GPU, half>(&net, "Mean", "MeanImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
BufferToImage<DeviceType::GPU, half>(&net, "Var", "VarImage", BufferToImage<DeviceType::GPU, half>(&net, "Var", "VarImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
OpDefBuilder("BatchNorm", "BatchNormTest") OpDefBuilder("BatchNorm", "BatchNormTest")
.Input("InputImage") .Input("InputImage")
...@@ -404,7 +403,7 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) { ...@@ -404,7 +403,7 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) {
net.Sync(); net.Sync();
ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "OPENCLOutput", ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(*expected, *net.GetOutput("OPENCLOutput"), ExpectTensorNear<float>(*expected, *net.GetOutput("OPENCLOutput"),
1e-1, 1e-2); 1e-1, 1e-2);
} }
......
...@@ -17,11 +17,11 @@ ...@@ -17,11 +17,11 @@
#include "mace/core/operator.h" #include "mace/core/operator.h"
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/kernels/opencl/image/batch_to_space.h" #include "mace/ops/opencl/image/batch_to_space.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace ops {
class BatchToSpaceOpBase : public Operation { class BatchToSpaceOpBase : public Operation {
public: public:
...@@ -303,5 +303,5 @@ void RegisterBatchToSpaceND(OpRegistryBase *op_registry) { ...@@ -303,5 +303,5 @@ void RegisterBatchToSpaceND(OpRegistryBase *op_registry) {
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "mace/core/op_def_registry.h"
#include "mace/core/testing/test_benchmark.h" #include "mace/core/testing/test_benchmark.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
...@@ -42,7 +41,7 @@ void BMBatchToSpace( ...@@ -42,7 +41,7 @@ void BMBatchToSpace(
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
} else if (D == DeviceType::GPU) { } else if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("BatchToSpaceND", "BatchToSpaceNDTest") OpDefBuilder("BatchToSpaceND", "BatchToSpaceNDTest")
.Input("InputImage") .Input("InputImage")
.Output("OutputImage") .Output("OutputImage")
......
...@@ -17,13 +17,13 @@ ...@@ -17,13 +17,13 @@
#include <vector> #include <vector>
#include "mace/core/operator.h" #include "mace/core/operator.h"
#include "mace/kernels/activation.h" #include "mace/ops/activation.h"
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/kernels/opencl/image/bias_add.h" #include "mace/ops/opencl/image/bias_add.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace ops {
template <DeviceType D, class T> template <DeviceType D, class T>
class BiasAddOp; class BiasAddOp;
...@@ -139,5 +139,5 @@ void RegisterBiasAdd(OpRegistryBase *op_registry) { ...@@ -139,5 +139,5 @@ void RegisterBiasAdd(OpRegistryBase *op_registry) {
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "mace/core/op_def_registry.h"
#include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/testing/test_benchmark.h" #include "mace/core/testing/test_benchmark.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
...@@ -47,9 +46,9 @@ void BiasAdd(int iters, int batch, int channels, int height, int width) { ...@@ -47,9 +46,9 @@ void BiasAdd(int iters, int batch, int channels, int height, int width) {
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
} else if (D == DeviceType::GPU) { } else if (D == DeviceType::GPU) {
BufferToImage<D, T>(&net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(&net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
OpDefBuilder("BiasAdd", "BiasAddBM") OpDefBuilder("BiasAdd", "BiasAddBM")
.Input("InputImage") .Input("InputImage")
.Input("BiasImage") .Input("BiasImage")
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "mace/core/op_def_registry.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
namespace mace { namespace mace {
...@@ -46,9 +45,9 @@ void BiasAddSimple() { ...@@ -46,9 +45,9 @@ void BiasAddSimple() {
"Output", NHWC); "Output", NHWC);
} else if (D == DeviceType::GPU) { } else if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, float>(&net, "Bias", "BiasImage", BufferToImage<D, float>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
OpDefBuilder("BiasAdd", "BiasAddTest") OpDefBuilder("BiasAdd", "BiasAddTest")
.Input("InputImage") .Input("InputImage")
...@@ -60,7 +59,7 @@ void BiasAddSimple() { ...@@ -60,7 +59,7 @@ void BiasAddSimple() {
// Transfer output // Transfer output
ImageToBuffer<D, float>(&net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
...@@ -116,9 +115,9 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) { ...@@ -116,9 +115,9 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) {
// Run on opencl // Run on opencl
BufferToImage<DeviceType::GPU, float>(&net, "Input", "InputImage", BufferToImage<DeviceType::GPU, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::GPU, float>(&net, "Bias", "BiasImage", BufferToImage<DeviceType::GPU, float>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
OpDefBuilder("BiasAdd", "BiasAddTest") OpDefBuilder("BiasAdd", "BiasAddTest")
.Input("InputImage") .Input("InputImage")
...@@ -131,7 +130,7 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) { ...@@ -131,7 +130,7 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) {
net.Sync(); net.Sync();
ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "OPENCLOutput", ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(*expected, *net.GetOutput("OPENCLOutput"), 1e-5); ExpectTensorNear<float>(*expected, *net.GetOutput("OPENCLOutput"), 1e-5);
} }
...@@ -172,9 +171,9 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) { ...@@ -172,9 +171,9 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) {
// Run on opencl // Run on opencl
BufferToImage<DeviceType::GPU, float>(&net, "Input", "InputImage", BufferToImage<DeviceType::GPU, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::GPU, float>(&net, "Bias", "BiasImage", BufferToImage<DeviceType::GPU, float>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
OpDefBuilder("BiasAdd", "BiasAddTest") OpDefBuilder("BiasAdd", "BiasAddTest")
.Input("InputImage") .Input("InputImage")
...@@ -187,7 +186,7 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) { ...@@ -187,7 +186,7 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) {
net.Sync(); net.Sync();
ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "OPENCLOutput", ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(*expected, *net.GetOutput("OPENCLOutput"), 1e-5); ExpectTensorNear<float>(*expected, *net.GetOutput("OPENCLOutput"), 1e-5);
} }
......
...@@ -15,11 +15,11 @@ ...@@ -15,11 +15,11 @@
#include <memory> #include <memory>
#include "mace/core/operator.h" #include "mace/core/operator.h"
#include "mace/kernels/opencl/buffer/buffer_inverse_transform.h" #include "mace/ops/opencl/buffer/buffer_inverse_transform.h"
#include "mace/kernels/opencl/image/image_to_buffer.h" #include "mace/ops/opencl/image/image_to_buffer.h"
namespace mace { namespace mace {
namespace kernels { namespace ops {
template <DeviceType D, class T> template <DeviceType D, class T>
class BufferInverseTransformOp; class BufferInverseTransformOp;
...@@ -41,9 +41,9 @@ class BufferInverseTransformOp<DeviceType::GPU, T> : public Operation { ...@@ -41,9 +41,9 @@ class BufferInverseTransformOp<DeviceType::GPU, T> : public Operation {
const Tensor *input = this->Input(0); const Tensor *input = this->Input(0);
Tensor *output = this->Output(0); Tensor *output = this->Output(0);
kernels::BufferType type = ops::BufferType type =
static_cast<kernels::BufferType>(Operation::GetOptionalArg<int>( static_cast<ops::BufferType>(Operation::GetOptionalArg<int>(
"buffer_type", static_cast<int>(kernels::CONV2D_FILTER))); "buffer_type", static_cast<int>(ops::CONV2D_FILTER)));
return kernel_->Compute(context, input, type, return kernel_->Compute(context, input, type,
wino_blk_size_, output); wino_blk_size_, output);
...@@ -63,5 +63,5 @@ void RegisterBufferInverseTransform(OpRegistryBase *op_registry) { ...@@ -63,5 +63,5 @@ void RegisterBufferInverseTransform(OpRegistryBase *op_registry) {
BufferInverseTransformOp, DeviceType::GPU, half); BufferInverseTransformOp, DeviceType::GPU, half);
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "mace/core/op_def_registry.h"
#include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/testing/test_benchmark.h" #include "mace/core/testing/test_benchmark.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
......
...@@ -54,103 +54,103 @@ void TestBidirectionTransform(const int type, ...@@ -54,103 +54,103 @@ void TestBidirectionTransform(const int type,
} // namespace } // namespace
TEST(BufferToImageTest, ArgSmall) { TEST(BufferToImageTest, ArgSmall) {
TestBidirectionTransform<DeviceType::GPU, float>(kernels::ARGUMENT, {1}); TestBidirectionTransform<DeviceType::GPU, float>(ops::ARGUMENT, {1});
} }
TEST(BufferToImageTest, ArgHalfSmall) { TEST(BufferToImageTest, ArgHalfSmall) {
TestBidirectionTransform<DeviceType::GPU, half>(kernels::ARGUMENT, {11}); TestBidirectionTransform<DeviceType::GPU, half>(ops::ARGUMENT, {11});
} }
TEST(BufferToImageTest, ArgMedium) { TEST(BufferToImageTest, ArgMedium) {
TestBidirectionTransform<DeviceType::GPU, float>(kernels::ARGUMENT, {11}); TestBidirectionTransform<DeviceType::GPU, float>(ops::ARGUMENT, {11});
} }
TEST(BufferToImageTest, ArgLarge) { TEST(BufferToImageTest, ArgLarge) {
TestBidirectionTransform<DeviceType::GPU, float>(kernels::ARGUMENT, {256}); TestBidirectionTransform<DeviceType::GPU, float>(ops::ARGUMENT, {256});
} }
TEST(BufferToImageTest, InputSmallSingleChannel) { TEST(BufferToImageTest, InputSmallSingleChannel) {
TestBidirectionTransform<DeviceType::GPU, float>(kernels::IN_OUT_CHANNEL, TestBidirectionTransform<DeviceType::GPU, float>(ops::IN_OUT_CHANNEL,
{1, 2, 3, 1}); {1, 2, 3, 1});
} }
TEST(BufferToImageTest, InputSmallMultipleChannel) { TEST(BufferToImageTest, InputSmallMultipleChannel) {
TestBidirectionTransform<DeviceType::GPU, float>(kernels::IN_OUT_CHANNEL, TestBidirectionTransform<DeviceType::GPU, float>(ops::IN_OUT_CHANNEL,
{1, 2, 3, 3}); {1, 2, 3, 3});
} }
TEST(BufferToImageTest, InputSmallMultipleBatchAndChannel) { TEST(BufferToImageTest, InputSmallMultipleBatchAndChannel) {
TestBidirectionTransform<DeviceType::GPU, float>(kernels::IN_OUT_CHANNEL, TestBidirectionTransform<DeviceType::GPU, float>(ops::IN_OUT_CHANNEL,
{3, 2, 3, 3}); {3, 2, 3, 3});
} }
TEST(BufferToImageTest, InputMedium) { TEST(BufferToImageTest, InputMedium) {
TestBidirectionTransform<DeviceType::GPU, float>(kernels::IN_OUT_CHANNEL, TestBidirectionTransform<DeviceType::GPU, float>(ops::IN_OUT_CHANNEL,
{3, 13, 17, 128}); {3, 13, 17, 128});
} }
TEST(BufferToImageTest, InputLarge) { TEST(BufferToImageTest, InputLarge) {
TestBidirectionTransform<DeviceType::GPU, float>(kernels::IN_OUT_CHANNEL, TestBidirectionTransform<DeviceType::GPU, float>(ops::IN_OUT_CHANNEL,
{3, 64, 64, 256}); {3, 64, 64, 256});
} }
TEST(BufferToImageTest, Filter1x1Small) { TEST(BufferToImageTest, Filter1x1Small) {
TestBidirectionTransform<DeviceType::GPU, float>(kernels::CONV2D_FILTER, TestBidirectionTransform<DeviceType::GPU, float>(ops::CONV2D_FILTER,
{5, 3, 1, 1}); {5, 3, 1, 1});
} }
TEST(BufferToImageTest, Filter1x1Medium) { TEST(BufferToImageTest, Filter1x1Medium) {
TestBidirectionTransform<DeviceType::GPU, float>(kernels::CONV2D_FILTER, TestBidirectionTransform<DeviceType::GPU, float>(ops::CONV2D_FILTER,
{13, 17, 1, 1}); {13, 17, 1, 1});
} }
TEST(BufferToImageTest, Filter1x1Large) { TEST(BufferToImageTest, Filter1x1Large) {
TestBidirectionTransform<DeviceType::GPU, float>(kernels::CONV2D_FILTER, TestBidirectionTransform<DeviceType::GPU, float>(ops::CONV2D_FILTER,
{512, 128, 1, 1}); {512, 128, 1, 1});
} }
TEST(BufferToImageTest, Filter3x3Small) { TEST(BufferToImageTest, Filter3x3Small) {
TestBidirectionTransform<DeviceType::GPU, float>(kernels::CONV2D_FILTER, TestBidirectionTransform<DeviceType::GPU, float>(ops::CONV2D_FILTER,
{3, 5, 3, 3}); {3, 5, 3, 3});
} }
TEST(BufferToImageTest, Filter3x3Medium) { TEST(BufferToImageTest, Filter3x3Medium) {
TestBidirectionTransform<DeviceType::GPU, float>(kernels::CONV2D_FILTER, TestBidirectionTransform<DeviceType::GPU, float>(ops::CONV2D_FILTER,
{17, 13, 3, 3}); {17, 13, 3, 3});
} }
TEST(BufferToImageTest, Filter3x3Large) { TEST(BufferToImageTest, Filter3x3Large) {
TestBidirectionTransform<DeviceType::GPU, float>(kernels::CONV2D_FILTER, TestBidirectionTransform<DeviceType::GPU, float>(ops::CONV2D_FILTER,
{256, 128, 3, 3}); {256, 128, 3, 3});
} }
TEST(BufferToImageTest, WeightWidthSmall) { TEST(BufferToImageTest, WeightWidthSmall) {
TestBidirectionTransform<DeviceType::GPU, float>(kernels::WEIGHT_WIDTH, TestBidirectionTransform<DeviceType::GPU, float>(ops::WEIGHT_WIDTH,
{1, 3, 3, 3}); {1, 3, 3, 3});
} }
TEST(BufferToImageTest, WeightWidthMedium) { TEST(BufferToImageTest, WeightWidthMedium) {
TestBidirectionTransform<DeviceType::GPU, float>(kernels::WEIGHT_WIDTH, TestBidirectionTransform<DeviceType::GPU, float>(ops::WEIGHT_WIDTH,
{11, 13, 13, 17}); {11, 13, 13, 17});
} }
TEST(BufferToImageTest, WeightWidthLarge) { TEST(BufferToImageTest, WeightWidthLarge) {
TestBidirectionTransform<DeviceType::GPU, float>(kernels::WEIGHT_WIDTH, TestBidirectionTransform<DeviceType::GPU, float>(ops::WEIGHT_WIDTH,
{64, 64, 11, 13}); {64, 64, 11, 13});
} }
TEST(BufferToImageTest, WeightHeightSmall) { TEST(BufferToImageTest, WeightHeightSmall) {
TestBidirectionTransform<DeviceType::GPU, float>(kernels::WEIGHT_HEIGHT, TestBidirectionTransform<DeviceType::GPU, float>(ops::WEIGHT_HEIGHT,
{2, 1, 1, 1}); {2, 1, 1, 1});
} }
TEST(BufferToImageTest, WeightHeightMedium) { TEST(BufferToImageTest, WeightHeightMedium) {
TestBidirectionTransform<DeviceType::GPU, float>(kernels::WEIGHT_HEIGHT, TestBidirectionTransform<DeviceType::GPU, float>(ops::WEIGHT_HEIGHT,
{11, 13, 13, 17}); {11, 13, 13, 17});
} }
TEST(BufferToImageTest, WeightHeightLarge) { TEST(BufferToImageTest, WeightHeightLarge) {
TestBidirectionTransform<DeviceType::GPU, float>(kernels::WEIGHT_HEIGHT, TestBidirectionTransform<DeviceType::GPU, float>(ops::WEIGHT_HEIGHT,
{64, 16, 11, 13}); {64, 16, 11, 13});
} }
...@@ -188,7 +188,7 @@ void TestDiffTypeBidirectionTransform(const int type, ...@@ -188,7 +188,7 @@ void TestDiffTypeBidirectionTransform(const int type,
} // namespace } // namespace
TEST(BufferToImageTest, ArgFloatToHalfSmall) { TEST(BufferToImageTest, ArgFloatToHalfSmall) {
TestDiffTypeBidirectionTransform<DeviceType::GPU, half>(kernels::ARGUMENT, TestDiffTypeBidirectionTransform<DeviceType::GPU, half>(ops::ARGUMENT,
{11}); {11});
} }
...@@ -233,7 +233,7 @@ TEST(BufferToImageTest, ArgStringHalfToHalfSmall) { ...@@ -233,7 +233,7 @@ TEST(BufferToImageTest, ArgStringHalfToHalfSmall) {
const unsigned char input_data[] = { const unsigned char input_data[] = {
0xCD, 0x3C, 0x33, 0x40, 0xCD, 0x3C, 0x33, 0x40,
}; };
TestStringHalfBidirectionTransform<DeviceType::GPU, half>(kernels::ARGUMENT, TestStringHalfBidirectionTransform<DeviceType::GPU, half>(ops::ARGUMENT,
{2}, input_data); {2}, input_data);
} }
......
...@@ -15,11 +15,11 @@ ...@@ -15,11 +15,11 @@
#include <memory> #include <memory>
#include "mace/core/operator.h" #include "mace/core/operator.h"
#include "mace/kernels/opencl/buffer/buffer_transform.h" #include "mace/ops/opencl/buffer/buffer_transform.h"
#include "mace/kernels/opencl/image/buffer_to_image.h" #include "mace/ops/opencl/image/buffer_to_image.h"
namespace mace { namespace mace {
namespace kernels { namespace ops {
template <DeviceType D, class T> template <DeviceType D, class T>
class BufferTransformOp; class BufferTransformOp;
...@@ -41,9 +41,9 @@ class BufferTransformOp<DeviceType::GPU, T> : public Operation { ...@@ -41,9 +41,9 @@ class BufferTransformOp<DeviceType::GPU, T> : public Operation {
const Tensor *input = this->Input(0); const Tensor *input = this->Input(0);
Tensor *output = this->Output(0); Tensor *output = this->Output(0);
kernels::BufferType type = ops::BufferType type =
static_cast<kernels::BufferType>(Operation::GetOptionalArg<int>( static_cast<ops::BufferType>(Operation::GetOptionalArg<int>(
"buffer_type", static_cast<int>(kernels::CONV2D_FILTER))); "buffer_type", static_cast<int>(ops::CONV2D_FILTER)));
return kernel_->Compute(context, input, type, return kernel_->Compute(context, input, type,
wino_blk_size_, output); wino_blk_size_, output);
...@@ -63,5 +63,5 @@ void RegisterBufferTransform(OpRegistryBase *op_registry) { ...@@ -63,5 +63,5 @@ void RegisterBufferTransform(OpRegistryBase *op_registry) {
BufferTransformOp, DeviceType::GPU, half); BufferTransformOp, DeviceType::GPU, half);
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -69,12 +69,12 @@ void TestBidirectionTransform(const int type, ...@@ -69,12 +69,12 @@ void TestBidirectionTransform(const int type,
} // namespace } // namespace
TEST_F(BufferTransformTest, FloatToHalf) { TEST_F(BufferTransformTest, FloatToHalf) {
TestBidirectionTransform<float, half>(kernels::BufferType::IN_OUT_CHANNEL, TestBidirectionTransform<float, half>(ops::BufferType::IN_OUT_CHANNEL,
{1, 2, 3, 4}); {1, 2, 3, 4});
} }
TEST_F(BufferTransformTest, HalfToHalf) { TEST_F(BufferTransformTest, HalfToHalf) {
TestBidirectionTransform<half, half>(kernels::BufferType::IN_OUT_CHANNEL, TestBidirectionTransform<half, half>(ops::BufferType::IN_OUT_CHANNEL,
{1, 2, 3, 4}); {1, 2, 3, 4});
} }
...@@ -85,7 +85,7 @@ void TestArgumentTransform(const index_t input_size) { ...@@ -85,7 +85,7 @@ void TestArgumentTransform(const index_t input_size) {
OpDefBuilder("BufferTransform", "BufferTransformTest") OpDefBuilder("BufferTransform", "BufferTransformTest")
.Input("Input") .Input("Input")
.Output("Output") .Output("Output")
.AddIntArg("buffer_type", kernels::BufferType::ARGUMENT) .AddIntArg("buffer_type", ops::BufferType::ARGUMENT)
.AddIntArg("T", DataTypeToEnum<T>::value) .AddIntArg("T", DataTypeToEnum<T>::value)
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#include "mace/core/operator.h" #include "mace/core/operator.h"
namespace mace { namespace mace {
namespace kernels { namespace ops {
template <DeviceType D, typename SrcType> template <DeviceType D, typename SrcType>
class CastOp : public Operation { class CastOp : public Operation {
...@@ -57,5 +57,5 @@ void RegisterCast(OpRegistryBase *op_registry) { ...@@ -57,5 +57,5 @@ void RegisterCast(OpRegistryBase *op_registry) {
DeviceType::CPU, int32_t); DeviceType::CPU, int32_t);
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
// limitations under the License. // limitations under the License.
#include "gmock/gmock.h" #include "gmock/gmock.h"
#include "mace/core/op_def_registry.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
namespace mace { namespace mace {
......
...@@ -16,11 +16,11 @@ ...@@ -16,11 +16,11 @@
#include "mace/core/operator.h" #include "mace/core/operator.h"
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/kernels/opencl/image/channel_shuffle.h" #include "mace/ops/opencl/image/channel_shuffle.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace ops {
template <DeviceType D, class T> template <DeviceType D, class T>
class ChannelShuffleOp; class ChannelShuffleOp;
...@@ -115,5 +115,5 @@ void RegisterChannelShuffle(OpRegistryBase *op_registry) { ...@@ -115,5 +115,5 @@ void RegisterChannelShuffle(OpRegistryBase *op_registry) {
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "mace/core/op_def_registry.h"
#include "mace/core/testing/test_benchmark.h" #include "mace/core/testing/test_benchmark.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
...@@ -44,7 +43,7 @@ void ChannelShuffle( ...@@ -44,7 +43,7 @@ void ChannelShuffle(
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
} else if (D == DeviceType::GPU) { } else if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("ChannelShuffle", "ChannelShuffleTest") OpDefBuilder("ChannelShuffle", "ChannelShuffleTest")
.Input("InputImage") .Input("InputImage")
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "mace/core/op_def_registry.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
namespace mace { namespace mace {
...@@ -61,7 +60,7 @@ TEST_F(ChannelShuffleOpTest, C16G4_OPENCL) { ...@@ -61,7 +60,7 @@ TEST_F(ChannelShuffleOpTest, C16G4_OPENCL) {
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}); 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31});
BufferToImage<DeviceType::GPU, float>(&net, "Input", "InputImage", BufferToImage<DeviceType::GPU, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("ChannelShuffle", "ChannelShuffleTest") OpDefBuilder("ChannelShuffle", "ChannelShuffleTest")
.Input("InputImage") .Input("InputImage")
...@@ -74,7 +73,7 @@ TEST_F(ChannelShuffleOpTest, C16G4_OPENCL) { ...@@ -74,7 +73,7 @@ TEST_F(ChannelShuffleOpTest, C16G4_OPENCL) {
// Transfer output // Transfer output
ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "Output", ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
// Check // Check
auto expected = net.CreateTensor<float>( auto expected = net.CreateTensor<float>(
......
...@@ -18,11 +18,11 @@ ...@@ -18,11 +18,11 @@
#include "mace/utils/quantize.h" #include "mace/utils/quantize.h"
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/kernels/opencl/image/concat.h" #include "mace/ops/opencl/image/concat.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace ops {
class ConcatOpBase : public Operation { class ConcatOpBase : public Operation {
public: public:
...@@ -206,6 +206,9 @@ void RegisterConcat(OpRegistryBase *op_registry) { ...@@ -206,6 +206,9 @@ void RegisterConcat(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "Concat", ConcatOp, MACE_REGISTER_OP(op_registry, "Concat", ConcatOp,
DeviceType::CPU, float); DeviceType::CPU, float);
MACE_REGISTER_OP(op_registry, "Concat", ConcatOp,
DeviceType::CPU, int32_t);
MACE_REGISTER_OP(op_registry, "Concat", ConcatOp, MACE_REGISTER_OP(op_registry, "Concat", ConcatOp,
DeviceType::CPU, uint8_t); DeviceType::CPU, uint8_t);
...@@ -218,5 +221,5 @@ void RegisterConcat(OpRegistryBase *op_registry) { ...@@ -218,5 +221,5 @@ void RegisterConcat(OpRegistryBase *op_registry) {
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "mace/core/op_def_registry.h"
#include "mace/core/testing/test_benchmark.h" #include "mace/core/testing/test_benchmark.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
...@@ -90,9 +89,9 @@ void OpenclConcatHelper(int iters, ...@@ -90,9 +89,9 @@ void OpenclConcatHelper(int iters,
net.AddRandomInput<DeviceType::GPU, float>("Input1", shape1); net.AddRandomInput<DeviceType::GPU, float>("Input1", shape1);
BufferToImage<DeviceType::GPU, T>(&net, "Input0", "InputImage0", BufferToImage<DeviceType::GPU, T>(&net, "Input0", "InputImage0",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::GPU, T>(&net, "Input1", "InputImage1", BufferToImage<DeviceType::GPU, T>(&net, "Input1", "InputImage1",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Concat", "ConcatBM") OpDefBuilder("Concat", "ConcatBM")
.Input("InputImage0") .Input("InputImage0")
.Input("InputImage1") .Input("InputImage1")
......
...@@ -262,7 +262,7 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes, ...@@ -262,7 +262,7 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes,
net.AddInputFromArray<DeviceType::GPU, float>(input_name, shapes[i], net.AddInputFromArray<DeviceType::GPU, float>(input_name, shapes[i],
inputs[i]); inputs[i]);
BufferToImage<DeviceType::GPU, T>(&net, input_name, image_name, BufferToImage<DeviceType::GPU, T>(&net, input_name, image_name,
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
} }
auto builder = OpDefBuilder("Concat", "ConcatTest"); auto builder = OpDefBuilder("Concat", "ConcatTest");
...@@ -279,7 +279,7 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes, ...@@ -279,7 +279,7 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes,
net.RunOp(DeviceType::GPU); net.RunOp(DeviceType::GPU);
ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "Output", ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
// Check // Check
auto output = net.GetOutput("Output"); auto output = net.GetOutput("Output");
......
...@@ -26,20 +26,20 @@ ...@@ -26,20 +26,20 @@
#include "mace/core/future.h" #include "mace/core/future.h"
#include "mace/core/operator.h" #include "mace/core/operator.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/kernels/activation.h" #include "mace/ops/activation.h"
#include "mace/kernels/arm/conv_2d_neon.h" #include "mace/ops/arm/conv_2d_neon.h"
#include "mace/kernels/arm/conv_winograd.h" #include "mace/ops/arm/conv_winograd.h"
#include "mace/kernels/conv_pool_2d_base.h" #include "mace/ops/conv_pool_2d_base.h"
#include "mace/kernels/conv_pool_2d_util.h" #include "mace/ops/conv_pool_2d_util.h"
#include "mace/kernels/gemmlowp_util.h" #include "mace/ops/gemmlowp_util.h"
#include "mace/utils/utils.h" #include "mace/utils/utils.h"
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/kernels/opencl/image/conv_2d.h" #include "mace/ops/opencl/image/conv_2d.h"
#include "mace/kernels/opencl/buffer/conv_2d.h" #include "mace/ops/opencl/buffer/conv_2d.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace ops {
template <DeviceType D, class T> template <DeviceType D, class T>
class Conv2dOp; class Conv2dOp;
...@@ -49,7 +49,7 @@ class Conv2dOp<DeviceType::CPU, float> : public ConvPool2dOpBase { ...@@ -49,7 +49,7 @@ class Conv2dOp<DeviceType::CPU, float> : public ConvPool2dOpBase {
public: public:
explicit Conv2dOp(OpConstructContext *context) explicit Conv2dOp(OpConstructContext *context)
: ConvPool2dOpBase(context), : ConvPool2dOpBase(context),
activation_(kernels::StringToActivationType( activation_(ops::StringToActivationType(
Operation::GetOptionalArg<std::string>("activation", Operation::GetOptionalArg<std::string>("activation",
"NOOP"))), "NOOP"))),
relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)), relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)),
...@@ -712,7 +712,7 @@ class Conv2dOp<DeviceType::CPU, uint8_t> : public ConvPool2dOpBase { ...@@ -712,7 +712,7 @@ class Conv2dOp<DeviceType::CPU, uint8_t> : public ConvPool2dOpBase {
public: public:
explicit Conv2dOp(OpConstructContext *context) explicit Conv2dOp(OpConstructContext *context)
: ConvPool2dOpBase(context), : ConvPool2dOpBase(context),
activation_(kernels::StringToActivationType( activation_(ops::StringToActivationType(
Operation::GetOptionalArg<std::string>("activation", Operation::GetOptionalArg<std::string>("activation",
"NOOP"))), "NOOP"))),
relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)) {} relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)) {}
...@@ -950,7 +950,7 @@ class Conv2dOp<DeviceType::GPU, T> : public ConvPool2dOpBase { ...@@ -950,7 +950,7 @@ class Conv2dOp<DeviceType::GPU, T> : public ConvPool2dOpBase {
public: public:
explicit Conv2dOp(OpConstructContext *context) explicit Conv2dOp(OpConstructContext *context)
: ConvPool2dOpBase(context), : ConvPool2dOpBase(context),
activation_(kernels::StringToActivationType( activation_(ops::StringToActivationType(
Operation::GetOptionalArg<std::string>("activation", Operation::GetOptionalArg<std::string>("activation",
"NOOP"))), "NOOP"))),
relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)) { relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)) {
...@@ -999,5 +999,5 @@ void RegisterConv2D(OpRegistryBase *op_registry) { ...@@ -999,5 +999,5 @@ void RegisterConv2D(OpRegistryBase *op_registry) {
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -14,9 +14,8 @@ ...@@ -14,9 +14,8 @@
#include <algorithm> #include <algorithm>
#include "mace/core/op_def_registry.h"
#include "mace/core/testing/test_benchmark.h" #include "mace/core/testing/test_benchmark.h"
#include "mace/kernels/conv_pool_2d_util.h" #include "mace/ops/conv_pool_2d_util.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
namespace mace { namespace mace {
...@@ -66,11 +65,11 @@ void Conv2d(int iters, ...@@ -66,11 +65,11 @@ void Conv2d(int iters,
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
} else if (D == DeviceType::GPU) { } else if (D == DeviceType::GPU) {
BufferToImage<D, T>(&net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(&net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); ops::BufferType::CONV2D_FILTER);
BufferToImage<D, T>(&net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
.Input("InputImage") .Input("InputImage")
.Input("FilterImage") .Input("FilterImage")
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#include <fstream> #include <fstream>
#include <vector> #include <vector>
#include "mace/kernels/conv_pool_2d_util.h" #include "mace/ops/conv_pool_2d_util.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
namespace mace { namespace mace {
...@@ -61,11 +61,11 @@ void TestNHWCSimple3x3VALID() { ...@@ -61,11 +61,11 @@ void TestNHWCSimple3x3VALID() {
"Output", NHWC); "Output", NHWC);
} else if (D == DeviceType::GPU) { } else if (D == DeviceType::GPU) {
BufferToImage<D, T>(&net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(&net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); ops::BufferType::CONV2D_FILTER);
BufferToImage<D, T>(&net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
.Input("InputImage") .Input("InputImage")
.Input("FilterImage") .Input("FilterImage")
...@@ -81,7 +81,7 @@ void TestNHWCSimple3x3VALID() { ...@@ -81,7 +81,7 @@ void TestNHWCSimple3x3VALID() {
// Transfer output // Transfer output
ImageToBuffer<D, float>(&net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
...@@ -127,11 +127,11 @@ void TestNHWCSimple3x3SAME() { ...@@ -127,11 +127,11 @@ void TestNHWCSimple3x3SAME() {
"Output", NHWC); "Output", NHWC);
} else if (D == DeviceType::GPU) { } else if (D == DeviceType::GPU) {
BufferToImage<D, T>(&net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(&net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); ops::BufferType::CONV2D_FILTER);
BufferToImage<D, T>(&net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
.Input("InputImage") .Input("InputImage")
.Input("FilterImage") .Input("FilterImage")
...@@ -147,7 +147,7 @@ void TestNHWCSimple3x3SAME() { ...@@ -147,7 +147,7 @@ void TestNHWCSimple3x3SAME() {
// Transfer output // Transfer output
ImageToBuffer<D, float>(&net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
...@@ -213,9 +213,9 @@ void TestNHWCSimple3x3WithoutBias() { ...@@ -213,9 +213,9 @@ void TestNHWCSimple3x3WithoutBias() {
"Output", NHWC); "Output", NHWC);
} else if (D == DeviceType::GPU) { } else if (D == DeviceType::GPU) {
BufferToImage<D, T>(&net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(&net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); ops::BufferType::CONV2D_FILTER);
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
.Input("InputImage") .Input("InputImage")
...@@ -230,7 +230,7 @@ void TestNHWCSimple3x3WithoutBias() { ...@@ -230,7 +230,7 @@ void TestNHWCSimple3x3WithoutBias() {
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, T>(&net, "OutputImage", "Output", ImageToBuffer<D, T>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
...@@ -287,11 +287,11 @@ void TestNHWCCombined3x3() { ...@@ -287,11 +287,11 @@ void TestNHWCCombined3x3() {
"Output", NHWC); "Output", NHWC);
} else if (D == DeviceType::GPU) { } else if (D == DeviceType::GPU) {
BufferToImage<D, T>(&net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(&net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); ops::BufferType::CONV2D_FILTER);
BufferToImage<D, T>(&net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2DTest") OpDefBuilder("Conv2D", "Conv2DTest")
.Input("InputImage") .Input("InputImage")
...@@ -307,7 +307,7 @@ void TestNHWCCombined3x3() { ...@@ -307,7 +307,7 @@ void TestNHWCCombined3x3() {
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, T>(&net, "OutputImage", "Output", ImageToBuffer<D, T>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
...@@ -362,11 +362,11 @@ void TestFusedNHWCSimple3x3VALID() { ...@@ -362,11 +362,11 @@ void TestFusedNHWCSimple3x3VALID() {
"Output", NHWC); "Output", NHWC);
} else if (D == DeviceType::GPU) { } else if (D == DeviceType::GPU) {
BufferToImage<D, T>(&net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(&net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); ops::BufferType::CONV2D_FILTER);
BufferToImage<D, T>(&net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2DTest") OpDefBuilder("Conv2D", "Conv2DTest")
.Input("InputImage") .Input("InputImage")
.Input("FilterImage") .Input("FilterImage")
...@@ -383,7 +383,7 @@ void TestFusedNHWCSimple3x3VALID() { ...@@ -383,7 +383,7 @@ void TestFusedNHWCSimple3x3VALID() {
// Transfer output // Transfer output
ImageToBuffer<D, float>(&net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
...@@ -425,9 +425,9 @@ void TestFusedNHWCSimple3x3WithoutBias() { ...@@ -425,9 +425,9 @@ void TestFusedNHWCSimple3x3WithoutBias() {
"Output", NHWC); "Output", NHWC);
} else if (D == DeviceType::GPU) { } else if (D == DeviceType::GPU) {
BufferToImage<D, T>(&net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(&net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); ops::BufferType::CONV2D_FILTER);
OpDefBuilder("Conv2D", "Conv2DTest") OpDefBuilder("Conv2D", "Conv2DTest")
.Input("InputImage") .Input("InputImage")
...@@ -443,7 +443,7 @@ void TestFusedNHWCSimple3x3WithoutBias() { ...@@ -443,7 +443,7 @@ void TestFusedNHWCSimple3x3WithoutBias() {
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, float>(&net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
...@@ -505,11 +505,11 @@ void TestConv1x1() { ...@@ -505,11 +505,11 @@ void TestConv1x1() {
"Output", NHWC); "Output", NHWC);
} else if (D == DeviceType::GPU) { } else if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, float>(&net, "Filter", "FilterImage", BufferToImage<D, float>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); ops::BufferType::CONV2D_FILTER);
BufferToImage<D, float>(&net, "Bias", "BiasImage", BufferToImage<D, float>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2DTest") OpDefBuilder("Conv2D", "Conv2DTest")
.Input("InputImage") .Input("InputImage")
...@@ -524,7 +524,7 @@ void TestConv1x1() { ...@@ -524,7 +524,7 @@ void TestConv1x1() {
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, float>(&net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
...@@ -596,11 +596,11 @@ void TestComplexConvNxNS12(const std::vector<index_t> &shape, ...@@ -596,11 +596,11 @@ void TestComplexConvNxNS12(const std::vector<index_t> &shape,
// run on gpu // run on gpu
BufferToImage<D, T>(&net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(&net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); ops::BufferType::CONV2D_FILTER);
BufferToImage<D, T>(&net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
.Input("InputImage") .Input("InputImage")
...@@ -616,7 +616,7 @@ void TestComplexConvNxNS12(const std::vector<index_t> &shape, ...@@ -616,7 +616,7 @@ void TestComplexConvNxNS12(const std::vector<index_t> &shape,
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, T>(&net, "OutputImage", "OPENCLOutput", ImageToBuffer<D, T>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(*expected, *net.GetOutput("OPENCLOutput"), 1e-4, ExpectTensorNear<float>(*expected, *net.GetOutput("OPENCLOutput"), 1e-4,
1e-4); 1e-4);
}; };
...@@ -705,11 +705,11 @@ void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape, ...@@ -705,11 +705,11 @@ void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape,
// run on gpu // run on gpu
BufferToImage<D, half>(&net, "Input", "InputImage", BufferToImage<D, half>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, half>(&net, "Filter", "FilterImage", BufferToImage<D, half>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); ops::BufferType::CONV2D_FILTER);
BufferToImage<D, half>(&net, "Bias", "BiasImage", BufferToImage<D, half>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
.Input("InputImage") .Input("InputImage")
...@@ -725,7 +725,7 @@ void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape, ...@@ -725,7 +725,7 @@ void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape,
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, float>(&net, "OutputImage", "OPENCLOutput", ImageToBuffer<D, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(*expected, *net.GetOutput("OPENCLOutput"), 1e-2, ExpectTensorNear<float>(*expected, *net.GetOutput("OPENCLOutput"), 1e-2,
1e-2); 1e-2);
...@@ -857,11 +857,11 @@ void TestDilationConvNxN(const std::vector<index_t> &shape, ...@@ -857,11 +857,11 @@ void TestDilationConvNxN(const std::vector<index_t> &shape,
// run on gpu // run on gpu
BufferToImage<D, T>(&net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(&net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); ops::BufferType::CONV2D_FILTER);
BufferToImage<D, T>(&net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
.Input("InputImage") .Input("InputImage")
...@@ -877,7 +877,7 @@ void TestDilationConvNxN(const std::vector<index_t> &shape, ...@@ -877,7 +877,7 @@ void TestDilationConvNxN(const std::vector<index_t> &shape,
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, T>(&net, "OutputImage", "OPENCLOutput", ImageToBuffer<D, T>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(*expected, *net.GetOutput("OPENCLOutput"), 1e-4, ExpectTensorNear<float>(*expected, *net.GetOutput("OPENCLOutput"), 1e-4,
1e-4); 1e-4);
}; };
...@@ -954,11 +954,11 @@ void TestGeneralHalfAtrousConv(const std::vector<index_t> &image_shape, ...@@ -954,11 +954,11 @@ void TestGeneralHalfAtrousConv(const std::vector<index_t> &image_shape,
// run on gpu // run on gpu
BufferToImage<D, half>(&net, "Input", "InputImage", BufferToImage<D, half>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, half>(&net, "Filter", "FilterImage", BufferToImage<D, half>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); ops::BufferType::CONV2D_FILTER);
BufferToImage<D, half>(&net, "Bias", "BiasImage", BufferToImage<D, half>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
.Input("InputImage") .Input("InputImage")
...@@ -974,7 +974,7 @@ void TestGeneralHalfAtrousConv(const std::vector<index_t> &image_shape, ...@@ -974,7 +974,7 @@ void TestGeneralHalfAtrousConv(const std::vector<index_t> &image_shape,
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, float>(&net, "OutputImage", "OPENCLOutput", ImageToBuffer<D, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(*expected, *net.GetOutput("OPENCLOutput"), 1e-2, ExpectTensorNear<float>(*expected, *net.GetOutput("OPENCLOutput"), 1e-2,
1e-1); 1e-1);
}; };
...@@ -1041,11 +1041,11 @@ void TestArbitraryPadConvNxN(const std::vector<index_t> &shape, ...@@ -1041,11 +1041,11 @@ void TestArbitraryPadConvNxN(const std::vector<index_t> &shape,
// run on gpu // run on gpu
BufferToImage<D, T>(&net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(&net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); ops::BufferType::CONV2D_FILTER);
BufferToImage<D, T>(&net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
.Input("InputImage") .Input("InputImage")
...@@ -1060,7 +1060,7 @@ void TestArbitraryPadConvNxN(const std::vector<index_t> &shape, ...@@ -1060,7 +1060,7 @@ void TestArbitraryPadConvNxN(const std::vector<index_t> &shape,
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, T>(&net, "OutputImage", "OPENCLOutput", ImageToBuffer<D, T>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(*expected, *net.GetOutput("OPENCLOutput"), 1e-4, ExpectTensorNear<float>(*expected, *net.GetOutput("OPENCLOutput"), 1e-4,
1e-4); 1e-4);
}; };
......
...@@ -12,16 +12,16 @@ ...@@ -12,16 +12,16 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef MACE_KERNELS_CONV_POOL_2D_BASE_H_ #ifndef MACE_OPS_CONV_POOL_2D_BASE_H_
#define MACE_KERNELS_CONV_POOL_2D_BASE_H_ #define MACE_OPS_CONV_POOL_2D_BASE_H_
#include <vector> #include <vector>
#include "mace/core/operator.h" #include "mace/core/operator.h"
#include "mace/kernels/conv_pool_2d_util.h" #include "mace/ops/conv_pool_2d_util.h"
namespace mace { namespace mace {
namespace kernels { namespace ops {
class ConvPool2dOpBase : public Operation { class ConvPool2dOpBase : public Operation {
public: public:
...@@ -40,7 +40,7 @@ class ConvPool2dOpBase : public Operation { ...@@ -40,7 +40,7 @@ class ConvPool2dOpBase : public Operation {
std::vector<int> dilations_; std::vector<int> dilations_;
}; };
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
#endif // MACE_KERNELS_CONV_POOL_2D_BASE_H_ #endif // MACE_OPS_CONV_POOL_2D_BASE_H_
...@@ -12,14 +12,14 @@ ...@@ -12,14 +12,14 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "mace/kernels/conv_pool_2d_util.h" #include "mace/ops/conv_pool_2d_util.h"
#include <algorithm> #include <algorithm>
#include <cmath> #include <cmath>
#include <vector> #include <vector>
namespace mace { namespace mace {
namespace kernels { namespace ops {
void CalcPaddingAndOutputSize(const index_t *input_shape, void CalcPaddingAndOutputSize(const index_t *input_shape,
const DataFormat input_format, const DataFormat input_format,
...@@ -463,5 +463,5 @@ MaceStatus ConstructNHWCInputWithPadding(const Tensor *input_tensor, ...@@ -463,5 +463,5 @@ MaceStatus ConstructNHWCInputWithPadding(const Tensor *input_tensor,
return MaceStatus::MACE_SUCCESS; return MaceStatus::MACE_SUCCESS;
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -12,8 +12,8 @@ ...@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef MACE_KERNELS_CONV_POOL_2D_UTIL_H_ #ifndef MACE_OPS_CONV_POOL_2D_UTIL_H_
#define MACE_KERNELS_CONV_POOL_2D_UTIL_H_ #define MACE_OPS_CONV_POOL_2D_UTIL_H_
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
...@@ -30,7 +30,7 @@ enum RoundType { ...@@ -30,7 +30,7 @@ enum RoundType {
CEIL = 1, CEIL = 1,
}; };
namespace kernels { namespace ops {
void CalcPaddingAndOutputSize(const index_t *input_shape, void CalcPaddingAndOutputSize(const index_t *input_shape,
const DataFormat input_format, const DataFormat input_format,
...@@ -113,7 +113,7 @@ MaceStatus ConstructNHWCInputWithPadding(const Tensor *input, ...@@ -113,7 +113,7 @@ MaceStatus ConstructNHWCInputWithPadding(const Tensor *input,
Tensor *output_tensor, Tensor *output_tensor,
bool padding_same_value = false); bool padding_same_value = false);
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
#endif // MACE_KERNELS_CONV_POOL_2D_UTIL_H_ #endif // MACE_OPS_CONV_POOL_2D_UTIL_H_
...@@ -29,7 +29,7 @@ TEST(CoreTest, INIT_MODE) { ...@@ -29,7 +29,7 @@ TEST(CoreTest, INIT_MODE) {
OpDefBuilder("BufferTransform", "BufferTransformTest") OpDefBuilder("BufferTransform", "BufferTransformTest")
.Input("Input") .Input("Input")
.Output("B2IOutput") .Output("B2IOutput")
.AddIntArg("buffer_type", kernels::BufferType::CONV2D_FILTER) .AddIntArg("buffer_type", ops::BufferType::CONV2D_FILTER)
.AddIntArg("mode", static_cast<int>(NetMode::INIT)) .AddIntArg("mode", static_cast<int>(NetMode::INIT))
.Finalize(&op_defs[op_defs.size() - 1]); .Finalize(&op_defs[op_defs.size() - 1]);
...@@ -46,18 +46,16 @@ TEST(CoreTest, INIT_MODE) { ...@@ -46,18 +46,16 @@ TEST(CoreTest, INIT_MODE) {
OpDefBuilder("BufferInverseTransform", "BufferInverseTransformTest") OpDefBuilder("BufferInverseTransform", "BufferInverseTransformTest")
.Input("B2IOutput") .Input("B2IOutput")
.Output("Output") .Output("Output")
.AddIntArg("buffer_type", kernels::BufferType::CONV2D_FILTER) .AddIntArg("buffer_type", ops::BufferType::CONV2D_FILTER)
.Finalize(&op_defs[op_defs.size() - 1]); .Finalize(&op_defs[op_defs.size() - 1]);
NetDef net_def; NetDef net_def;
for (auto &op_def : op_defs) { for (auto &op_def : op_defs) {
net_def.add_op()->CopyFrom(op_def); net_def.add_op()->CopyFrom(op_def);
net_def.add_op_types(op_def.type());
} }
std::shared_ptr<OpDefRegistryBase> op_def_registry(new OpDefRegistry()); std::shared_ptr<OpRegistry> op_registry(new OpRegistry());
std::shared_ptr<OpRegistryBase> op_registry(new OpRegistry());
auto net = std::unique_ptr<NetBase>(new SerialNet( auto net = std::unique_ptr<NetBase>(new SerialNet(
op_def_registry.get(), op_registry.get(), &net_def, &ws, device, op_registry.get(), &net_def, &ws, device,
NetMode::INIT)); NetMode::INIT));
MaceStatus status = net->Init(); MaceStatus status = net->Init();
MACE_CHECK(status == MaceStatus::MACE_SUCCESS); MACE_CHECK(status == MaceStatus::MACE_SUCCESS);
...@@ -67,7 +65,7 @@ TEST(CoreTest, INIT_MODE) { ...@@ -67,7 +65,7 @@ TEST(CoreTest, INIT_MODE) {
EXPECT_TRUE(ws.GetTensor("B2IOutput") != nullptr); EXPECT_TRUE(ws.GetTensor("B2IOutput") != nullptr);
EXPECT_TRUE(ws.GetTensor("Output") == nullptr); EXPECT_TRUE(ws.GetTensor("Output") == nullptr);
net = std::unique_ptr<NetBase>(new SerialNet( net = std::unique_ptr<NetBase>(new SerialNet(
op_def_registry.get(), op_registry.get(), &net_def, &ws, device)); op_registry.get(), &net_def, &ws, device));
status = net->Init(); status = net->Init();
MACE_CHECK(status == MaceStatus::MACE_SUCCESS); MACE_CHECK(status == MaceStatus::MACE_SUCCESS);
status = net->Run(); status = net->Run();
......
...@@ -16,11 +16,11 @@ ...@@ -16,11 +16,11 @@
#include "mace/core/operator.h" #include "mace/core/operator.h"
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/kernels/opencl/image/crop.h" #include "mace/ops/opencl/image/crop.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace ops {
template <DeviceType D, class T> template <DeviceType D, class T>
class CropOp : public Operation { class CropOp : public Operation {
...@@ -143,5 +143,5 @@ void RegisterCrop(OpRegistryBase *op_registry) { ...@@ -143,5 +143,5 @@ void RegisterCrop(OpRegistryBase *op_registry) {
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "mace/core/op_def_registry.h"
#include "mace/core/testing/test_benchmark.h" #include "mace/core/testing/test_benchmark.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
...@@ -81,9 +80,9 @@ void OpenclCropHelper(int iters, ...@@ -81,9 +80,9 @@ void OpenclCropHelper(int iters,
net.AddRandomInput<DeviceType::GPU, float>("Input1", shape1); net.AddRandomInput<DeviceType::GPU, float>("Input1", shape1);
BufferToImage<DeviceType::GPU, T>(&net, "Input0", "InputImage0", BufferToImage<DeviceType::GPU, T>(&net, "Input0", "InputImage0",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::GPU, T>(&net, "Input1", "InputImage1", BufferToImage<DeviceType::GPU, T>(&net, "Input1", "InputImage1",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Crop", "CropBM") OpDefBuilder("Crop", "CropBM")
.Input("InputImage0") .Input("InputImage0")
.Input("InputImage1") .Input("InputImage1")
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "mace/core/op_def_registry.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
namespace mace { namespace mace {
...@@ -36,9 +35,9 @@ void RunCrop(const std::vector<index_t> &input_shape, ...@@ -36,9 +35,9 @@ void RunCrop(const std::vector<index_t> &input_shape,
if (D == GPU) { if (D == GPU) {
BufferToImage<D, float>(&net, "Input0", "InputImage0", BufferToImage<D, float>(&net, "Input0", "InputImage0",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, float>(&net, "Input1", "InputImage1", BufferToImage<D, float>(&net, "Input1", "InputImage1",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Crop", "CropTest") OpDefBuilder("Crop", "CropTest")
.Input("InputImage0") .Input("InputImage0")
.Input("InputImage1") .Input("InputImage1")
...@@ -69,7 +68,7 @@ void RunCrop(const std::vector<index_t> &input_shape, ...@@ -69,7 +68,7 @@ void RunCrop(const std::vector<index_t> &input_shape,
if (D == GPU) { if (D == GPU) {
ImageToBuffer<D, float>(&net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
} else if (D == CPU) { } else if (D == CPU) {
net.TransformDataFormat<DeviceType::CPU, float>("OutputNCHW", NCHW, net.TransformDataFormat<DeviceType::CPU, float>("OutputNCHW", NCHW,
"Output", NHWC); "Output", NHWC);
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "mace/kernels/deconv_2d.h" #include "mace/ops/deconv_2d.h"
#if defined(MACE_ENABLE_NEON) #if defined(MACE_ENABLE_NEON)
#include <arm_neon.h> #include <arm_neon.h>
...@@ -27,16 +27,16 @@ ...@@ -27,16 +27,16 @@
#include "mace/core/future.h" #include "mace/core/future.h"
#include "mace/core/operator.h" #include "mace/core/operator.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/kernels/activation.h" #include "mace/ops/activation.h"
#include "mace/kernels/arm/deconv_2d_neon.h" #include "mace/ops/arm/deconv_2d_neon.h"
#include "mace/kernels/conv_pool_2d_util.h" #include "mace/ops/conv_pool_2d_util.h"
#include "mace/utils/utils.h" #include "mace/utils/utils.h"
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/kernels/opencl/image/deconv_2d.h" #include "mace/ops/opencl/image/deconv_2d.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace ops {
class Deconv2dOpBase : public Operation { class Deconv2dOpBase : public Operation {
public: public:
...@@ -46,9 +46,9 @@ class Deconv2dOpBase : public Operation { ...@@ -46,9 +46,9 @@ class Deconv2dOpBase : public Operation {
padding_type_(static_cast<Padding>(Operation::GetOptionalArg<int>( padding_type_(static_cast<Padding>(Operation::GetOptionalArg<int>(
"padding", static_cast<int>(SAME)))), "padding", static_cast<int>(SAME)))),
paddings_(Operation::GetRepeatedArgs<int>("padding_values")), paddings_(Operation::GetRepeatedArgs<int>("padding_values")),
model_type_(static_cast<kernels::FrameworkType>( model_type_(static_cast<ops::FrameworkType>(
Operation::GetOptionalArg<int>("framework_type", 0))), Operation::GetOptionalArg<int>("framework_type", 0))),
activation_(kernels::StringToActivationType( activation_(ops::StringToActivationType(
Operation::GetOptionalArg<std::string>("activation", Operation::GetOptionalArg<std::string>("activation",
"NOOP"))), "NOOP"))),
relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)) {} relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)) {}
...@@ -180,7 +180,7 @@ class Deconv2dOp<DeviceType::CPU, float> : public Deconv2dOpBase { ...@@ -180,7 +180,7 @@ class Deconv2dOp<DeviceType::CPU, float> : public Deconv2dOpBase {
const Tensor *filter = this->Input(1); const Tensor *filter = this->Input(1);
const Tensor *bias = nullptr; const Tensor *bias = nullptr;
const Tensor *output_shape_tensor = nullptr; const Tensor *output_shape_tensor = nullptr;
if (model_type_ == kernels::CAFFE) { if (model_type_ == ops::CAFFE) {
bias = this->InputSize() >= 3 ? this->Input(2) : nullptr; bias = this->InputSize() >= 3 ? this->Input(2) : nullptr;
} else { } else {
output_shape_tensor = output_shape_tensor =
...@@ -491,7 +491,7 @@ class Deconv2dOp<DeviceType::GPU, T> : public Deconv2dOpBase { ...@@ -491,7 +491,7 @@ class Deconv2dOp<DeviceType::GPU, T> : public Deconv2dOpBase {
const Tensor *filter = this->Input(1); const Tensor *filter = this->Input(1);
const Tensor *bias = nullptr; const Tensor *bias = nullptr;
const Tensor *output_shape_tensor = nullptr; const Tensor *output_shape_tensor = nullptr;
if (model_type_ == kernels::CAFFE) { if (model_type_ == ops::CAFFE) {
bias = this->InputSize() >= 3 ? this->Input(2) : nullptr; bias = this->InputSize() >= 3 ? this->Input(2) : nullptr;
} else { } else {
output_shape_tensor = output_shape_tensor =
...@@ -557,5 +557,5 @@ void RegisterDeconv2D(OpRegistryBase *op_registry) { ...@@ -557,5 +557,5 @@ void RegisterDeconv2D(OpRegistryBase *op_registry) {
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -12,18 +12,18 @@ ...@@ -12,18 +12,18 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef MACE_KERNELS_DECONV_2D_H_ #ifndef MACE_OPS_DECONV_2D_H_
#define MACE_KERNELS_DECONV_2D_H_ #define MACE_OPS_DECONV_2D_H_
namespace mace { namespace mace {
namespace kernels { namespace ops {
enum FrameworkType { enum FrameworkType {
TENSORFLOW = 0, TENSORFLOW = 0,
CAFFE = 1, CAFFE = 1,
}; };
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
#endif // MACE_KERNELS_DECONV_2D_H_ #endif // MACE_OPS_DECONV_2D_H_
...@@ -14,9 +14,8 @@ ...@@ -14,9 +14,8 @@
#include <algorithm> #include <algorithm>
#include "mace/core/op_def_registry.h"
#include "mace/core/testing/test_benchmark.h" #include "mace/core/testing/test_benchmark.h"
#include "mace/kernels/conv_pool_2d_util.h" #include "mace/ops/conv_pool_2d_util.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
namespace mace { namespace mace {
...@@ -54,11 +53,11 @@ static void Deconv2d(int iters, ...@@ -54,11 +53,11 @@ static void Deconv2d(int iters,
{batch, out_h, out_w, output_channels}); {batch, out_h, out_w, output_channels});
if (D == DeviceType::GPU) { if (D == DeviceType::GPU) {
BufferToImage<D, T>(&net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(&net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); ops::BufferType::CONV2D_FILTER);
BufferToImage<D, T>(&net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
OpDefBuilder("Deconv2D", "Deconv2dTest") OpDefBuilder("Deconv2D", "Deconv2dTest")
.Input("InputImage") .Input("InputImage")
.Input("FilterImage") .Input("FilterImage")
......
...@@ -15,8 +15,8 @@ ...@@ -15,8 +15,8 @@
#include <fstream> #include <fstream>
#include <vector> #include <vector>
#include "mace/kernels/deconv_2d.h" #include "mace/ops/deconv_2d.h"
#include "mace/kernels/conv_pool_2d_util.h" #include "mace/ops/conv_pool_2d_util.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
namespace mace { namespace mace {
...@@ -38,7 +38,7 @@ void RunTestSimple(const std::vector<index_t> &input_shape, ...@@ -38,7 +38,7 @@ void RunTestSimple(const std::vector<index_t> &input_shape,
const std::vector<float> &filter_data, const std::vector<float> &filter_data,
const std::vector<index_t> &expected_shape, const std::vector<index_t> &expected_shape,
const std::vector<float> &expected_data, const std::vector<float> &expected_data,
kernels::FrameworkType model_type) { ops::FrameworkType model_type) {
OpsTestNet net; OpsTestNet net;
// Add input data // Add input data
const index_t batch = input_shape[0]; const index_t batch = input_shape[0];
...@@ -50,12 +50,12 @@ void RunTestSimple(const std::vector<index_t> &input_shape, ...@@ -50,12 +50,12 @@ void RunTestSimple(const std::vector<index_t> &input_shape,
net.TransformDataFormat<D, float>("Filter", HWOI, "FilterOIHW", OIHW); net.TransformDataFormat<D, float>("Filter", HWOI, "FilterOIHW", OIHW);
if (D == DeviceType::GPU) { if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, float>(&net, "Bias", "BiasImage", BufferToImage<D, float>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
BufferToImage<D, float>(&net, "FilterOIHW", "FilterImage", BufferToImage<D, float>(&net, "FilterOIHW", "FilterImage",
kernels::BufferType::CONV2D_FILTER); ops::BufferType::CONV2D_FILTER);
if (model_type == kernels::FrameworkType::CAFFE) { if (model_type == ops::FrameworkType::CAFFE) {
OpDefBuilder("Deconv2D", "Deconv2dTest") OpDefBuilder("Deconv2D", "Deconv2dTest")
.Input("InputImage") .Input("InputImage")
.Input("FilterImage") .Input("FilterImage")
...@@ -85,12 +85,12 @@ void RunTestSimple(const std::vector<index_t> &input_shape, ...@@ -85,12 +85,12 @@ void RunTestSimple(const std::vector<index_t> &input_shape,
// Transfer output // Transfer output
ImageToBuffer<D, float>(&net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
} else { } else {
net.TransformDataFormat<DeviceType::CPU, float>("Input", NHWC, "InputNCHW", net.TransformDataFormat<DeviceType::CPU, float>("Input", NHWC, "InputNCHW",
NCHW); NCHW);
if (model_type == kernels::FrameworkType::CAFFE) { if (model_type == ops::FrameworkType::CAFFE) {
OpDefBuilder("Deconv2D", "Deconv2dTest") OpDefBuilder("Deconv2D", "Deconv2dTest")
.Input("InputNCHW") .Input("InputNCHW")
.Input("FilterOIHW") .Input("FilterOIHW")
...@@ -138,7 +138,7 @@ void TestNHWCSimple3x3SAME_S1() { ...@@ -138,7 +138,7 @@ void TestNHWCSimple3x3SAME_S1() {
{4.5, 4.6, 4.7, 6.5, 6.6, 6.7, 4.5, 4.6, 4.7, {4.5, 4.6, 4.7, 6.5, 6.6, 6.7, 4.5, 4.6, 4.7,
6.5, 6.6, 6.7, 9.5, 9.6, 9.7, 6.5, 6.6, 6.7, 6.5, 6.6, 6.7, 9.5, 9.6, 9.7, 6.5, 6.6, 6.7,
4.5, 4.6, 4.7, 6.5, 6.6, 6.7, 4.5, 4.6, 4.7}, 4.5, 4.6, 4.7, 6.5, 6.6, 6.7, 4.5, 4.6, 4.7},
kernels::FrameworkType::TENSORFLOW); ops::FrameworkType::TENSORFLOW);
RunTestSimple<D>({1, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1}, {0, 0, 0}, RunTestSimple<D>({1, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1}, {0, 0, 0},
1, Padding::VALID, {2, 2}, 1, Padding::VALID, {2, 2},
{0}, {3, 3, 3, 1}, {0}, {3, 3, 3, 1},
...@@ -147,7 +147,7 @@ void TestNHWCSimple3x3SAME_S1() { ...@@ -147,7 +147,7 @@ void TestNHWCSimple3x3SAME_S1() {
{1, 3, 3, 3}, {1, 3, 3, 3},
{4, 4, 4, 6, 6, 6, 4, 4, 4, 6, 6, 6, 9, 9, {4, 4, 4, 6, 6, 6, 4, 4, 4, 6, 6, 6, 9, 9,
9, 6, 6, 6, 4, 4, 4, 6, 6, 6, 4, 4, 4}, 9, 6, 6, 6, 4, 4, 4, 6, 6, 6, 4, 4, 4},
kernels::FrameworkType::CAFFE); ops::FrameworkType::CAFFE);
RunTestSimple<D>({1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 0, 0}, RunTestSimple<D>({1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 0, 0},
1, Padding::SAME, {}, 1, Padding::SAME, {},
{1, 3, 3, 3}, {3, 3, 3, 1}, {1, 3, 3, 3}, {3, 3, 3, 1},
...@@ -157,7 +157,7 @@ void TestNHWCSimple3x3SAME_S1() { ...@@ -157,7 +157,7 @@ void TestNHWCSimple3x3SAME_S1() {
{54, 66, 78, 126, 147, 168, 130, 146, 162, {54, 66, 78, 126, 147, 168, 130, 146, 162,
198, 225, 252, 405, 450, 495, 366, 399, 432, 198, 225, 252, 405, 450, 495, 366, 399, 432,
354, 378, 402, 630, 669, 708, 502, 530, 558}, 354, 378, 402, 630, 669, 708, 502, 530, 558},
kernels::FrameworkType::TENSORFLOW); ops::FrameworkType::TENSORFLOW);
RunTestSimple<D>({1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 0, 0}, RunTestSimple<D>({1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 0, 0},
1, Padding::SAME, {2, 2}, 1, Padding::SAME, {2, 2},
{0}, {3, 3, 3, 1}, {0}, {3, 3, 3, 1},
...@@ -167,7 +167,7 @@ void TestNHWCSimple3x3SAME_S1() { ...@@ -167,7 +167,7 @@ void TestNHWCSimple3x3SAME_S1() {
{54, 66, 78, 126, 147, 168, 130, 146, 162, {54, 66, 78, 126, 147, 168, 130, 146, 162,
198, 225, 252, 405, 450, 495, 366, 399, 432, 198, 225, 252, 405, 450, 495, 366, 399, 432,
354, 378, 402, 630, 669, 708, 502, 530, 558}, 354, 378, 402, 630, 669, 708, 502, 530, 558},
kernels::FrameworkType::CAFFE); ops::FrameworkType::CAFFE);
} }
template <DeviceType D> template <DeviceType D>
...@@ -185,7 +185,7 @@ void TestNHWCSimple3x3SAME_S2() { ...@@ -185,7 +185,7 @@ void TestNHWCSimple3x3SAME_S2() {
1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1,
2, 2, 2, 2, 2, 2, 4, 4, 4, 2, 2, 2, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 2, 2, 2, 4, 4, 4, 2, 2, 2,
1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1}, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1},
kernels::FrameworkType::TENSORFLOW); ops::FrameworkType::TENSORFLOW);
RunTestSimple<D>({1, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1}, {0, 0, 0}, RunTestSimple<D>({1, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1}, {0, 0, 0},
2, Padding::SAME, {2, 2}, 2, Padding::SAME, {2, 2},
{0}, {3, 3, 3, 1}, {0}, {3, 3, 3, 1},
...@@ -198,7 +198,7 @@ void TestNHWCSimple3x3SAME_S2() { ...@@ -198,7 +198,7 @@ void TestNHWCSimple3x3SAME_S2() {
1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1,
2, 2, 2, 4, 4, 4, 2, 2, 2, 4, 4, 4, 2, 2, 2, 2, 2, 2, 4, 4, 4, 2, 2, 2, 4, 4, 4, 2, 2, 2,
1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1}, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1},
kernels::FrameworkType::CAFFE); ops::FrameworkType::CAFFE);
RunTestSimple<D>({1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 0, 0}, RunTestSimple<D>({1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 0, 0},
2, Padding::SAME, {}, 2, Padding::SAME, {},
{1, 6, 6, 3}, {3, 3, 3, 1}, {1, 6, 6, 3}, {3, 3, 3, 1},
...@@ -216,7 +216,7 @@ void TestNHWCSimple3x3SAME_S2() { ...@@ -216,7 +216,7 @@ void TestNHWCSimple3x3SAME_S2() {
83, 94, 105, 116, 127, 138, 252, 276, 300, 142, 155, 168, 83, 94, 105, 116, 127, 138, 252, 276, 300, 142, 155, 168,
304, 332, 360, 168, 183, 198, 70, 77, 84, 91, 98, 105, 192, 304, 332, 360, 168, 183, 198, 70, 77, 84, 91, 98, 105, 192,
207, 222, 104, 112, 120, 218, 235, 252, 117, 126, 135}, 207, 222, 104, 112, 120, 218, 235, 252, 117, 126, 135},
kernels::FrameworkType::TENSORFLOW); ops::FrameworkType::TENSORFLOW);
RunTestSimple<D>({1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 0, 0}, RunTestSimple<D>({1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 0, 0},
2, Padding::SAME, {2, 2}, 2, Padding::SAME, {2, 2},
{0}, {3, 3, 3, 1}, {0}, {3, 3, 3, 1},
...@@ -229,7 +229,7 @@ void TestNHWCSimple3x3SAME_S2() { ...@@ -229,7 +229,7 @@ void TestNHWCSimple3x3SAME_S2() {
140, 151, 162, 78, 84, 90, 116, 127, 138, 252, 276, 300, 140, 151, 162, 78, 84, 90, 116, 127, 138, 252, 276, 300,
142, 155, 168, 304, 332, 360, 168, 183, 198, 91, 98, 105, 142, 155, 168, 304, 332, 360, 168, 183, 198, 91, 98, 105,
192, 207, 222, 104, 112, 120, 218, 235, 252, 117, 126, 135}, 192, 207, 222, 104, 112, 120, 218, 235, 252, 117, 126, 135},
kernels::FrameworkType::CAFFE); ops::FrameworkType::CAFFE);
} }
template <DeviceType D> template <DeviceType D>
...@@ -246,7 +246,7 @@ void TestNHWCSimple3x3SAME_S2_1() { ...@@ -246,7 +246,7 @@ void TestNHWCSimple3x3SAME_S2_1() {
18, 18, 18, 45, 45, 45, 27, 27, 27, 45, 45, 45, 18, 18, 18, 18, 18, 18, 45, 45, 45, 27, 27, 27, 45, 45, 45, 18, 18, 18,
30, 30, 30, 75, 75, 75, 45, 45, 45, 75, 75, 75, 30, 30, 30, 30, 30, 30, 75, 75, 75, 45, 45, 45, 75, 75, 75, 30, 30, 30,
12, 12, 12, 30, 30, 30, 18, 18, 18, 30, 30, 30, 12, 12, 12}, 12, 12, 12, 30, 30, 30, 18, 18, 18, 30, 30, 30, 12, 12, 12},
kernels::FrameworkType::TENSORFLOW); ops::FrameworkType::TENSORFLOW);
} }
template <DeviceType D> template <DeviceType D>
...@@ -271,7 +271,7 @@ void TestNHWCSimple3x3VALID_S2() { ...@@ -271,7 +271,7 @@ void TestNHWCSimple3x3VALID_S2() {
1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1,
1, 1, 1}, 1, 1, 1},
kernels::FrameworkType::TENSORFLOW); ops::FrameworkType::TENSORFLOW);
} }
template <DeviceType D> template <DeviceType D>
...@@ -288,7 +288,7 @@ void TestNHWCSimple3x3VALID_S1() { ...@@ -288,7 +288,7 @@ void TestNHWCSimple3x3VALID_S1() {
366, 399, 432, 234, 252, 270, 146, 157, 168, 354, 378, 402, 366, 399, 432, 234, 252, 270, 146, 157, 168, 354, 378, 402,
630, 669, 708, 502, 530, 558, 294, 309, 324, 133, 140, 147, 630, 669, 708, 502, 530, 558, 294, 309, 324, 133, 140, 147,
306, 321, 336, 522, 546, 570, 398, 415, 432, 225, 234, 243}, 306, 321, 336, 522, 546, 570, 398, 415, 432, 225, 234, 243},
kernels::FrameworkType::TENSORFLOW); ops::FrameworkType::TENSORFLOW);
} }
template <DeviceType D> template <DeviceType D>
...@@ -297,7 +297,7 @@ void TestNHWCSimple2x2SAME() { ...@@ -297,7 +297,7 @@ void TestNHWCSimple2x2SAME() {
{1, 2, 2, 1}, {3, 3, 1, 1}, {1, 2, 2, 1}, {3, 3, 1, 1},
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
{1, 2, 2, 1}, {4.f, 4.f, 4.f, 4.f}, {1, 2, 2, 1}, {4.f, 4.f, 4.f, 4.f},
kernels::FrameworkType::TENSORFLOW); ops::FrameworkType::TENSORFLOW);
} }
template <DeviceType D> template <DeviceType D>
...@@ -308,7 +308,7 @@ void TestNHWCSimple2x2VALID() { ...@@ -308,7 +308,7 @@ void TestNHWCSimple2x2VALID() {
{1, 5, 5, 1}, {1, 5, 5, 1},
{1.f, 1.f, 2.f, 1.f, 1.f, 1.f, 1.f, 2.f, 1.f, 1.f, 2.f, 2.f, 4.f, {1.f, 1.f, 2.f, 1.f, 1.f, 1.f, 1.f, 2.f, 1.f, 1.f, 2.f, 2.f, 4.f,
2.f, 2.f, 1.f, 1.f, 2.f, 1.f, 1.f, 1.f, 1.f, 2.f, 1.f, 1.f}, 2.f, 2.f, 1.f, 1.f, 2.f, 1.f, 1.f, 1.f, 1.f, 2.f, 1.f, 1.f},
kernels::FrameworkType::TENSORFLOW); ops::FrameworkType::TENSORFLOW);
} }
} // namespace } // namespace
...@@ -397,11 +397,11 @@ void TestComplexDeconvNxNS12(const int batch, ...@@ -397,11 +397,11 @@ void TestComplexDeconvNxNS12(const int batch,
std::vector<int> paddings; std::vector<int> paddings;
std::vector<int> output_shape; std::vector<int> output_shape;
kernels::FrameworkType model_type = ops::FrameworkType model_type =
padding < 0 ? padding < 0 ?
kernels::FrameworkType::TENSORFLOW : kernels::FrameworkType::CAFFE; ops::FrameworkType::TENSORFLOW : ops::FrameworkType::CAFFE;
if (model_type == kernels::FrameworkType::TENSORFLOW) { if (model_type == ops::FrameworkType::TENSORFLOW) {
if (type == Padding::SAME) { if (type == Padding::SAME) {
out_h = (height - 1) * stride_h + 1; out_h = (height - 1) * stride_h + 1;
out_w = (width - 1) * stride_w + 1; out_w = (width - 1) * stride_w + 1;
...@@ -421,7 +421,7 @@ void TestComplexDeconvNxNS12(const int batch, ...@@ -421,7 +421,7 @@ void TestComplexDeconvNxNS12(const int batch,
paddings.push_back(padding); paddings.push_back(padding);
} }
if (model_type == kernels::FrameworkType::CAFFE) { if (model_type == ops::FrameworkType::CAFFE) {
OpDefBuilder("Deconv2D", "Deconv2dTest") OpDefBuilder("Deconv2D", "Deconv2dTest")
.Input("InputNCHW") .Input("InputNCHW")
.Input("Filter") .Input("Filter")
...@@ -458,13 +458,13 @@ void TestComplexDeconvNxNS12(const int batch, ...@@ -458,13 +458,13 @@ void TestComplexDeconvNxNS12(const int batch,
// run on gpu // run on gpu
BufferToImage<D, T>(&net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(&net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); ops::BufferType::CONV2D_FILTER);
BufferToImage<D, T>(&net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
if (model_type == kernels::FrameworkType::CAFFE) { if (model_type == ops::FrameworkType::CAFFE) {
OpDefBuilder("Deconv2D", "Deconv2dTest") OpDefBuilder("Deconv2D", "Deconv2dTest")
.Input("InputImage") .Input("InputImage")
.Input("FilterImage") .Input("FilterImage")
...@@ -492,7 +492,7 @@ void TestComplexDeconvNxNS12(const int batch, ...@@ -492,7 +492,7 @@ void TestComplexDeconvNxNS12(const int batch,
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, T>(&net, "OutputImage", "OPENCLOutput", ImageToBuffer<D, T>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(*expected, *net.GetOutput("OPENCLOutput"), 1e-4, ExpectTensorNear<float>(*expected, *net.GetOutput("OPENCLOutput"), 1e-4,
1e-4); 1e-4);
}; };
......
...@@ -17,11 +17,11 @@ ...@@ -17,11 +17,11 @@
#include "mace/core/operator.h" #include "mace/core/operator.h"
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/kernels/opencl/image/depth_to_space.h" #include "mace/ops/opencl/image/depth_to_space.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace ops {
template <DeviceType D, class T> template <DeviceType D, class T>
class DepthToSpaceOp : public Operation { class DepthToSpaceOp : public Operation {
...@@ -127,5 +127,5 @@ void RegisterDepthToSpace(OpRegistryBase *op_registry) { ...@@ -127,5 +127,5 @@ void RegisterDepthToSpace(OpRegistryBase *op_registry) {
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "mace/core/op_def_registry.h"
#include "mace/core/testing/test_benchmark.h" #include "mace/core/testing/test_benchmark.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
...@@ -44,7 +43,7 @@ void DepthToSpace( ...@@ -44,7 +43,7 @@ void DepthToSpace(
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
} else if (D == DeviceType::GPU) { } else if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("DepthToSpace", "DepthToSpaceBM") OpDefBuilder("DepthToSpace", "DepthToSpaceBM")
.Input("InputImage") .Input("InputImage")
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
#include <fstream> #include <fstream>
#include <vector> #include <vector>
#include "mace/core/op_def_registry.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
namespace mace { namespace mace {
...@@ -47,7 +46,7 @@ void RunDepthToSpace(const std::vector<index_t> &input_shape, ...@@ -47,7 +46,7 @@ void RunDepthToSpace(const std::vector<index_t> &input_shape,
} else { } else {
BufferToImage<D, float>(&net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("DepthToSpace", "DepthToSpaceTest") OpDefBuilder("DepthToSpace", "DepthToSpaceTest")
.Input("InputImage") .Input("InputImage")
.Output("OutputImage") .Output("OutputImage")
...@@ -59,7 +58,7 @@ void RunDepthToSpace(const std::vector<index_t> &input_shape, ...@@ -59,7 +58,7 @@ void RunDepthToSpace(const std::vector<index_t> &input_shape,
if (D == DeviceType::GPU) { if (D == DeviceType::GPU) {
ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "Output", ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
} }
auto expected = net.CreateTensor<float>(expected_shape, expected_data); auto expected = net.CreateTensor<float>(expected_shape, expected_data);
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5); ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5);
...@@ -136,7 +135,7 @@ void RandomTest(const int block_size, ...@@ -136,7 +135,7 @@ void RandomTest(const int block_size,
NHWC); NHWC);
BufferToImage<D, T>(&net, "Input", "InputImg", BufferToImage<D, T>(&net, "Input", "InputImg",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("DepthToSpace", "DepthToSpaceTest") OpDefBuilder("DepthToSpace", "DepthToSpaceTest")
.Input("InputImg") .Input("InputImg")
...@@ -149,7 +148,7 @@ void RandomTest(const int block_size, ...@@ -149,7 +148,7 @@ void RandomTest(const int block_size,
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, float>(&net, "OutputImg", "OPENCLOutput", ImageToBuffer<D, float>(&net, "OutputImg", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
if (DataTypeToEnum<T>::value == DT_FLOAT) { if (DataTypeToEnum<T>::value == DT_FLOAT) {
ExpectTensorNear<float>(*net.GetTensor("Output"), ExpectTensorNear<float>(*net.GetTensor("Output"),
......
...@@ -26,24 +26,24 @@ ...@@ -26,24 +26,24 @@
#include "mace/core/future.h" #include "mace/core/future.h"
#include "mace/core/operator.h" #include "mace/core/operator.h"
#include "mace/kernels/activation.h" #include "mace/ops/activation.h"
#include "mace/kernels/arm/depthwise_conv2d_neon.h" #include "mace/ops/arm/depthwise_conv2d_neon.h"
#include "mace/kernels/conv_pool_2d_base.h" #include "mace/ops/conv_pool_2d_base.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
#include "mace/utils/quantize.h" #include "mace/utils/quantize.h"
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/kernels/opencl/image/depthwise_conv2d.h" #include "mace/ops/opencl/image/depthwise_conv2d.h"
#include "mace/kernels/opencl/buffer/depthwise_conv2d.h" #include "mace/ops/opencl/buffer/depthwise_conv2d.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace ops {
class DepthwiseConv2dOpBase : public ConvPool2dOpBase { class DepthwiseConv2dOpBase : public ConvPool2dOpBase {
public: public:
explicit DepthwiseConv2dOpBase(OpConstructContext *context) explicit DepthwiseConv2dOpBase(OpConstructContext *context)
: ConvPool2dOpBase(context), : ConvPool2dOpBase(context),
activation_(kernels::StringToActivationType( activation_(ops::StringToActivationType(
Operation::GetOptionalArg<std::string>("activation", Operation::GetOptionalArg<std::string>("activation",
"NOOP"))), "NOOP"))),
relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)) {} relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)) {}
...@@ -532,5 +532,5 @@ void RegisterDepthwiseConv2d(OpRegistryBase *op_registry) { ...@@ -532,5 +532,5 @@ void RegisterDepthwiseConv2d(OpRegistryBase *op_registry) {
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -14,9 +14,8 @@ ...@@ -14,9 +14,8 @@
#include <algorithm> #include <algorithm>
#include "mace/core/op_def_registry.h"
#include "mace/core/testing/test_benchmark.h" #include "mace/core/testing/test_benchmark.h"
#include "mace/kernels/conv_pool_2d_util.h" #include "mace/ops/conv_pool_2d_util.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
namespace mace { namespace mace {
...@@ -81,11 +80,11 @@ void DepthwiseConv2d(int iters, ...@@ -81,11 +80,11 @@ void DepthwiseConv2d(int iters,
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
} else if (D == DeviceType::GPU) { } else if (D == DeviceType::GPU) {
BufferToImage<D, T>(&net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(&net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::DW_CONV2D_FILTER); ops::BufferType::DW_CONV2D_FILTER);
BufferToImage<D, T>(&net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
OpDefBuilder("DepthwiseConv2d", "DepthwiseConv2dTest") OpDefBuilder("DepthwiseConv2d", "DepthwiseConv2dTest")
.Input("InputImage") .Input("InputImage")
.Input("FilterImage") .Input("FilterImage")
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "mace/kernels/conv_pool_2d_util.h" #include "mace/ops/conv_pool_2d_util.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
namespace mace { namespace mace {
...@@ -53,11 +53,11 @@ void SimpleValidTest() { ...@@ -53,11 +53,11 @@ void SimpleValidTest() {
"Output", NHWC); "Output", NHWC);
} else if (D == DeviceType::GPU) { } else if (D == DeviceType::GPU) {
BufferToImage<D, T>(&net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(&net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::DW_CONV2D_FILTER); ops::BufferType::DW_CONV2D_FILTER);
BufferToImage<D, T>(&net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
OpDefBuilder("DepthwiseConv2d", "DepthwiseConv2DTest") OpDefBuilder("DepthwiseConv2d", "DepthwiseConv2DTest")
.Input("InputImage") .Input("InputImage")
.Input("FilterImage") .Input("FilterImage")
...@@ -73,7 +73,7 @@ void SimpleValidTest() { ...@@ -73,7 +73,7 @@ void SimpleValidTest() {
// Transfer output // Transfer output
ImageToBuffer<D, float>(&net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
...@@ -150,11 +150,11 @@ void ComplexValidTest(index_t batch, ...@@ -150,11 +150,11 @@ void ComplexValidTest(index_t batch,
"Output", NHWC); "Output", NHWC);
} else if (D == DeviceType::GPU) { } else if (D == DeviceType::GPU) {
BufferToImage<D, T>(&net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(&net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::DW_CONV2D_FILTER); ops::BufferType::DW_CONV2D_FILTER);
BufferToImage<D, T>(&net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
OpDefBuilder("DepthwiseConv2d", "DepthwiseConv2DTest") OpDefBuilder("DepthwiseConv2d", "DepthwiseConv2DTest")
.Input("InputImage") .Input("InputImage")
.Input("FilterImage") .Input("FilterImage")
...@@ -170,7 +170,7 @@ void ComplexValidTest(index_t batch, ...@@ -170,7 +170,7 @@ void ComplexValidTest(index_t batch,
// Transfer output // Transfer output
ImageToBuffer<D, T>(&net, "OutputImage", "Output", ImageToBuffer<D, T>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
...@@ -290,11 +290,11 @@ void TestNxNS12(const index_t height, const index_t width) { ...@@ -290,11 +290,11 @@ void TestNxNS12(const index_t height, const index_t width) {
expected->Copy(*net.GetOutput("Output")); expected->Copy(*net.GetOutput("Output"));
BufferToImage<DeviceType::GPU, T>(&net, "Input", "InputImage", BufferToImage<DeviceType::GPU, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::GPU, T>(&net, "Filter", "FilterImage", BufferToImage<DeviceType::GPU, T>(&net, "Filter", "FilterImage",
kernels::BufferType::DW_CONV2D_FILTER); ops::BufferType::DW_CONV2D_FILTER);
BufferToImage<DeviceType::GPU, T>(&net, "Bias", "BiasImage", BufferToImage<DeviceType::GPU, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); ops::BufferType::ARGUMENT);
OpDefBuilder("DepthwiseConv2d", "DepthwiseConv2DTest") OpDefBuilder("DepthwiseConv2d", "DepthwiseConv2DTest")
.Input("InputImage") .Input("InputImage")
.Input("FilterImage") .Input("FilterImage")
...@@ -312,7 +312,7 @@ void TestNxNS12(const index_t height, const index_t width) { ...@@ -312,7 +312,7 @@ void TestNxNS12(const index_t height, const index_t width) {
// Transfer output // Transfer output
ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "DeviceOutput", ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "DeviceOutput",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
// Check // Check
if (DataTypeToEnum<T>::value == DT_FLOAT) { if (DataTypeToEnum<T>::value == DT_FLOAT) {
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "mace/kernels/eltwise.h" #include "mace/ops/eltwise.h"
#include <algorithm> #include <algorithm>
#include <cmath> #include <cmath>
...@@ -26,11 +26,11 @@ ...@@ -26,11 +26,11 @@
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/utils/quantize.h" #include "mace/utils/quantize.h"
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/kernels/opencl/image/eltwise.h" #include "mace/ops/opencl/image/eltwise.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace ops {
inline index_t GetIndex(const std::vector<index_t> &shape, inline index_t GetIndex(const std::vector<index_t> &shape,
...@@ -792,8 +792,8 @@ class EltwiseOp : public Operation { ...@@ -792,8 +792,8 @@ class EltwiseOp : public Operation {
public: public:
explicit EltwiseOp(OpConstructContext *context) explicit EltwiseOp(OpConstructContext *context)
: Operation(context), : Operation(context),
type_(static_cast<kernels::EltwiseType>(Operation::GetOptionalArg<int>( type_(static_cast<ops::EltwiseType>(Operation::GetOptionalArg<int>(
"type", static_cast<int>(kernels::EltwiseType::NONE)))), "type", static_cast<int>(ops::EltwiseType::NONE)))),
coeff_(Operation::GetRepeatedArgs<float>("coeff")), coeff_(Operation::GetRepeatedArgs<float>("coeff")),
scalar_input_(Operation::GetOptionalArg<float>("scalar_input", 1.0)), scalar_input_(Operation::GetOptionalArg<float>("scalar_input", 1.0)),
scalar_input_index_(Operation::GetOptionalArg<int32_t>( scalar_input_index_(Operation::GetOptionalArg<int32_t>(
...@@ -934,8 +934,8 @@ class EltwiseOp<DeviceType::CPU, uint8_t> : public Operation { ...@@ -934,8 +934,8 @@ class EltwiseOp<DeviceType::CPU, uint8_t> : public Operation {
public: public:
explicit EltwiseOp(OpConstructContext *context) explicit EltwiseOp(OpConstructContext *context)
: Operation(context), : Operation(context),
type_(static_cast<kernels::EltwiseType>(Operation::GetOptionalArg<int>( type_(static_cast<ops::EltwiseType>(Operation::GetOptionalArg<int>(
"type", static_cast<int>(kernels::EltwiseType::NONE)))), "type", static_cast<int>(ops::EltwiseType::NONE)))),
coeff_(Operation::GetRepeatedArgs<float>("coeff")), coeff_(Operation::GetRepeatedArgs<float>("coeff")),
scalar_input_(Operation::GetOptionalArg<float>("scalar_input", 1.0)), scalar_input_(Operation::GetOptionalArg<float>("scalar_input", 1.0)),
scalar_input_index_(Operation::GetOptionalArg<int32_t>( scalar_input_index_(Operation::GetOptionalArg<int32_t>(
...@@ -1076,9 +1076,9 @@ class EltwiseOp<DeviceType::GPU, T> : public Operation { ...@@ -1076,9 +1076,9 @@ class EltwiseOp<DeviceType::GPU, T> : public Operation {
public: public:
explicit EltwiseOp(OpConstructContext *context) explicit EltwiseOp(OpConstructContext *context)
: Operation(context) { : Operation(context) {
EltwiseType type = static_cast<kernels::EltwiseType>( EltwiseType type = static_cast<ops::EltwiseType>(
Operation::GetOptionalArg<int>( Operation::GetOptionalArg<int>(
"type", static_cast<int>(kernels::EltwiseType::NONE))); "type", static_cast<int>(ops::EltwiseType::NONE)));
std::vector<float> coeff = Operation::GetRepeatedArgs<float>("coeff"); std::vector<float> coeff = Operation::GetRepeatedArgs<float>("coeff");
float scalar_input = Operation::GetOptionalArg<float>("scalar_input", 1.0); float scalar_input = Operation::GetOptionalArg<float>("scalar_input", 1.0);
int32_t scalar_input_index = Operation::GetOptionalArg<int32_t>( int32_t scalar_input_index = Operation::GetOptionalArg<int32_t>(
...@@ -1121,5 +1121,5 @@ void RegisterEltwise(OpRegistryBase *op_registry) { ...@@ -1121,5 +1121,5 @@ void RegisterEltwise(OpRegistryBase *op_registry) {
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -12,11 +12,11 @@ ...@@ -12,11 +12,11 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef MACE_KERNELS_ELTWISE_H_ #ifndef MACE_OPS_ELTWISE_H_
#define MACE_KERNELS_ELTWISE_H_ #define MACE_OPS_ELTWISE_H_
namespace mace { namespace mace {
namespace kernels { namespace ops {
enum EltwiseType { enum EltwiseType {
SUM = 0, SUM = 0,
...@@ -35,7 +35,7 @@ enum EltwiseType { ...@@ -35,7 +35,7 @@ enum EltwiseType {
inline bool IsLogicalType(EltwiseType type) { return type == EQUAL; } inline bool IsLogicalType(EltwiseType type) { return type == EQUAL; }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
#endif // MACE_KERNELS_ELTWISE_H_ #endif // MACE_OPS_ELTWISE_H_
...@@ -14,9 +14,8 @@ ...@@ -14,9 +14,8 @@
#include <string> #include <string>
#include "mace/core/op_def_registry.h"
#include "mace/core/testing/test_benchmark.h" #include "mace/core/testing/test_benchmark.h"
#include "mace/kernels/eltwise.h" #include "mace/ops/eltwise.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
namespace mace { namespace mace {
...@@ -26,7 +25,7 @@ namespace test { ...@@ -26,7 +25,7 @@ namespace test {
namespace { namespace {
template <DeviceType D, typename T> template <DeviceType D, typename T>
void EltwiseBenchmark( void EltwiseBenchmark(
int iters, kernels::EltwiseType type, int n, int h, int w, int c) { int iters, ops::EltwiseType type, int n, int h, int w, int c) {
mace::testing::StopTiming(); mace::testing::StopTiming();
OpsTestNet net; OpsTestNet net;
...@@ -36,9 +35,9 @@ void EltwiseBenchmark( ...@@ -36,9 +35,9 @@ void EltwiseBenchmark(
if (D == DeviceType::GPU) { if (D == DeviceType::GPU) {
BufferToImage<D, half>(&net, "Input0", "InputImg0", BufferToImage<D, half>(&net, "Input0", "InputImg0",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, half>(&net, "Input1", "InputImg1", BufferToImage<D, half>(&net, "Input1", "InputImg1",
kernels::BufferType::IN_OUT_CHANNEL); ops::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Eltwise", "EltwiseTest") OpDefBuilder("Eltwise", "EltwiseTest")
.Input("InputImg0") .Input("InputImg0")
.Input("InputImg1") .Input("InputImg1")
...@@ -84,7 +83,7 @@ void EltwiseBenchmark( ...@@ -84,7 +83,7 @@ void EltwiseBenchmark(
mace::testing::MaccProcessed(tot); \ mace::testing::MaccProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
EltwiseBenchmark<DEVICE, TYPE>( \ EltwiseBenchmark<DEVICE, TYPE>( \
iters, static_cast<kernels::EltwiseType>(ELT_TYPE), N, H, W, C); \ iters, static_cast<ops::EltwiseType>(ELT_TYPE), N, H, W, C); \
} \ } \
MACE_BENCHMARK( \ MACE_BENCHMARK( \
MACE_BM_ELTWISE_##ELT_TYPE##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE) MACE_BM_ELTWISE_##ELT_TYPE##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE)
......
此差异已折叠。
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#include "mace/core/operator.h" #include "mace/core/operator.h"
namespace mace { namespace mace {
namespace kernels { namespace ops {
template <DeviceType D, class T> template <DeviceType D, class T>
class ExpandDimsOp; class ExpandDimsOp;
...@@ -67,5 +67,5 @@ void RegisterExpandDims(OpRegistryBase *op_registry) { ...@@ -67,5 +67,5 @@ void RegisterExpandDims(OpRegistryBase *op_registry) {
DeviceType::CPU, uint8_t); DeviceType::CPU, uint8_t);
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
// limitations under the License. // limitations under the License.
#include "gmock/gmock.h" #include "gmock/gmock.h"
#include "mace/core/op_def_registry.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
namespace mace { namespace mace {
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#include "mace/core/operator.h" #include "mace/core/operator.h"
namespace mace { namespace mace {
namespace kernels { namespace ops {
template <DeviceType D, class T> template <DeviceType D, class T>
class FillOp; class FillOp;
...@@ -66,5 +66,5 @@ void RegisterFill(OpRegistryBase *op_registry) { ...@@ -66,5 +66,5 @@ void RegisterFill(OpRegistryBase *op_registry) {
DeviceType::CPU, float); DeviceType::CPU, float);
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "mace/core/op_def_registry.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
namespace mace { namespace mace {
......
...@@ -12,8 +12,8 @@ ...@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef MACE_KERNELS_FIXPOINT_H_ #ifndef MACE_OPS_FIXPOINT_H_
#define MACE_KERNELS_FIXPOINT_H_ #define MACE_OPS_FIXPOINT_H_
#if defined(MACE_ENABLE_NEON) #if defined(MACE_ENABLE_NEON)
#include <arm_neon.h> #include <arm_neon.h>
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
#include "mace/core/types.h" #include "mace/core/types.h"
namespace mace { namespace mace {
namespace kernels { namespace ops {
inline uint8_t FindMax(const uint8_t *xs, const index_t size) { inline uint8_t FindMax(const uint8_t *xs, const index_t size) {
uint8_t max_value = 0; uint8_t max_value = 0;
...@@ -57,8 +57,8 @@ inline uint8_t FindMax(const uint8_t *xs, const index_t size) { ...@@ -57,8 +57,8 @@ inline uint8_t FindMax(const uint8_t *xs, const index_t size) {
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
#endif // MACE_KERNELS_FIXPOINT_H_ #endif // MACE_OPS_FIXPOINT_H_
...@@ -17,10 +17,10 @@ ...@@ -17,10 +17,10 @@
#include <vector> #include <vector>
#include <algorithm> #include <algorithm>
#include "mace/kernels/fixpoint.h" #include "mace/ops/fixpoint.h"
namespace mace { namespace mace {
namespace kernels { namespace ops {
namespace test { namespace test {
namespace { namespace {
...@@ -49,6 +49,6 @@ TEST(FixpointTest, FindMax) { ...@@ -49,6 +49,6 @@ TEST(FixpointTest, FindMax) {
} }
} // namespace test } // namespace test
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
此差异已折叠。
...@@ -19,22 +19,22 @@ ...@@ -19,22 +19,22 @@
#include "mace/core/future.h" #include "mace/core/future.h"
#include "mace/core/operator.h" #include "mace/core/operator.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/kernels/activation.h" #include "mace/ops/activation.h"
#include "mace/kernels/gemm.h" #include "mace/ops/gemm.h"
#include "mace/kernels/gemmlowp_util.h" #include "mace/ops/gemmlowp_util.h"
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/kernels/opencl/image/fully_connected.h" #include "mace/ops/opencl/image/fully_connected.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace ops {
class FullyConnectedOpBase : public Operation { class FullyConnectedOpBase : public Operation {
public: public:
explicit FullyConnectedOpBase(OpConstructContext *context) explicit FullyConnectedOpBase(OpConstructContext *context)
: Operation(context), : Operation(context),
activation_(kernels::StringToActivationType( activation_(ops::StringToActivationType(
Operation::GetOptionalArg<std::string>("activation", Operation::GetOptionalArg<std::string>("activation",
"NOOP"))), "NOOP"))),
relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)) {} relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)) {}
...@@ -229,5 +229,5 @@ void RegisterFullyConnected(OpRegistryBase *op_registry) { ...@@ -229,5 +229,5 @@ void RegisterFullyConnected(OpRegistryBase *op_registry) {
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
} }
} // namespace kernels } // namespace ops
} // namespace mace } // namespace mace
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册