提交 baf2dcd1 编写于 作者: L Liangliang He

Resolve operator and allocator registering static variable issue

上级 faadb474
...@@ -10,38 +10,27 @@ licenses(["notice"]) # Apache 2.0 ...@@ -10,38 +10,27 @@ licenses(["notice"]) # Apache 2.0
load("//mace:mace.bzl", "if_android", "if_profiling_enabled", "if_embed_binary_program") load("//mace:mace.bzl", "if_android", "if_profiling_enabled", "if_embed_binary_program")
cc_library( cc_library(
name = "opencl_runtime", name = "core",
srcs = glob([ srcs = glob([
"*.cc",
"runtime/opencl/*.cc", "runtime/opencl/*.cc",
]), ]),
hdrs = glob([ hdrs = glob([
"*.h",
"public/*.h",
"runtime/opencl/cl2.hpp", "runtime/opencl/cl2.hpp",
"runtime/opencl/*.h", "runtime/opencl/*.h",
]), ]),
copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"] + copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"] +
if_profiling_enabled(["-DMACE_OPENCL_PROFILING"]) + if_profiling_enabled(["-DMACE_OPENCL_PROFILING"]) +
if_embed_binary_program(["-DMACE_EMBED_BINARY_PROGRAM"]), if_embed_binary_program(["-DMACE_EMBED_BINARY_PROGRAM"]),
linkopts = ["-ldl"], linkopts = if_android(["-pie", "-ldl"]),
deps = [ deps = [
":core", "//mace/utils:utils_hdrs",
"//mace/utils:logging", "//mace/utils:logging",
"//mace/utils:tuner", "//mace/utils:tuner",
"@opencl_headers//:opencl20_headers", "@opencl_headers//:opencl20_headers",
], ],
alwayslink = 1,
)
cc_library(
name = "core",
srcs = glob(["*.cc"]),
hdrs = glob(["*.h", "public/*.h"]),
copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"],
linkopts = if_android(["-pie"]),
deps = [
"//mace/utils:utils_hdrs",
"//mace/utils:logging",
],
) )
cc_library( cc_library(
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
// //
#include "mace/core/allocator.h" #include "mace/core/allocator.h"
#include "mace/core/runtime/opencl/opencl_allocator.h"
namespace mace { namespace mace {
...@@ -22,5 +23,6 @@ Allocator *GetDeviceAllocator(DeviceType type) { ...@@ -22,5 +23,6 @@ Allocator *GetDeviceAllocator(DeviceType type) {
MACE_REGISTER_ALLOCATOR(DeviceType::CPU, new CPUAllocator()); MACE_REGISTER_ALLOCATOR(DeviceType::CPU, new CPUAllocator());
MACE_REGISTER_ALLOCATOR(DeviceType::NEON, new CPUAllocator()); MACE_REGISTER_ALLOCATOR(DeviceType::NEON, new CPUAllocator());
MACE_REGISTER_ALLOCATOR(DeviceType::OPENCL, new OpenCLAllocator());
} // namespace mace } // namespace mace
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include "mace/core/public/mace.h" #include "mace/core/public/mace.h"
#include "mace/core/types.h" #include "mace/core/types.h"
#include "mace/core/net.h" #include "mace/core/net.h"
#include "mace/core/operator.h"
#include "mace/core/workspace.h" #include "mace/core/workspace.h"
#include "mace/utils/logging.h" #include "mace/utils/logging.h"
...@@ -481,17 +482,19 @@ const OperatorDef &NetDef::op(const int idx) const { ...@@ -481,17 +482,19 @@ const OperatorDef &NetDef::op(const int idx) const {
// Mace Engine // Mace Engine
MaceEngine::MaceEngine(const NetDef *net_def, DeviceType device_type): MaceEngine::MaceEngine(const NetDef *net_def, DeviceType device_type):
device_type_(device_type), ws_(new Workspace()), net_(nullptr) { op_registry_(new OperatorRegistry()), device_type_(device_type),
ws_(new Workspace()), net_(nullptr) {
ws_->LoadModelTensor(*net_def, device_type); ws_->LoadModelTensor(*net_def, device_type);
// Init model // Init model
auto net = CreateNet(*net_def, ws_.get(), device_type, NetMode::INIT); auto net = CreateNet(op_registry_, *net_def, ws_.get(),
device_type, NetMode::INIT);
if(!net->Run()) { if(!net->Run()) {
LOG(FATAL) << "Net init run failed"; LOG(FATAL) << "Net init run failed";
} }
ws_->CreateTensor("mace_input_node:0", GetDeviceAllocator(device_type_), DT_FLOAT); ws_->CreateTensor("mace_input_node:0", GetDeviceAllocator(device_type_), DT_FLOAT);
net_ = std::move(CreateNet(*net_def, ws_.get(), device_type)); net_ = std::move(CreateNet(op_registry_, *net_def, ws_.get(), device_type));
} }
MaceEngine::~MaceEngine() = default; MaceEngine::~MaceEngine() = default;
bool MaceEngine::Run(const float *input, bool MaceEngine::Run(const float *input,
......
...@@ -3,22 +3,24 @@ ...@@ -3,22 +3,24 @@
// //
#include "mace/core/net.h" #include "mace/core/net.h"
#include "mace/core/operator.h"
#include "mace/core/workspace.h" #include "mace/core/workspace.h"
#include "mace/utils/utils.h" #include "mace/utils/utils.h"
namespace mace { namespace mace {
NetBase::NetBase(const std::shared_ptr<const NetDef> &net_def, NetBase::NetBase(const std::shared_ptr<const OperatorRegistry> op_registry,
const std::shared_ptr<const NetDef> net_def,
Workspace *ws, Workspace *ws,
DeviceType type) DeviceType type)
: name_(net_def->name()) {} : op_registry_(op_registry), name_(net_def->name()) {}
SimpleNet::SimpleNet(const std::shared_ptr<const NetDef> &net_def, SimpleNet::SimpleNet(const std::shared_ptr<const OperatorRegistry> op_registry,
const std::shared_ptr<const NetDef> net_def,
Workspace *ws, Workspace *ws,
DeviceType type, DeviceType type,
const NetMode mode) const NetMode mode)
: NetBase(net_def, ws, type), device_type_(type){ : NetBase(op_registry, net_def, ws, type),
device_type_(type) {
VLOG(1) << "Constructing SimpleNet " << net_def->name(); VLOG(1) << "Constructing SimpleNet " << net_def->name();
for (int idx = 0; idx < net_def->op_size(); ++idx) { for (int idx = 0; idx < net_def->op_size(); ++idx) {
const auto &operator_def = net_def->op(idx); const auto &operator_def = net_def->op(idx);
...@@ -26,7 +28,7 @@ SimpleNet::SimpleNet(const std::shared_ptr<const NetDef> &net_def, ...@@ -26,7 +28,7 @@ SimpleNet::SimpleNet(const std::shared_ptr<const NetDef> &net_def,
<< operator_def.type(); << operator_def.type();
std::unique_ptr<OperatorBase> op{nullptr}; std::unique_ptr<OperatorBase> op{nullptr};
OperatorDef temp_def(operator_def); OperatorDef temp_def(operator_def);
op = CreateOperator(temp_def, ws, type, mode); op = op_registry->CreateOperator(temp_def, ws, type, mode);
if (op) { if (op) {
operators_.emplace_back(std::move(op)); operators_.emplace_back(std::move(op));
} }
...@@ -62,9 +64,8 @@ bool SimpleNet::Run(RunMetadata *run_metadata) { ...@@ -62,9 +64,8 @@ bool SimpleNet::Run(RunMetadata *run_metadata) {
} }
if (run_metadata != nullptr) { if (run_metadata != nullptr) {
OperatorStats op_stats = { op->debug_def().name(), OperatorStats op_stats = {op->debug_def().name(), op->debug_def().type(),
op->debug_def().type(), call_stats};
call_stats };
run_metadata->op_stats.emplace_back(op_stats); run_metadata->op_stats.emplace_back(op_stats);
} }
...@@ -80,19 +81,23 @@ bool SimpleNet::Run(RunMetadata *run_metadata) { ...@@ -80,19 +81,23 @@ bool SimpleNet::Run(RunMetadata *run_metadata) {
return true; return true;
} }
unique_ptr<NetBase> CreateNet(const NetDef &net_def, std::unique_ptr<NetBase> CreateNet(
Workspace *ws, const std::shared_ptr<const OperatorRegistry> op_registry,
DeviceType type, const NetDef &net_def,
const NetMode mode) { Workspace *ws,
DeviceType type,
const NetMode mode) {
std::shared_ptr<NetDef> tmp_net_def(new NetDef(net_def)); std::shared_ptr<NetDef> tmp_net_def(new NetDef(net_def));
return CreateNet(tmp_net_def, ws, type, mode); return CreateNet(op_registry, tmp_net_def, ws, type, mode);
} }
unique_ptr<NetBase> CreateNet(const std::shared_ptr<const NetDef> &net_def, std::unique_ptr<NetBase> CreateNet(
Workspace *ws, const std::shared_ptr<const OperatorRegistry> op_registry,
DeviceType type, const std::shared_ptr<const NetDef> net_def,
const NetMode mode) { Workspace *ws,
unique_ptr<NetBase> net(new SimpleNet(net_def, ws, type, mode)); DeviceType type,
const NetMode mode) {
unique_ptr<NetBase> net(new SimpleNet(op_registry, net_def, ws, type, mode));
return net; return net;
} }
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#define MACE_CORE_NET_H_ #define MACE_CORE_NET_H_
#include "mace/core/common.h" #include "mace/core/common.h"
#include "mace/core/operator.h"
#include "mace/core/public/mace.h" #include "mace/core/public/mace.h"
namespace mace { namespace mace {
...@@ -16,7 +17,8 @@ class Workspace; ...@@ -16,7 +17,8 @@ class Workspace;
class NetBase { class NetBase {
public: public:
NetBase(const std::shared_ptr<const NetDef> &net_def, NetBase(const std::shared_ptr<const OperatorRegistry> op_registry,
const std::shared_ptr<const NetDef> net_def,
Workspace *ws, Workspace *ws,
DeviceType type); DeviceType type);
virtual ~NetBase() noexcept {} virtual ~NetBase() noexcept {}
...@@ -27,13 +29,15 @@ class NetBase { ...@@ -27,13 +29,15 @@ class NetBase {
protected: protected:
string name_; string name_;
const std::shared_ptr<const OperatorRegistry> op_registry_;
DISABLE_COPY_AND_ASSIGN(NetBase); DISABLE_COPY_AND_ASSIGN(NetBase);
}; };
class SimpleNet : public NetBase { class SimpleNet : public NetBase {
public: public:
SimpleNet(const std::shared_ptr<const NetDef> &net_def, SimpleNet(const std::shared_ptr<const OperatorRegistry> op_registry,
const std::shared_ptr<const NetDef> net_def,
Workspace *ws, Workspace *ws,
DeviceType type, DeviceType type,
const NetMode mode = NetMode::NORMAL); const NetMode mode = NetMode::NORMAL);
...@@ -47,14 +51,18 @@ class SimpleNet : public NetBase { ...@@ -47,14 +51,18 @@ class SimpleNet : public NetBase {
DISABLE_COPY_AND_ASSIGN(SimpleNet); DISABLE_COPY_AND_ASSIGN(SimpleNet);
}; };
unique_ptr<NetBase> CreateNet(const NetDef &net_def, std::unique_ptr<NetBase> CreateNet(
Workspace *ws, const std::shared_ptr<const OperatorRegistry> op_registry,
DeviceType type, const NetDef &net_def,
const NetMode mode = NetMode::NORMAL); Workspace *ws,
unique_ptr<NetBase> CreateNet(const std::shared_ptr<const NetDef> &net_def, DeviceType type,
Workspace *ws, const NetMode mode = NetMode::NORMAL);
DeviceType type, std::unique_ptr<NetBase> CreateNet(
const NetMode mode = NetMode::NORMAL); const std::shared_ptr<const OperatorRegistry> op_registry,
const std::shared_ptr<const NetDef> net_def,
Workspace *ws,
DeviceType type,
const NetMode mode = NetMode::NORMAL);
} // namespace mace } // namespace mace
......
...@@ -2,12 +2,19 @@ ...@@ -2,12 +2,19 @@
// Copyright (c) 2017 XiaoMi All rights reserved. // Copyright (c) 2017 XiaoMi All rights reserved.
// //
#include <sstream>
#include "mace/core/operator.h" #include "mace/core/operator.h"
namespace mace { namespace mace {
OperatorBase::OperatorBase(const OperatorDef &operator_def, Workspace *ws)
: operator_ws_(ws),
operator_def_(std::make_shared<OperatorDef>(operator_def)) {}
OpKeyBuilder::OpKeyBuilder(const char *op_name): op_name_(op_name) {} OpKeyBuilder::OpKeyBuilder(const char *op_name) : op_name_(op_name) {}
OpKeyBuilder &OpKeyBuilder::Device(DeviceType device) { device_type_ = device; }
OpKeyBuilder &OpKeyBuilder::TypeConstraint(const char *attr_name, OpKeyBuilder &OpKeyBuilder::TypeConstraint(const char *attr_name,
const DataType allowed) { const DataType allowed) {
...@@ -17,61 +24,72 @@ OpKeyBuilder &OpKeyBuilder::TypeConstraint(const char *attr_name, ...@@ -17,61 +24,72 @@ OpKeyBuilder &OpKeyBuilder::TypeConstraint(const char *attr_name,
const std::string OpKeyBuilder::Build() { const std::string OpKeyBuilder::Build() {
static const std::vector<std::string> type_order = {"T"}; static const std::vector<std::string> type_order = {"T"};
std::string key = op_name_; std::stringstream ss;
ss << op_name_;
ss << device_type_;
for (auto type : type_order) { for (auto type : type_order) {
key += type + "_" + DataTypeToString(type_constraint_[type]); ss << type << "_" << DataTypeToString(type_constraint_[type]);
} }
return key;
}
std::map<int32_t, OperatorRegistry *> *gDeviceTypeRegistry() { return ss.str();
static std::map<int32_t, OperatorRegistry *> g_device_type_registry;
return &g_device_type_registry;
} }
MACE_DEFINE_REGISTRY(CPUOperatorRegistry, std::unique_ptr<OperatorBase> OperatorRegistry::CreateOperator(
OperatorBase, const OperatorDef &operator_def,
const OperatorDef &, Workspace *ws,
Workspace *); DeviceType type,
MACE_REGISTER_DEVICE_TYPE(DeviceType::CPU, CPUOperatorRegistry); const NetMode mode) const {
const int dtype = ArgumentHelper::GetSingleArgument<OperatorDef, int>(
MACE_DEFINE_REGISTRY(NEONOperatorRegistry, operator_def, "T", static_cast<int>(DT_FLOAT));
OperatorBase, const int op_mode_i = ArgumentHelper::GetSingleArgument<OperatorDef, int>(
const OperatorDef &, operator_def, "mode", static_cast<int>(NetMode::NORMAL));
Workspace *);
MACE_REGISTER_DEVICE_TYPE(DeviceType::NEON, NEONOperatorRegistry);
MACE_DEFINE_REGISTRY(OPENCLOperatorRegistry,
OperatorBase,
const OperatorDef &,
Workspace *);
MACE_REGISTER_DEVICE_TYPE(DeviceType::OPENCL, OPENCLOperatorRegistry);
unique_ptr<OperatorBase> CreateOperator(const OperatorDef &operator_def,
Workspace *ws,
DeviceType type,
const NetMode mode) {
OperatorRegistry *registry = gDeviceTypeRegistry()->at(type);
const int dtype = ArgumentHelper::GetSingleArgument<OperatorDef, int>(operator_def,
"T",
static_cast<int>(DT_FLOAT));
const int op_mode_i= ArgumentHelper::GetSingleArgument<OperatorDef, int>(operator_def,
"mode",
static_cast<int>(NetMode::NORMAL));
const NetMode op_mode = static_cast<NetMode>(op_mode_i); const NetMode op_mode = static_cast<NetMode>(op_mode_i);
if (op_mode == mode) { if (op_mode == mode) {
return registry->Create(OpKeyBuilder(operator_def.type().data()) return registry_.Create(
.TypeConstraint("T", static_cast<DataType>(dtype)) OpKeyBuilder(operator_def.type().data())
.Build(), .Device(type)
operator_def, .TypeConstraint("T", static_cast<DataType>(dtype))
ws); .Build(),
operator_def, ws);
} else { } else {
return nullptr; return nullptr;
} }
} }
OperatorBase::OperatorBase(const OperatorDef &operator_def, Workspace *ws) extern void Register_AddN(OperatorRegistry *op_registry);
: operator_ws_(ws), extern void Register_BatchNorm(OperatorRegistry *op_registry);
operator_def_(std::make_shared<OperatorDef>(operator_def)) {} extern void Register_BatchToSpaceND(OperatorRegistry *op_registry);
extern void Register_BiasAdd(OperatorRegistry *op_registry);
extern void Register_BufferToImage(OperatorRegistry *op_registry);
extern void Register_ChannelShuffle(OperatorRegistry *op_registry);
extern void Register_Concat(OperatorRegistry *op_registry);
extern void Register_Conv2D(OperatorRegistry *op_registry);
extern void Register_DepthwiseConv2d(OperatorRegistry *op_registry);
extern void Register_FusedConv2D(OperatorRegistry *op_registry);
extern void Register_GlobalAvgPooling(OperatorRegistry *op_registry);
extern void Register_ImageToBuffer(OperatorRegistry *op_registry);
extern void Register_Pooling(OperatorRegistry *op_registry);
extern void Register_Relu(OperatorRegistry *op_registry);
extern void Register_ResizeBilinear(OperatorRegistry *op_registry);
extern void Register_SpaceToBatchND(OperatorRegistry *op_registry);
OperatorRegistry::OperatorRegistry() {
Register_AddN(this);
Register_BatchNorm(this);
Register_BatchToSpaceND(this);
Register_BiasAdd(this);
Register_BufferToImage(this);
Register_ChannelShuffle(this);
Register_Concat(this);
Register_Conv2D(this);
Register_DepthwiseConv2d(this);
Register_FusedConv2D(this);
Register_GlobalAvgPooling(this);
Register_ImageToBuffer(this);
Register_Pooling(this);
Register_Relu(this);
Register_ResizeBilinear(this);
Register_SpaceToBatchND(this);
}
} // namespace mace } // namespace mace
...@@ -5,13 +5,13 @@ ...@@ -5,13 +5,13 @@
#ifndef MACE_CORE_OPERATOR_H #ifndef MACE_CORE_OPERATOR_H
#define MACE_CORE_OPERATOR_H #define MACE_CORE_OPERATOR_H
#include "mace/core/common.h"
#include "mace/core/arg_helper.h" #include "mace/core/arg_helper.h"
#include "mace/core/common.h"
#include "mace/core/future.h" #include "mace/core/future.h"
#include "mace/core/public/mace.h"
#include "mace/core/registry.h" #include "mace/core/registry.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/core/workspace.h" #include "mace/core/workspace.h"
#include "mace/core/public/mace.h"
namespace mace { namespace mace {
...@@ -102,7 +102,7 @@ class Operator : public OperatorBase { ...@@ -102,7 +102,7 @@ class Operator : public OperatorBase {
} }
} }
} }
virtual bool Run(StatsFuture *future) override = 0; virtual bool Run(StatsFuture *future) override = 0;
~Operator() noexcept override {} ~Operator() noexcept override {}
}; };
...@@ -122,29 +122,12 @@ class Operator : public OperatorBase { ...@@ -122,29 +122,12 @@ class Operator : public OperatorBase {
#define OP_OUTPUT_TAGS(first_input, ...) \ #define OP_OUTPUT_TAGS(first_input, ...) \
enum _OutputTags { first_input = 0, __VA_ARGS__ } enum _OutputTags { first_input = 0, __VA_ARGS__ }
typedef Registry<std::string, OperatorBase, const OperatorDef &, Workspace *>
OperatorRegistry;
typedef Registry<std::string, OperatorBase, const OperatorDef &, Workspace *> *(
*RegistryFunction)();
std::map<int32_t, OperatorRegistry *> *gDeviceTypeRegistry();
struct DeviceTypeRegisterer {
explicit DeviceTypeRegisterer(int32_t type, RegistryFunction func) {
if (gDeviceTypeRegistry()->count(type)) {
LOG(ERROR) << "Device type " << type
<< "registered twice. This should not happen. Did you have "
"duplicated numbers assigned to different devices?";
std::exit(1);
}
// Calling the registry function to get the actual registry pointer.
gDeviceTypeRegistry()->emplace(type, func());
}
};
class OpKeyBuilder { class OpKeyBuilder {
public: public:
explicit OpKeyBuilder(const char *op_name); explicit OpKeyBuilder(const char *op_name);
OpKeyBuilder &Device(DeviceType device);
OpKeyBuilder &TypeConstraint(const char *attr_name, const DataType allowed); OpKeyBuilder &TypeConstraint(const char *attr_name, const DataType allowed);
template <typename T> template <typename T>
...@@ -154,6 +137,7 @@ class OpKeyBuilder { ...@@ -154,6 +137,7 @@ class OpKeyBuilder {
private: private:
std::string op_name_; std::string op_name_;
DeviceType device_type_;
std::map<std::string, DataType> type_constraint_; std::map<std::string, DataType> type_constraint_;
}; };
...@@ -162,48 +146,30 @@ OpKeyBuilder &OpKeyBuilder::TypeConstraint(const char *attr_name) { ...@@ -162,48 +146,30 @@ OpKeyBuilder &OpKeyBuilder::TypeConstraint(const char *attr_name) {
return this->TypeConstraint(attr_name, DataTypeToEnum<T>::value); return this->TypeConstraint(attr_name, DataTypeToEnum<T>::value);
} }
class OperatorRegistry {
public:
typedef Registry<std::string, OperatorBase, const OperatorDef &, Workspace *>
RegistryType;
OperatorRegistry();
~OperatorRegistry() = default;
RegistryType *registry() { return &registry_; };
std::unique_ptr<OperatorBase> CreateOperator(const OperatorDef &operator_def,
Workspace *ws,
DeviceType type,
const NetMode mode) const;
private:
RegistryType registry_;
DISABLE_COPY_AND_ASSIGN(OperatorRegistry);
};
#define MACE_REGISTER_DEVICE_TYPE(type, registry_function) \ MACE_DECLARE_REGISTRY(OpRegistry,
namespace { \
static DeviceTypeRegisterer MACE_ANONYMOUS_VARIABLE(DeviceType)( \
type, &registry_function); \
}
MACE_DECLARE_REGISTRY(CPUOperatorRegistry,
OperatorBase,
const OperatorDef &,
Workspace *);
#define REGISTER_CPU_OPERATOR_CREATOR(key, ...) \
MACE_REGISTER_CREATOR(CPUOperatorRegistry, key, __VA_ARGS__)
#define REGISTER_CPU_OPERATOR(name, ...) \
MACE_REGISTER_CLASS(CPUOperatorRegistry, name, __VA_ARGS__)
MACE_DECLARE_REGISTRY(NEONOperatorRegistry,
OperatorBase,
const OperatorDef &,
Workspace *);
#define REGISTER_NEON_OPERATOR_CREATOR(key, ...) \
MACE_REGISTER_CREATOR(NEONOperatorRegistry, key, __VA_ARGS__)
#define REGISTER_NEON_OPERATOR(name, ...) \
MACE_REGISTER_CLASS(NEONOperatorRegistry, name, __VA_ARGS__)
MACE_DECLARE_REGISTRY(OPENCLOperatorRegistry,
OperatorBase, OperatorBase,
const OperatorDef &, const OperatorDef &,
Workspace *); Workspace *);
#define REGISTER_OPENCL_OPERATOR_CREATOR(key, ...) \ #define REGISTER_OPERATOR(op_registry, name, ...) \
MACE_REGISTER_CREATOR(OPENCLOperatorRegistry, key, __VA_ARGS__) MACE_REGISTER_CLASS(OpRegistry, op_registry->registry(), name, __VA_ARGS__)
#define REGISTER_OPENCL_OPERATOR(name, ...) \
MACE_REGISTER_CLASS(OPENCLOperatorRegistry, name, __VA_ARGS__)
unique_ptr<OperatorBase> CreateOperator(const OperatorDef &operator_def,
Workspace *ws,
DeviceType type,
const NetMode mode);
} // namespace mace } // namespace mace
......
...@@ -302,10 +302,12 @@ class NetDef { ...@@ -302,10 +302,12 @@ class NetDef {
class Workspace; class Workspace;
class NetBase; class NetBase;
class OperatorRegistry;
class MaceEngine { class MaceEngine {
public: public:
explicit MaceEngine(const NetDef *net_def, DeviceType device_type); explicit MaceEngine(const NetDef *net_def,
DeviceType device_type);
~MaceEngine(); ~MaceEngine();
bool Run(const float *input, bool Run(const float *input,
const std::vector<int64_t> &input_shape, const std::vector<int64_t> &input_shape,
...@@ -314,6 +316,7 @@ class MaceEngine { ...@@ -314,6 +316,7 @@ class MaceEngine {
MaceEngine &operator=(const MaceEngine&) = delete; MaceEngine &operator=(const MaceEngine&) = delete;
private: private:
std::shared_ptr<OperatorRegistry> op_registry_;
DeviceType device_type_; DeviceType device_type_;
std::unique_ptr<Workspace> ws_; std::unique_ptr<Workspace> ws_;
std::unique_ptr<NetBase> net_; std::unique_ptr<NetBase> net_;
......
...@@ -17,24 +17,27 @@ class Registry { ...@@ -17,24 +17,27 @@ class Registry {
Registry() : registry_() {} Registry() : registry_() {}
void Register(const SrcType &key, Creator creator) { void Register(const SrcType &key, Creator creator) {
VLOG(2) << "Registering: " << key;
std::lock_guard<std::mutex> lock(register_mutex_); std::lock_guard<std::mutex> lock(register_mutex_);
MACE_CHECK(registry_.count(key) == 0, "Key already registered."); MACE_CHECK(registry_.count(key) == 0, "Key already registered.");
registry_[key] = creator; registry_[key] = creator;
} }
inline bool Has(const SrcType &key) { return registry_.count(key) != 0; } inline bool Has(const SrcType &key) const {
return registry_.count(key) != 0;
}
unique_ptr<ObjectType> Create(const SrcType &key, Args... args) { unique_ptr<ObjectType> Create(const SrcType &key, Args... args) const {
if (registry_.count(key) == 0) { if (registry_.count(key) == 0) {
LOG(FATAL) << "Key not registered: " << key; LOG(FATAL) << "Key not registered: " << key;
} }
return registry_[key](args...); return registry_.at(key)(args...);
} }
/** /**
* Returns the keys currently registered as a vector. * Returns the keys currently registered as a vector.
*/ */
vector<SrcType> Keys() { vector<SrcType> Keys() const {
vector<SrcType> keys; vector<SrcType> keys;
for (const auto &it : registry_) { for (const auto &it : registry_) {
keys.push_back(it.first); keys.push_back(it.first);
...@@ -77,39 +80,31 @@ class Registerer { ...@@ -77,39 +80,31 @@ class Registerer {
typedef Registerer<SrcType, ObjectType, ##__VA_ARGS__> \ typedef Registerer<SrcType, ObjectType, ##__VA_ARGS__> \
Registerer##RegistryName; Registerer##RegistryName;
/*
#define MACE_DEFINE_TYPED_REGISTRY(RegistryName, SrcType, ObjectType, ...) \ #define MACE_DEFINE_TYPED_REGISTRY(RegistryName, SrcType, ObjectType, ...) \
Registry<SrcType, ObjectType, ##__VA_ARGS__> *RegistryName() { \ Registry<SrcType, ObjectType, ##__VA_ARGS__> *RegistryName() { \
static Registry<SrcType, ObjectType, ##__VA_ARGS__> *registry = \ static Registry<SrcType, ObjectType, ##__VA_ARGS__> *registry = \
new Registry<SrcType, ObjectType, ##__VA_ARGS__>(); \ new Registry<SrcType, ObjectType, ##__VA_ARGS__>(); \
return registry; \ return registry; \
} }
*/
#define MACE_DECLARE_REGISTRY(RegistryName, ObjectType, ...) \ #define MACE_DECLARE_REGISTRY(RegistryName, ObjectType, ...) \
MACE_DECLARE_TYPED_REGISTRY(RegistryName, std::string, ObjectType, \ MACE_DECLARE_TYPED_REGISTRY(RegistryName, std::string, ObjectType, \
##__VA_ARGS__) ##__VA_ARGS__)
/*
#define MACE_DEFINE_REGISTRY(RegistryName, ObjectType, ...) \ #define MACE_DEFINE_REGISTRY(RegistryName, ObjectType, ...) \
MACE_DEFINE_TYPED_REGISTRY(RegistryName, std::string, ObjectType, \ MACE_DEFINE_TYPED_REGISTRY(RegistryName, std::string, ObjectType, \
##__VA_ARGS__) ##__VA_ARGS__)
*/
#define MACE_REGISTER_TYPED_CREATOR(RegistryName, key, ...) \ #define MACE_REGISTER_TYPED_CLASS(RegistryName, registry, key, ...) \
namespace { \ Registerer##RegistryName MACE_ANONYMOUS_VARIABLE(l_##RegistryName)( \
static Registerer##RegistryName MACE_ANONYMOUS_VARIABLE(g_##RegistryName)( \ key, registry, Registerer##RegistryName::DefaultCreator<__VA_ARGS__>);
key, RegistryName(), __VA_ARGS__);
#define MACE_REGISTER_TYPED_CLASS(RegistryName, key, ...) \
namespace { \
static Registerer##RegistryName MACE_ANONYMOUS_VARIABLE(g_##RegistryName)( \
key, \
RegistryName(), \
Registerer##RegistryName::DefaultCreator<__VA_ARGS__>); \
}
#define MACE_REGISTER_CREATOR(RegistryName, key, ...) \
MACE_REGISTER_TYPED_CREATOR(RegistryName, key, __VA_ARGS__)
#define MACE_REGISTER_CLASS(RegistryName, key, ...) \ #define MACE_REGISTER_CLASS(RegistryName, registry, key, ...) \
MACE_REGISTER_TYPED_CLASS(RegistryName, key, __VA_ARGS__) MACE_REGISTER_TYPED_CLASS(RegistryName, registry, key, __VA_ARGS__)
} // namespace mace } // namespace mace
......
...@@ -127,6 +127,4 @@ void OpenCLAllocator::Unmap(void *buffer, void *mapped_ptr) { ...@@ -127,6 +127,4 @@ void OpenCLAllocator::Unmap(void *buffer, void *mapped_ptr) {
bool OpenCLAllocator::OnHost() { return false; } bool OpenCLAllocator::OnHost() { return false; }
MACE_REGISTER_ALLOCATOR(DeviceType::OPENCL, new OpenCLAllocator());
} // namespace mace } // namespace mace
...@@ -105,7 +105,8 @@ class Tensor { ...@@ -105,7 +105,8 @@ class Tensor {
inline index_t dim_size() const { return shape_.size(); } inline index_t dim_size() const { return shape_.size(); }
inline index_t dim(unsigned int index) const { inline index_t dim(unsigned int index) const {
MACE_CHECK(index < shape_.size(), "Exceeding ndim limit"); MACE_CHECK(index < shape_.size(), "Dim out of range: ",
index, " >= ", shape_.size());
return shape_[index]; return shape_[index];
} }
......
...@@ -11,7 +11,6 @@ cc_binary( ...@@ -11,7 +11,6 @@ cc_binary(
deps = [ deps = [
"//mace/core", "//mace/core",
"//mace/ops", "//mace/ops",
"//mace/core:opencl_runtime",
], ],
) )
......
...@@ -26,7 +26,6 @@ cc_library( ...@@ -26,7 +26,6 @@ cc_library(
linkopts = if_android(["-lm"]), linkopts = if_android(["-lm"]),
deps = [ deps = [
"//mace/core", "//mace/core",
"//mace/core:opencl_runtime",
"//mace/utils:utils_hdrs", "//mace/utils:utils_hdrs",
], ],
) )
......
...@@ -6,26 +6,32 @@ ...@@ -6,26 +6,32 @@
namespace mace { namespace mace {
REGISTER_CPU_OPERATOR(OpKeyBuilder("AddN") void Register_AddN(OperatorRegistry *op_registry) {
.TypeConstraint<float>("T") REGISTER_OPERATOR(op_registry, OpKeyBuilder("AddN")
.Build(), .Device(DeviceType::CPU)
AddNOp<DeviceType::CPU, float>); .TypeConstraint<float>("T")
.Build(),
AddNOp<DeviceType::CPU, float>);
#if MACE_ENABLE_NEON #if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("AddN") REGISTER_OPERATOR(op_registry, OpKeyBuilder("AddN")
.TypeConstraint<float>("T") .Device(DeviceType::NEON)
.Build(), .TypeConstraint<float>("T")
AddNOp<DeviceType::NEON, float>); .Build(),
AddNOp<DeviceType::NEON, float>);
#endif // MACE_ENABLE_NEON #endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("AddN") REGISTER_OPERATOR(op_registry, OpKeyBuilder("AddN")
.TypeConstraint<float>("T") .Device(DeviceType::OPENCL)
.Build(), .TypeConstraint<float>("T")
AddNOp<DeviceType::OPENCL, float>); .Build(),
AddNOp<DeviceType::OPENCL, float>);
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("AddN") REGISTER_OPERATOR(op_registry, OpKeyBuilder("AddN")
.TypeConstraint<half>("T") .Device(DeviceType::OPENCL)
.Build(), .TypeConstraint<half>("T")
AddNOp<DeviceType::OPENCL, half>); .Build(),
AddNOp<DeviceType::OPENCL, half>);
}
} // namespace mace } // namespace mace
...@@ -15,8 +15,8 @@ static void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) { ...@@ -15,8 +15,8 @@ static void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) {
OpsTestNet net; OpsTestNet net;
// Add input data // Add input data
for (int i = 0; i < inputs; ++i) { for (int i = 0; i < inputs; ++i) {
net.AddRandomInput<D, float>( net.AddRandomInput<D, float>(internal::MakeString("Input", i).c_str(),
internal::MakeString("Input", i).c_str(), {n, h, w, c}); {n, h, w, c});
} }
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
...@@ -30,16 +30,16 @@ static void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) { ...@@ -30,16 +30,16 @@ static void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) {
op_def_builder.Input(internal::MakeString("InputImage", i).c_str()); op_def_builder.Input(internal::MakeString("InputImage", i).c_str());
} }
op_def_builder.Output("OutputImage") op_def_builder.Output("OutputImage")
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value)) .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
} else { } else {
OpDefBuilder op_def_builder("AddN", "AddNBM"); OpDefBuilder op_def_builder("AddN", "AddNBM");
for (int i = 0; i < inputs; ++i) { for (int i = 0; i < inputs; ++i) {
op_def_builder.Input(internal::MakeString("Input", i).c_str()); op_def_builder.Input(internal::MakeString("Input", i).c_str());
} }
op_def_builder.Output("Output") op_def_builder.Output("Output")
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value)) .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
} }
// Warm-up // Warm-up
......
...@@ -6,26 +6,32 @@ ...@@ -6,26 +6,32 @@
namespace mace { namespace mace {
REGISTER_CPU_OPERATOR(OpKeyBuilder("BatchNorm") void Register_BatchNorm(OperatorRegistry *op_registry) {
.TypeConstraint<float>("T") REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchNorm")
.Build(), .Device(DeviceType::CPU)
BatchNormOp<DeviceType::CPU, float>); .TypeConstraint<float>("T")
.Build(),
BatchNormOp<DeviceType::CPU, float>);
#if MACE_ENABLE_NEON #if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("BatchNorm") REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchNorm")
.TypeConstraint<float>("T") .Device(DeviceType::NEON)
.Build(), .TypeConstraint<float>("T")
BatchNormOp<DeviceType::NEON, float>); .Build(),
BatchNormOp<DeviceType::NEON, float>);
#endif // MACE_ENABLE_NEON #endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BatchNorm") REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchNorm")
.TypeConstraint<float>("T") .Device(DeviceType::OPENCL)
.Build(), .TypeConstraint<float>("T")
BatchNormOp<DeviceType::OPENCL, float>); .Build(),
BatchNormOp<DeviceType::OPENCL, float>);
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BatchNorm") REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchNorm")
.TypeConstraint<half>("T") .Device(DeviceType::OPENCL)
.Build(), .TypeConstraint<half>("T")
BatchNormOp<DeviceType::OPENCL, half>); .Build(),
BatchNormOp<DeviceType::OPENCL, half>);
}
} // namespace mace } // namespace mace
...@@ -23,11 +23,16 @@ static void BatchNorm( ...@@ -23,11 +23,16 @@ static void BatchNorm(
net.AddRandomInput<D, T>("Var", {channels}, true); net.AddRandomInput<D, T>("Var", {channels}, true);
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<D, float>(net, "Input", "InputImage",
BufferToImage<D, float>(net, "Scale", "ScaleImage", kernels::BufferType::ARGUMENT); kernels::BufferType::IN_OUT);
BufferToImage<D, float>(net, "Offset", "OffsetImage", kernels::BufferType::ARGUMENT); BufferToImage<D, float>(net, "Scale", "ScaleImage",
BufferToImage<D, float>(net, "Mean", "MeanImage", kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
BufferToImage<D, float>(net, "Var", "VarImage", kernels::BufferType::ARGUMENT); BufferToImage<D, float>(net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT);
BufferToImage<D, float>(net, "Mean", "MeanImage",
kernels::BufferType::ARGUMENT);
BufferToImage<D, float>(net, "Var", "VarImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("BatchNorm", "BatchNormBM") OpDefBuilder("BatchNorm", "BatchNormBM")
.Input("InputImage") .Input("InputImage")
.Input("ScaleImage") .Input("ScaleImage")
...@@ -37,8 +42,7 @@ static void BatchNorm( ...@@ -37,8 +42,7 @@ static void BatchNorm(
.AddFloatArg("epsilon", 1e-3) .AddFloatArg("epsilon", 1e-3)
.Output("Output") .Output("Output")
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
} } else {
else {
OpDefBuilder("BatchNorm", "BatchNormBM") OpDefBuilder("BatchNorm", "BatchNormBM")
.Input("Input") .Input("Input")
.Input("Scale") .Input("Scale")
...@@ -50,7 +54,6 @@ static void BatchNorm( ...@@ -50,7 +54,6 @@ static void BatchNorm(
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
} }
// tuning // tuning
setenv("MACE_TUNING", "1", 1); setenv("MACE_TUNING", "1", 1);
net.RunOp(D); net.RunOp(D);
...@@ -79,9 +82,8 @@ static void BatchNorm( ...@@ -79,9 +82,8 @@ static void BatchNorm(
} \ } \
BENCHMARK(BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE) BENCHMARK(BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#define BM_BATCH_NORM(N, C, H, W, TYPE) \ #define BM_BATCH_NORM(N, C, H, W, TYPE) \
BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, CPU); \ BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, CPU); \
BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, NEON);\
BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, OPENCL); BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, OPENCL);
BM_BATCH_NORM(1, 1, 512, 512, float); BM_BATCH_NORM(1, 1, 512, 512, float);
......
...@@ -15,18 +15,23 @@ void Simple() { ...@@ -15,18 +15,23 @@ void Simple() {
// Add input data // Add input data
net.AddInputFromArray<D, float>("Input", {1, 6, 2, 1}, net.AddInputFromArray<D, float>("Input", {1, 6, 2, 1},
{5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}); {5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15});
net.AddInputFromArray<D, float>("Scale", {1}, {4.0f}); net.AddInputFromArray<D, float>("Scale", {1}, {4.0f});
net.AddInputFromArray<D, float>("Offset", {1}, {2.0}); net.AddInputFromArray<D, float>("Offset", {1}, {2.0});
net.AddInputFromArray<D, float>("Mean", {1}, {10}); net.AddInputFromArray<D, float>("Mean", {1}, {10});
net.AddInputFromArray<D, float>("Var", {1}, {11.67f}); net.AddInputFromArray<D, float>("Var", {1}, {11.67f});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<D, float>(net, "Input", "InputImage",
BufferToImage<D, float>(net, "Scale", "ScaleImage", kernels::BufferType::ARGUMENT); kernels::BufferType::IN_OUT);
BufferToImage<D, float>(net, "Offset", "OffsetImage", kernels::BufferType::ARGUMENT); BufferToImage<D, float>(net, "Scale", "ScaleImage",
BufferToImage<D, float>(net, "Mean", "MeanImage", kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
BufferToImage<D, float>(net, "Var", "VarImage", kernels::BufferType::ARGUMENT); BufferToImage<D, float>(net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT);
BufferToImage<D, float>(net, "Mean", "MeanImage",
kernels::BufferType::ARGUMENT);
BufferToImage<D, float>(net, "Var", "VarImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("BatchNorm", "BatchNormTest") OpDefBuilder("BatchNorm", "BatchNormTest")
.Input("InputImage") .Input("InputImage")
...@@ -41,7 +46,8 @@ void Simple() { ...@@ -41,7 +46,8 @@ void Simple() {
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, float>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); ImageToBuffer<D, float>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
} else { } else {
OpDefBuilder("BatchNorm", "BatchNormTest") OpDefBuilder("BatchNorm", "BatchNormTest")
.Input("Input") .Input("Input")
...@@ -64,9 +70,7 @@ void Simple() { ...@@ -64,9 +70,7 @@ void Simple() {
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-2); ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-2);
} }
TEST_F(BatchNormOpTest, SimpleCPU) { TEST_F(BatchNormOpTest, SimpleCPU) { Simple<DeviceType::CPU>(); }
Simple<DeviceType::CPU>();
}
/* /*
TEST_F(BatchNormOpTest, SimpleNEON) { TEST_F(BatchNormOpTest, SimpleNEON) {
...@@ -74,9 +78,7 @@ TEST_F(BatchNormOpTest, SimpleNEON) { ...@@ -74,9 +78,7 @@ TEST_F(BatchNormOpTest, SimpleNEON) {
} }
*/ */
TEST_F(BatchNormOpTest, SimpleOPENCL) { TEST_F(BatchNormOpTest, SimpleOPENCL) { Simple<DeviceType::OPENCL>(); }
Simple<DeviceType::OPENCL>();
}
/* /*
TEST_F(BatchNormOpTest, SimpleRandomNeon) { TEST_F(BatchNormOpTest, SimpleRandomNeon) {
...@@ -100,7 +102,8 @@ TEST_F(BatchNormOpTest, SimpleRandomNeon) { ...@@ -100,7 +102,8 @@ TEST_F(BatchNormOpTest, SimpleRandomNeon) {
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
// Add input data // Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height, width}); net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height,
width});
net.AddRandomInput<DeviceType::CPU, float>("Scale", {channels}); net.AddRandomInput<DeviceType::CPU, float>("Scale", {channels});
net.AddRandomInput<DeviceType::CPU, float>("Offset", {channels}); net.AddRandomInput<DeviceType::CPU, float>("Offset", {channels});
net.AddRandomInput<DeviceType::CPU, float>("Mean", {channels}); net.AddRandomInput<DeviceType::CPU, float>("Mean", {channels});
...@@ -141,7 +144,8 @@ TEST_F(BatchNormOpTest, ComplexRandomNeon) { ...@@ -141,7 +144,8 @@ TEST_F(BatchNormOpTest, ComplexRandomNeon) {
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
// Add input data // Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height, width}); net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height,
width});
net.AddRandomInput<DeviceType::CPU, float>("Scale", {channels}); net.AddRandomInput<DeviceType::CPU, float>("Scale", {channels});
net.AddRandomInput<DeviceType::CPU, float>("Offset", {channels}); net.AddRandomInput<DeviceType::CPU, float>("Offset", {channels});
net.AddRandomInput<DeviceType::CPU, float>("Mean", {channels}); net.AddRandomInput<DeviceType::CPU, float>("Mean", {channels});
...@@ -184,7 +188,8 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { ...@@ -184,7 +188,8 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
// Add input data // Add input data
net.AddRandomInput<DeviceType::OPENCL, float>("Input", {batch, height, width, channels}); net.AddRandomInput<DeviceType::OPENCL, float>(
"Input", {batch, height, width, channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Scale", {channels}); net.AddRandomInput<DeviceType::OPENCL, float>("Scale", {channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Offset", {channels}); net.AddRandomInput<DeviceType::OPENCL, float>("Offset", {channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Mean", {channels}); net.AddRandomInput<DeviceType::OPENCL, float>("Mean", {channels});
...@@ -198,11 +203,16 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { ...@@ -198,11 +203,16 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// Run on opencl // Run on opencl
BufferToImage<DeviceType::OPENCL, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<DeviceType::OPENCL, float>(net, "Input", "InputImage",
BufferToImage<DeviceType::OPENCL, float>(net, "Scale", "ScaleImage", kernels::BufferType::ARGUMENT); kernels::BufferType::IN_OUT);
BufferToImage<DeviceType::OPENCL, float>(net, "Offset", "OffsetImage", kernels::BufferType::ARGUMENT); BufferToImage<DeviceType::OPENCL, float>(net, "Scale", "ScaleImage",
BufferToImage<DeviceType::OPENCL, float>(net, "Mean", "MeanImage", kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(net, "Var", "VarImage", kernels::BufferType::ARGUMENT); BufferToImage<DeviceType::OPENCL, float>(net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(net, "Mean", "MeanImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(net, "Var", "VarImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("BatchNorm", "BatchNormTest") OpDefBuilder("BatchNorm", "BatchNormTest")
.Input("InputImage") .Input("InputImage")
...@@ -223,7 +233,8 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { ...@@ -223,7 +233,8 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
net.RunOp(DeviceType::OPENCL); net.RunOp(DeviceType::OPENCL);
net.Sync(); net.Sync();
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-2); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-2);
} }
...@@ -249,7 +260,8 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) { ...@@ -249,7 +260,8 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) {
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
// Add input data // Add input data
net.AddRandomInput<DeviceType::OPENCL, float>("Input", {batch, height, width, channels}); net.AddRandomInput<DeviceType::OPENCL, float>(
"Input", {batch, height, width, channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Scale", {channels}); net.AddRandomInput<DeviceType::OPENCL, float>("Scale", {channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Offset", {channels}); net.AddRandomInput<DeviceType::OPENCL, float>("Offset", {channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Mean", {channels}); net.AddRandomInput<DeviceType::OPENCL, float>("Mean", {channels});
...@@ -263,11 +275,16 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) { ...@@ -263,11 +275,16 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) {
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// Run on opencl // Run on opencl
BufferToImage<DeviceType::OPENCL, half>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<DeviceType::OPENCL, half>(net, "Input", "InputImage",
BufferToImage<DeviceType::OPENCL, half>(net, "Scale", "ScaleImage", kernels::BufferType::ARGUMENT); kernels::BufferType::IN_OUT);
BufferToImage<DeviceType::OPENCL, half>(net, "Offset", "OffsetImage", kernels::BufferType::ARGUMENT); BufferToImage<DeviceType::OPENCL, half>(net, "Scale", "ScaleImage",
BufferToImage<DeviceType::OPENCL, half>(net, "Mean", "MeanImage", kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(net, "Var", "VarImage", kernels::BufferType::ARGUMENT); BufferToImage<DeviceType::OPENCL, half>(net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(net, "Mean", "MeanImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(net, "Var", "VarImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("BatchNorm", "BatchNormTest") OpDefBuilder("BatchNorm", "BatchNormTest")
.Input("InputImage") .Input("InputImage")
...@@ -289,7 +306,8 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) { ...@@ -289,7 +306,8 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) {
net.RunOp(DeviceType::OPENCL); net.RunOp(DeviceType::OPENCL);
net.Sync(); net.Sync();
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.5); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.5);
} }
...@@ -315,7 +333,8 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) { ...@@ -315,7 +333,8 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
// Add input data // Add input data
net.AddRandomInput<DeviceType::OPENCL, float>("Input", {batch, height, width, channels}); net.AddRandomInput<DeviceType::OPENCL, float>(
"Input", {batch, height, width, channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Scale", {channels}); net.AddRandomInput<DeviceType::OPENCL, float>("Scale", {channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Offset", {channels}); net.AddRandomInput<DeviceType::OPENCL, float>("Offset", {channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Mean", {channels}); net.AddRandomInput<DeviceType::OPENCL, float>("Mean", {channels});
...@@ -328,13 +347,17 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) { ...@@ -328,13 +347,17 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
Tensor expected; Tensor expected;
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// Run on opencl // Run on opencl
BufferToImage<DeviceType::OPENCL, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<DeviceType::OPENCL, float>(net, "Input", "InputImage",
BufferToImage<DeviceType::OPENCL, float>(net, "Scale", "ScaleImage", kernels::BufferType::ARGUMENT); kernels::BufferType::IN_OUT);
BufferToImage<DeviceType::OPENCL, float>(net, "Offset", "OffsetImage", kernels::BufferType::ARGUMENT); BufferToImage<DeviceType::OPENCL, float>(net, "Scale", "ScaleImage",
BufferToImage<DeviceType::OPENCL, float>(net, "Mean", "MeanImage", kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(net, "Var", "VarImage", kernels::BufferType::ARGUMENT); BufferToImage<DeviceType::OPENCL, float>(net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(net, "Mean", "MeanImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(net, "Var", "VarImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("BatchNorm", "BatchNormTest") OpDefBuilder("BatchNorm", "BatchNormTest")
.Input("InputImage") .Input("InputImage")
...@@ -355,7 +378,8 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) { ...@@ -355,7 +378,8 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
net.RunOp(DeviceType::OPENCL); net.RunOp(DeviceType::OPENCL);
net.Sync(); net.Sync();
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-2); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-2);
} }
...@@ -381,7 +405,8 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) { ...@@ -381,7 +405,8 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) {
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
// Add input data // Add input data
net.AddRandomInput<DeviceType::OPENCL, float>("Input", {batch, height, width, channels}); net.AddRandomInput<DeviceType::OPENCL, float>(
"Input", {batch, height, width, channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Scale", {channels}); net.AddRandomInput<DeviceType::OPENCL, float>("Scale", {channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Offset", {channels}); net.AddRandomInput<DeviceType::OPENCL, float>("Offset", {channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Mean", {channels}); net.AddRandomInput<DeviceType::OPENCL, float>("Mean", {channels});
...@@ -394,13 +419,17 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) { ...@@ -394,13 +419,17 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) {
Tensor expected; Tensor expected;
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// Run on opencl // Run on opencl
BufferToImage<DeviceType::OPENCL, half>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<DeviceType::OPENCL, half>(net, "Input", "InputImage",
BufferToImage<DeviceType::OPENCL, half>(net, "Scale", "ScaleImage", kernels::BufferType::ARGUMENT); kernels::BufferType::IN_OUT);
BufferToImage<DeviceType::OPENCL, half>(net, "Offset", "OffsetImage", kernels::BufferType::ARGUMENT); BufferToImage<DeviceType::OPENCL, half>(net, "Scale", "ScaleImage",
BufferToImage<DeviceType::OPENCL, half>(net, "Mean", "MeanImage", kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(net, "Var", "VarImage", kernels::BufferType::ARGUMENT); BufferToImage<DeviceType::OPENCL, half>(net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(net, "Mean", "MeanImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(net, "Var", "VarImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("BatchNorm", "BatchNormTest") OpDefBuilder("BatchNorm", "BatchNormTest")
.Input("InputImage") .Input("InputImage")
...@@ -422,7 +451,8 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) { ...@@ -422,7 +451,8 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) {
net.RunOp(DeviceType::OPENCL); net.RunOp(DeviceType::OPENCL);
net.Sync(); net.Sync();
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.5); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.5);
} }
} }
...@@ -6,13 +6,17 @@ ...@@ -6,13 +6,17 @@
namespace mace { namespace mace {
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BatchToSpaceND") void Register_BatchToSpaceND(OperatorRegistry *op_registry) {
.TypeConstraint<float>("T") REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchToSpaceND")
.Build(), .Device(DeviceType::OPENCL)
BatchToSpaceNDOp<DeviceType::OPENCL, float>); .TypeConstraint<float>("T")
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BatchToSpaceND") .Build(),
.TypeConstraint<half>("T") BatchToSpaceNDOp<DeviceType::OPENCL, float>);
.Build(), REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchToSpaceND")
BatchToSpaceNDOp<DeviceType::OPENCL, half>); .Device(DeviceType::OPENCL)
.TypeConstraint<half>("T")
.Build(),
BatchToSpaceNDOp<DeviceType::OPENCL, half>);
}
} // namespace mace } // namespace mace
...@@ -14,7 +14,8 @@ static void BMBatchToSpace( ...@@ -14,7 +14,8 @@ static void BMBatchToSpace(
OpsTestNet net; OpsTestNet net;
net.AddRandomInput<D, float>("Input", {batch, height, width, channels}); net.AddRandomInput<D, float>("Input", {batch, height, width, channels});
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<D, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
OpDefBuilder("BatchToSpaceND", "BatchToSpaceNDTest") OpDefBuilder("BatchToSpaceND", "BatchToSpaceNDTest")
.Input("InputImage") .Input("InputImage")
.Output("OutputImage") .Output("OutputImage")
...@@ -36,16 +37,17 @@ static void BMBatchToSpace( ...@@ -36,16 +37,17 @@ static void BMBatchToSpace(
} }
#define BM_BATCH_TO_SPACE_MACRO(N, H, W, C, ARG, TYPE, DEVICE) \ #define BM_BATCH_TO_SPACE_MACRO(N, H, W, C, ARG, TYPE, DEVICE) \
static void BM_BATCH_TO_SPACE_##N##_##H##_##W##_##C##_##ARG##_##TYPE##_##DEVICE( \ static void \
int iters) { \ BM_BATCH_TO_SPACE_##N##_##H##_##W##_##C##_##ARG##_##TYPE##_##DEVICE( \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \ int iters) { \
mace::testing::ItemsProcessed(tot); \ const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ mace::testing::ItemsProcessed(tot); \
BMBatchToSpace<DEVICE, TYPE>(iters, N, C, H, W, ARG); \ mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
} \ BMBatchToSpace<DEVICE, TYPE>(iters, N, C, H, W, ARG); \
} \
BENCHMARK(BM_BATCH_TO_SPACE_##N##_##H##_##W##_##C##_##ARG##_##TYPE##_##DEVICE) BENCHMARK(BM_BATCH_TO_SPACE_##N##_##H##_##W##_##C##_##ARG##_##TYPE##_##DEVICE)
#define BM_BATCH_TO_SPACE(N, H, W, C, ARG, TYPE) \ #define BM_BATCH_TO_SPACE(N, H, W, C, ARG, TYPE) \
BM_BATCH_TO_SPACE_MACRO(N, H, W, C, ARG, TYPE, OPENCL); BM_BATCH_TO_SPACE_MACRO(N, H, W, C, ARG, TYPE, OPENCL);
BM_BATCH_TO_SPACE(128, 8, 8, 128, 2, float); BM_BATCH_TO_SPACE(128, 8, 8, 128, 2, float);
......
...@@ -6,28 +6,34 @@ ...@@ -6,28 +6,34 @@
namespace mace { namespace mace {
REGISTER_CPU_OPERATOR(OpKeyBuilder("BiasAdd") void Register_BiasAdd(OperatorRegistry *op_registry) {
.TypeConstraint<float>("T") REGISTER_OPERATOR(op_registry, OpKeyBuilder("BiasAdd")
.Build(), .Device(DeviceType::CPU)
BiasAddOp<DeviceType::CPU, float>); .TypeConstraint<float>("T")
.Build(),
BiasAddOp<DeviceType::CPU, float>);
/* /*
#if __ARM_NEON #if __ARM_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("BiasAdd") REGISTER_OPERATOR(op_registry,OpKeyBuilder("BiasAdd")
.TypeConstraint<float>("T") .Device(DeviceType::NEON)
.Build(), .TypeConstraint<float>("T")
BiasAddOp<DeviceType::NEON, float>); .Build(),
#endif // __ARM_NEON BiasAddOp<DeviceType::NEON, float>);
*/ #endif // __ARM_NEON
*/
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BiasAdd") REGISTER_OPERATOR(op_registry, OpKeyBuilder("BiasAdd")
.TypeConstraint<float>("T") .Device(DeviceType::OPENCL)
.Build(), .TypeConstraint<float>("T")
BiasAddOp<DeviceType::OPENCL, float>); .Build(),
BiasAddOp<DeviceType::OPENCL, float>);
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BiasAdd") REGISTER_OPERATOR(op_registry, OpKeyBuilder("BiasAdd")
.TypeConstraint<half>("T") .Device(DeviceType::OPENCL)
.Build(), .TypeConstraint<half>("T")
BiasAddOp<DeviceType::OPENCL, half>); .Build(),
BiasAddOp<DeviceType::OPENCL, half>);
}
} // namespace mace } // namespace mace
...@@ -9,8 +9,7 @@ ...@@ -9,8 +9,7 @@
namespace mace { namespace mace {
template <DeviceType D, typename T> template <DeviceType D, typename T>
static void BiasAdd( static void BiasAdd(int iters, int batch, int channels, int height, int width) {
int iters, int batch, int channels, int height, int width) {
mace::testing::StopTiming(); mace::testing::StopTiming();
OpsTestNet net; OpsTestNet net;
...@@ -20,15 +19,16 @@ static void BiasAdd( ...@@ -20,15 +19,16 @@ static void BiasAdd(
net.AddRandomInput<D, T>("Bias", {channels}, true); net.AddRandomInput<D, T>("Bias", {channels}, true);
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<D, T>(net, "Input", "InputImage",
BufferToImage<D, T>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("BiasAdd", "BiasAddBM") OpDefBuilder("BiasAdd", "BiasAddBM")
.Input("InputImage") .Input("InputImage")
.Input("BiasImage") .Input("BiasImage")
.Output("Output") .Output("Output")
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
} } else {
else {
OpDefBuilder("BiasAdd", "BiasAddBM") OpDefBuilder("BiasAdd", "BiasAddBM")
.Input("Input") .Input("Input")
.Input("Bias") .Input("Bias")
...@@ -51,12 +51,12 @@ static void BiasAdd( ...@@ -51,12 +51,12 @@ static void BiasAdd(
#define BM_BIAS_ADD_MACRO(N, C, H, W, TYPE, DEVICE) \ #define BM_BIAS_ADD_MACRO(N, C, H, W, TYPE, DEVICE) \
static void BM_BIAS_ADD_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \ static void BM_BIAS_ADD_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \
int iters) { \ int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \ const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \ mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BiasAdd<DEVICE, TYPE>(iters, N, C, H, W); \ BiasAdd<DEVICE, TYPE>(iters, N, C, H, W); \
} \ } \
BENCHMARK(BM_BIAS_ADD_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE) BENCHMARK(BM_BIAS_ADD_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#define BM_BIAS_ADD(N, C, H, W, TYPE) \ #define BM_BIAS_ADD(N, C, H, W, TYPE) \
......
...@@ -15,12 +15,14 @@ void BiasAddSimple() { ...@@ -15,12 +15,14 @@ void BiasAddSimple() {
// Add input data // Add input data
net.AddInputFromArray<D, float>("Input", {1, 6, 2, 1}, net.AddInputFromArray<D, float>("Input", {1, 6, 2, 1},
{5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}); {5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15});
net.AddInputFromArray<D, float>("Bias", {1}, {0.5f}); net.AddInputFromArray<D, float>("Bias", {1}, {0.5f});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<D, float>(net, "Input", "InputImage",
BufferToImage<D, float>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); kernels::BufferType::IN_OUT);
BufferToImage<D, float>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("BiasAdd", "BiasAddTest") OpDefBuilder("BiasAdd", "BiasAddTest")
.Input("InputImage") .Input("InputImage")
...@@ -31,7 +33,8 @@ void BiasAddSimple() { ...@@ -31,7 +33,8 @@ void BiasAddSimple() {
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, float>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); ImageToBuffer<D, float>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
} else { } else {
OpDefBuilder("BiasAdd", "BiasAddTest") OpDefBuilder("BiasAdd", "BiasAddTest")
.Input("Input") .Input("Input")
...@@ -43,16 +46,14 @@ void BiasAddSimple() { ...@@ -43,16 +46,14 @@ void BiasAddSimple() {
} }
// Check // Check
auto expected = auto expected = CreateTensor<float>(
CreateTensor<float>({1, 6, 2, 1}, {5.5, 5.5, 7.5, 7.5, 9.5, 9.5, 11.5, {1, 6, 2, 1},
11.5, 13.5, 13.5, 15.5, 15.5}); {5.5, 5.5, 7.5, 7.5, 9.5, 9.5, 11.5, 11.5, 13.5, 13.5, 15.5, 15.5});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-2); ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-2);
} }
TEST_F(BiasAddOpTest, BiasAddSimpleCPU) { TEST_F(BiasAddOpTest, BiasAddSimpleCPU) { BiasAddSimple<DeviceType::CPU>(); }
BiasAddSimple<DeviceType::CPU>();
}
TEST_F(BiasAddOpTest, BiasAddSimpleOPENCL) { TEST_F(BiasAddOpTest, BiasAddSimpleOPENCL) {
BiasAddSimple<DeviceType::OPENCL>(); BiasAddSimple<DeviceType::OPENCL>();
...@@ -76,7 +77,8 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) { ...@@ -76,7 +77,8 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) {
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
// Add input data // Add input data
net.AddRandomInput<DeviceType::OPENCL, float>("Input", {batch, height, width, channels}); net.AddRandomInput<DeviceType::OPENCL, float>(
"Input", {batch, height, width, channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Bias", {channels}, true); net.AddRandomInput<DeviceType::OPENCL, float>("Bias", {channels}, true);
// run cpu // run cpu
...@@ -87,8 +89,10 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) { ...@@ -87,8 +89,10 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) {
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// Run on opencl // Run on opencl
BufferToImage<DeviceType::OPENCL, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<DeviceType::OPENCL, float>(net, "Input", "InputImage",
BufferToImage<DeviceType::OPENCL, float>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); kernels::BufferType::IN_OUT);
BufferToImage<DeviceType::OPENCL, float>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("BiasAdd", "BiasAddTest") OpDefBuilder("BiasAdd", "BiasAddTest")
.Input("InputImage") .Input("InputImage")
...@@ -100,7 +104,8 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) { ...@@ -100,7 +104,8 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) {
net.RunOp(DeviceType::OPENCL); net.RunOp(DeviceType::OPENCL);
net.Sync(); net.Sync();
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-2); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-2);
} }
...@@ -122,7 +127,8 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) { ...@@ -122,7 +127,8 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) {
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
// Add input data // Add input data
net.AddRandomInput<DeviceType::OPENCL, float>("Input", {batch, height, width, channels}); net.AddRandomInput<DeviceType::OPENCL, float>(
"Input", {batch, height, width, channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Bias", {channels}, true); net.AddRandomInput<DeviceType::OPENCL, float>("Bias", {channels}, true);
// run cpu // run cpu
...@@ -132,10 +138,11 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) { ...@@ -132,10 +138,11 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) {
Tensor expected; Tensor expected;
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// Run on opencl // Run on opencl
BufferToImage<DeviceType::OPENCL, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<DeviceType::OPENCL, float>(net, "Input", "InputImage",
BufferToImage<DeviceType::OPENCL, float>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); kernels::BufferType::IN_OUT);
BufferToImage<DeviceType::OPENCL, float>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("BiasAdd", "BiasAddTest") OpDefBuilder("BiasAdd", "BiasAddTest")
.Input("InputImage") .Input("InputImage")
...@@ -147,8 +154,8 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) { ...@@ -147,8 +154,8 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) {
net.RunOp(DeviceType::OPENCL); net.RunOp(DeviceType::OPENCL);
net.Sync(); net.Sync();
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-2); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-2);
} }
} }
...@@ -6,14 +6,18 @@ ...@@ -6,14 +6,18 @@
namespace mace { namespace mace {
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BufferToImage") void Register_BufferToImage(OperatorRegistry *op_registry) {
.TypeConstraint<float>("T") REGISTER_OPERATOR(op_registry, OpKeyBuilder("BufferToImage")
.Build(), .Device(DeviceType::OPENCL)
BufferToImageOp<DeviceType::OPENCL, float>); .TypeConstraint<float>("T")
.Build(),
BufferToImageOp<DeviceType::OPENCL, float>);
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BufferToImage") REGISTER_OPERATOR(op_registry, OpKeyBuilder("BufferToImage")
.TypeConstraint<half>("T") .Device(DeviceType::OPENCL)
.Build(), .TypeConstraint<half>("T")
BufferToImageOp<DeviceType::OPENCL, half>); .Build(),
BufferToImageOp<DeviceType::OPENCL, half>);
}
} // namespace mace } // namespace mace
...@@ -6,9 +6,12 @@ ...@@ -6,9 +6,12 @@
namespace mace { namespace mace {
REGISTER_CPU_OPERATOR(OpKeyBuilder("ChannelShuffle") void Register_ChannelShuffle(OperatorRegistry *op_registry) {
.TypeConstraint<float>("T") REGISTER_OPERATOR(op_registry, OpKeyBuilder("ChannelShuffle")
.Build(), .Device(DeviceType::CPU)
ChannelShuffleOp<DeviceType::CPU, float>); .TypeConstraint<float>("T")
.Build(),
ChannelShuffleOp<DeviceType::CPU, float>);
}
} // namespace mace } // namespace mace
...@@ -23,7 +23,8 @@ static void ChannelShuffle( ...@@ -23,7 +23,8 @@ static void ChannelShuffle(
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
// Add input data // Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height, width}); net.AddRandomInput<DeviceType::CPU, float>("Input",
{batch, channels, height, width});
// Warm-up // Warm-up
for (int i = 0; i < 5; ++i) { for (int i = 0; i < 5; ++i) {
......
...@@ -17,7 +17,6 @@ TEST_F(ChannelShuffleOpTest, C8G4) { ...@@ -17,7 +17,6 @@ TEST_F(ChannelShuffleOpTest, C8G4) {
.AddIntArg("group", 4) .AddIntArg("group", 4)
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
// Add input data // Add input data
net.AddInputFromArray<DeviceType::CPU, float>( net.AddInputFromArray<DeviceType::CPU, float>(
"Input", {1, 8, 1, 2}, "Input", {1, 8, 1, 2},
......
...@@ -6,21 +6,28 @@ ...@@ -6,21 +6,28 @@
namespace mace { namespace mace {
REGISTER_CPU_OPERATOR(OpKeyBuilder("Concat") void Register_Concat(OperatorRegistry *op_registry) {
.TypeConstraint<float>("T") REGISTER_OPERATOR(op_registry, OpKeyBuilder("Concat")
.Build(), .Device(DeviceType::CPU)
ConcatOp<DeviceType::CPU, float>); .TypeConstraint<float>("T")
REGISTER_CPU_OPERATOR(OpKeyBuilder("Concat") .Build(),
.TypeConstraint<half>("T") ConcatOp<DeviceType::CPU, float>);
.Build(), REGISTER_OPERATOR(op_registry, OpKeyBuilder("Concat")
ConcatOp<DeviceType::CPU, half>); .Device(DeviceType::CPU)
.TypeConstraint<half>("T")
.Build(),
ConcatOp<DeviceType::CPU, half>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Concat")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
.Build(),
ConcatOp<DeviceType::OPENCL, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Concat")
.Device(DeviceType::OPENCL)
.TypeConstraint<half>("T")
.Build(),
ConcatOp<DeviceType::OPENCL, half>);
}
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Concat")
.TypeConstraint<float>("T")
.Build(),
ConcatOp<DeviceType::OPENCL, float>);
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Concat")
.TypeConstraint<half>("T")
.Build(),
ConcatOp<DeviceType::OPENCL, half>);
} // namespace mace } // namespace mace
...@@ -60,8 +60,10 @@ static void OpenclConcatHelper(int iters, ...@@ -60,8 +60,10 @@ static void OpenclConcatHelper(int iters,
net.AddRandomInput<DeviceType::OPENCL, float>("Input0", shape0); net.AddRandomInput<DeviceType::OPENCL, float>("Input0", shape0);
net.AddRandomInput<DeviceType::OPENCL, float>("Input1", shape1); net.AddRandomInput<DeviceType::OPENCL, float>("Input1", shape1);
BufferToImage<DeviceType::OPENCL, T>(net, "Input0", "InputImage0", kernels::BufferType::IN_OUT); BufferToImage<DeviceType::OPENCL, T>(net, "Input0", "InputImage0",
BufferToImage<DeviceType::OPENCL, T>(net, "Input1", "InputImage1", kernels::BufferType::IN_OUT); kernels::BufferType::IN_OUT);
BufferToImage<DeviceType::OPENCL, T>(net, "Input1", "InputImage1",
kernels::BufferType::IN_OUT);
OpDefBuilder("Concat", "ConcatBM") OpDefBuilder("Concat", "ConcatBM")
.Input("InputImage0") .Input("InputImage0")
.Input("InputImage1") .Input("InputImage1")
...@@ -75,7 +77,8 @@ static void OpenclConcatHelper(int iters, ...@@ -75,7 +77,8 @@ static void OpenclConcatHelper(int iters,
net.RunOp(DeviceType::OPENCL); net.RunOp(DeviceType::OPENCL);
} }
const int64_t tot = static_cast<int64_t>(iters) * const int64_t tot =
static_cast<int64_t>(iters) *
(net.GetTensor("Input0")->size() + net.GetTensor("Input1")->size()); (net.GetTensor("Input0")->size() + net.GetTensor("Input1")->size());
mace::testing::ItemsProcessed(tot); mace::testing::ItemsProcessed(tot);
testing::BytesProcessed(tot * sizeof(T)); testing::BytesProcessed(tot * sizeof(T));
......
...@@ -97,7 +97,9 @@ TEST_F(ConcatOpTest, CPURandom) { ...@@ -97,7 +97,9 @@ TEST_F(ConcatOpTest, CPURandom) {
for (int i = 0; i < num_inputs; ++i) { for (int i = 0; i < num_inputs; ++i) {
builder = builder.Input(("Input" + ToString(i)).c_str()); builder = builder.Input(("Input" + ToString(i)).c_str());
} }
builder.AddIntArg("axis", axis).Output("Output").Finalize(net.NewOperatorDef()); builder.AddIntArg("axis", axis)
.Output("Output")
.Finalize(net.NewOperatorDef());
std::vector<index_t> shape_data; std::vector<index_t> shape_data;
GenerateRandomIntTypeData<index_t>({dim}, shape_data, 1, dim); GenerateRandomIntTypeData<index_t>({dim}, shape_data, 1, dim);
...@@ -110,8 +112,8 @@ TEST_F(ConcatOpTest, CPURandom) { ...@@ -110,8 +112,8 @@ TEST_F(ConcatOpTest, CPURandom) {
concat_axis_size += input_shapes[i][axis]; concat_axis_size += input_shapes[i][axis];
GenerateRandomRealTypeData(input_shapes[i], inputs[i]); GenerateRandomRealTypeData(input_shapes[i], inputs[i]);
input_ptrs[i] = inputs[i].data(); input_ptrs[i] = inputs[i].data();
net.AddInputFromArray<DeviceType::CPU, float>(("Input" + ToString(i)).c_str(), net.AddInputFromArray<DeviceType::CPU, float>(
input_shapes[i], inputs[i]); ("Input" + ToString(i)).c_str(), input_shapes[i], inputs[i]);
} }
// Run // Run
...@@ -137,7 +139,7 @@ TEST_F(ConcatOpTest, CPURandom) { ...@@ -137,7 +139,7 @@ TEST_F(ConcatOpTest, CPURandom) {
} }
} }
template<typename T> template <typename T>
void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes, void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes,
const int axis) { const int axis) {
srand(time(nullptr)); srand(time(nullptr));
...@@ -149,9 +151,9 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes, ...@@ -149,9 +151,9 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes,
const std::string input_name = ("Input" + ToString(i)).c_str(); const std::string input_name = ("Input" + ToString(i)).c_str();
const std::string image_name = ("InputImage" + ToString(i)).c_str(); const std::string image_name = ("InputImage" + ToString(i)).c_str();
concat_axis_size += shapes[i][axis]; concat_axis_size += shapes[i][axis];
net.AddRandomInput<DeviceType::OPENCL, float>(input_name, net.AddRandomInput<DeviceType::OPENCL, float>(input_name, shapes[i]);
shapes[i]); BufferToImage<DeviceType::OPENCL, T>(net, input_name, image_name,
BufferToImage<DeviceType::OPENCL, T>(net, input_name, image_name, kernels::BufferType::IN_OUT); kernels::BufferType::IN_OUT);
} }
auto builder = OpDefBuilder("Concat", "ConcatTest"); auto builder = OpDefBuilder("Concat", "ConcatTest");
...@@ -167,7 +169,8 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes, ...@@ -167,7 +169,8 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes,
// Run // Run
net.RunOp(DeviceType::OPENCL); net.RunOp(DeviceType::OPENCL);
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
// Check // Check
auto output = net.GetOutput("Output"); auto output = net.GetOutput("Output");
...@@ -182,15 +185,16 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes, ...@@ -182,15 +185,16 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes,
while (output_ptr != (output->data<float>() + output->size())) { while (output_ptr != (output->data<float>() + output->size())) {
for (int i = 0; i < num_inputs; ++i) { for (int i = 0; i < num_inputs; ++i) {
index_t num_elements = index_t num_elements =
std::accumulate(shapes[i].begin() + axis, shapes[i].end(), std::accumulate(shapes[i].begin() + axis, shapes[i].end(), 1,
1, std::multiplies<index_t>()); std::multiplies<index_t>());
const std::string input_name = ("Input" + ToString(i)).c_str(); const std::string input_name = ("Input" + ToString(i)).c_str();
const Tensor *input_tensor = net.GetTensor(input_name.data()); const Tensor *input_tensor = net.GetTensor(input_name.data());
Tensor::MappingGuard input_guard(input_tensor); Tensor::MappingGuard input_guard(input_tensor);
const float *input_ptr = input_tensor->data<float>() + k * num_elements; const float *input_ptr = input_tensor->data<float>() + k * num_elements;
for (int j = 0; j < num_elements; ++j) { for (int j = 0; j < num_elements; ++j) {
EXPECT_NEAR(*(input_ptr + j), *output_ptr++, 1e-2) << "With index: " << i << ", " << j; EXPECT_NEAR(*(input_ptr + j), *output_ptr++, 1e-2)
<< "With index: " << i << ", " << j;
} }
} }
k++; k++;
...@@ -198,25 +202,13 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes, ...@@ -198,25 +202,13 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes,
} }
TEST_F(ConcatOpTest, OPENCLAligned) { TEST_F(ConcatOpTest, OPENCLAligned) {
OpenclRandomTest<float>({ OpenclRandomTest<float>({{3, 32, 32, 32}, {3, 32, 32, 64}}, 3);
{3, 32, 32, 32},
{3, 32, 32, 64}
},
3);
} }
TEST_F(ConcatOpTest, OPENCLHalfAligned) { TEST_F(ConcatOpTest, OPENCLHalfAligned) {
OpenclRandomTest<half>({ OpenclRandomTest<half>({{3, 32, 32, 32}, {3, 32, 32, 64}}, 3);
{3, 32, 32, 32},
{3, 32, 32, 64}
},
3);
} }
TEST_F(ConcatOpTest, OPENCLUnAligned) { TEST_F(ConcatOpTest, OPENCLUnAligned) {
OpenclRandomTest<float>({ OpenclRandomTest<float>({{3, 32, 32, 13}, {3, 32, 32, 17}}, 3);
{3, 32, 32, 13},
{3, 32, 32, 17}
},
3);
} }
...@@ -6,31 +6,38 @@ ...@@ -6,31 +6,38 @@
namespace mace { namespace mace {
REGISTER_CPU_OPERATOR(OpKeyBuilder("Conv2D") void Register_Conv2D(OperatorRegistry *op_registry) {
.TypeConstraint<float>("T") REGISTER_OPERATOR(op_registry, OpKeyBuilder("Conv2D")
.Build(), .Device(DeviceType::CPU)
Conv2dOp<DeviceType::CPU, float>); .TypeConstraint<float>("T")
.Build(),
REGISTER_CPU_OPERATOR(OpKeyBuilder("Conv2D") Conv2dOp<DeviceType::CPU, float>);
.TypeConstraint<half>("T")
.Build(), REGISTER_OPERATOR(op_registry, OpKeyBuilder("Conv2D")
Conv2dOp<DeviceType::CPU, half>); .Device(DeviceType::CPU)
.TypeConstraint<half>("T")
.Build(),
Conv2dOp<DeviceType::CPU, half>);
#if MACE_ENABLE_NEON #if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("Conv2D") REGISTER_OPERATOR(op_registry, OpKeyBuilder("Conv2D")
.TypeConstraint<float>("T") .Device(DeviceType::NEON)
.Build(), .TypeConstraint<float>("T")
Conv2dOp<DeviceType::NEON, float>); .Build(),
Conv2dOp<DeviceType::NEON, float>);
#endif // MACE_ENABLE_NEON #endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Conv2D") REGISTER_OPERATOR(op_registry, OpKeyBuilder("Conv2D")
.TypeConstraint<float>("T") .Device(DeviceType::OPENCL)
.Build(), .TypeConstraint<float>("T")
Conv2dOp<DeviceType::OPENCL, float>); .Build(),
Conv2dOp<DeviceType::OPENCL, float>);
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Conv2D")
.TypeConstraint<half>("T") REGISTER_OPERATOR(op_registry, OpKeyBuilder("Conv2D")
.Build(), .Device(DeviceType::OPENCL)
Conv2dOp<DeviceType::OPENCL, half>); .TypeConstraint<half>("T")
.Build(),
Conv2dOp<DeviceType::OPENCL, half>);
}
} // namespace mace } // namespace mace
...@@ -33,9 +33,12 @@ static void Conv2d(int iters, ...@@ -33,9 +33,12 @@ static void Conv2d(int iters,
net.AddRandomInput<D, float>("Bias", {output_channels}); net.AddRandomInput<D, float>("Bias", {output_channels});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<D, T>(net, "Input", "InputImage",
BufferToImage<D, T>(net, "Filter", "FilterImage", kernels::BufferType::FILTER); kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); BufferToImage<D, T>(net, "Filter", "FilterImage",
kernels::BufferType::FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
.Input("InputImage") .Input("InputImage")
.Input("FilterImage") .Input("FilterImage")
...@@ -89,7 +92,7 @@ static void Conv2d(int iters, ...@@ -89,7 +92,7 @@ static void Conv2d(int iters,
BENCHMARK( \ BENCHMARK( \
BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE) BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE)
#define BM_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE) \ #define BM_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE) \
BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, OPENCL); BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, OPENCL);
// ICNet // ICNet
...@@ -106,28 +109,29 @@ BM_CONV_2D(1, 3, 512, 512, 7, 7, 2, SAME, 64, half); ...@@ -106,28 +109,29 @@ BM_CONV_2D(1, 3, 512, 512, 7, 7, 2, SAME, 64, half);
BM_CONV_2D(1, 512, 64, 64, 1, 1, 1, SAME, 256, half); BM_CONV_2D(1, 512, 64, 64, 1, 1, 1, SAME, 256, half);
// Test RGB <-> YUV // Test RGB <-> YUV
//BM_CONV_2D(1, 3, 2160, 1080, 1, 1, 1, VALID, 3, float); // BM_CONV_2D(1, 3, 2160, 1080, 1, 1, 1, VALID, 3, float);
//BM_CONV_2D(1, 3, 480, 480, 1, 1, 1, VALID, 3, float); // BM_CONV_2D(1, 3, 480, 480, 1, 1, 1, VALID, 3, float);
// //
//BM_CONV_2D(1, 64, 32, 32, 1, 1, 1, VALID, 128, float); // BM_CONV_2D(1, 64, 32, 32, 1, 1, 1, VALID, 128, float);
//BM_CONV_2D(1, 64, 33, 31, 1, 1, 1, VALID, 128, float); // Test bad alignments // BM_CONV_2D(1, 64, 33, 31, 1, 1, 1, VALID, 128, float); // Test bad
//BM_CONV_2D(1, 3, 512, 512, 1, 1, 1, VALID, 3, float); // alignments
//BM_CONV_2D(1, 32, 112, 112, 1, 1, 1, VALID, 64, float); // BM_CONV_2D(1, 3, 512, 512, 1, 1, 1, VALID, 3, float);
//BM_CONV_2D(1, 64, 56, 56, 1, 1, 1, VALID, 128, float); // BM_CONV_2D(1, 32, 112, 112, 1, 1, 1, VALID, 64, float);
//BM_CONV_2D(1, 256, 28, 28, 1, 1, 1, VALID, 256, float); // BM_CONV_2D(1, 64, 56, 56, 1, 1, 1, VALID, 128, float);
//BM_CONV_2D(1, 1024, 7, 7, 1, 1, 1, VALID, 1024, float); // BM_CONV_2D(1, 256, 28, 28, 1, 1, 1, VALID, 256, float);
//BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, VALID, 128, float); // BM_CONV_2D(1, 1024, 7, 7, 1, 1, 1, VALID, 1024, float);
//BM_CONV_2D(1, 64, 33, 31, 3, 3, 1, VALID, 128, float); // BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, VALID, 128, float);
//BM_CONV_2D(1, 3, 512, 512, 3, 3, 1, VALID, 3, float); // BM_CONV_2D(1, 64, 33, 31, 3, 3, 1, VALID, 128, float);
//BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, SAME, 128, float); // BM_CONV_2D(1, 3, 512, 512, 3, 3, 1, VALID, 3, float);
//BM_CONV_2D(1, 64, 33, 31, 3, 3, 1, SAME, 128, float); // BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, SAME, 128, float);
//BM_CONV_2D(1, 64, 32, 32, 3, 3, 2, VALID, 128, float); // BM_CONV_2D(1, 64, 33, 31, 3, 3, 1, SAME, 128, float);
//BM_CONV_2D(1, 3, 512, 512, 3, 3, 2, VALID, 3, float); // BM_CONV_2D(1, 64, 32, 32, 3, 3, 2, VALID, 128, float);
//BM_CONV_2D(1, 64, 33, 31, 3, 3, 2, VALID, 128, float); // BM_CONV_2D(1, 3, 512, 512, 3, 3, 2, VALID, 3, float);
//BM_CONV_2D(1, 64, 32, 32, 3, 3, 2, SAME, 128, float); // BM_CONV_2D(1, 64, 33, 31, 3, 3, 2, VALID, 128, float);
//BM_CONV_2D(1, 64, 33, 31, 3, 3, 2, SAME, 128, float); // BM_CONV_2D(1, 64, 32, 32, 3, 3, 2, SAME, 128, float);
//BM_CONV_2D(1, 64, 32, 32, 5, 5, 1, VALID, 128, float); // BM_CONV_2D(1, 64, 33, 31, 3, 3, 2, SAME, 128, float);
//BM_CONV_2D(1, 64, 32, 31, 5, 5, 1, VALID, 128, float); // BM_CONV_2D(1, 64, 32, 32, 5, 5, 1, VALID, 128, float);
//BM_CONV_2D(1, 64, 32, 32, 5, 5, 1, SAME, 128, float); // BM_CONV_2D(1, 64, 32, 31, 5, 5, 1, VALID, 128, float);
//BM_CONV_2D(1, 64, 32, 31, 5, 5, 1, SAME, 128, float); // BM_CONV_2D(1, 64, 32, 32, 5, 5, 1, SAME, 128, float);
// BM_CONV_2D(1, 64, 32, 31, 5, 5, 1, SAME, 128, float);
} // namespace mace } // namespace mace
...@@ -10,7 +10,7 @@ using namespace mace; ...@@ -10,7 +10,7 @@ using namespace mace;
class Conv2dOpTest : public OpsTestBase {}; class Conv2dOpTest : public OpsTestBase {};
template<DeviceType D> template <DeviceType D>
void TestSimple3x3VALID() { void TestSimple3x3VALID() {
OpsTestNet net; OpsTestNet net;
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
...@@ -42,10 +42,9 @@ void TestSimple3x3VALID() { ...@@ -42,10 +42,9 @@ void TestSimple3x3VALID() {
auto expected = CreateTensor<float>({1, 1, 1, 1}, {18.1f}); auto expected = CreateTensor<float>({1, 1, 1, 1}, {18.1f});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001); ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001);
} }
template<DeviceType D> template <DeviceType D>
void TestSimple3x3SAME() { void TestSimple3x3SAME() {
OpsTestNet net; OpsTestNet net;
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
...@@ -86,7 +85,7 @@ TEST_F(Conv2dOpTest, NEONSimple) { ...@@ -86,7 +85,7 @@ TEST_F(Conv2dOpTest, NEONSimple) {
} }
#endif #endif
template<DeviceType D, typename T> template <DeviceType D, typename T>
void TestNHWCSimple3x3VALID() { void TestNHWCSimple3x3VALID() {
OpsTestNet net; OpsTestNet net;
// Add input data // Add input data
...@@ -100,9 +99,12 @@ void TestNHWCSimple3x3VALID() { ...@@ -100,9 +99,12 @@ void TestNHWCSimple3x3VALID() {
net.AddInputFromArray<D, T>("Bias", {1}, {0.1f}); net.AddInputFromArray<D, T>("Bias", {1}, {0.1f});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<D, T>(net, "Input", "InputImage",
BufferToImage<D, T>(net, "Filter", "FilterImage", kernels::BufferType::FILTER); kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); BufferToImage<D, T>(net, "Filter", "FilterImage",
kernels::BufferType::FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
.Input("InputImage") .Input("InputImage")
.Input("FilterImage") .Input("FilterImage")
...@@ -117,7 +119,8 @@ void TestNHWCSimple3x3VALID() { ...@@ -117,7 +119,8 @@ void TestNHWCSimple3x3VALID() {
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, T>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); ImageToBuffer<D, T>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
} else { } else {
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
...@@ -138,7 +141,7 @@ void TestNHWCSimple3x3VALID() { ...@@ -138,7 +141,7 @@ void TestNHWCSimple3x3VALID() {
ExpectTensorNear<float, T>(*expected, *net.GetOutput("Output"), 0.01); ExpectTensorNear<float, T>(*expected, *net.GetOutput("Output"), 0.01);
} }
template<DeviceType D, typename T> template <DeviceType D, typename T>
void TestNHWCSimple3x3SAME() { void TestNHWCSimple3x3SAME() {
OpsTestNet net; OpsTestNet net;
...@@ -153,9 +156,12 @@ void TestNHWCSimple3x3SAME() { ...@@ -153,9 +156,12 @@ void TestNHWCSimple3x3SAME() {
net.AddInputFromArray<D, T>("Bias", {1}, {0.1f}); net.AddInputFromArray<D, T>("Bias", {1}, {0.1f});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<D, T>(net, "Input", "InputImage",
BufferToImage<D, T>(net, "Filter", "FilterImage", kernels::BufferType::FILTER); kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); BufferToImage<D, T>(net, "Filter", "FilterImage",
kernels::BufferType::FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
.Input("InputImage") .Input("InputImage")
.Input("FilterImage") .Input("FilterImage")
...@@ -170,7 +176,8 @@ void TestNHWCSimple3x3SAME() { ...@@ -170,7 +176,8 @@ void TestNHWCSimple3x3SAME() {
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, T>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); ImageToBuffer<D, T>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
} else { } else {
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
...@@ -204,7 +211,7 @@ TEST_F(Conv2dOpTest, OPENCLSimple) { ...@@ -204,7 +211,7 @@ TEST_F(Conv2dOpTest, OPENCLSimple) {
TestNHWCSimple3x3SAME<DeviceType::OPENCL, float>(); TestNHWCSimple3x3SAME<DeviceType::OPENCL, float>();
} }
template<DeviceType D> template <DeviceType D>
void TestSimple3x3WithoutBias() { void TestSimple3x3WithoutBias() {
OpsTestNet net; OpsTestNet net;
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
...@@ -234,14 +241,13 @@ void TestSimple3x3WithoutBias() { ...@@ -234,14 +241,13 @@ void TestSimple3x3WithoutBias() {
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001); ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001);
} }
#ifdef __ARM_NEON #ifdef __ARM_NEON
TEST_F(Conv2dOpTest, NEONWithouBias) { TEST_F(Conv2dOpTest, NEONWithouBias) {
TestSimple3x3WithoutBias<DeviceType::NEON>(); TestSimple3x3WithoutBias<DeviceType::NEON>();
} }
#endif #endif
template<DeviceType D, typename T> template <DeviceType D, typename T>
void TestNHWCSimple3x3WithoutBias() { void TestNHWCSimple3x3WithoutBias() {
OpsTestNet net; OpsTestNet net;
...@@ -255,8 +261,10 @@ void TestNHWCSimple3x3WithoutBias() { ...@@ -255,8 +261,10 @@ void TestNHWCSimple3x3WithoutBias() {
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}); 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<D, T>(net, "Input", "InputImage",
BufferToImage<D, T>(net, "Filter", "FilterImage", kernels::BufferType::FILTER); kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Filter", "FilterImage",
kernels::BufferType::FILTER);
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
.Input("InputImage") .Input("InputImage")
...@@ -270,7 +278,8 @@ void TestNHWCSimple3x3WithoutBias() { ...@@ -270,7 +278,8 @@ void TestNHWCSimple3x3WithoutBias() {
// Run // Run
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, T>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); ImageToBuffer<D, T>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
} else { } else {
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
.Input("Input") .Input("Input")
...@@ -300,7 +309,7 @@ TEST_F(Conv2dOpTest, OPENCLWithoutBias) { ...@@ -300,7 +309,7 @@ TEST_F(Conv2dOpTest, OPENCLWithoutBias) {
TestNHWCSimple3x3WithoutBias<DeviceType::OPENCL, float>(); TestNHWCSimple3x3WithoutBias<DeviceType::OPENCL, float>();
} }
template<DeviceType D> template <DeviceType D>
static void TestCombined3x3() { static void TestCombined3x3() {
// Construct graph // Construct graph
OpsTestNet net; OpsTestNet net;
...@@ -335,17 +344,13 @@ static void TestCombined3x3() { ...@@ -335,17 +344,13 @@ static void TestCombined3x3() {
4.2f, 6.2f, 4.2f, 6.2f, 9.2f, 6.2f, 4.2f, 6.2f, 4.2f}); 4.2f, 6.2f, 4.2f, 6.2f, 9.2f, 6.2f, 4.2f, 6.2f, 4.2f});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001); ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001);
} }
#ifdef __ARM_NEON #ifdef __ARM_NEON
TEST_F(Conv2dOpTest, NEONCombined) { TEST_F(Conv2dOpTest, NEONCombined) { TestCombined3x3<DeviceType::NEON>(); }
TestCombined3x3<DeviceType::NEON>();
}
#endif #endif
template<DeviceType D, typename T> template <DeviceType D, typename T>
static void TestNHWCCombined3x3() { static void TestNHWCCombined3x3() {
// Construct graph // Construct graph
OpsTestNet net; OpsTestNet net;
...@@ -353,8 +358,8 @@ static void TestNHWCCombined3x3() { ...@@ -353,8 +358,8 @@ static void TestNHWCCombined3x3() {
// Add input data // Add input data
net.AddInputFromArray<D, T>( net.AddInputFromArray<D, T>(
"Input", {1, 5, 5, 2}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, "Input", {1, 5, 5, 2}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
net.AddInputFromArray<D, T>( net.AddInputFromArray<D, T>(
"Filter", {3, 3, 2, 2}, "Filter", {3, 3, 2, 2},
{1.0f, 0.5f, 1.0f, 0.5f, 1.0f, 0.5f, 1.0f, 0.5f, 1.0f, 0.5f, 1.0f, 0.5f, {1.0f, 0.5f, 1.0f, 0.5f, 1.0f, 0.5f, 1.0f, 0.5f, 1.0f, 0.5f, 1.0f, 0.5f,
...@@ -363,9 +368,12 @@ static void TestNHWCCombined3x3() { ...@@ -363,9 +368,12 @@ static void TestNHWCCombined3x3() {
net.AddInputFromArray<D, T>("Bias", {2}, {0.1f, 0.2f}); net.AddInputFromArray<D, T>("Bias", {2}, {0.1f, 0.2f});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<D, T>(net, "Input", "InputImage",
BufferToImage<D, T>(net, "Filter", "FilterImage", kernels::BufferType::FILTER); kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); BufferToImage<D, T>(net, "Filter", "FilterImage",
kernels::BufferType::FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2DTest") OpDefBuilder("Conv2D", "Conv2DTest")
.Input("InputImage") .Input("InputImage")
...@@ -380,7 +388,8 @@ static void TestNHWCCombined3x3() { ...@@ -380,7 +388,8 @@ static void TestNHWCCombined3x3() {
// Run // Run
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, T>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); ImageToBuffer<D, T>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
} else { } else {
OpDefBuilder("Conv2D", "Conv2DTest") OpDefBuilder("Conv2D", "Conv2DTest")
.Input("Input") .Input("Input")
...@@ -394,16 +403,13 @@ static void TestNHWCCombined3x3() { ...@@ -394,16 +403,13 @@ static void TestNHWCCombined3x3() {
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
// Run // Run
net.RunOp(D); net.RunOp(D);
} }
// Check // Check
auto expected = CreateTensor<float>( auto expected = CreateTensor<float>(
{1, 3, 3, 2}, {8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f, {1, 3, 3, 2}, {8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f, 12.1f, 6.2f, 18.1f,
12.1f, 6.2f, 18.1f, 9.2f, 12.1f, 6.2f, 9.2f, 12.1f, 6.2f, 8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f});
8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f});
ExpectTensorNear<float, T>(*expected, *net.GetOutput("Output"), 0.01); ExpectTensorNear<float, T>(*expected, *net.GetOutput("Output"), 0.01);
} }
TEST_F(Conv2dOpTest, CPUStride2) { TEST_F(Conv2dOpTest, CPUStride2) {
...@@ -414,7 +420,7 @@ TEST_F(Conv2dOpTest, OPENCLStride2) { ...@@ -414,7 +420,7 @@ TEST_F(Conv2dOpTest, OPENCLStride2) {
TestNHWCCombined3x3<DeviceType::OPENCL, float>(); TestNHWCCombined3x3<DeviceType::OPENCL, float>();
} }
template<DeviceType D> template <DeviceType D>
void TestConv1x1() { void TestConv1x1() {
// Construct graph // Construct graph
OpsTestNet net; OpsTestNet net;
...@@ -435,9 +441,12 @@ void TestConv1x1() { ...@@ -435,9 +441,12 @@ void TestConv1x1() {
net.AddInputFromArray<D, float>("Bias", {2}, {0.1f, 0.2f}); net.AddInputFromArray<D, float>("Bias", {2}, {0.1f, 0.2f});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<D, float>(net, "Input", "InputImage",
BufferToImage<D, float>(net, "Filter", "FilterImage", kernels::BufferType::FILTER); kernels::BufferType::IN_OUT);
BufferToImage<D, float>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); BufferToImage<D, float>(net, "Filter", "FilterImage",
kernels::BufferType::FILTER);
BufferToImage<D, float>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2DTest") OpDefBuilder("Conv2D", "Conv2DTest")
.Input("InputImage") .Input("InputImage")
...@@ -451,7 +460,8 @@ void TestConv1x1() { ...@@ -451,7 +460,8 @@ void TestConv1x1() {
// Run // Run
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, float>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); ImageToBuffer<D, float>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
} else { } else {
OpDefBuilder("Conv2D", "Conv2DTest") OpDefBuilder("Conv2D", "Conv2DTest")
.Input("Input") .Input("Input")
...@@ -479,15 +489,11 @@ void TestConv1x1() { ...@@ -479,15 +489,11 @@ void TestConv1x1() {
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001); ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001);
} }
TEST_F(Conv2dOpTest, CPUConv1x1) { TEST_F(Conv2dOpTest, CPUConv1x1) { TestConv1x1<DeviceType::CPU>(); }
TestConv1x1<DeviceType::CPU>();
}
TEST_F(Conv2dOpTest, OPENCLConv1x1) { TEST_F(Conv2dOpTest, OPENCLConv1x1) { TestConv1x1<DeviceType::OPENCL>(); }
TestConv1x1<DeviceType::OPENCL>();
}
template<DeviceType D, typename T> template <DeviceType D, typename T>
static void TestComplexConvNxNS12(const std::vector<index_t> &shape) { static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
testing::internal::LogToStderr(); testing::internal::LogToStderr();
auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w, auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w,
...@@ -526,9 +532,12 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) { ...@@ -526,9 +532,12 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// run on gpu // run on gpu
BufferToImage<D, T>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<D, T>(net, "Input", "InputImage",
BufferToImage<D, T>(net, "Filter", "FilterImage", kernels::BufferType::FILTER); kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); BufferToImage<D, T>(net, "Filter", "FilterImage",
kernels::BufferType::FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
.Input("InputImage") .Input("InputImage")
...@@ -543,7 +552,8 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) { ...@@ -543,7 +552,8 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
// Run on device // Run on device
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, T>(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); ImageToBuffer<D, T>(net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.001); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.001);
}; };
...@@ -592,15 +602,20 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape, ...@@ -592,15 +602,20 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape,
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
std::vector<float> float_input_data; std::vector<float> float_input_data;
GenerateRandomRealTypeData({batch, height, width, input_channels}, float_input_data); GenerateRandomRealTypeData({batch, height, width, input_channels},
float_input_data);
std::vector<float> float_filter_data; std::vector<float> float_filter_data;
GenerateRandomRealTypeData({kernel_h, kernel_w, input_channels, output_channels}, float_filter_data); GenerateRandomRealTypeData(
{kernel_h, kernel_w, input_channels, output_channels},
float_filter_data);
std::vector<float> float_bias_data; std::vector<float> float_bias_data;
GenerateRandomRealTypeData({output_channels}, float_bias_data); GenerateRandomRealTypeData({output_channels}, float_bias_data);
// Add input data // Add input data
net.AddInputFromArray<D, float>("Input", {batch, height, width, input_channels}, float_input_data);
net.AddInputFromArray<D, float>( net.AddInputFromArray<D, float>(
"Filter", {kernel_h, kernel_w, input_channels, output_channels}, float_filter_data); "Input", {batch, height, width, input_channels}, float_input_data);
net.AddInputFromArray<D, float>(
"Filter", {kernel_h, kernel_w, input_channels, output_channels},
float_filter_data);
net.AddInputFromArray<D, float>("Bias", {output_channels}, float_bias_data); net.AddInputFromArray<D, float>("Bias", {output_channels}, float_bias_data);
// run on cpu // run on cpu
...@@ -610,9 +625,12 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape, ...@@ -610,9 +625,12 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape,
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// run on gpu // run on gpu
BufferToImage<D, half>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<D, half>(net, "Input", "InputImage",
BufferToImage<D, half>(net, "Filter", "FilterImage", kernels::BufferType::FILTER); kernels::BufferType::IN_OUT);
BufferToImage<D, half>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); BufferToImage<D, half>(net, "Filter", "FilterImage",
kernels::BufferType::FILTER);
BufferToImage<D, half>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
.Input("InputImage") .Input("InputImage")
...@@ -627,7 +645,8 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape, ...@@ -627,7 +645,8 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape,
// Run on device // Run on device
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, float>(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); ImageToBuffer<D, float>(net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.5); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.5);
}; };
......
...@@ -7,7 +7,6 @@ ...@@ -7,7 +7,6 @@
namespace mace { namespace mace {
TEST(CoreTest, INIT_MODE) { TEST(CoreTest, INIT_MODE) {
std::vector<OperatorDef> op_defs; std::vector<OperatorDef> op_defs;
Workspace ws; Workspace ws;
...@@ -18,10 +17,11 @@ TEST(CoreTest, INIT_MODE) { ...@@ -18,10 +17,11 @@ TEST(CoreTest, INIT_MODE) {
.Output("B2IOutput") .Output("B2IOutput")
.AddIntArg("buffer_type", kernels::BufferType::FILTER) .AddIntArg("buffer_type", kernels::BufferType::FILTER)
.AddIntArg("mode", static_cast<int>(NetMode::INIT)) .AddIntArg("mode", static_cast<int>(NetMode::INIT))
.Finalize(&op_defs[op_defs.size()-1]); .Finalize(&op_defs[op_defs.size() - 1]);
Tensor *input = Tensor *input =
ws.CreateTensor("Input", GetDeviceAllocator(DeviceType::OPENCL), DataTypeToEnum<float>::v()); ws.CreateTensor("Input", GetDeviceAllocator(DeviceType::OPENCL),
DataTypeToEnum<float>::v());
input->Resize({1, 3, 3, 3}); input->Resize({1, 3, 3, 3});
{ {
Tensor::MappingGuard input_mapper(input); Tensor::MappingGuard input_mapper(input);
...@@ -34,23 +34,26 @@ TEST(CoreTest, INIT_MODE) { ...@@ -34,23 +34,26 @@ TEST(CoreTest, INIT_MODE) {
.Input("B2IOutput") .Input("B2IOutput")
.Output("Output") .Output("Output")
.AddIntArg("buffer_type", kernels::BufferType::FILTER) .AddIntArg("buffer_type", kernels::BufferType::FILTER)
.Finalize(&op_defs[op_defs.size()-1]); .Finalize(&op_defs[op_defs.size() - 1]);
NetDef net_def; NetDef net_def;
for (auto &op_def : op_defs) { for (auto &op_def : op_defs) {
net_def.add_op()->CopyFrom(op_def); net_def.add_op()->CopyFrom(op_def);
} }
auto net = CreateNet(net_def, &ws, DeviceType::OPENCL, NetMode::INIT); std::shared_ptr<OperatorRegistry> op_registry(new OperatorRegistry());
auto net =
CreateNet(op_registry, net_def, &ws, DeviceType::OPENCL, NetMode::INIT);
net->Run(); net->Run();
EXPECT_TRUE(ws.GetTensor("B2IOutput") != nullptr); EXPECT_TRUE(ws.GetTensor("B2IOutput") != nullptr);
EXPECT_TRUE(ws.GetTensor("Output") == nullptr); EXPECT_TRUE(ws.GetTensor("Output") == nullptr);
net = CreateNet(net_def, &ws, DeviceType::OPENCL); net = CreateNet(op_registry, net_def, &ws, DeviceType::OPENCL);
net->Run(); net->Run();
EXPECT_TRUE(ws.GetTensor("Output") != nullptr); EXPECT_TRUE(ws.GetTensor("Output") != nullptr);
ExpectTensorNear<float>(*ws.GetTensor("Input"), *ws.GetTensor("Output"), 1e-5); ExpectTensorNear<float>(*ws.GetTensor("Input"), *ws.GetTensor("Output"),
1e-5);
} }
} // namespace mace } // namespace mace
...@@ -6,21 +6,26 @@ ...@@ -6,21 +6,26 @@
namespace mace { namespace mace {
REGISTER_CPU_OPERATOR(OpKeyBuilder("DepthwiseConv2d") void Register_DepthwiseConv2d(OperatorRegistry *op_registry) {
.TypeConstraint<float>("T") REGISTER_OPERATOR(op_registry, OpKeyBuilder("DepthwiseConv2d")
.Build(), .Device(DeviceType::CPU)
DepthwiseConv2dOp<DeviceType::CPU, float>); .TypeConstraint<float>("T")
.Build(),
DepthwiseConv2dOp<DeviceType::CPU, float>);
#if MACE_ENABLE_NEON #if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("DepthwiseConv2d") REGISTER_OPERATOR(op_registry, OpKeyBuilder("DepthwiseConv2d")
.TypeConstraint<float>("T") .Device(DeviceType::NEON)
.Build(), .TypeConstraint<float>("T")
DepthwiseConv2dOp<DeviceType::NEON, float>); .Build(),
DepthwiseConv2dOp<DeviceType::NEON, float>);
#endif // MACE_ENABLE_NEON #endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("DepthwiseConv2d") REGISTER_OPERATOR(op_registry, OpKeyBuilder("DepthwiseConv2d")
.TypeConstraint<float>("T") .Device(DeviceType::OPENCL)
.Build(), .TypeConstraint<float>("T")
DepthwiseConv2dOp<DeviceType::OPENCL, float>); .Build(),
DepthwiseConv2dOp<DeviceType::OPENCL, float>);
}
} // namespace mace } // namespace mace
...@@ -26,7 +26,7 @@ void SimpleValidTest() { ...@@ -26,7 +26,7 @@ void SimpleValidTest() {
// Add input data // Add input data
net.AddInputFromArray<D, float>("Input", {1, 2, 2, 3}, net.AddInputFromArray<D, float>("Input", {1, 2, 2, 3},
{1, 3, 5, 7, 9, 11, 2, 4, 6, 8, 10, 12}); {1, 3, 5, 7, 9, 11, 2, 4, 6, 8, 10, 12});
net.AddInputFromArray<D, float>( net.AddInputFromArray<D, float>(
"Filter", {2, 2, 2, 2}, "Filter", {2, 2, 2, 2},
{1.0f, 5.0f, 9.0f, 13.0f, 2.0f, 6.0f, 10.0f, 14.0f, 3.0f, 7.0f, 11.0f, {1.0f, 5.0f, 9.0f, 13.0f, 2.0f, 6.0f, 10.0f, 14.0f, 3.0f, 7.0f, 11.0f,
...@@ -41,12 +41,9 @@ void SimpleValidTest() { ...@@ -41,12 +41,9 @@ void SimpleValidTest() {
{196.1f, 252.1f, 216.2f, 280.2f, 272.3f, 344.3f, 296.4f, 376.4f}); {196.1f, 252.1f, 216.2f, 280.2f, 272.3f, 344.3f, 296.4f, 376.4f});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5); ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5);
} }
TEST_F(DepthwiseConv2dOpTest, SimpleCPU) { TEST_F(DepthwiseConv2dOpTest, SimpleCPU) { SimpleValidTest<DeviceType::CPU>(); }
SimpleValidTest<DeviceType::CPU>();
}
template <DeviceType D> template <DeviceType D>
void TestNxNS12(const index_t height, const index_t width) { void TestNxNS12(const index_t height, const index_t width) {
...@@ -72,8 +69,10 @@ void TestNxNS12(const index_t height, const index_t width) { ...@@ -72,8 +69,10 @@ void TestNxNS12(const index_t height, const index_t width) {
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
// Add input data // Add input data
net.AddRandomInput<D, float>("Input", {batch, input_channels, height, width}); net.AddRandomInput<D, float>("Input",
net.AddRandomInput<D, float>("Filter", {multiplier, input_channels, kernel_h, kernel_w}); {batch, input_channels, height, width});
net.AddRandomInput<D, float>(
"Filter", {multiplier, input_channels, kernel_h, kernel_w});
net.AddRandomInput<D, float>("Bias", {multiplier * input_channels}); net.AddRandomInput<D, float>("Bias", {multiplier * input_channels});
// Run on device // Run on device
net.RunOp(D); net.RunOp(D);
...@@ -93,7 +92,6 @@ void TestNxNS12(const index_t height, const index_t width) { ...@@ -93,7 +92,6 @@ void TestNxNS12(const index_t height, const index_t width) {
func(kernel_size, kernel_size, stride, stride, SAME); func(kernel_size, kernel_size, stride, stride, SAME);
} }
} }
} }
#if __ARM_NEON #if __ARM_NEON
......
...@@ -38,8 +38,8 @@ static void DepthwiseConv2d(int iters, ...@@ -38,8 +38,8 @@ static void DepthwiseConv2d(int iters,
// Add input data // Add input data
net.AddRandomInput<D, float>("Input", {batch, channels, height, width}); net.AddRandomInput<D, float>("Input", {batch, channels, height, width});
net.AddRandomInput<D, float>("Filter", net.AddRandomInput<D, float>("Filter",
{output_channels, channels, kernel_h, kernel_w}); {output_channels, channels, kernel_h, kernel_w});
net.AddRandomInput<D, float>("Bias", {output_channels*channels}); net.AddRandomInput<D, float>("Bias", {output_channels * channels});
// Warm-up // Warm-up
for (int i = 0; i < 5; ++i) { for (int i = 0; i < 5; ++i) {
...@@ -54,23 +54,22 @@ static void DepthwiseConv2d(int iters, ...@@ -54,23 +54,22 @@ static void DepthwiseConv2d(int iters,
net.Sync(); net.Sync();
} }
#define BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, OC, TYPE, \ #define BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, OC, TYPE, \
DEVICE) \ DEVICE) \
static void \ static void \
BM_DEPTHWISE_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE( \ BM_DEPTHWISE_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE( \
int iters) { \ int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \ const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \ mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
DepthwiseConv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, \ DepthwiseConv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, \
mace::Padding::P, OC); \ mace::Padding::P, OC); \
} \ } \
BENCHMARK( \ BENCHMARK( \
BM_DEPTHWISE_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE) BM_DEPTHWISE_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE)
#define BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE) \ #define BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE) \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, CPU); \ BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, CPU); \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, NEON);\
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, OPENCL); BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, OPENCL);
BM_DEPTHWISE_CONV_2D(1, 64, 32, 32, 3, 3, 1, VALID, 2, float); BM_DEPTHWISE_CONV_2D(1, 64, 32, 32, 3, 3, 1, VALID, 2, float);
......
...@@ -6,25 +6,30 @@ ...@@ -6,25 +6,30 @@
namespace mace { namespace mace {
REGISTER_CPU_OPERATOR(OpKeyBuilder("FusedConv2D") void Register_FusedConv2D(OperatorRegistry *op_registry) {
.TypeConstraint<float>("T") REGISTER_OPERATOR(op_registry, OpKeyBuilder("FusedConv2D")
.Build(), .Device(DeviceType::CPU)
FusedConv2dOp<DeviceType::CPU, float>); .TypeConstraint<float>("T")
.Build(),
REGISTER_CPU_OPERATOR(OpKeyBuilder("FusedConv2D") FusedConv2dOp<DeviceType::CPU, float>);
.TypeConstraint<half>("T")
.Build(), REGISTER_OPERATOR(op_registry, OpKeyBuilder("FusedConv2D")
FusedConv2dOp<DeviceType::CPU, half>); .Device(DeviceType::CPU)
.TypeConstraint<half>("T")
.Build(),
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("FusedConv2D") FusedConv2dOp<DeviceType::CPU, half>);
.TypeConstraint<float>("T")
.Build(), REGISTER_OPERATOR(op_registry, OpKeyBuilder("FusedConv2D")
FusedConv2dOp<DeviceType::OPENCL, float>); .Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("FusedConv2D") .Build(),
.TypeConstraint<half>("T") FusedConv2dOp<DeviceType::OPENCL, float>);
.Build(),
FusedConv2dOp<DeviceType::OPENCL, half>); REGISTER_OPERATOR(op_registry, OpKeyBuilder("FusedConv2D")
.Device(DeviceType::OPENCL)
.TypeConstraint<half>("T")
.Build(),
FusedConv2dOp<DeviceType::OPENCL, half>);
}
} // namespace mace } // namespace mace
...@@ -9,7 +9,7 @@ using namespace mace; ...@@ -9,7 +9,7 @@ using namespace mace;
class FusedConv2dOpTest : public OpsTestBase {}; class FusedConv2dOpTest : public OpsTestBase {};
template<DeviceType D, typename T> template <DeviceType D, typename T>
void TestNHWCSimple3x3VALID() { void TestNHWCSimple3x3VALID() {
OpsTestNet net; OpsTestNet net;
// Add input data // Add input data
...@@ -23,9 +23,12 @@ void TestNHWCSimple3x3VALID() { ...@@ -23,9 +23,12 @@ void TestNHWCSimple3x3VALID() {
net.AddInputFromArray<D, T>("Bias", {1}, {-0.1f}); net.AddInputFromArray<D, T>("Bias", {1}, {-0.1f});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<D, T>(net, "Input", "InputImage",
BufferToImage<D, T>(net, "Filter", "FilterImage", kernels::BufferType::FILTER); kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); BufferToImage<D, T>(net, "Filter", "FilterImage",
kernels::BufferType::FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("FusedConv2D", "FusedConv2dTest") OpDefBuilder("FusedConv2D", "FusedConv2dTest")
.Input("InputImage") .Input("InputImage")
.Input("FilterImage") .Input("FilterImage")
...@@ -40,7 +43,8 @@ void TestNHWCSimple3x3VALID() { ...@@ -40,7 +43,8 @@ void TestNHWCSimple3x3VALID() {
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, T>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); ImageToBuffer<D, T>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
} else { } else {
OpDefBuilder("FusedConv2D", "FusedConv2dTest") OpDefBuilder("FusedConv2D", "FusedConv2dTest")
...@@ -61,7 +65,7 @@ void TestNHWCSimple3x3VALID() { ...@@ -61,7 +65,7 @@ void TestNHWCSimple3x3VALID() {
ExpectTensorNear<float, T>(*expected, *net.GetOutput("Output"), 0.01); ExpectTensorNear<float, T>(*expected, *net.GetOutput("Output"), 0.01);
} }
template<DeviceType D, typename T> template <DeviceType D, typename T>
void TestNHWCSimple3x3SAME() { void TestNHWCSimple3x3SAME() {
OpsTestNet net; OpsTestNet net;
...@@ -76,9 +80,12 @@ void TestNHWCSimple3x3SAME() { ...@@ -76,9 +80,12 @@ void TestNHWCSimple3x3SAME() {
net.AddInputFromArray<D, T>("Bias", {1}, {-0.1f}); net.AddInputFromArray<D, T>("Bias", {1}, {-0.1f});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<D, T>(net, "Input", "InputImage",
BufferToImage<D, T>(net, "Filter", "FilterImage", kernels::BufferType::FILTER); kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); BufferToImage<D, T>(net, "Filter", "FilterImage",
kernels::BufferType::FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("FusedConv2D", "FusedConv2dTest") OpDefBuilder("FusedConv2D", "FusedConv2dTest")
.Input("InputImage") .Input("InputImage")
.Input("FilterImage") .Input("FilterImage")
...@@ -93,7 +100,8 @@ void TestNHWCSimple3x3SAME() { ...@@ -93,7 +100,8 @@ void TestNHWCSimple3x3SAME() {
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, T>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); ImageToBuffer<D, T>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
} else { } else {
OpDefBuilder("FusedConv2D", "FusedConv2dTest") OpDefBuilder("FusedConv2D", "FusedConv2dTest")
...@@ -111,8 +119,7 @@ void TestNHWCSimple3x3SAME() { ...@@ -111,8 +119,7 @@ void TestNHWCSimple3x3SAME() {
} }
auto expected = CreateTensor<float>( auto expected = CreateTensor<float>(
{1, 3, 3, 1}, {1, 3, 3, 1}, {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
{0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
ExpectTensorNear<float, T>(*expected, *net.GetOutput("Output"), 0.01); ExpectTensorNear<float, T>(*expected, *net.GetOutput("Output"), 0.01);
} }
...@@ -127,7 +134,7 @@ TEST_F(FusedConv2dOpTest, OPENCLSimple) { ...@@ -127,7 +134,7 @@ TEST_F(FusedConv2dOpTest, OPENCLSimple) {
TestNHWCSimple3x3SAME<DeviceType::OPENCL, float>(); TestNHWCSimple3x3SAME<DeviceType::OPENCL, float>();
} }
template<DeviceType D, typename T> template <DeviceType D, typename T>
void TestNHWCSimple3x3WithoutBias() { void TestNHWCSimple3x3WithoutBias() {
OpsTestNet net; OpsTestNet net;
...@@ -141,8 +148,10 @@ void TestNHWCSimple3x3WithoutBias() { ...@@ -141,8 +148,10 @@ void TestNHWCSimple3x3WithoutBias() {
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}); 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<D, T>(net, "Input", "InputImage",
BufferToImage<D, T>(net, "Filter", "FilterImage", kernels::BufferType::FILTER); kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Filter", "FilterImage",
kernels::BufferType::FILTER);
OpDefBuilder("FusedConv2D", "FusedConv2dTest") OpDefBuilder("FusedConv2D", "FusedConv2dTest")
.Input("InputImage") .Input("InputImage")
...@@ -156,7 +165,8 @@ void TestNHWCSimple3x3WithoutBias() { ...@@ -156,7 +165,8 @@ void TestNHWCSimple3x3WithoutBias() {
// Run // Run
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, T>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); ImageToBuffer<D, T>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
} else { } else {
OpDefBuilder("FusedConv2D", "FusedConv2dTest") OpDefBuilder("FusedConv2D", "FusedConv2dTest")
.Input("Input") .Input("Input")
...@@ -186,7 +196,7 @@ TEST_F(FusedConv2dOpTest, OPENCLWithoutBias) { ...@@ -186,7 +196,7 @@ TEST_F(FusedConv2dOpTest, OPENCLWithoutBias) {
TestNHWCSimple3x3WithoutBias<DeviceType::OPENCL, float>(); TestNHWCSimple3x3WithoutBias<DeviceType::OPENCL, float>();
} }
template<DeviceType D> template <DeviceType D>
void TestConv1x1() { void TestConv1x1() {
// Construct graph // Construct graph
OpsTestNet net; OpsTestNet net;
...@@ -207,9 +217,12 @@ void TestConv1x1() { ...@@ -207,9 +217,12 @@ void TestConv1x1() {
net.AddInputFromArray<D, float>("Bias", {2}, {0.1f, 0.2f}); net.AddInputFromArray<D, float>("Bias", {2}, {0.1f, 0.2f});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<D, float>(net, "Input", "InputImage",
BufferToImage<D, float>(net, "Filter", "FilterImage", kernels::BufferType::FILTER); kernels::BufferType::IN_OUT);
BufferToImage<D, float>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); BufferToImage<D, float>(net, "Filter", "FilterImage",
kernels::BufferType::FILTER);
BufferToImage<D, float>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("FusedConv2D", "FusedConv2dTest") OpDefBuilder("FusedConv2D", "FusedConv2dTest")
.Input("InputImage") .Input("InputImage")
...@@ -223,7 +236,8 @@ void TestConv1x1() { ...@@ -223,7 +236,8 @@ void TestConv1x1() {
// Run // Run
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, float>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); ImageToBuffer<D, float>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
} else { } else {
OpDefBuilder("FusedConv2D", "FusedConv2dTest") OpDefBuilder("FusedConv2D", "FusedConv2dTest")
.Input("Input") .Input("Input")
...@@ -251,15 +265,11 @@ void TestConv1x1() { ...@@ -251,15 +265,11 @@ void TestConv1x1() {
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001); ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001);
} }
TEST_F(FusedConv2dOpTest, CPUConv1x1) { TEST_F(FusedConv2dOpTest, CPUConv1x1) { TestConv1x1<DeviceType::CPU>(); }
TestConv1x1<DeviceType::CPU>();
}
TEST_F(FusedConv2dOpTest, OPENCLConv1x1) { TEST_F(FusedConv2dOpTest, OPENCLConv1x1) { TestConv1x1<DeviceType::OPENCL>(); }
TestConv1x1<DeviceType::OPENCL>();
}
template<DeviceType D, typename T> template <DeviceType D, typename T>
static void TestComplexConvNxNS12(const std::vector<index_t> &shape) { static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
testing::internal::LogToStderr(); testing::internal::LogToStderr();
auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w, auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w,
...@@ -298,9 +308,12 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) { ...@@ -298,9 +308,12 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// run on gpu // run on gpu
BufferToImage<D, T>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<D, T>(net, "Input", "InputImage",
BufferToImage<D, T>(net, "Filter", "FilterImage", kernels::BufferType::FILTER); kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); BufferToImage<D, T>(net, "Filter", "FilterImage",
kernels::BufferType::FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("FusedConv2D", "FusedConv2dTest") OpDefBuilder("FusedConv2D", "FusedConv2dTest")
.Input("InputImage") .Input("InputImage")
...@@ -315,7 +328,8 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) { ...@@ -315,7 +328,8 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
// Run on device // Run on device
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, T>(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); ImageToBuffer<D, T>(net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.001); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.001);
}; };
...@@ -331,7 +345,7 @@ TEST_F(FusedConv2dOpTest, OPENCLUnalignedConvNxNS12) { ...@@ -331,7 +345,7 @@ TEST_F(FusedConv2dOpTest, OPENCLUnalignedConvNxNS12) {
TestComplexConvNxNS12<DeviceType::OPENCL, float>({107, 113, 5, 7}); TestComplexConvNxNS12<DeviceType::OPENCL, float>({107, 113, 5, 7});
} }
template<DeviceType D> template <DeviceType D>
static void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape) { static void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape) {
testing::internal::LogToStderr(); testing::internal::LogToStderr();
auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w, auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w,
...@@ -357,15 +371,20 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape) { ...@@ -357,15 +371,20 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape) {
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
std::vector<float> float_input_data; std::vector<float> float_input_data;
GenerateRandomRealTypeData({batch, height, width, input_channels}, float_input_data); GenerateRandomRealTypeData({batch, height, width, input_channels},
float_input_data);
std::vector<float> float_filter_data; std::vector<float> float_filter_data;
GenerateRandomRealTypeData({kernel_h, kernel_w, input_channels, output_channels}, float_filter_data); GenerateRandomRealTypeData(
{kernel_h, kernel_w, input_channels, output_channels},
float_filter_data);
std::vector<float> float_bias_data; std::vector<float> float_bias_data;
GenerateRandomRealTypeData({output_channels}, float_bias_data); GenerateRandomRealTypeData({output_channels}, float_bias_data);
// Add input data // Add input data
net.AddInputFromArray<D, float>("Input", {batch, height, width, input_channels}, float_input_data);
net.AddInputFromArray<D, float>( net.AddInputFromArray<D, float>(
"Filter", {kernel_h, kernel_w, input_channels, output_channels}, float_filter_data); "Input", {batch, height, width, input_channels}, float_input_data);
net.AddInputFromArray<D, float>(
"Filter", {kernel_h, kernel_w, input_channels, output_channels},
float_filter_data);
net.AddInputFromArray<D, float>("Bias", {output_channels}, float_bias_data); net.AddInputFromArray<D, float>("Bias", {output_channels}, float_bias_data);
// run on cpu // run on cpu
...@@ -375,9 +394,12 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape) { ...@@ -375,9 +394,12 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape) {
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// run on gpu // run on gpu
BufferToImage<D, half>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<D, half>(net, "Input", "InputImage",
BufferToImage<D, half>(net, "Filter", "FilterImage", kernels::BufferType::FILTER); kernels::BufferType::IN_OUT);
BufferToImage<D, half>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); BufferToImage<D, half>(net, "Filter", "FilterImage",
kernels::BufferType::FILTER);
BufferToImage<D, half>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("FusedConv2D", "FusedConv2dTest") OpDefBuilder("FusedConv2D", "FusedConv2dTest")
.Input("InputImage") .Input("InputImage")
...@@ -392,7 +414,8 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape) { ...@@ -392,7 +414,8 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape) {
// Run on device // Run on device
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, float>(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); ImageToBuffer<D, float>(net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.2); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.2);
}; };
...@@ -408,7 +431,7 @@ TEST_F(FusedConv2dOpTest, OPENCLHalfAlignedConvNxNS12) { ...@@ -408,7 +431,7 @@ TEST_F(FusedConv2dOpTest, OPENCLHalfAlignedConvNxNS12) {
TestHalfComplexConvNxNS12<DeviceType::OPENCL>({32, 32, 32, 64}); TestHalfComplexConvNxNS12<DeviceType::OPENCL>({32, 32, 32, 64});
} }
template<DeviceType D, typename T> template <DeviceType D, typename T>
static void TestGeneralConvNxNS12(const std::vector<index_t> &image_shape, static void TestGeneralConvNxNS12(const std::vector<index_t> &image_shape,
const std::vector<index_t> &filter_shape) { const std::vector<index_t> &filter_shape) {
testing::internal::LogToStderr(); testing::internal::LogToStderr();
...@@ -449,9 +472,12 @@ static void TestGeneralConvNxNS12(const std::vector<index_t> &image_shape, ...@@ -449,9 +472,12 @@ static void TestGeneralConvNxNS12(const std::vector<index_t> &image_shape,
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// run on gpu // run on gpu
BufferToImage<D, T>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<D, T>(net, "Input", "InputImage",
BufferToImage<D, T>(net, "Filter", "FilterImage", kernels::BufferType::FILTER); kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); BufferToImage<D, T>(net, "Filter", "FilterImage",
kernels::BufferType::FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("FusedConv2D", "FusedConv2dTest") OpDefBuilder("FusedConv2D", "FusedConv2dTest")
.Input("InputImage") .Input("InputImage")
...@@ -466,7 +492,8 @@ static void TestGeneralConvNxNS12(const std::vector<index_t> &image_shape, ...@@ -466,7 +492,8 @@ static void TestGeneralConvNxNS12(const std::vector<index_t> &image_shape,
// Run on device // Run on device
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, T>(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); ImageToBuffer<D, T>(net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.001); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.001);
}; };
...@@ -477,13 +504,11 @@ static void TestGeneralConvNxNS12(const std::vector<index_t> &image_shape, ...@@ -477,13 +504,11 @@ static void TestGeneralConvNxNS12(const std::vector<index_t> &image_shape,
} }
TEST_F(FusedConv2dOpTest, OPENCL7X7ConvNxNS12) { TEST_F(FusedConv2dOpTest, OPENCL7X7ConvNxNS12) {
TestGeneralConvNxNS12<DeviceType::OPENCL, float>({32, 32}, TestGeneralConvNxNS12<DeviceType::OPENCL, float>({32, 32}, {7, 7, 3, 64});
{7, 7, 3, 64});
} }
TEST_F(FusedConv2dOpTest, OPENCL15X1ConvNxNS12) { TEST_F(FusedConv2dOpTest, OPENCL15X1ConvNxNS12) {
TestGeneralConvNxNS12<DeviceType::OPENCL, float>({40, 40}, TestGeneralConvNxNS12<DeviceType::OPENCL, float>({40, 40}, {15, 1, 32, 64});
{15, 1, 32, 64});
} }
template<DeviceType D, typename T> template<DeviceType D, typename T>
......
...@@ -6,16 +6,20 @@ ...@@ -6,16 +6,20 @@
namespace mace { namespace mace {
REGISTER_CPU_OPERATOR(OpKeyBuilder("GlobalAvgPooling") void Register_GlobalAvgPooling(OperatorRegistry *op_registry) {
.TypeConstraint<float>("T") REGISTER_OPERATOR(op_registry, OpKeyBuilder("GlobalAvgPooling")
.Build(), .Device(DeviceType::CPU)
GlobalAvgPoolingOp<DeviceType::CPU, float>); .TypeConstraint<float>("T")
.Build(),
GlobalAvgPoolingOp<DeviceType::CPU, float>);
#if MACE_ENABLE_NEON #if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("GlobalAvgPooling") REGISTER_OPERATOR(op_registry, OpKeyBuilder("GlobalAvgPooling")
.TypeConstraint<float>("T") .Device(DeviceType::NEON)
.Build(), .TypeConstraint<float>("T")
GlobalAvgPoolingOp<DeviceType::NEON, float>); .Build(),
GlobalAvgPoolingOp<DeviceType::NEON, float>);
#endif // MACE_ENABLE_NEON #endif // MACE_ENABLE_NEON
}
} // namespace mace } // namespace mace
...@@ -22,7 +22,8 @@ static void GlobalAvgPooling( ...@@ -22,7 +22,8 @@ static void GlobalAvgPooling(
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
// Add input data // Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height, width}); net.AddRandomInput<DeviceType::CPU, float>("Input",
{batch, channels, height, width});
// Warm-up // Warm-up
for (int i = 0; i < 5; ++i) { for (int i = 0; i < 5; ++i) {
......
...@@ -6,14 +6,18 @@ ...@@ -6,14 +6,18 @@
namespace mace { namespace mace {
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("ImageToBuffer") void Register_ImageToBuffer(OperatorRegistry *op_registry) {
.TypeConstraint<float>("T") REGISTER_OPERATOR(op_registry, OpKeyBuilder("ImageToBuffer")
.Build(), .Device(DeviceType::OPENCL)
ImageToBufferOp<DeviceType::OPENCL, float>); .TypeConstraint<float>("T")
.Build(),
ImageToBufferOp<DeviceType::OPENCL, float>);
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("ImageToBuffer") REGISTER_OPERATOR(op_registry, OpKeyBuilder("ImageToBuffer")
.TypeConstraint<half>("T") .Device(DeviceType::OPENCL)
.Build(), .TypeConstraint<half>("T")
ImageToBufferOp<DeviceType::OPENCL, half>); .Build(),
ImageToBufferOp<DeviceType::OPENCL, half>);
}
} // namespace mace } // namespace mace
...@@ -10,9 +10,9 @@ ...@@ -10,9 +10,9 @@
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "mace/core/common.h" #include "mace/core/common.h"
#include "mace/core/net.h" #include "mace/core/net.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/core/workspace.h" #include "mace/core/workspace.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/kernels/opencl/helper.h" #include "mace/kernels/opencl/helper.h"
#include "mace/utils/utils.h" #include "mace/utils/utils.h"
...@@ -56,7 +56,8 @@ class OpDefBuilder { ...@@ -56,7 +56,8 @@ class OpDefBuilder {
return *this; return *this;
} }
OpDefBuilder AddIntsArg(const std::string &name, const std::vector<int> &values) { OpDefBuilder AddIntsArg(const std::string &name,
const std::vector<int> &values) {
auto arg = op_def_.add_arg(); auto arg = op_def_.add_arg();
arg->set_name(name); arg->set_name(name);
for (auto value : values) { for (auto value : values) {
...@@ -65,7 +66,8 @@ class OpDefBuilder { ...@@ -65,7 +66,8 @@ class OpDefBuilder {
return *this; return *this;
} }
OpDefBuilder AddFloatsArg(const std::string &name, const std::vector<float> &values) { OpDefBuilder AddFloatsArg(const std::string &name,
const std::vector<float> &values) {
auto arg = op_def_.add_arg(); auto arg = op_def_.add_arg();
arg->set_name(name); arg->set_name(name);
for (auto value : values) { for (auto value : values) {
...@@ -75,7 +77,7 @@ class OpDefBuilder { ...@@ -75,7 +77,7 @@ class OpDefBuilder {
} }
OpDefBuilder AddStringsArg(const std::string &name, OpDefBuilder AddStringsArg(const std::string &name,
const std::vector<const char *> &values) { const std::vector<const char *> &values) {
auto arg = op_def_.add_arg(); auto arg = op_def_.add_arg();
arg->set_name(name); arg->set_name(name);
for (auto value : values) { for (auto value : values) {
...@@ -94,7 +96,7 @@ class OpDefBuilder { ...@@ -94,7 +96,7 @@ class OpDefBuilder {
class OpsTestNet { class OpsTestNet {
public: public:
OpsTestNet() {} OpsTestNet() : op_registry_(new OperatorRegistry()) {};
template <DeviceType D, typename T> template <DeviceType D, typename T>
void AddInputFromArray(const std::string &name, void AddInputFromArray(const std::string &name,
...@@ -135,10 +137,11 @@ class OpsTestNet { ...@@ -135,10 +137,11 @@ class OpsTestNet {
std::mt19937 gen(rd()); std::mt19937 gen(rd());
std::normal_distribution<float> nd(0, 1); std::normal_distribution<float> nd(0, 1);
if (DataTypeToEnum<T>::value == DT_HALF) { if (DataTypeToEnum<T>::value == DT_HALF) {
std::generate(input_data, input_data + input->size(), std::generate(
[&gen, &nd, positive] { input_data, input_data + input->size(), [&gen, &nd, positive] {
return half_float::half_cast<half>(positive ? std::abs(nd(gen)) : nd(gen)); return half_float::half_cast<half>(positive ? std::abs(nd(gen))
}); : nd(gen));
});
} else { } else {
std::generate(input_data, input_data + input->size(), std::generate(input_data, input_data + input->size(),
[&gen, &nd, positive] { [&gen, &nd, positive] {
...@@ -160,7 +163,7 @@ class OpsTestNet { ...@@ -160,7 +163,7 @@ class OpsTestNet {
for (auto &op_def_ : op_defs_) { for (auto &op_def_ : op_defs_) {
net_def.add_op()->CopyFrom(op_def_); net_def.add_op()->CopyFrom(op_def_);
} }
net_ = CreateNet(net_def, &ws_, device); net_ = CreateNet(op_registry_, net_def, &ws_, device);
device_ = device; device_ = device;
return net_->Run(); return net_->Run();
} }
...@@ -182,6 +185,7 @@ class OpsTestNet { ...@@ -182,6 +185,7 @@ class OpsTestNet {
} }
public: public:
std::shared_ptr<OperatorRegistry> op_registry_;
Workspace ws_; Workspace ws_;
std::vector<OperatorDef> op_defs_; std::vector<OperatorDef> op_defs_;
std::unique_ptr<NetBase> net_; std::unique_ptr<NetBase> net_;
...@@ -211,7 +215,8 @@ void GenerateRandomRealTypeData(const std::vector<index_t> &shape, ...@@ -211,7 +215,8 @@ void GenerateRandomRealTypeData(const std::vector<index_t> &shape,
res.resize(size); res.resize(size);
if (DataTypeToEnum<T>::value == DT_HALF) { if (DataTypeToEnum<T>::value == DT_HALF) {
std::generate(res.begin(), res.end(), [&gen, &nd] { return half_float::half_cast<half>(nd(gen)); }); std::generate(res.begin(), res.end(),
[&gen, &nd] { return half_float::half_cast<half>(nd(gen)); });
} else { } else {
std::generate(res.begin(), res.end(), [&gen, &nd] { return nd(gen); }); std::generate(res.begin(), res.end(), [&gen, &nd] { return nd(gen); });
} }
...@@ -236,7 +241,8 @@ void GenerateRandomIntTypeData(const std::vector<index_t> &shape, ...@@ -236,7 +241,8 @@ void GenerateRandomIntTypeData(const std::vector<index_t> &shape,
template <typename T> template <typename T>
unique_ptr<Tensor> CreateTensor(const std::vector<index_t> &shape, unique_ptr<Tensor> CreateTensor(const std::vector<index_t> &shape,
const std::vector<T> &data) { const std::vector<T> &data) {
unique_ptr<Tensor> res(new Tensor(GetDeviceAllocator(DeviceType::CPU), DataTypeToEnum<T>::v())); unique_ptr<Tensor> res(
new Tensor(GetDeviceAllocator(DeviceType::CPU), DataTypeToEnum<T>::v()));
res->Resize(shape); res->Resize(shape);
T *input_data = res->mutable_data<T>(); T *input_data = res->mutable_data<T>();
memcpy(input_data, data.data(), data.size() * sizeof(T)); memcpy(input_data, data.data(), data.size() * sizeof(T));
...@@ -268,9 +274,9 @@ inline std::string ShapeToString(const Tensor &x) { ...@@ -268,9 +274,9 @@ inline std::string ShapeToString(const Tensor &x) {
template <typename T> template <typename T>
struct is_floating_point_type { struct is_floating_point_type {
static const bool value = static const bool value = std::is_same<T, float>::value ||
std::is_same<T, float>::value || std::is_same<T, double>::value std::is_same<T, double>::value ||
|| std::is_same<T, half>::value; std::is_same<T, half>::value;
}; };
template <typename T> template <typename T>
...@@ -293,7 +299,9 @@ inline void AssertSameDims(const Tensor &x, const Tensor &y) { ...@@ -293,7 +299,9 @@ inline void AssertSameDims(const Tensor &x, const Tensor &y) {
<< "y.shape [ " << ShapeToString(y) << "]"; << "y.shape [ " << ShapeToString(y) << "]";
} }
template <typename EXP_TYPE, typename RES_TYPE, bool is_fp = is_floating_point_type<EXP_TYPE>::value> template <typename EXP_TYPE,
typename RES_TYPE,
bool is_fp = is_floating_point_type<EXP_TYPE>::value>
struct Expector; struct Expector;
// Partial specialization for float and double. // Partial specialization for float and double.
...@@ -343,7 +351,6 @@ struct Expector<EXP_TYPE, RES_TYPE, true> { ...@@ -343,7 +351,6 @@ struct Expector<EXP_TYPE, RES_TYPE, true> {
} }
} }
} }
}; };
template <typename T> template <typename T>
...@@ -355,8 +362,8 @@ void ExpectTensorNear(const Tensor &x, const Tensor &y, const double abs_err) { ...@@ -355,8 +362,8 @@ void ExpectTensorNear(const Tensor &x, const Tensor &y, const double abs_err) {
template <typename EXP_TYPE, typename RES_TYPE> template <typename EXP_TYPE, typename RES_TYPE>
void ExpectTensorNear(const Tensor &x, const Tensor &y, const double abs_err) { void ExpectTensorNear(const Tensor &x, const Tensor &y, const double abs_err) {
static_assert(is_floating_point_type<EXP_TYPE>::value static_assert(is_floating_point_type<EXP_TYPE>::value &&
&& is_floating_point_type<RES_TYPE>::value, is_floating_point_type<RES_TYPE>::value,
"T is not a floating point type"); "T is not a floating point type");
Expector<EXP_TYPE, RES_TYPE>::Near(x, y, abs_err); Expector<EXP_TYPE, RES_TYPE>::Near(x, y, abs_err);
} }
......
...@@ -6,29 +6,36 @@ ...@@ -6,29 +6,36 @@
namespace mace { namespace mace {
REGISTER_CPU_OPERATOR(OpKeyBuilder("Pooling") void Register_Pooling(OperatorRegistry *op_registry) {
.TypeConstraint<float>("T") REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling")
.Build(), .Device(DeviceType::CPU)
PoolingOp<DeviceType::CPU, float>); .TypeConstraint<float>("T")
REGISTER_CPU_OPERATOR(OpKeyBuilder("Pooling") .Build(),
.TypeConstraint<half>("T") PoolingOp<DeviceType::CPU, float>);
.Build(), REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling")
PoolingOp<DeviceType::CPU, half>); .Device(DeviceType::CPU)
.TypeConstraint<half>("T")
.Build(),
PoolingOp<DeviceType::CPU, half>);
#if MACE_ENABLE_NEON #if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("Pooling") REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling")
.TypeConstraint<float>("T") .Device(DeviceType::NEON)
.Build(), .TypeConstraint<float>("T")
PoolingOp<DeviceType::NEON, float>); .Build(),
PoolingOp<DeviceType::NEON, float>);
#endif // MACE_ENABLE_NEON #endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Pooling") REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling")
.TypeConstraint<float>("T") .Device(DeviceType::OPENCL)
.Build(), .TypeConstraint<float>("T")
PoolingOp<DeviceType::OPENCL, float>); .Build(),
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Pooling") PoolingOp<DeviceType::OPENCL, float>);
.TypeConstraint<half>("T") REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling")
.Build(), .Device(DeviceType::OPENCL)
PoolingOp<DeviceType::OPENCL, half>); .TypeConstraint<half>("T")
.Build(),
PoolingOp<DeviceType::OPENCL, half>);
}
} // namespace mace } // namespace mace
...@@ -35,7 +35,8 @@ static void Pooling(int iters, ...@@ -35,7 +35,8 @@ static void Pooling(int iters,
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
// Add input data // Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height, width}); net.AddRandomInput<DeviceType::CPU, float>("Input",
{batch, channels, height, width});
// Warm-up // Warm-up
for (int i = 0; i < 5; ++i) { for (int i = 0; i < 5; ++i) {
......
...@@ -29,7 +29,7 @@ TEST_F(PoolingOpTest, MAX_VALID) { ...@@ -29,7 +29,7 @@ TEST_F(PoolingOpTest, MAX_VALID) {
// Add input data // Add input data
net.AddInputFromArray<DeviceType::CPU, float>( net.AddInputFromArray<DeviceType::CPU, float>(
"Input", {1, 4, 4, 2}, "Input", {1, 4, 4, 2},
{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23,
8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}); 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
// Run // Run
...@@ -42,7 +42,6 @@ TEST_F(PoolingOpTest, MAX_VALID) { ...@@ -42,7 +42,6 @@ TEST_F(PoolingOpTest, MAX_VALID) {
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001); ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001);
} }
TEST_F(PoolingOpTest, MAX_SAME) { TEST_F(PoolingOpTest, MAX_SAME) {
// Construct graph // Construct graph
OpsTestNet net; OpsTestNet net;
...@@ -122,7 +121,7 @@ TEST_F(PoolingOpTest, MAX_k2x2s2x2) { ...@@ -122,7 +121,7 @@ TEST_F(PoolingOpTest, MAX_k2x2s2x2) {
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001); ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001);
} }
template<DeviceType D> template <DeviceType D>
static void SimpleMaxPooling3S2() { static void SimpleMaxPooling3S2() {
// Construct graph // Construct graph
OpsTestNet net; OpsTestNet net;
...@@ -130,11 +129,12 @@ static void SimpleMaxPooling3S2() { ...@@ -130,11 +129,12 @@ static void SimpleMaxPooling3S2() {
// Add input data // Add input data
net.AddInputFromArray<D, float>( net.AddInputFromArray<D, float>(
"Input", {1, 3, 9, 1}, "Input", {1, 3, 9, 1},
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26}); 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<D, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
OpDefBuilder("Pooling", "PoolingTest") OpDefBuilder("Pooling", "PoolingTest")
.Input("InputImage") .Input("InputImage")
.Output("OutputImage") .Output("OutputImage")
...@@ -145,7 +145,8 @@ static void SimpleMaxPooling3S2() { ...@@ -145,7 +145,8 @@ static void SimpleMaxPooling3S2() {
.AddIntsArg("dilations", {1, 1}) .AddIntsArg("dilations", {1, 1})
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, float>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); ImageToBuffer<D, float>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
} else { } else {
// Run // Run
OpDefBuilder("Pooling", "PoolingTest") OpDefBuilder("Pooling", "PoolingTest")
...@@ -166,15 +167,13 @@ static void SimpleMaxPooling3S2() { ...@@ -166,15 +167,13 @@ static void SimpleMaxPooling3S2() {
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001); ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001);
} }
TEST_F(PoolingOpTest, CPUSimpleMaxPooling3S2) { TEST_F(PoolingOpTest, CPUSimpleMaxPooling3S2) { SimpleMaxPooling3S2<CPU>(); }
SimpleMaxPooling3S2<CPU>();
}
TEST_F(PoolingOpTest, OPENCLSimpleMaxPooling3S2) { TEST_F(PoolingOpTest, OPENCLSimpleMaxPooling3S2) {
SimpleMaxPooling3S2<OPENCL>(); SimpleMaxPooling3S2<OPENCL>();
} }
template<DeviceType D, typename T> template <DeviceType D, typename T>
static void MaxPooling3S2(const std::vector<index_t> &input_shape, static void MaxPooling3S2(const std::vector<index_t> &input_shape,
const std::vector<int> strides, const std::vector<int> strides,
Padding padding) { Padding padding) {
...@@ -211,13 +210,14 @@ static void MaxPooling3S2(const std::vector<index_t> &input_shape, ...@@ -211,13 +210,14 @@ static void MaxPooling3S2(const std::vector<index_t> &input_shape,
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value)) .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, T>(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); ImageToBuffer<D, T>(net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT);
ExpectTensorNear<T>(expected, *net.GetOutput("OPENCLOutput"), 0.001); ExpectTensorNear<T>(expected, *net.GetOutput("OPENCLOutput"), 0.001);
} }
// TODO(chenghui) : there is a bug. // TODO(chenghui) : there is a bug.
//TEST_F(PoolingOpTest, NEONAlignedMaxPooling3S2) { // TEST_F(PoolingOpTest, NEONAlignedMaxPooling3S2) {
// AlignedMaxPooling3S2<NEON>(Padding::VALID); // AlignedMaxPooling3S2<NEON>(Padding::VALID);
// AlignedMaxPooling3S2<NEON>(Padding::SAME); // AlignedMaxPooling3S2<NEON>(Padding::SAME);
//} //}
...@@ -259,7 +259,7 @@ TEST_F(PoolingOpTest, AVG_VALID) { ...@@ -259,7 +259,7 @@ TEST_F(PoolingOpTest, AVG_VALID) {
// Add input data // Add input data
net.AddInputFromArray<DeviceType::CPU, float>( net.AddInputFromArray<DeviceType::CPU, float>(
"Input", {1, 4, 4, 2}, "Input", {1, 4, 4, 2},
{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23,
8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}); 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
// Run // Run
...@@ -272,7 +272,7 @@ TEST_F(PoolingOpTest, AVG_VALID) { ...@@ -272,7 +272,7 @@ TEST_F(PoolingOpTest, AVG_VALID) {
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001); ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001);
} }
template<DeviceType D> template <DeviceType D>
static void SimpleAvgPoolingTest() { static void SimpleAvgPoolingTest() {
// Construct graph // Construct graph
OpsTestNet net; OpsTestNet net;
...@@ -282,7 +282,8 @@ static void SimpleAvgPoolingTest() { ...@@ -282,7 +282,8 @@ static void SimpleAvgPoolingTest() {
"Input", {1, 2, 8, 1}, "Input", {1, 2, 8, 1},
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}); {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15});
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<D, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
OpDefBuilder("Pooling", "PoolingTest") OpDefBuilder("Pooling", "PoolingTest")
.Input("InputImage") .Input("InputImage")
.Output("OutputImage") .Output("OutputImage")
...@@ -294,7 +295,8 @@ static void SimpleAvgPoolingTest() { ...@@ -294,7 +295,8 @@ static void SimpleAvgPoolingTest() {
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
// Run // Run
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, float>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); ImageToBuffer<D, float>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
// Check // Check
auto expected = CreateTensor<float>({1, 1, 4, 1}, {4.5, 6.5, 8.5, 10.5}); auto expected = CreateTensor<float>({1, 1, 4, 1}, {4.5, 6.5, 8.5, 10.5});
...@@ -306,11 +308,11 @@ TEST_F(PoolingOpTest, OPENCLSimpleAvgPooling) { ...@@ -306,11 +308,11 @@ TEST_F(PoolingOpTest, OPENCLSimpleAvgPooling) {
SimpleAvgPoolingTest<OPENCL>(); SimpleAvgPoolingTest<OPENCL>();
} }
template<DeviceType D, typename T> template <DeviceType D, typename T>
static void AvgPoolingTest(const std::vector<index_t> &shape, static void AvgPoolingTest(const std::vector<index_t> &shape,
const std::vector<int> &kernels, const std::vector<int> &kernels,
const std::vector<int> &strides, const std::vector<int> &strides,
Padding padding) { Padding padding) {
// Construct graph // Construct graph
OpsTestNet net; OpsTestNet net;
OpDefBuilder("Pooling", "PoolingTest") OpDefBuilder("Pooling", "PoolingTest")
...@@ -343,38 +345,49 @@ static void AvgPoolingTest(const std::vector<index_t> &shape, ...@@ -343,38 +345,49 @@ static void AvgPoolingTest(const std::vector<index_t> &shape,
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value)) .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, T>(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); ImageToBuffer<D, T>(net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT);
ExpectTensorNear<float, T>(expected, *net.GetOutput("OPENCLOutput"), 0.01); ExpectTensorNear<float, T>(expected, *net.GetOutput("OPENCLOutput"), 0.01);
} }
TEST_F(PoolingOpTest, OPENCLAlignedAvgPooling) { TEST_F(PoolingOpTest, OPENCLAlignedAvgPooling) {
AvgPoolingTest<OPENCL, float>({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::VALID); AvgPoolingTest<OPENCL, float>({3, 15, 15, 128}, {4, 4}, {4, 4},
AvgPoolingTest<OPENCL, float>({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::SAME); Padding::VALID);
AvgPoolingTest<OPENCL, float>({3, 15, 15, 128}, {4, 4}, {4, 4},
Padding::SAME);
} }
TEST_F(PoolingOpTest, OPENCLHalfAlignedAvgPooling) { TEST_F(PoolingOpTest, OPENCLHalfAlignedAvgPooling) {
AvgPoolingTest<OPENCL, half>({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::VALID); AvgPoolingTest<OPENCL, half>({3, 15, 15, 128}, {4, 4}, {4, 4},
Padding::VALID);
AvgPoolingTest<OPENCL, half>({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::SAME); AvgPoolingTest<OPENCL, half>({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::SAME);
} }
TEST_F(PoolingOpTest, OPENCLAlignedLargeKernelAvgPooling) { TEST_F(PoolingOpTest, OPENCLAlignedLargeKernelAvgPooling) {
AvgPoolingTest<OPENCL, float>({3, 64, 64, 128}, {16, 16}, {16, 16}, Padding::VALID); AvgPoolingTest<OPENCL, float>({3, 64, 64, 128}, {16, 16}, {16, 16},
AvgPoolingTest<OPENCL, float>({3, 64, 64, 128}, {16, 16}, {16, 16}, Padding::SAME); Padding::VALID);
AvgPoolingTest<OPENCL, float>({3, 64, 64, 128}, {16, 16}, {16, 16},
Padding::SAME);
} }
TEST_F(PoolingOpTest, OPENCLHalfAlignedLargeKernelAvgPooling) { TEST_F(PoolingOpTest, OPENCLHalfAlignedLargeKernelAvgPooling) {
AvgPoolingTest<OPENCL, half>({3, 64, 64, 128}, {16, 16}, {16, 16}, Padding::VALID); AvgPoolingTest<OPENCL, half>({3, 64, 64, 128}, {16, 16}, {16, 16},
AvgPoolingTest<OPENCL, half>({3, 64, 64, 128}, {16, 16}, {16, 16}, Padding::SAME); Padding::VALID);
AvgPoolingTest<OPENCL, half>({3, 64, 64, 128}, {16, 16}, {16, 16},
Padding::SAME);
} }
TEST_F(PoolingOpTest, OPENCLUnAlignedAvgPooling) { TEST_F(PoolingOpTest, OPENCLUnAlignedAvgPooling) {
AvgPoolingTest<OPENCL, float>({3, 31, 37, 128}, {2, 2}, {2, 2}, Padding::VALID); AvgPoolingTest<OPENCL, float>({3, 31, 37, 128}, {2, 2}, {2, 2},
AvgPoolingTest<OPENCL, float>({3, 31, 37, 128}, {2, 2}, {2, 2}, Padding::SAME); Padding::VALID);
AvgPoolingTest<OPENCL, float>({3, 31, 37, 128}, {2, 2}, {2, 2},
Padding::SAME);
} }
TEST_F(PoolingOpTest, OPENCLUnAlignedLargeKernelAvgPooling) { TEST_F(PoolingOpTest, OPENCLUnAlignedLargeKernelAvgPooling) {
AvgPoolingTest<OPENCL, float>({3, 31, 37, 128}, {8, 8}, {8, 8}, Padding::VALID); AvgPoolingTest<OPENCL, float>({3, 31, 37, 128}, {8, 8}, {8, 8},
AvgPoolingTest<OPENCL, float>({3, 31, 37, 128}, {8, 8}, {8, 8}, Padding::SAME); Padding::VALID);
AvgPoolingTest<OPENCL, float>({3, 31, 37, 128}, {8, 8}, {8, 8},
Padding::SAME);
} }
...@@ -6,26 +6,32 @@ ...@@ -6,26 +6,32 @@
namespace mace { namespace mace {
REGISTER_CPU_OPERATOR(OpKeyBuilder("Relu") void Register_Relu(OperatorRegistry *op_registry) {
.TypeConstraint<float>("T") REGISTER_OPERATOR(op_registry, OpKeyBuilder("Relu")
.Build(), .Device(DeviceType::CPU)
ReluOp<DeviceType::CPU, float>); .TypeConstraint<float>("T")
.Build(),
ReluOp<DeviceType::CPU, float>);
#if MACE_ENABLE_NEON #if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("Relu") REGISTER_OPERATOR(op_registry, OpKeyBuilder("Relu")
.TypeConstraint<float>("T") .Device(DeviceType::NEON)
.Build(), .TypeConstraint<float>("T")
ReluOp<DeviceType::NEON, float>); .Build(),
ReluOp<DeviceType::NEON, float>);
#endif // MACE_ENABLE_NEON #endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Relu") REGISTER_OPERATOR(op_registry, OpKeyBuilder("Relu")
.TypeConstraint<float>("T") .Device(DeviceType::OPENCL)
.Build(), .TypeConstraint<float>("T")
ReluOp<DeviceType::OPENCL, float>); .Build(),
ReluOp<DeviceType::OPENCL, float>);
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Relu") REGISTER_OPERATOR(op_registry, OpKeyBuilder("Relu")
.TypeConstraint<half>("T") .Device(DeviceType::OPENCL)
.Build(), .TypeConstraint<half>("T")
ReluOp<DeviceType::OPENCL, half>); .Build(),
ReluOp<DeviceType::OPENCL, half>);
}
} // namespace mace } // namespace mace
...@@ -19,7 +19,8 @@ static void ReluBenchmark( ...@@ -19,7 +19,8 @@ static void ReluBenchmark(
net.AddRandomInput<D, float>("Input", {batch, height, width, channels}); net.AddRandomInput<D, float>("Input", {batch, height, width, channels});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<D, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
OpDefBuilder("Relu", "ReluBM") OpDefBuilder("Relu", "ReluBM")
.Input("InputImage") .Input("InputImage")
...@@ -54,9 +55,9 @@ static void ReluBenchmark( ...@@ -54,9 +55,9 @@ static void ReluBenchmark(
} \ } \
BENCHMARK(BM_RELU_##N##C##H##W##_##TYPE##_##DEVICE) BENCHMARK(BM_RELU_##N##C##H##W##_##TYPE##_##DEVICE)
#define BM_RELU(N, C, H, W, TYPE) \ #define BM_RELU(N, C, H, W, TYPE) \
BM_RELU_MACRO(N, C, H, W, TYPE, CPU); \ BM_RELU_MACRO(N, C, H, W, TYPE, CPU); \
BM_RELU_MACRO(N, C, H, W, TYPE, NEON);\ BM_RELU_MACRO(N, C, H, W, TYPE, NEON); \
BM_RELU_MACRO(N, C, H, W, TYPE, OPENCL); BM_RELU_MACRO(N, C, H, W, TYPE, OPENCL);
BM_RELU(1, 1, 512, 512, float); BM_RELU(1, 1, 512, 512, float);
......
...@@ -14,13 +14,13 @@ void TestSimple() { ...@@ -14,13 +14,13 @@ void TestSimple() {
OpsTestNet net; OpsTestNet net;
// Add input data // Add input data
net.AddInputFromArray<D, float>("Input", net.AddInputFromArray<D, float>(
{2, 2, 2, 2}, "Input", {2, 2, 2, 2},
{-7, 7, -6, 6, -5, 5, -4, 4, {-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0});
-3, 3, -2, 2, -1, 1, 0, 0});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<D, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
OpDefBuilder("Relu", "ReluTest") OpDefBuilder("Relu", "ReluTest")
.Input("InputImage") .Input("InputImage")
...@@ -31,7 +31,8 @@ void TestSimple() { ...@@ -31,7 +31,8 @@ void TestSimple() {
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, float>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); ImageToBuffer<D, float>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
} else { } else {
OpDefBuilder("Relu", "ReluTest") OpDefBuilder("Relu", "ReluTest")
.Input("Input") .Input("Input")
...@@ -42,38 +43,30 @@ void TestSimple() { ...@@ -42,38 +43,30 @@ void TestSimple() {
net.RunOp(D); net.RunOp(D);
} }
auto expected = CreateTensor<float>({2, 2, 2, 2}, auto expected = CreateTensor<float>(
{0, 7, 0, 6, 0, 5, 0, 4, {2, 2, 2, 2}, {0, 7, 0, 6, 0, 5, 0, 4, 0, 3, 0, 2, 0, 1, 0, 0});
0, 3, 0, 2, 0, 1, 0, 0});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5); ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5);
} }
TEST_F(ReluOpTest, CPUSimple) { TEST_F(ReluOpTest, CPUSimple) { TestSimple<DeviceType::CPU>(); }
TestSimple<DeviceType::CPU>();
}
#if __ARM_NEON #if __ARM_NEON
TEST_F(ReluOpTest, NEONSimple) { TEST_F(ReluOpTest, NEONSimple) { TestSimple<DeviceType::NEON>(); }
TestSimple<DeviceType::NEON>();
}
#endif #endif
TEST_F(ReluOpTest, OPENCLSimple) { TEST_F(ReluOpTest, OPENCLSimple) { TestSimple<DeviceType::OPENCL>(); }
TestSimple<DeviceType::OPENCL>();
}
template <DeviceType D> template <DeviceType D>
void TestUnalignedSimple() { void TestUnalignedSimple() {
OpsTestNet net; OpsTestNet net;
// Add input data // Add input data
net.AddInputFromArray<D, float>("Input", net.AddInputFromArray<D, float>("Input", {1, 3, 2, 1}, {-7, 7, -6, 6, -5, 5});
{1, 3, 2, 1},
{-7, 7, -6, 6, -5, 5});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<D, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
OpDefBuilder("Relu", "ReluTest") OpDefBuilder("Relu", "ReluTest")
.Input("InputImage") .Input("InputImage")
...@@ -84,7 +77,8 @@ void TestUnalignedSimple() { ...@@ -84,7 +77,8 @@ void TestUnalignedSimple() {
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, float>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); ImageToBuffer<D, float>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
} else { } else {
OpDefBuilder("Relu", "ReluTest") OpDefBuilder("Relu", "ReluTest")
.Input("Input") .Input("Input")
...@@ -95,8 +89,7 @@ void TestUnalignedSimple() { ...@@ -95,8 +89,7 @@ void TestUnalignedSimple() {
net.RunOp(D); net.RunOp(D);
} }
auto expected = CreateTensor<float>({1, 3, 2, 1}, auto expected = CreateTensor<float>({1, 3, 2, 1}, {0, 7, 0, 6, 0, 5});
{0, 7, 0, 6, 0, 5});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5); ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5);
} }
...@@ -120,13 +113,13 @@ void TestSimpleReluX() { ...@@ -120,13 +113,13 @@ void TestSimpleReluX() {
OpsTestNet net; OpsTestNet net;
// Add input data // Add input data
net.AddInputFromArray<D, float>("Input", net.AddInputFromArray<D, float>(
{2, 2, 2, 2}, "Input", {2, 2, 2, 2},
{-7, 7, -6, 6, -5, 5, -4, 4, {-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0});
-3, 3, -2, 2, -1, 1, 0, 0});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<D, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
OpDefBuilder("Relu", "ReluTest") OpDefBuilder("Relu", "ReluTest")
.Input("InputImage") .Input("InputImage")
...@@ -138,7 +131,8 @@ void TestSimpleReluX() { ...@@ -138,7 +131,8 @@ void TestSimpleReluX() {
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, float>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); ImageToBuffer<D, float>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
} else { } else {
OpDefBuilder("Relu", "ReluTest") OpDefBuilder("Relu", "ReluTest")
.Input("Input") .Input("Input")
...@@ -150,38 +144,31 @@ void TestSimpleReluX() { ...@@ -150,38 +144,31 @@ void TestSimpleReluX() {
net.RunOp(D); net.RunOp(D);
} }
auto expected = CreateTensor<float>({2, 2, 2, 2}, auto expected = CreateTensor<float>(
{0, 6, 0, 6, 0, 5, 0, 4, {2, 2, 2, 2}, {0, 6, 0, 6, 0, 5, 0, 4, 0, 3, 0, 2, 0, 1, 0, 0});
0, 3, 0, 2, 0, 1, 0, 0});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5); ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5);
} }
TEST_F(ReluOpTest, CPUSimpleReluX) { TEST_F(ReluOpTest, CPUSimpleReluX) { TestSimpleReluX<DeviceType::CPU>(); }
TestSimpleReluX<DeviceType::CPU>();
}
#if __ARM_NEON #if __ARM_NEON
TEST_F(ReluOpTest, NEONSimpleReluX) { TEST_F(ReluOpTest, NEONSimpleReluX) { TestSimpleReluX<DeviceType::NEON>(); }
TestSimpleReluX<DeviceType::NEON>();
}
#endif #endif
TEST_F(ReluOpTest, OPENCLSimpleReluX) { TEST_F(ReluOpTest, OPENCLSimpleReluX) { TestSimpleReluX<DeviceType::OPENCL>(); }
TestSimpleReluX<DeviceType::OPENCL>();
}
template <DeviceType D> template <DeviceType D>
void TestUnalignedSimpleReluX() { void TestUnalignedSimpleReluX() {
OpsTestNet net; OpsTestNet net;
// Add input data // Add input data
net.AddInputFromArray<D, float>("Input", net.AddInputFromArray<D, float>("Input", {1, 1, 7, 1},
{1, 1, 7, 1},
{-7, 7, -6, 6, -5, 5, -4}); {-7, 7, -6, 6, -5, 5, -4});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<D, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
OpDefBuilder("Relu", "ReluTest") OpDefBuilder("Relu", "ReluTest")
.Input("InputImage") .Input("InputImage")
...@@ -193,7 +180,8 @@ void TestUnalignedSimpleReluX() { ...@@ -193,7 +180,8 @@ void TestUnalignedSimpleReluX() {
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, float>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); ImageToBuffer<D, float>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
} else { } else {
OpDefBuilder("Relu", "ReluTest") OpDefBuilder("Relu", "ReluTest")
.Input("Input") .Input("Input")
...@@ -205,8 +193,7 @@ void TestUnalignedSimpleReluX() { ...@@ -205,8 +193,7 @@ void TestUnalignedSimpleReluX() {
net.RunOp(D); net.RunOp(D);
} }
auto expected = CreateTensor<float>({1, 1, 7, 1}, auto expected = CreateTensor<float>({1, 1, 7, 1}, {0, 6, 0, 6, 0, 5, 0});
{0, 6, 0, 6, 0, 5, 0});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5); ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5);
} }
......
...@@ -6,26 +6,32 @@ ...@@ -6,26 +6,32 @@
namespace mace { namespace mace {
REGISTER_CPU_OPERATOR(OpKeyBuilder("ResizeBilinear") void Register_ResizeBilinear(OperatorRegistry *op_registry) {
.TypeConstraint<float>("T") REGISTER_OPERATOR(op_registry, OpKeyBuilder("ResizeBilinear")
.Build(), .Device(DeviceType::CPU)
ResizeBilinearOp<DeviceType::CPU, float>); .TypeConstraint<float>("T")
.Build(),
ResizeBilinearOp<DeviceType::CPU, float>);
#if MACE_ENABLE_NEON #if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("ResizeBilinear") REGISTER_OPERATOR(op_registry, OpKeyBuilder("ResizeBilinear")
.TypeConstraint<float>("T") .Device(DeviceType::NEON)
.Build(), .TypeConstraint<float>("T")
ResizeBilinearOp<DeviceType::NEON, float>); .Build(),
ResizeBilinearOp<DeviceType::NEON, float>);
#endif // MACE_ENABLE_NEON #endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("ResizeBilinear") REGISTER_OPERATOR(op_registry, OpKeyBuilder("ResizeBilinear")
.TypeConstraint<float>("T") .Device(DeviceType::OPENCL)
.Build(), .TypeConstraint<float>("T")
ResizeBilinearOp<DeviceType::OPENCL, float>); .Build(),
ResizeBilinearOp<DeviceType::OPENCL, float>);
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("ResizeBilinear") REGISTER_OPERATOR(op_registry, OpKeyBuilder("ResizeBilinear")
.TypeConstraint<half>("T") .Device(DeviceType::OPENCL)
.Build(), .TypeConstraint<half>("T")
ResizeBilinearOp<DeviceType::OPENCL, half>); .Build(),
ResizeBilinearOp<DeviceType::OPENCL, half>);
}
} // namespace mace } // namespace mace
...@@ -26,22 +26,23 @@ static void ResizeBilinearBenchmark(int iters, ...@@ -26,22 +26,23 @@ static void ResizeBilinearBenchmark(int iters,
net.AddInputFromArray<D, index_t>("OutSize", {2}, net.AddInputFromArray<D, index_t>("OutSize", {2},
{output_height, output_width}); {output_height, output_width});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<D, T>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
OpDefBuilder("ResizeBilinear", "ResizeBilinearBenchmark") OpDefBuilder("ResizeBilinear", "ResizeBilinearBenchmark")
.Input("InputImage") .Input("InputImage")
.Input("OutSize") .Input("OutSize")
.Output("OutputImage") .Output("OutputImage")
.AddIntsArg("size", {output_height, output_width}) .AddIntsArg("size", {output_height, output_width})
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value)) .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
} else { } else {
OpDefBuilder("ResizeBilinear", "ResizeBilinearBenchmark") OpDefBuilder("ResizeBilinear", "ResizeBilinearBenchmark")
.Input("Input") .Input("Input")
.Input("OutSize") .Input("OutSize")
.Output("Output") .Output("Output")
.AddIntsArg("size", {output_height, output_width}) .AddIntsArg("size", {output_height, output_width})
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value)) .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
} }
// Warm-up // Warm-up
...@@ -68,8 +69,8 @@ static void ResizeBilinearBenchmark(int iters, ...@@ -68,8 +69,8 @@ static void ResizeBilinearBenchmark(int iters,
BENCHMARK( \ BENCHMARK( \
BM_RESIZE_BILINEAR_##N##_##C##_##H0##_##W0##_##H1##_##W1##_##TYPE##_##DEVICE) BM_RESIZE_BILINEAR_##N##_##C##_##H0##_##W0##_##H1##_##W1##_##TYPE##_##DEVICE)
#define BM_RESIZE_BILINEAR(N, C, H0, W0, H1, W1, TYPE) \ #define BM_RESIZE_BILINEAR(N, C, H0, W0, H1, W1, TYPE) \
BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, TYPE, CPU); \ BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, TYPE, CPU); \
BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, TYPE, OPENCL); BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, TYPE, OPENCL);
// SNPE 835 GPU: 6870us // SNPE 835 GPU: 6870us
......
...@@ -80,29 +80,31 @@ void TestRandomResizeBilinear() { ...@@ -80,29 +80,31 @@ void TestRandomResizeBilinear() {
{batch, in_height, in_width, channels}); {batch, in_height, in_width, channels});
OpDefBuilder("ResizeBilinear", "ResizeBilinearTest") OpDefBuilder("ResizeBilinear", "ResizeBilinearTest")
.Input("Input") .Input("Input")
.Output("Output") .Output("Output")
.AddIntArg("align_corners", align_corners) .AddIntArg("align_corners", align_corners)
.AddIntsArg("size", {height, width}) .AddIntsArg("size", {height, width})
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
// Run on CPU // Run on CPU
net.RunOp(DeviceType::CPU); net.RunOp(DeviceType::CPU);
Tensor expected; Tensor expected;
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<D, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
OpDefBuilder("ResizeBilinear", "ResizeBilinearTest") OpDefBuilder("ResizeBilinear", "ResizeBilinearTest")
.Input("InputImage") .Input("InputImage")
.Output("OutputImage") .Output("OutputImage")
.AddIntArg("align_corners", align_corners) .AddIntArg("align_corners", align_corners)
.AddIntsArg("size", {height, width}) .AddIntsArg("size", {height, width})
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
// Run // Run
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, float>(net, "OutputImage", "DeviceOutput", kernels::BufferType::IN_OUT); ImageToBuffer<D, float>(net, "OutputImage", "DeviceOutput",
kernels::BufferType::IN_OUT);
} else { } else {
// TODO support NEON // TODO support NEON
} }
......
...@@ -6,13 +6,17 @@ ...@@ -6,13 +6,17 @@
namespace mace { namespace mace {
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("SpaceToBatchND") void Register_SpaceToBatchND(OperatorRegistry *op_registry) {
.TypeConstraint<float>("T") REGISTER_OPERATOR(op_registry, OpKeyBuilder("SpaceToBatchND")
.Build(), .Device(DeviceType::OPENCL)
SpaceToBatchNDOp<DeviceType::OPENCL, float>); .TypeConstraint<float>("T")
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("SpaceToBatchND") .Build(),
.TypeConstraint<half>("T") SpaceToBatchNDOp<DeviceType::OPENCL, float>);
.Build(), REGISTER_OPERATOR(op_registry, OpKeyBuilder("SpaceToBatchND")
SpaceToBatchNDOp<DeviceType::OPENCL, half>); .Device(DeviceType::OPENCL)
.TypeConstraint<half>("T")
.Build(),
SpaceToBatchNDOp<DeviceType::OPENCL, half>);
}
} // namespace mace } // namespace mace
...@@ -15,7 +15,8 @@ static void BMSpaceToBatch( ...@@ -15,7 +15,8 @@ static void BMSpaceToBatch(
OpsTestNet net; OpsTestNet net;
net.AddRandomInput<D, float>("Input", {batch, height, width, channels}); net.AddRandomInput<D, float>("Input", {batch, height, width, channels});
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<D, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
OpDefBuilder("SpaceToBatchND", "SpaceToBatchNDTest") OpDefBuilder("SpaceToBatchND", "SpaceToBatchNDTest")
.Input("InputImage") .Input("InputImage")
.Output("OutputImage") .Output("OutputImage")
...@@ -36,17 +37,19 @@ static void BMSpaceToBatch( ...@@ -36,17 +37,19 @@ static void BMSpaceToBatch(
net.Sync(); net.Sync();
} }
#define BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, TYPE, DEVICE) \ #define BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, TYPE, DEVICE) \
static void BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE( \ static void \
int iters) { \ BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE( \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \ int iters) { \
mace::testing::ItemsProcessed(tot); \ const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ mace::testing::ItemsProcessed(tot); \
BMSpaceToBatch<DEVICE, TYPE>(iters, N, H, W, C, SHAPE); \ mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
} \ BMSpaceToBatch<DEVICE, TYPE>(iters, N, H, W, C, SHAPE); \
BENCHMARK(BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE) } \
BENCHMARK( \
#define BM_SPACE_TO_BATCH(N, H, W, C, SHAPE, TYPE) \ BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE)
#define BM_SPACE_TO_BATCH(N, H, W, C, SHAPE, TYPE) \
BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, TYPE, OPENCL); BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, TYPE, OPENCL);
BM_SPACE_TO_BATCH(128, 16, 16, 128, 2, float); BM_SPACE_TO_BATCH(128, 16, 16, 128, 2, float);
......
...@@ -2,23 +2,23 @@ ...@@ -2,23 +2,23 @@
// Copyright (c) 2017 XiaoMi All rights reserved. // Copyright (c) 2017 XiaoMi All rights reserved.
// //
#include <fstream>
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
#include <fstream>
using namespace mace; using namespace mace;
template<DeviceType D> template <DeviceType D>
void RunSpaceToBatch(const std::vector<index_t> &input_shape, void RunSpaceToBatch(const std::vector<index_t> &input_shape,
const std::vector<float> &input_data, const std::vector<float> &input_data,
const std::vector<int> &block_shape_data, const std::vector<int> &block_shape_data,
const std::vector<int> &padding_data, const std::vector<int> &padding_data,
const Tensor *expected) { const Tensor *expected) {
OpsTestNet net; OpsTestNet net;
net.AddInputFromArray<D, float>( net.AddInputFromArray<D, float>("Input", input_shape, input_data);
"Input", input_shape, input_data);
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<D, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
OpDefBuilder("SpaceToBatchND", "SpaceToBatchNDTest") OpDefBuilder("SpaceToBatchND", "SpaceToBatchNDTest")
.Input("InputImage") .Input("InputImage")
.Output("OutputImage") .Output("OutputImage")
...@@ -29,12 +29,13 @@ void RunSpaceToBatch(const std::vector<index_t> &input_shape, ...@@ -29,12 +29,13 @@ void RunSpaceToBatch(const std::vector<index_t> &input_shape,
// Run // Run
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, float>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); ImageToBuffer<D, float>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
// Check // Check
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-8); ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-8);
} }
template<DeviceType D> template <DeviceType D>
void RunBatchToSpace(const std::vector<index_t> &input_shape, void RunBatchToSpace(const std::vector<index_t> &input_shape,
const std::vector<float> &input_data, const std::vector<float> &input_data,
const std::vector<int> &block_shape_data, const std::vector<int> &block_shape_data,
...@@ -42,10 +43,10 @@ void RunBatchToSpace(const std::vector<index_t> &input_shape, ...@@ -42,10 +43,10 @@ void RunBatchToSpace(const std::vector<index_t> &input_shape,
const Tensor *expected) { const Tensor *expected) {
OpsTestNet net; OpsTestNet net;
// Add input data // Add input data
net.AddInputFromArray<D, float>( net.AddInputFromArray<D, float>("Input", input_shape, input_data);
"Input", input_shape, input_data);
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT); BufferToImage<D, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
OpDefBuilder("BatchToSpaceND", "BatchToSpaceNDTest") OpDefBuilder("BatchToSpaceND", "BatchToSpaceNDTest")
.Input("InputImage") .Input("InputImage")
.Output("OutputImage") .Output("OutputImage")
...@@ -56,33 +57,33 @@ void RunBatchToSpace(const std::vector<index_t> &input_shape, ...@@ -56,33 +57,33 @@ void RunBatchToSpace(const std::vector<index_t> &input_shape,
// Run // Run
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, float>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); ImageToBuffer<D, float>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
// Check // Check
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-8); ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-8);
} }
template<typename T> template <typename T>
void TestBidirectionalTransform(const std::vector<index_t> &space_shape, void TestBidirectionalTransform(const std::vector<index_t> &space_shape,
const std::vector<float> &space_data, const std::vector<float> &space_data,
const std::vector<int> &block_data, const std::vector<int> &block_data,
const std::vector<int> &padding_data, const std::vector<int> &padding_data,
const std::vector<index_t> &batch_shape, const std::vector<index_t> &batch_shape,
const std::vector<float> &batch_data) { const std::vector<float> &batch_data) {
auto space_tensor = unique_ptr<Tensor>(new Tensor(
auto space_tensor = unique_ptr<Tensor>(new Tensor(GetDeviceAllocator(DeviceType::OPENCL), GetDeviceAllocator(DeviceType::OPENCL), DataTypeToEnum<T>::v()));
DataTypeToEnum<T>::v()));
space_tensor->Resize(space_shape); space_tensor->Resize(space_shape);
{ {
Tensor::MappingGuard space_mapper(space_tensor.get()); Tensor::MappingGuard space_mapper(space_tensor.get());
T *space_ptr = space_tensor->mutable_data<T>(); T *space_ptr = space_tensor->mutable_data<T>();
MACE_CHECK(static_cast<size_t>(space_tensor->size()) == space_data.size()) MACE_CHECK(static_cast<size_t>(space_tensor->size()) == space_data.size())
<< "Space tensor size:" << space_tensor->size() << "Space tensor size:" << space_tensor->size()
<< ", space data size:" << space_data.size(); << ", space data size:" << space_data.size();
memcpy(space_ptr, space_data.data(), space_data.size() * sizeof(T)); memcpy(space_ptr, space_data.data(), space_data.size() * sizeof(T));
} }
auto batch_tensor = unique_ptr<Tensor>(new Tensor(GetDeviceAllocator(DeviceType::OPENCL), auto batch_tensor = unique_ptr<Tensor>(new Tensor(
DataTypeToEnum<T>::v())); GetDeviceAllocator(DeviceType::OPENCL), DataTypeToEnum<T>::v()));
batch_tensor->Resize(batch_shape); batch_tensor->Resize(batch_shape);
{ {
Tensor::MappingGuard batch_mapper(batch_tensor.get()); Tensor::MappingGuard batch_mapper(batch_tensor.get());
...@@ -91,113 +92,81 @@ void TestBidirectionalTransform(const std::vector<index_t> &space_shape, ...@@ -91,113 +92,81 @@ void TestBidirectionalTransform(const std::vector<index_t> &space_shape,
memcpy(batch_ptr, batch_data.data(), batch_data.size() * sizeof(T)); memcpy(batch_ptr, batch_data.data(), batch_data.size() * sizeof(T));
} }
RunSpaceToBatch<DeviceType::OPENCL>(space_shape, space_data, RunSpaceToBatch<DeviceType::OPENCL>(space_shape, space_data, block_data,
block_data, padding_data, batch_tensor.get());
padding_data,
batch_tensor.get());
RunBatchToSpace<DeviceType::OPENCL>(batch_shape, batch_data, RunBatchToSpace<DeviceType::OPENCL>(batch_shape, batch_data, block_data,
block_data, padding_data, space_tensor.get());
padding_data,
space_tensor.get());
} }
TEST(SpaceToBatchTest, SmallData) { TEST(SpaceToBatchTest, SmallData) {
TestBidirectionalTransform<float>({1, 2, 2, 1}, TestBidirectionalTransform<float>({1, 2, 2, 1}, {1, 2, 3, 4}, {2, 2},
{1, 2, 3, 4}, {0, 0, 0, 0}, {4, 1, 1, 1}, {1, 2, 3, 4});
{2, 2},
{0, 0, 0, 0},
{4, 1, 1, 1},
{1, 2, 3, 4}
);
} }
TEST(SpaceToBatchTest, SmallDataWithOnePadding) { TEST(SpaceToBatchTest, SmallDataWithOnePadding) {
TestBidirectionalTransform<float>({1, 2, 2, 1}, TestBidirectionalTransform<float>({1, 2, 2, 1}, {1, 2, 3, 4}, {3, 3},
{1, 2, 3, 4}, {1, 0, 1, 0}, {9, 1, 1, 1},
{3, 3}, {0, 0, 0, 0, 1, 2, 0, 3, 4});
{1, 0, 1, 0},
{9, 1, 1, 1},
{0, 0, 0, 0, 1, 2, 0, 3, 4}
);
} }
TEST(SpaceToBatchTest, SmallDataWithTwoPadding) { TEST(SpaceToBatchTest, SmallDataWithTwoPadding) {
TestBidirectionalTransform<float>({1, 2, 2, 1}, TestBidirectionalTransform<float>(
{1, 2, 3, 4}, {1, 2, 2, 1}, {1, 2, 3, 4}, {2, 2}, {1, 1, 1, 1}, {4, 2, 2, 1},
{2, 2}, {0, 0, 0, 4, 0, 0, 3, 0, 0, 2, 0, 0, 1, 0, 0, 0});
{1, 1, 1, 1},
{4, 2, 2, 1},
{0, 0, 0, 4, 0, 0, 3, 0, 0, 2, 0, 0, 1, 0, 0, 0}
);
} }
TEST(SpaceToBatchTest, SmallDataWithLargeImage) { TEST(SpaceToBatchTest, SmallDataWithLargeImage) {
TestBidirectionalTransform<float>({1, 2, 10, 1}, TestBidirectionalTransform<float>(
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, {1, 2, 10, 1},
11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20},
{2, 2}, {2, 2}, {0, 0, 0, 0}, {4, 1, 5, 1},
{0, 0, 0, 0}, {1, 3, 5, 7, 9, 2, 4, 6, 8, 10, 11, 13, 15, 17, 19, 12, 14, 16, 18, 20});
{4, 1, 5, 1},
{1, 3, 5, 7, 9,
2, 4, 6, 8, 10,
11, 13, 15, 17, 19,
12, 14, 16, 18, 20}
);
} }
TEST(SpaceToBatchTest, MultiChannelData) { TEST(SpaceToBatchTest, MultiChannelData) {
TestBidirectionalTransform<float>({1, 2, 2, 3}, TestBidirectionalTransform<float>(
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, {1, 2, 2, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, {2, 2},
{2, 2}, {0, 0, 0, 0}, {4, 1, 1, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
{0, 0, 0, 0},
{4, 1, 1, 3},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}
);
} }
TEST(SpaceToBatchTest, LargerMultiChannelData) { TEST(SpaceToBatchTest, LargerMultiChannelData) {
TestBidirectionalTransform<float>({1, 4, 4, 1}, TestBidirectionalTransform<float>(
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, {1, 4, 4, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
{2, 2}, {2, 2}, {0, 0, 0, 0}, {4, 2, 2, 1},
{0, 0, 0, 0}, {1, 3, 9, 11, 2, 4, 10, 12, 5, 7, 13, 15, 6, 8, 14, 16});
{4, 2, 2, 1},
{1, 3, 9, 11, 2, 4, 10, 12, 5, 7, 13, 15, 6, 8, 14, 16}
);
} }
TEST(SpaceToBatchTest, MultiBatchData) { TEST(SpaceToBatchTest, MultiBatchData) {
TestBidirectionalTransform<float>({2, 2, 4, 1}, TestBidirectionalTransform<float>(
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, {2, 2, 4, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
{2, 2}, {2, 2}, {0, 0, 0, 0}, {8, 1, 2, 1},
{0, 0, 0, 0}, {1, 3, 2, 4, 5, 7, 6, 8, 9, 11, 10, 12, 13, 15, 14, 16});
{8, 1, 2, 1},
{1, 3, 2, 4, 5, 7, 6, 8, 9, 11, 10, 12, 13, 15, 14, 16}
);
} }
TEST(SpaceToBatchTest, MultiBatchAndChannelData) { TEST(SpaceToBatchTest, MultiBatchAndChannelData) {
TestBidirectionalTransform<float>({2, 2, 4, 2}, TestBidirectionalTransform<float>(
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, {2, 2, 4, 2},
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
{2, 2}, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32},
{0, 0, 0, 0}, {2, 2}, {0, 0, 0, 0}, {8, 1, 2, 2},
{8, 1, 2, 2}, {1, 2, 5, 6, 3, 4, 7, 8, 9, 10, 13, 14, 11, 12, 15, 16,
{1, 2, 5, 6, 3, 4, 7, 8, 9, 10, 13, 14, 11, 12, 15, 16, 17, 18, 21, 22, 19, 20, 23, 24, 25, 26, 29, 30, 27, 28, 31, 32});
17, 18, 21, 22, 19, 20, 23, 24, 25, 26, 29, 30, 27, 28, 31, 32}
);
} }
//TEST(SpaceTobatchTest, CompareTF) { // TEST(SpaceTobatchTest, CompareTF) {
// //
// const std::string space_file = "/data/local/tmp/test/input"; // const std::string space_file = "/data/local/tmp/test/input";
// const std::string batch_file = "/data/local/tmp/test/output"; // const std::string batch_file = "/data/local/tmp/test/output";
// const std::vector<index_t> space_shape = {1, 256, 256, 32}; // const std::vector<index_t> space_shape = {1, 256, 256, 32};
// const int space_size = std::accumulate(space_shape.begin(), space_shape.end(), 1, std::multiplies<int>()); // const int space_size = std::accumulate(space_shape.begin(),
// space_shape.end(), 1, std::multiplies<int>());
// const std::vector<index_t> batch_shape = {4, 130, 130, 32}; // const std::vector<index_t> batch_shape = {4, 130, 130, 32};
// const int batch_size = std::accumulate(batch_shape.begin(), batch_shape.end(), 1, std::multiplies<int>()); // const int batch_size = std::accumulate(batch_shape.begin(),
// batch_shape.end(), 1, std::multiplies<int>());
// //
// auto space_tensor = unique_ptr<Tensor>(new Tensor(GetDeviceAllocator(DeviceType::OPENCL), // auto space_tensor = unique_ptr<Tensor>(new
// Tensor(GetDeviceAllocator(DeviceType::OPENCL),
// DataTypeToEnum<float>::v())); // DataTypeToEnum<float>::v()));
// space_tensor->Resize(space_shape); // space_tensor->Resize(space_shape);
// std::vector<float> space_data(space_size, 0.0); // std::vector<float> space_data(space_size, 0.0);
...@@ -216,7 +185,8 @@ TEST(SpaceToBatchTest, MultiBatchAndChannelData) { ...@@ -216,7 +185,8 @@ TEST(SpaceToBatchTest, MultiBatchAndChannelData) {
// VLOG(0) << "open space file failed"; // VLOG(0) << "open space file failed";
// } // }
// //
// auto batch_tensor = unique_ptr<Tensor>(new Tensor(GetDeviceAllocator(DeviceType::OPENCL), // auto batch_tensor = unique_ptr<Tensor>(new
// Tensor(GetDeviceAllocator(DeviceType::OPENCL),
// DataTypeToEnum<float>::v())); // DataTypeToEnum<float>::v()));
// std::vector<float> batch_data(batch_size, 0.0); // std::vector<float> batch_data(batch_size, 0.0);
// batch_tensor->Resize(batch_shape); // batch_tensor->Resize(batch_shape);
...@@ -231,7 +201,8 @@ TEST(SpaceToBatchTest, MultiBatchAndChannelData) { ...@@ -231,7 +201,8 @@ TEST(SpaceToBatchTest, MultiBatchAndChannelData) {
// } // }
// Tensor::MappingGuard batch_mapper(batch_tensor.get()); // Tensor::MappingGuard batch_mapper(batch_tensor.get());
// float *batch_ptr = batch_tensor->mutable_data<float>(); // float *batch_ptr = batch_tensor->mutable_data<float>();
// MACE_CHECK(static_cast<size_t>(batch_tensor->size()) == batch_data.size()); // MACE_CHECK(static_cast<size_t>(batch_tensor->size()) ==
// batch_data.size());
// memcpy(batch_ptr, batch_data.data(), batch_data.size() * sizeof(float)); // memcpy(batch_ptr, batch_data.data(), batch_data.size() * sizeof(float));
// } // }
// //
...@@ -245,4 +216,3 @@ TEST(SpaceToBatchTest, MultiBatchAndChannelData) { ...@@ -245,4 +216,3 @@ TEST(SpaceToBatchTest, MultiBatchAndChannelData) {
// {2, 2, 2, 2}, // {2, 2, 2, 2},
// space_tensor.get()); // space_tensor.get());
//} //}
...@@ -27,12 +27,12 @@ void Create{{tensor.name}}(std::vector<mace::ConstTensor> &tensors) { ...@@ -27,12 +27,12 @@ void Create{{tensor.name}}(std::vector<mace::ConstTensor> &tensors) {
#include "mace/core/public/mace.h" #include "mace/core/public/mace.h"
namespace { namespace {
static void UpdateOp(mace::OperatorDef &op, void UpdateOp(mace::OperatorDef &op,
const std::string &name, const std::string &name,
const std::string &type, const std::string &type,
const std::vector<std::string> &inputs, const std::vector<std::string> &inputs,
const std::vector<std::string> &outputs, const std::vector<std::string> &outputs,
const std::vector<mace::DataType> &output_types) { const std::vector<mace::DataType> &output_types) {
op.set_name(name); op.set_name(name);
op.set_type(type); op.set_type(type);
op.set_input(inputs); op.set_input(inputs);
......
...@@ -17,9 +17,8 @@ BAZEL_BIN_PATH=${BAZEL_BIN_PATH#//} ...@@ -17,9 +17,8 @@ BAZEL_BIN_PATH=${BAZEL_BIN_PATH#//}
BAZEL_BIN_PATH=bazel-bin/$BAZEL_BIN_PATH BAZEL_BIN_PATH=bazel-bin/$BAZEL_BIN_PATH
BIN_NAME=`echo $BAZEL_TARGET | cut -d: -f2` BIN_NAME=`echo $BAZEL_TARGET | cut -d: -f2`
ANDROID_ABI=armeabi-v7a
ANDROID_ABI=arm64-v8a ANDROID_ABI=arm64-v8a
STRIP="" ANDROID_ABI=armeabi-v7a
STRIP="--strip always" STRIP="--strip always"
VLOG_LEVEL=0 VLOG_LEVEL=0
PROFILINE="--define profiling=true" PROFILINE="--define profiling=true"
...@@ -31,7 +30,7 @@ bazel build -c opt $STRIP --verbose_failures $BAZEL_TARGET \ ...@@ -31,7 +30,7 @@ bazel build -c opt $STRIP --verbose_failures $BAZEL_TARGET \
--crosstool_top=//external:android/crosstool \ --crosstool_top=//external:android/crosstool \
--host_crosstool_top=@bazel_tools//tools/cpp:toolchain \ --host_crosstool_top=@bazel_tools//tools/cpp:toolchain \
--cpu=$ANDROID_ABI \ --cpu=$ANDROID_ABI \
--define neon=true --define neon=false
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
exit 1 exit 1
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册