提交 baf2dcd1 编写于 作者: L Liangliang He

Resolve operator and allocator registering static variable issue

上级 faadb474
......@@ -10,38 +10,27 @@ licenses(["notice"]) # Apache 2.0
load("//mace:mace.bzl", "if_android", "if_profiling_enabled", "if_embed_binary_program")
cc_library(
name = "opencl_runtime",
name = "core",
srcs = glob([
"*.cc",
"runtime/opencl/*.cc",
]),
hdrs = glob([
"*.h",
"public/*.h",
"runtime/opencl/cl2.hpp",
"runtime/opencl/*.h",
]),
copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"] +
if_profiling_enabled(["-DMACE_OPENCL_PROFILING"]) +
if_embed_binary_program(["-DMACE_EMBED_BINARY_PROGRAM"]),
linkopts = ["-ldl"],
linkopts = if_android(["-pie", "-ldl"]),
deps = [
":core",
"//mace/utils:utils_hdrs",
"//mace/utils:logging",
"//mace/utils:tuner",
"@opencl_headers//:opencl20_headers",
],
alwayslink = 1,
)
cc_library(
name = "core",
srcs = glob(["*.cc"]),
hdrs = glob(["*.h", "public/*.h"]),
copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"],
linkopts = if_android(["-pie"]),
deps = [
"//mace/utils:utils_hdrs",
"//mace/utils:logging",
],
)
cc_library(
......
......@@ -3,6 +3,7 @@
//
#include "mace/core/allocator.h"
#include "mace/core/runtime/opencl/opencl_allocator.h"
namespace mace {
......@@ -22,5 +23,6 @@ Allocator *GetDeviceAllocator(DeviceType type) {
MACE_REGISTER_ALLOCATOR(DeviceType::CPU, new CPUAllocator());
MACE_REGISTER_ALLOCATOR(DeviceType::NEON, new CPUAllocator());
MACE_REGISTER_ALLOCATOR(DeviceType::OPENCL, new OpenCLAllocator());
} // namespace mace
......@@ -5,6 +5,7 @@
#include "mace/core/public/mace.h"
#include "mace/core/types.h"
#include "mace/core/net.h"
#include "mace/core/operator.h"
#include "mace/core/workspace.h"
#include "mace/utils/logging.h"
......@@ -481,17 +482,19 @@ const OperatorDef &NetDef::op(const int idx) const {
// Mace Engine
MaceEngine::MaceEngine(const NetDef *net_def, DeviceType device_type):
device_type_(device_type), ws_(new Workspace()), net_(nullptr) {
op_registry_(new OperatorRegistry()), device_type_(device_type),
ws_(new Workspace()), net_(nullptr) {
ws_->LoadModelTensor(*net_def, device_type);
// Init model
auto net = CreateNet(*net_def, ws_.get(), device_type, NetMode::INIT);
auto net = CreateNet(op_registry_, *net_def, ws_.get(),
device_type, NetMode::INIT);
if(!net->Run()) {
LOG(FATAL) << "Net init run failed";
}
ws_->CreateTensor("mace_input_node:0", GetDeviceAllocator(device_type_), DT_FLOAT);
net_ = std::move(CreateNet(*net_def, ws_.get(), device_type));
net_ = std::move(CreateNet(op_registry_, *net_def, ws_.get(), device_type));
}
MaceEngine::~MaceEngine() = default;
bool MaceEngine::Run(const float *input,
......
......@@ -3,22 +3,24 @@
//
#include "mace/core/net.h"
#include "mace/core/operator.h"
#include "mace/core/workspace.h"
#include "mace/utils/utils.h"
namespace mace {
NetBase::NetBase(const std::shared_ptr<const NetDef> &net_def,
NetBase::NetBase(const std::shared_ptr<const OperatorRegistry> op_registry,
const std::shared_ptr<const NetDef> net_def,
Workspace *ws,
DeviceType type)
: name_(net_def->name()) {}
: op_registry_(op_registry), name_(net_def->name()) {}
SimpleNet::SimpleNet(const std::shared_ptr<const NetDef> &net_def,
SimpleNet::SimpleNet(const std::shared_ptr<const OperatorRegistry> op_registry,
const std::shared_ptr<const NetDef> net_def,
Workspace *ws,
DeviceType type,
const NetMode mode)
: NetBase(net_def, ws, type), device_type_(type){
: NetBase(op_registry, net_def, ws, type),
device_type_(type) {
VLOG(1) << "Constructing SimpleNet " << net_def->name();
for (int idx = 0; idx < net_def->op_size(); ++idx) {
const auto &operator_def = net_def->op(idx);
......@@ -26,7 +28,7 @@ SimpleNet::SimpleNet(const std::shared_ptr<const NetDef> &net_def,
<< operator_def.type();
std::unique_ptr<OperatorBase> op{nullptr};
OperatorDef temp_def(operator_def);
op = CreateOperator(temp_def, ws, type, mode);
op = op_registry->CreateOperator(temp_def, ws, type, mode);
if (op) {
operators_.emplace_back(std::move(op));
}
......@@ -62,9 +64,8 @@ bool SimpleNet::Run(RunMetadata *run_metadata) {
}
if (run_metadata != nullptr) {
OperatorStats op_stats = { op->debug_def().name(),
op->debug_def().type(),
call_stats };
OperatorStats op_stats = {op->debug_def().name(), op->debug_def().type(),
call_stats};
run_metadata->op_stats.emplace_back(op_stats);
}
......@@ -80,19 +81,23 @@ bool SimpleNet::Run(RunMetadata *run_metadata) {
return true;
}
unique_ptr<NetBase> CreateNet(const NetDef &net_def,
std::unique_ptr<NetBase> CreateNet(
const std::shared_ptr<const OperatorRegistry> op_registry,
const NetDef &net_def,
Workspace *ws,
DeviceType type,
const NetMode mode) {
std::shared_ptr<NetDef> tmp_net_def(new NetDef(net_def));
return CreateNet(tmp_net_def, ws, type, mode);
return CreateNet(op_registry, tmp_net_def, ws, type, mode);
}
unique_ptr<NetBase> CreateNet(const std::shared_ptr<const NetDef> &net_def,
std::unique_ptr<NetBase> CreateNet(
const std::shared_ptr<const OperatorRegistry> op_registry,
const std::shared_ptr<const NetDef> net_def,
Workspace *ws,
DeviceType type,
const NetMode mode) {
unique_ptr<NetBase> net(new SimpleNet(net_def, ws, type, mode));
unique_ptr<NetBase> net(new SimpleNet(op_registry, net_def, ws, type, mode));
return net;
}
......
......@@ -6,6 +6,7 @@
#define MACE_CORE_NET_H_
#include "mace/core/common.h"
#include "mace/core/operator.h"
#include "mace/core/public/mace.h"
namespace mace {
......@@ -16,7 +17,8 @@ class Workspace;
class NetBase {
public:
NetBase(const std::shared_ptr<const NetDef> &net_def,
NetBase(const std::shared_ptr<const OperatorRegistry> op_registry,
const std::shared_ptr<const NetDef> net_def,
Workspace *ws,
DeviceType type);
virtual ~NetBase() noexcept {}
......@@ -27,13 +29,15 @@ class NetBase {
protected:
string name_;
const std::shared_ptr<const OperatorRegistry> op_registry_;
DISABLE_COPY_AND_ASSIGN(NetBase);
};
class SimpleNet : public NetBase {
public:
SimpleNet(const std::shared_ptr<const NetDef> &net_def,
SimpleNet(const std::shared_ptr<const OperatorRegistry> op_registry,
const std::shared_ptr<const NetDef> net_def,
Workspace *ws,
DeviceType type,
const NetMode mode = NetMode::NORMAL);
......@@ -47,11 +51,15 @@ class SimpleNet : public NetBase {
DISABLE_COPY_AND_ASSIGN(SimpleNet);
};
unique_ptr<NetBase> CreateNet(const NetDef &net_def,
std::unique_ptr<NetBase> CreateNet(
const std::shared_ptr<const OperatorRegistry> op_registry,
const NetDef &net_def,
Workspace *ws,
DeviceType type,
const NetMode mode = NetMode::NORMAL);
unique_ptr<NetBase> CreateNet(const std::shared_ptr<const NetDef> &net_def,
std::unique_ptr<NetBase> CreateNet(
const std::shared_ptr<const OperatorRegistry> op_registry,
const std::shared_ptr<const NetDef> net_def,
Workspace *ws,
DeviceType type,
const NetMode mode = NetMode::NORMAL);
......
......@@ -2,12 +2,19 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <sstream>
#include "mace/core/operator.h"
namespace mace {
OperatorBase::OperatorBase(const OperatorDef &operator_def, Workspace *ws)
: operator_ws_(ws),
operator_def_(std::make_shared<OperatorDef>(operator_def)) {}
OpKeyBuilder::OpKeyBuilder(const char *op_name): op_name_(op_name) {}
OpKeyBuilder::OpKeyBuilder(const char *op_name) : op_name_(op_name) {}
OpKeyBuilder &OpKeyBuilder::Device(DeviceType device) { device_type_ = device; }
OpKeyBuilder &OpKeyBuilder::TypeConstraint(const char *attr_name,
const DataType allowed) {
......@@ -17,61 +24,72 @@ OpKeyBuilder &OpKeyBuilder::TypeConstraint(const char *attr_name,
const std::string OpKeyBuilder::Build() {
static const std::vector<std::string> type_order = {"T"};
std::string key = op_name_;
std::stringstream ss;
ss << op_name_;
ss << device_type_;
for (auto type : type_order) {
key += type + "_" + DataTypeToString(type_constraint_[type]);
ss << type << "_" << DataTypeToString(type_constraint_[type]);
}
return key;
}
std::map<int32_t, OperatorRegistry *> *gDeviceTypeRegistry() {
static std::map<int32_t, OperatorRegistry *> g_device_type_registry;
return &g_device_type_registry;
return ss.str();
}
MACE_DEFINE_REGISTRY(CPUOperatorRegistry,
OperatorBase,
const OperatorDef &,
Workspace *);
MACE_REGISTER_DEVICE_TYPE(DeviceType::CPU, CPUOperatorRegistry);
MACE_DEFINE_REGISTRY(NEONOperatorRegistry,
OperatorBase,
const OperatorDef &,
Workspace *);
MACE_REGISTER_DEVICE_TYPE(DeviceType::NEON, NEONOperatorRegistry);
MACE_DEFINE_REGISTRY(OPENCLOperatorRegistry,
OperatorBase,
const OperatorDef &,
Workspace *);
MACE_REGISTER_DEVICE_TYPE(DeviceType::OPENCL, OPENCLOperatorRegistry);
unique_ptr<OperatorBase> CreateOperator(const OperatorDef &operator_def,
std::unique_ptr<OperatorBase> OperatorRegistry::CreateOperator(
const OperatorDef &operator_def,
Workspace *ws,
DeviceType type,
const NetMode mode) {
OperatorRegistry *registry = gDeviceTypeRegistry()->at(type);
const int dtype = ArgumentHelper::GetSingleArgument<OperatorDef, int>(operator_def,
"T",
static_cast<int>(DT_FLOAT));
const int op_mode_i= ArgumentHelper::GetSingleArgument<OperatorDef, int>(operator_def,
"mode",
static_cast<int>(NetMode::NORMAL));
const NetMode mode) const {
const int dtype = ArgumentHelper::GetSingleArgument<OperatorDef, int>(
operator_def, "T", static_cast<int>(DT_FLOAT));
const int op_mode_i = ArgumentHelper::GetSingleArgument<OperatorDef, int>(
operator_def, "mode", static_cast<int>(NetMode::NORMAL));
const NetMode op_mode = static_cast<NetMode>(op_mode_i);
if (op_mode == mode) {
return registry->Create(OpKeyBuilder(operator_def.type().data())
return registry_.Create(
OpKeyBuilder(operator_def.type().data())
.Device(type)
.TypeConstraint("T", static_cast<DataType>(dtype))
.Build(),
operator_def,
ws);
operator_def, ws);
} else {
return nullptr;
}
}
OperatorBase::OperatorBase(const OperatorDef &operator_def, Workspace *ws)
: operator_ws_(ws),
operator_def_(std::make_shared<OperatorDef>(operator_def)) {}
extern void Register_AddN(OperatorRegistry *op_registry);
extern void Register_BatchNorm(OperatorRegistry *op_registry);
extern void Register_BatchToSpaceND(OperatorRegistry *op_registry);
extern void Register_BiasAdd(OperatorRegistry *op_registry);
extern void Register_BufferToImage(OperatorRegistry *op_registry);
extern void Register_ChannelShuffle(OperatorRegistry *op_registry);
extern void Register_Concat(OperatorRegistry *op_registry);
extern void Register_Conv2D(OperatorRegistry *op_registry);
extern void Register_DepthwiseConv2d(OperatorRegistry *op_registry);
extern void Register_FusedConv2D(OperatorRegistry *op_registry);
extern void Register_GlobalAvgPooling(OperatorRegistry *op_registry);
extern void Register_ImageToBuffer(OperatorRegistry *op_registry);
extern void Register_Pooling(OperatorRegistry *op_registry);
extern void Register_Relu(OperatorRegistry *op_registry);
extern void Register_ResizeBilinear(OperatorRegistry *op_registry);
extern void Register_SpaceToBatchND(OperatorRegistry *op_registry);
OperatorRegistry::OperatorRegistry() {
Register_AddN(this);
Register_BatchNorm(this);
Register_BatchToSpaceND(this);
Register_BiasAdd(this);
Register_BufferToImage(this);
Register_ChannelShuffle(this);
Register_Concat(this);
Register_Conv2D(this);
Register_DepthwiseConv2d(this);
Register_FusedConv2D(this);
Register_GlobalAvgPooling(this);
Register_ImageToBuffer(this);
Register_Pooling(this);
Register_Relu(this);
Register_ResizeBilinear(this);
Register_SpaceToBatchND(this);
}
} // namespace mace
......@@ -5,13 +5,13 @@
#ifndef MACE_CORE_OPERATOR_H
#define MACE_CORE_OPERATOR_H
#include "mace/core/common.h"
#include "mace/core/arg_helper.h"
#include "mace/core/common.h"
#include "mace/core/future.h"
#include "mace/core/public/mace.h"
#include "mace/core/registry.h"
#include "mace/core/tensor.h"
#include "mace/core/workspace.h"
#include "mace/core/public/mace.h"
namespace mace {
......@@ -122,29 +122,12 @@ class Operator : public OperatorBase {
#define OP_OUTPUT_TAGS(first_input, ...) \
enum _OutputTags { first_input = 0, __VA_ARGS__ }
typedef Registry<std::string, OperatorBase, const OperatorDef &, Workspace *>
OperatorRegistry;
typedef Registry<std::string, OperatorBase, const OperatorDef &, Workspace *> *(
*RegistryFunction)();
std::map<int32_t, OperatorRegistry *> *gDeviceTypeRegistry();
struct DeviceTypeRegisterer {
explicit DeviceTypeRegisterer(int32_t type, RegistryFunction func) {
if (gDeviceTypeRegistry()->count(type)) {
LOG(ERROR) << "Device type " << type
<< "registered twice. This should not happen. Did you have "
"duplicated numbers assigned to different devices?";
std::exit(1);
}
// Calling the registry function to get the actual registry pointer.
gDeviceTypeRegistry()->emplace(type, func());
}
};
class OpKeyBuilder {
public:
explicit OpKeyBuilder(const char *op_name);
OpKeyBuilder &Device(DeviceType device);
OpKeyBuilder &TypeConstraint(const char *attr_name, const DataType allowed);
template <typename T>
......@@ -154,6 +137,7 @@ class OpKeyBuilder {
private:
std::string op_name_;
DeviceType device_type_;
std::map<std::string, DataType> type_constraint_;
};
......@@ -162,48 +146,30 @@ OpKeyBuilder &OpKeyBuilder::TypeConstraint(const char *attr_name) {
return this->TypeConstraint(attr_name, DataTypeToEnum<T>::value);
}
class OperatorRegistry {
public:
typedef Registry<std::string, OperatorBase, const OperatorDef &, Workspace *>
RegistryType;
OperatorRegistry();
~OperatorRegistry() = default;
RegistryType *registry() { return &registry_; };
std::unique_ptr<OperatorBase> CreateOperator(const OperatorDef &operator_def,
Workspace *ws,
DeviceType type,
const NetMode mode) const;
private:
RegistryType registry_;
DISABLE_COPY_AND_ASSIGN(OperatorRegistry);
};
#define MACE_REGISTER_DEVICE_TYPE(type, registry_function) \
namespace { \
static DeviceTypeRegisterer MACE_ANONYMOUS_VARIABLE(DeviceType)( \
type, &registry_function); \
}
MACE_DECLARE_REGISTRY(CPUOperatorRegistry,
OperatorBase,
const OperatorDef &,
Workspace *);
#define REGISTER_CPU_OPERATOR_CREATOR(key, ...) \
MACE_REGISTER_CREATOR(CPUOperatorRegistry, key, __VA_ARGS__)
#define REGISTER_CPU_OPERATOR(name, ...) \
MACE_REGISTER_CLASS(CPUOperatorRegistry, name, __VA_ARGS__)
MACE_DECLARE_REGISTRY(NEONOperatorRegistry,
OperatorBase,
const OperatorDef &,
Workspace *);
#define REGISTER_NEON_OPERATOR_CREATOR(key, ...) \
MACE_REGISTER_CREATOR(NEONOperatorRegistry, key, __VA_ARGS__)
#define REGISTER_NEON_OPERATOR(name, ...) \
MACE_REGISTER_CLASS(NEONOperatorRegistry, name, __VA_ARGS__)
MACE_DECLARE_REGISTRY(OPENCLOperatorRegistry,
MACE_DECLARE_REGISTRY(OpRegistry,
OperatorBase,
const OperatorDef &,
Workspace *);
#define REGISTER_OPENCL_OPERATOR_CREATOR(key, ...) \
MACE_REGISTER_CREATOR(OPENCLOperatorRegistry, key, __VA_ARGS__)
#define REGISTER_OPENCL_OPERATOR(name, ...) \
MACE_REGISTER_CLASS(OPENCLOperatorRegistry, name, __VA_ARGS__)
unique_ptr<OperatorBase> CreateOperator(const OperatorDef &operator_def,
Workspace *ws,
DeviceType type,
const NetMode mode);
#define REGISTER_OPERATOR(op_registry, name, ...) \
MACE_REGISTER_CLASS(OpRegistry, op_registry->registry(), name, __VA_ARGS__)
} // namespace mace
......
......@@ -302,10 +302,12 @@ class NetDef {
class Workspace;
class NetBase;
class OperatorRegistry;
class MaceEngine {
public:
explicit MaceEngine(const NetDef *net_def, DeviceType device_type);
explicit MaceEngine(const NetDef *net_def,
DeviceType device_type);
~MaceEngine();
bool Run(const float *input,
const std::vector<int64_t> &input_shape,
......@@ -314,6 +316,7 @@ class MaceEngine {
MaceEngine &operator=(const MaceEngine&) = delete;
private:
std::shared_ptr<OperatorRegistry> op_registry_;
DeviceType device_type_;
std::unique_ptr<Workspace> ws_;
std::unique_ptr<NetBase> net_;
......
......@@ -17,24 +17,27 @@ class Registry {
Registry() : registry_() {}
void Register(const SrcType &key, Creator creator) {
VLOG(2) << "Registering: " << key;
std::lock_guard<std::mutex> lock(register_mutex_);
MACE_CHECK(registry_.count(key) == 0, "Key already registered.");
registry_[key] = creator;
}
inline bool Has(const SrcType &key) { return registry_.count(key) != 0; }
inline bool Has(const SrcType &key) const {
return registry_.count(key) != 0;
}
unique_ptr<ObjectType> Create(const SrcType &key, Args... args) {
unique_ptr<ObjectType> Create(const SrcType &key, Args... args) const {
if (registry_.count(key) == 0) {
LOG(FATAL) << "Key not registered: " << key;
}
return registry_[key](args...);
return registry_.at(key)(args...);
}
/**
* Returns the keys currently registered as a vector.
*/
vector<SrcType> Keys() {
vector<SrcType> Keys() const {
vector<SrcType> keys;
for (const auto &it : registry_) {
keys.push_back(it.first);
......@@ -77,39 +80,31 @@ class Registerer {
typedef Registerer<SrcType, ObjectType, ##__VA_ARGS__> \
Registerer##RegistryName;
/*
#define MACE_DEFINE_TYPED_REGISTRY(RegistryName, SrcType, ObjectType, ...) \
Registry<SrcType, ObjectType, ##__VA_ARGS__> *RegistryName() { \
static Registry<SrcType, ObjectType, ##__VA_ARGS__> *registry = \
new Registry<SrcType, ObjectType, ##__VA_ARGS__>(); \
return registry; \
}
*/
#define MACE_DECLARE_REGISTRY(RegistryName, ObjectType, ...) \
MACE_DECLARE_TYPED_REGISTRY(RegistryName, std::string, ObjectType, \
##__VA_ARGS__)
/*
#define MACE_DEFINE_REGISTRY(RegistryName, ObjectType, ...) \
MACE_DEFINE_TYPED_REGISTRY(RegistryName, std::string, ObjectType, \
##__VA_ARGS__)
*/
#define MACE_REGISTER_TYPED_CREATOR(RegistryName, key, ...) \
namespace { \
static Registerer##RegistryName MACE_ANONYMOUS_VARIABLE(g_##RegistryName)( \
key, RegistryName(), __VA_ARGS__);
#define MACE_REGISTER_TYPED_CLASS(RegistryName, key, ...) \
namespace { \
static Registerer##RegistryName MACE_ANONYMOUS_VARIABLE(g_##RegistryName)( \
key, \
RegistryName(), \
Registerer##RegistryName::DefaultCreator<__VA_ARGS__>); \
}
#define MACE_REGISTER_CREATOR(RegistryName, key, ...) \
MACE_REGISTER_TYPED_CREATOR(RegistryName, key, __VA_ARGS__)
#define MACE_REGISTER_TYPED_CLASS(RegistryName, registry, key, ...) \
Registerer##RegistryName MACE_ANONYMOUS_VARIABLE(l_##RegistryName)( \
key, registry, Registerer##RegistryName::DefaultCreator<__VA_ARGS__>);
#define MACE_REGISTER_CLASS(RegistryName, key, ...) \
MACE_REGISTER_TYPED_CLASS(RegistryName, key, __VA_ARGS__)
#define MACE_REGISTER_CLASS(RegistryName, registry, key, ...) \
MACE_REGISTER_TYPED_CLASS(RegistryName, registry, key, __VA_ARGS__)
} // namespace mace
......
......@@ -127,6 +127,4 @@ void OpenCLAllocator::Unmap(void *buffer, void *mapped_ptr) {
bool OpenCLAllocator::OnHost() { return false; }
MACE_REGISTER_ALLOCATOR(DeviceType::OPENCL, new OpenCLAllocator());
} // namespace mace
......@@ -105,7 +105,8 @@ class Tensor {
inline index_t dim_size() const { return shape_.size(); }
inline index_t dim(unsigned int index) const {
MACE_CHECK(index < shape_.size(), "Exceeding ndim limit");
MACE_CHECK(index < shape_.size(), "Dim out of range: ",
index, " >= ", shape_.size());
return shape_[index];
}
......
......@@ -11,7 +11,6 @@ cc_binary(
deps = [
"//mace/core",
"//mace/ops",
"//mace/core:opencl_runtime",
],
)
......
......@@ -26,7 +26,6 @@ cc_library(
linkopts = if_android(["-lm"]),
deps = [
"//mace/core",
"//mace/core:opencl_runtime",
"//mace/utils:utils_hdrs",
],
)
......
......@@ -6,26 +6,32 @@
namespace mace {
REGISTER_CPU_OPERATOR(OpKeyBuilder("AddN")
void Register_AddN(OperatorRegistry *op_registry) {
REGISTER_OPERATOR(op_registry, OpKeyBuilder("AddN")
.Device(DeviceType::CPU)
.TypeConstraint<float>("T")
.Build(),
AddNOp<DeviceType::CPU, float>);
#if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("AddN")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("AddN")
.Device(DeviceType::NEON)
.TypeConstraint<float>("T")
.Build(),
AddNOp<DeviceType::NEON, float>);
#endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("AddN")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("AddN")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
.Build(),
AddNOp<DeviceType::OPENCL, float>);
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("AddN")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("AddN")
.Device(DeviceType::OPENCL)
.TypeConstraint<half>("T")
.Build(),
AddNOp<DeviceType::OPENCL, half>);
}
} // namespace mace
......@@ -15,8 +15,8 @@ static void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) {
OpsTestNet net;
// Add input data
for (int i = 0; i < inputs; ++i) {
net.AddRandomInput<D, float>(
internal::MakeString("Input", i).c_str(), {n, h, w, c});
net.AddRandomInput<D, float>(internal::MakeString("Input", i).c_str(),
{n, h, w, c});
}
if (D == DeviceType::OPENCL) {
......
......@@ -6,26 +6,32 @@
namespace mace {
REGISTER_CPU_OPERATOR(OpKeyBuilder("BatchNorm")
void Register_BatchNorm(OperatorRegistry *op_registry) {
REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchNorm")
.Device(DeviceType::CPU)
.TypeConstraint<float>("T")
.Build(),
BatchNormOp<DeviceType::CPU, float>);
#if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("BatchNorm")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchNorm")
.Device(DeviceType::NEON)
.TypeConstraint<float>("T")
.Build(),
BatchNormOp<DeviceType::NEON, float>);
#endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BatchNorm")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchNorm")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
.Build(),
BatchNormOp<DeviceType::OPENCL, float>);
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BatchNorm")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchNorm")
.Device(DeviceType::OPENCL)
.TypeConstraint<half>("T")
.Build(),
BatchNormOp<DeviceType::OPENCL, half>);
}
} // namespace mace
......@@ -23,11 +23,16 @@ static void BatchNorm(
net.AddRandomInput<D, T>("Var", {channels}, true);
if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, float>(net, "Scale", "ScaleImage", kernels::BufferType::ARGUMENT);
BufferToImage<D, float>(net, "Offset", "OffsetImage", kernels::BufferType::ARGUMENT);
BufferToImage<D, float>(net, "Mean", "MeanImage", kernels::BufferType::ARGUMENT);
BufferToImage<D, float>(net, "Var", "VarImage", kernels::BufferType::ARGUMENT);
BufferToImage<D, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
BufferToImage<D, float>(net, "Scale", "ScaleImage",
kernels::BufferType::ARGUMENT);
BufferToImage<D, float>(net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT);
BufferToImage<D, float>(net, "Mean", "MeanImage",
kernels::BufferType::ARGUMENT);
BufferToImage<D, float>(net, "Var", "VarImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("BatchNorm", "BatchNormBM")
.Input("InputImage")
.Input("ScaleImage")
......@@ -37,8 +42,7 @@ static void BatchNorm(
.AddFloatArg("epsilon", 1e-3)
.Output("Output")
.Finalize(net.NewOperatorDef());
}
else {
} else {
OpDefBuilder("BatchNorm", "BatchNormBM")
.Input("Input")
.Input("Scale")
......@@ -50,7 +54,6 @@ static void BatchNorm(
.Finalize(net.NewOperatorDef());
}
// tuning
setenv("MACE_TUNING", "1", 1);
net.RunOp(D);
......@@ -81,7 +84,6 @@ static void BatchNorm(
#define BM_BATCH_NORM(N, C, H, W, TYPE) \
BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, CPU); \
BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, NEON);\
BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, OPENCL);
BM_BATCH_NORM(1, 1, 512, 512, float);
......
......@@ -22,11 +22,16 @@ void Simple() {
net.AddInputFromArray<D, float>("Var", {1}, {11.67f});
if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, float>(net, "Scale", "ScaleImage", kernels::BufferType::ARGUMENT);
BufferToImage<D, float>(net, "Offset", "OffsetImage", kernels::BufferType::ARGUMENT);
BufferToImage<D, float>(net, "Mean", "MeanImage", kernels::BufferType::ARGUMENT);
BufferToImage<D, float>(net, "Var", "VarImage", kernels::BufferType::ARGUMENT);
BufferToImage<D, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
BufferToImage<D, float>(net, "Scale", "ScaleImage",
kernels::BufferType::ARGUMENT);
BufferToImage<D, float>(net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT);
BufferToImage<D, float>(net, "Mean", "MeanImage",
kernels::BufferType::ARGUMENT);
BufferToImage<D, float>(net, "Var", "VarImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("BatchNorm", "BatchNormTest")
.Input("InputImage")
......@@ -41,7 +46,8 @@ void Simple() {
net.RunOp(D);
// Transfer output
ImageToBuffer<D, float>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT);
ImageToBuffer<D, float>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
} else {
OpDefBuilder("BatchNorm", "BatchNormTest")
.Input("Input")
......@@ -64,9 +70,7 @@ void Simple() {
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-2);
}
TEST_F(BatchNormOpTest, SimpleCPU) {
Simple<DeviceType::CPU>();
}
TEST_F(BatchNormOpTest, SimpleCPU) { Simple<DeviceType::CPU>(); }
/*
TEST_F(BatchNormOpTest, SimpleNEON) {
......@@ -74,9 +78,7 @@ TEST_F(BatchNormOpTest, SimpleNEON) {
}
*/
TEST_F(BatchNormOpTest, SimpleOPENCL) {
Simple<DeviceType::OPENCL>();
}
TEST_F(BatchNormOpTest, SimpleOPENCL) { Simple<DeviceType::OPENCL>(); }
/*
TEST_F(BatchNormOpTest, SimpleRandomNeon) {
......@@ -100,7 +102,8 @@ TEST_F(BatchNormOpTest, SimpleRandomNeon) {
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height, width});
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height,
width});
net.AddRandomInput<DeviceType::CPU, float>("Scale", {channels});
net.AddRandomInput<DeviceType::CPU, float>("Offset", {channels});
net.AddRandomInput<DeviceType::CPU, float>("Mean", {channels});
......@@ -141,7 +144,8 @@ TEST_F(BatchNormOpTest, ComplexRandomNeon) {
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height, width});
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height,
width});
net.AddRandomInput<DeviceType::CPU, float>("Scale", {channels});
net.AddRandomInput<DeviceType::CPU, float>("Offset", {channels});
net.AddRandomInput<DeviceType::CPU, float>("Mean", {channels});
......@@ -184,7 +188,8 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<DeviceType::OPENCL, float>("Input", {batch, height, width, channels});
net.AddRandomInput<DeviceType::OPENCL, float>(
"Input", {batch, height, width, channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Scale", {channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Offset", {channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Mean", {channels});
......@@ -198,11 +203,16 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
expected.Copy(*net.GetOutput("Output"));
// Run on opencl
BufferToImage<DeviceType::OPENCL, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<DeviceType::OPENCL, float>(net, "Scale", "ScaleImage", kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(net, "Offset", "OffsetImage", kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(net, "Mean", "MeanImage", kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(net, "Var", "VarImage", kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
BufferToImage<DeviceType::OPENCL, float>(net, "Scale", "ScaleImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(net, "Mean", "MeanImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(net, "Var", "VarImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("BatchNorm", "BatchNormTest")
.Input("InputImage")
......@@ -223,7 +233,8 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
net.RunOp(DeviceType::OPENCL);
net.Sync();
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT);
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-2);
}
......@@ -249,7 +260,8 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) {
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<DeviceType::OPENCL, float>("Input", {batch, height, width, channels});
net.AddRandomInput<DeviceType::OPENCL, float>(
"Input", {batch, height, width, channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Scale", {channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Offset", {channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Mean", {channels});
......@@ -263,11 +275,16 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) {
expected.Copy(*net.GetOutput("Output"));
// Run on opencl
BufferToImage<DeviceType::OPENCL, half>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<DeviceType::OPENCL, half>(net, "Scale", "ScaleImage", kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(net, "Offset", "OffsetImage", kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(net, "Mean", "MeanImage", kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(net, "Var", "VarImage", kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
BufferToImage<DeviceType::OPENCL, half>(net, "Scale", "ScaleImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(net, "Mean", "MeanImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(net, "Var", "VarImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("BatchNorm", "BatchNormTest")
.Input("InputImage")
......@@ -289,7 +306,8 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) {
net.RunOp(DeviceType::OPENCL);
net.Sync();
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT);
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.5);
}
......@@ -315,7 +333,8 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<DeviceType::OPENCL, float>("Input", {batch, height, width, channels});
net.AddRandomInput<DeviceType::OPENCL, float>(
"Input", {batch, height, width, channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Scale", {channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Offset", {channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Mean", {channels});
......@@ -328,13 +347,17 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
Tensor expected;
expected.Copy(*net.GetOutput("Output"));
// Run on opencl
BufferToImage<DeviceType::OPENCL, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<DeviceType::OPENCL, float>(net, "Scale", "ScaleImage", kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(net, "Offset", "OffsetImage", kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(net, "Mean", "MeanImage", kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(net, "Var", "VarImage", kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
BufferToImage<DeviceType::OPENCL, float>(net, "Scale", "ScaleImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(net, "Mean", "MeanImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(net, "Var", "VarImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("BatchNorm", "BatchNormTest")
.Input("InputImage")
......@@ -355,7 +378,8 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
net.RunOp(DeviceType::OPENCL);
net.Sync();
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT);
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-2);
}
......@@ -381,7 +405,8 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) {
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<DeviceType::OPENCL, float>("Input", {batch, height, width, channels});
net.AddRandomInput<DeviceType::OPENCL, float>(
"Input", {batch, height, width, channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Scale", {channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Offset", {channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Mean", {channels});
......@@ -394,13 +419,17 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) {
Tensor expected;
expected.Copy(*net.GetOutput("Output"));
// Run on opencl
BufferToImage<DeviceType::OPENCL, half>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<DeviceType::OPENCL, half>(net, "Scale", "ScaleImage", kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(net, "Offset", "OffsetImage", kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(net, "Mean", "MeanImage", kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(net, "Var", "VarImage", kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
BufferToImage<DeviceType::OPENCL, half>(net, "Scale", "ScaleImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(net, "Mean", "MeanImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(net, "Var", "VarImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("BatchNorm", "BatchNormTest")
.Input("InputImage")
......@@ -422,7 +451,8 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) {
net.RunOp(DeviceType::OPENCL);
net.Sync();
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT);
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.5);
}
}
......@@ -6,13 +6,17 @@
namespace mace {
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BatchToSpaceND")
void Register_BatchToSpaceND(OperatorRegistry *op_registry) {
REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchToSpaceND")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
.Build(),
BatchToSpaceNDOp<DeviceType::OPENCL, float>);
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BatchToSpaceND")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchToSpaceND")
.Device(DeviceType::OPENCL)
.TypeConstraint<half>("T")
.Build(),
BatchToSpaceNDOp<DeviceType::OPENCL, half>);
}
} // namespace mace
......@@ -14,7 +14,8 @@ static void BMBatchToSpace(
OpsTestNet net;
net.AddRandomInput<D, float>("Input", {batch, height, width, channels});
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
OpDefBuilder("BatchToSpaceND", "BatchToSpaceNDTest")
.Input("InputImage")
.Output("OutputImage")
......@@ -36,7 +37,8 @@ static void BMBatchToSpace(
}
#define BM_BATCH_TO_SPACE_MACRO(N, H, W, C, ARG, TYPE, DEVICE) \
static void BM_BATCH_TO_SPACE_##N##_##H##_##W##_##C##_##ARG##_##TYPE##_##DEVICE( \
static void \
BM_BATCH_TO_SPACE_##N##_##H##_##W##_##C##_##ARG##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
......
......@@ -6,28 +6,34 @@
namespace mace {
REGISTER_CPU_OPERATOR(OpKeyBuilder("BiasAdd")
void Register_BiasAdd(OperatorRegistry *op_registry) {
REGISTER_OPERATOR(op_registry, OpKeyBuilder("BiasAdd")
.Device(DeviceType::CPU)
.TypeConstraint<float>("T")
.Build(),
BiasAddOp<DeviceType::CPU, float>);
/*
#if __ARM_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("BiasAdd")
/*
#if __ARM_NEON
REGISTER_OPERATOR(op_registry,OpKeyBuilder("BiasAdd")
.Device(DeviceType::NEON)
.TypeConstraint<float>("T")
.Build(),
BiasAddOp<DeviceType::NEON, float>);
#endif // __ARM_NEON
*/
#endif // __ARM_NEON
*/
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BiasAdd")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("BiasAdd")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
.Build(),
BiasAddOp<DeviceType::OPENCL, float>);
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BiasAdd")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("BiasAdd")
.Device(DeviceType::OPENCL)
.TypeConstraint<half>("T")
.Build(),
BiasAddOp<DeviceType::OPENCL, half>);
}
} // namespace mace
......@@ -9,8 +9,7 @@
namespace mace {
template <DeviceType D, typename T>
static void BiasAdd(
int iters, int batch, int channels, int height, int width) {
static void BiasAdd(int iters, int batch, int channels, int height, int width) {
mace::testing::StopTiming();
OpsTestNet net;
......@@ -20,15 +19,16 @@ static void BiasAdd(
net.AddRandomInput<D, T>("Bias", {channels}, true);
if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT);
BufferToImage<D, T>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("BiasAdd", "BiasAddBM")
.Input("InputImage")
.Input("BiasImage")
.Output("Output")
.Finalize(net.NewOperatorDef());
}
else {
} else {
OpDefBuilder("BiasAdd", "BiasAddBM")
.Input("Input")
.Input("Bias")
......
......@@ -19,8 +19,10 @@ void BiasAddSimple() {
net.AddInputFromArray<D, float>("Bias", {1}, {0.5f});
if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, float>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT);
BufferToImage<D, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
BufferToImage<D, float>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("BiasAdd", "BiasAddTest")
.Input("InputImage")
......@@ -31,7 +33,8 @@ void BiasAddSimple() {
net.RunOp(D);
// Transfer output
ImageToBuffer<D, float>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT);
ImageToBuffer<D, float>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
} else {
OpDefBuilder("BiasAdd", "BiasAddTest")
.Input("Input")
......@@ -43,16 +46,14 @@ void BiasAddSimple() {
}
// Check
auto expected =
CreateTensor<float>({1, 6, 2, 1}, {5.5, 5.5, 7.5, 7.5, 9.5, 9.5, 11.5,
11.5, 13.5, 13.5, 15.5, 15.5});
auto expected = CreateTensor<float>(
{1, 6, 2, 1},
{5.5, 5.5, 7.5, 7.5, 9.5, 9.5, 11.5, 11.5, 13.5, 13.5, 15.5, 15.5});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-2);
}
TEST_F(BiasAddOpTest, BiasAddSimpleCPU) {
BiasAddSimple<DeviceType::CPU>();
}
TEST_F(BiasAddOpTest, BiasAddSimpleCPU) { BiasAddSimple<DeviceType::CPU>(); }
TEST_F(BiasAddOpTest, BiasAddSimpleOPENCL) {
BiasAddSimple<DeviceType::OPENCL>();
......@@ -76,7 +77,8 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) {
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<DeviceType::OPENCL, float>("Input", {batch, height, width, channels});
net.AddRandomInput<DeviceType::OPENCL, float>(
"Input", {batch, height, width, channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Bias", {channels}, true);
// run cpu
......@@ -87,8 +89,10 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) {
expected.Copy(*net.GetOutput("Output"));
// Run on opencl
BufferToImage<DeviceType::OPENCL, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<DeviceType::OPENCL, float>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
BufferToImage<DeviceType::OPENCL, float>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("BiasAdd", "BiasAddTest")
.Input("InputImage")
......@@ -100,7 +104,8 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) {
net.RunOp(DeviceType::OPENCL);
net.Sync();
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT);
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-2);
}
......@@ -122,7 +127,8 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) {
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<DeviceType::OPENCL, float>("Input", {batch, height, width, channels});
net.AddRandomInput<DeviceType::OPENCL, float>(
"Input", {batch, height, width, channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Bias", {channels}, true);
// run cpu
......@@ -132,10 +138,11 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) {
Tensor expected;
expected.Copy(*net.GetOutput("Output"));
// Run on opencl
BufferToImage<DeviceType::OPENCL, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<DeviceType::OPENCL, float>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
BufferToImage<DeviceType::OPENCL, float>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("BiasAdd", "BiasAddTest")
.Input("InputImage")
......@@ -147,8 +154,8 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) {
net.RunOp(DeviceType::OPENCL);
net.Sync();
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT);
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-2);
}
}
......@@ -6,14 +6,18 @@
namespace mace {
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BufferToImage")
void Register_BufferToImage(OperatorRegistry *op_registry) {
REGISTER_OPERATOR(op_registry, OpKeyBuilder("BufferToImage")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
.Build(),
BufferToImageOp<DeviceType::OPENCL, float>);
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BufferToImage")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("BufferToImage")
.Device(DeviceType::OPENCL)
.TypeConstraint<half>("T")
.Build(),
BufferToImageOp<DeviceType::OPENCL, half>);
}
} // namespace mace
......@@ -6,9 +6,12 @@
namespace mace {
REGISTER_CPU_OPERATOR(OpKeyBuilder("ChannelShuffle")
void Register_ChannelShuffle(OperatorRegistry *op_registry) {
REGISTER_OPERATOR(op_registry, OpKeyBuilder("ChannelShuffle")
.Device(DeviceType::CPU)
.TypeConstraint<float>("T")
.Build(),
ChannelShuffleOp<DeviceType::CPU, float>);
}
} // namespace mace
......@@ -23,7 +23,8 @@ static void ChannelShuffle(
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height, width});
net.AddRandomInput<DeviceType::CPU, float>("Input",
{batch, channels, height, width});
// Warm-up
for (int i = 0; i < 5; ++i) {
......
......@@ -17,7 +17,6 @@ TEST_F(ChannelShuffleOpTest, C8G4) {
.AddIntArg("group", 4)
.Finalize(net.NewOperatorDef());
// Add input data
net.AddInputFromArray<DeviceType::CPU, float>(
"Input", {1, 8, 1, 2},
......
......@@ -6,21 +6,28 @@
namespace mace {
REGISTER_CPU_OPERATOR(OpKeyBuilder("Concat")
void Register_Concat(OperatorRegistry *op_registry) {
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Concat")
.Device(DeviceType::CPU)
.TypeConstraint<float>("T")
.Build(),
ConcatOp<DeviceType::CPU, float>);
REGISTER_CPU_OPERATOR(OpKeyBuilder("Concat")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Concat")
.Device(DeviceType::CPU)
.TypeConstraint<half>("T")
.Build(),
ConcatOp<DeviceType::CPU, half>);
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Concat")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Concat")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
.Build(),
ConcatOp<DeviceType::OPENCL, float>);
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Concat")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Concat")
.Device(DeviceType::OPENCL)
.TypeConstraint<half>("T")
.Build(),
ConcatOp<DeviceType::OPENCL, half>);
}
} // namespace mace
......@@ -60,8 +60,10 @@ static void OpenclConcatHelper(int iters,
net.AddRandomInput<DeviceType::OPENCL, float>("Input0", shape0);
net.AddRandomInput<DeviceType::OPENCL, float>("Input1", shape1);
BufferToImage<DeviceType::OPENCL, T>(net, "Input0", "InputImage0", kernels::BufferType::IN_OUT);
BufferToImage<DeviceType::OPENCL, T>(net, "Input1", "InputImage1", kernels::BufferType::IN_OUT);
BufferToImage<DeviceType::OPENCL, T>(net, "Input0", "InputImage0",
kernels::BufferType::IN_OUT);
BufferToImage<DeviceType::OPENCL, T>(net, "Input1", "InputImage1",
kernels::BufferType::IN_OUT);
OpDefBuilder("Concat", "ConcatBM")
.Input("InputImage0")
.Input("InputImage1")
......@@ -75,7 +77,8 @@ static void OpenclConcatHelper(int iters,
net.RunOp(DeviceType::OPENCL);
}
const int64_t tot = static_cast<int64_t>(iters) *
const int64_t tot =
static_cast<int64_t>(iters) *
(net.GetTensor("Input0")->size() + net.GetTensor("Input1")->size());
mace::testing::ItemsProcessed(tot);
testing::BytesProcessed(tot * sizeof(T));
......
......@@ -97,7 +97,9 @@ TEST_F(ConcatOpTest, CPURandom) {
for (int i = 0; i < num_inputs; ++i) {
builder = builder.Input(("Input" + ToString(i)).c_str());
}
builder.AddIntArg("axis", axis).Output("Output").Finalize(net.NewOperatorDef());
builder.AddIntArg("axis", axis)
.Output("Output")
.Finalize(net.NewOperatorDef());
std::vector<index_t> shape_data;
GenerateRandomIntTypeData<index_t>({dim}, shape_data, 1, dim);
......@@ -110,8 +112,8 @@ TEST_F(ConcatOpTest, CPURandom) {
concat_axis_size += input_shapes[i][axis];
GenerateRandomRealTypeData(input_shapes[i], inputs[i]);
input_ptrs[i] = inputs[i].data();
net.AddInputFromArray<DeviceType::CPU, float>(("Input" + ToString(i)).c_str(),
input_shapes[i], inputs[i]);
net.AddInputFromArray<DeviceType::CPU, float>(
("Input" + ToString(i)).c_str(), input_shapes[i], inputs[i]);
}
// Run
......@@ -137,7 +139,7 @@ TEST_F(ConcatOpTest, CPURandom) {
}
}
template<typename T>
template <typename T>
void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes,
const int axis) {
srand(time(nullptr));
......@@ -149,9 +151,9 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes,
const std::string input_name = ("Input" + ToString(i)).c_str();
const std::string image_name = ("InputImage" + ToString(i)).c_str();
concat_axis_size += shapes[i][axis];
net.AddRandomInput<DeviceType::OPENCL, float>(input_name,
shapes[i]);
BufferToImage<DeviceType::OPENCL, T>(net, input_name, image_name, kernels::BufferType::IN_OUT);
net.AddRandomInput<DeviceType::OPENCL, float>(input_name, shapes[i]);
BufferToImage<DeviceType::OPENCL, T>(net, input_name, image_name,
kernels::BufferType::IN_OUT);
}
auto builder = OpDefBuilder("Concat", "ConcatTest");
......@@ -167,7 +169,8 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes,
// Run
net.RunOp(DeviceType::OPENCL);
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT);
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
// Check
auto output = net.GetOutput("Output");
......@@ -182,15 +185,16 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes,
while (output_ptr != (output->data<float>() + output->size())) {
for (int i = 0; i < num_inputs; ++i) {
index_t num_elements =
std::accumulate(shapes[i].begin() + axis, shapes[i].end(),
1, std::multiplies<index_t>());
std::accumulate(shapes[i].begin() + axis, shapes[i].end(), 1,
std::multiplies<index_t>());
const std::string input_name = ("Input" + ToString(i)).c_str();
const Tensor *input_tensor = net.GetTensor(input_name.data());
Tensor::MappingGuard input_guard(input_tensor);
const float *input_ptr = input_tensor->data<float>() + k * num_elements;
for (int j = 0; j < num_elements; ++j) {
EXPECT_NEAR(*(input_ptr + j), *output_ptr++, 1e-2) << "With index: " << i << ", " << j;
EXPECT_NEAR(*(input_ptr + j), *output_ptr++, 1e-2)
<< "With index: " << i << ", " << j;
}
}
k++;
......@@ -198,25 +202,13 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes,
}
TEST_F(ConcatOpTest, OPENCLAligned) {
OpenclRandomTest<float>({
{3, 32, 32, 32},
{3, 32, 32, 64}
},
3);
OpenclRandomTest<float>({{3, 32, 32, 32}, {3, 32, 32, 64}}, 3);
}
TEST_F(ConcatOpTest, OPENCLHalfAligned) {
OpenclRandomTest<half>({
{3, 32, 32, 32},
{3, 32, 32, 64}
},
3);
OpenclRandomTest<half>({{3, 32, 32, 32}, {3, 32, 32, 64}}, 3);
}
TEST_F(ConcatOpTest, OPENCLUnAligned) {
OpenclRandomTest<float>({
{3, 32, 32, 13},
{3, 32, 32, 17}
},
3);
OpenclRandomTest<float>({{3, 32, 32, 13}, {3, 32, 32, 17}}, 3);
}
......@@ -6,31 +6,38 @@
namespace mace {
REGISTER_CPU_OPERATOR(OpKeyBuilder("Conv2D")
void Register_Conv2D(OperatorRegistry *op_registry) {
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Conv2D")
.Device(DeviceType::CPU)
.TypeConstraint<float>("T")
.Build(),
Conv2dOp<DeviceType::CPU, float>);
REGISTER_CPU_OPERATOR(OpKeyBuilder("Conv2D")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Conv2D")
.Device(DeviceType::CPU)
.TypeConstraint<half>("T")
.Build(),
Conv2dOp<DeviceType::CPU, half>);
#if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("Conv2D")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Conv2D")
.Device(DeviceType::NEON)
.TypeConstraint<float>("T")
.Build(),
Conv2dOp<DeviceType::NEON, float>);
#endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Conv2D")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Conv2D")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
.Build(),
Conv2dOp<DeviceType::OPENCL, float>);
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Conv2D")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Conv2D")
.Device(DeviceType::OPENCL)
.TypeConstraint<half>("T")
.Build(),
Conv2dOp<DeviceType::OPENCL, half>);
}
} // namespace mace
......@@ -33,9 +33,12 @@ static void Conv2d(int iters,
net.AddRandomInput<D, float>("Bias", {output_channels});
if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Filter", "FilterImage", kernels::BufferType::FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT);
BufferToImage<D, T>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Filter", "FilterImage",
kernels::BufferType::FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2dTest")
.Input("InputImage")
.Input("FilterImage")
......@@ -106,28 +109,29 @@ BM_CONV_2D(1, 3, 512, 512, 7, 7, 2, SAME, 64, half);
BM_CONV_2D(1, 512, 64, 64, 1, 1, 1, SAME, 256, half);
// Test RGB <-> YUV
//BM_CONV_2D(1, 3, 2160, 1080, 1, 1, 1, VALID, 3, float);
//BM_CONV_2D(1, 3, 480, 480, 1, 1, 1, VALID, 3, float);
// BM_CONV_2D(1, 3, 2160, 1080, 1, 1, 1, VALID, 3, float);
// BM_CONV_2D(1, 3, 480, 480, 1, 1, 1, VALID, 3, float);
//
//BM_CONV_2D(1, 64, 32, 32, 1, 1, 1, VALID, 128, float);
//BM_CONV_2D(1, 64, 33, 31, 1, 1, 1, VALID, 128, float); // Test bad alignments
//BM_CONV_2D(1, 3, 512, 512, 1, 1, 1, VALID, 3, float);
//BM_CONV_2D(1, 32, 112, 112, 1, 1, 1, VALID, 64, float);
//BM_CONV_2D(1, 64, 56, 56, 1, 1, 1, VALID, 128, float);
//BM_CONV_2D(1, 256, 28, 28, 1, 1, 1, VALID, 256, float);
//BM_CONV_2D(1, 1024, 7, 7, 1, 1, 1, VALID, 1024, float);
//BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, VALID, 128, float);
//BM_CONV_2D(1, 64, 33, 31, 3, 3, 1, VALID, 128, float);
//BM_CONV_2D(1, 3, 512, 512, 3, 3, 1, VALID, 3, float);
//BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, SAME, 128, float);
//BM_CONV_2D(1, 64, 33, 31, 3, 3, 1, SAME, 128, float);
//BM_CONV_2D(1, 64, 32, 32, 3, 3, 2, VALID, 128, float);
//BM_CONV_2D(1, 3, 512, 512, 3, 3, 2, VALID, 3, float);
//BM_CONV_2D(1, 64, 33, 31, 3, 3, 2, VALID, 128, float);
//BM_CONV_2D(1, 64, 32, 32, 3, 3, 2, SAME, 128, float);
//BM_CONV_2D(1, 64, 33, 31, 3, 3, 2, SAME, 128, float);
//BM_CONV_2D(1, 64, 32, 32, 5, 5, 1, VALID, 128, float);
//BM_CONV_2D(1, 64, 32, 31, 5, 5, 1, VALID, 128, float);
//BM_CONV_2D(1, 64, 32, 32, 5, 5, 1, SAME, 128, float);
//BM_CONV_2D(1, 64, 32, 31, 5, 5, 1, SAME, 128, float);
// BM_CONV_2D(1, 64, 32, 32, 1, 1, 1, VALID, 128, float);
// BM_CONV_2D(1, 64, 33, 31, 1, 1, 1, VALID, 128, float); // Test bad
// alignments
// BM_CONV_2D(1, 3, 512, 512, 1, 1, 1, VALID, 3, float);
// BM_CONV_2D(1, 32, 112, 112, 1, 1, 1, VALID, 64, float);
// BM_CONV_2D(1, 64, 56, 56, 1, 1, 1, VALID, 128, float);
// BM_CONV_2D(1, 256, 28, 28, 1, 1, 1, VALID, 256, float);
// BM_CONV_2D(1, 1024, 7, 7, 1, 1, 1, VALID, 1024, float);
// BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, VALID, 128, float);
// BM_CONV_2D(1, 64, 33, 31, 3, 3, 1, VALID, 128, float);
// BM_CONV_2D(1, 3, 512, 512, 3, 3, 1, VALID, 3, float);
// BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, SAME, 128, float);
// BM_CONV_2D(1, 64, 33, 31, 3, 3, 1, SAME, 128, float);
// BM_CONV_2D(1, 64, 32, 32, 3, 3, 2, VALID, 128, float);
// BM_CONV_2D(1, 3, 512, 512, 3, 3, 2, VALID, 3, float);
// BM_CONV_2D(1, 64, 33, 31, 3, 3, 2, VALID, 128, float);
// BM_CONV_2D(1, 64, 32, 32, 3, 3, 2, SAME, 128, float);
// BM_CONV_2D(1, 64, 33, 31, 3, 3, 2, SAME, 128, float);
// BM_CONV_2D(1, 64, 32, 32, 5, 5, 1, VALID, 128, float);
// BM_CONV_2D(1, 64, 32, 31, 5, 5, 1, VALID, 128, float);
// BM_CONV_2D(1, 64, 32, 32, 5, 5, 1, SAME, 128, float);
// BM_CONV_2D(1, 64, 32, 31, 5, 5, 1, SAME, 128, float);
} // namespace mace
......@@ -10,7 +10,7 @@ using namespace mace;
class Conv2dOpTest : public OpsTestBase {};
template<DeviceType D>
template <DeviceType D>
void TestSimple3x3VALID() {
OpsTestNet net;
OpDefBuilder("Conv2D", "Conv2dTest")
......@@ -42,10 +42,9 @@ void TestSimple3x3VALID() {
auto expected = CreateTensor<float>({1, 1, 1, 1}, {18.1f});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001);
}
template<DeviceType D>
template <DeviceType D>
void TestSimple3x3SAME() {
OpsTestNet net;
OpDefBuilder("Conv2D", "Conv2dTest")
......@@ -86,7 +85,7 @@ TEST_F(Conv2dOpTest, NEONSimple) {
}
#endif
template<DeviceType D, typename T>
template <DeviceType D, typename T>
void TestNHWCSimple3x3VALID() {
OpsTestNet net;
// Add input data
......@@ -100,9 +99,12 @@ void TestNHWCSimple3x3VALID() {
net.AddInputFromArray<D, T>("Bias", {1}, {0.1f});
if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Filter", "FilterImage", kernels::BufferType::FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT);
BufferToImage<D, T>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Filter", "FilterImage",
kernels::BufferType::FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2dTest")
.Input("InputImage")
.Input("FilterImage")
......@@ -117,7 +119,8 @@ void TestNHWCSimple3x3VALID() {
net.RunOp(D);
// Transfer output
ImageToBuffer<D, T>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT);
ImageToBuffer<D, T>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
} else {
OpDefBuilder("Conv2D", "Conv2dTest")
......@@ -138,7 +141,7 @@ void TestNHWCSimple3x3VALID() {
ExpectTensorNear<float, T>(*expected, *net.GetOutput("Output"), 0.01);
}
template<DeviceType D, typename T>
template <DeviceType D, typename T>
void TestNHWCSimple3x3SAME() {
OpsTestNet net;
......@@ -153,9 +156,12 @@ void TestNHWCSimple3x3SAME() {
net.AddInputFromArray<D, T>("Bias", {1}, {0.1f});
if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Filter", "FilterImage", kernels::BufferType::FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT);
BufferToImage<D, T>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Filter", "FilterImage",
kernels::BufferType::FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2dTest")
.Input("InputImage")
.Input("FilterImage")
......@@ -170,7 +176,8 @@ void TestNHWCSimple3x3SAME() {
net.RunOp(D);
// Transfer output
ImageToBuffer<D, T>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT);
ImageToBuffer<D, T>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
} else {
OpDefBuilder("Conv2D", "Conv2dTest")
......@@ -204,7 +211,7 @@ TEST_F(Conv2dOpTest, OPENCLSimple) {
TestNHWCSimple3x3SAME<DeviceType::OPENCL, float>();
}
template<DeviceType D>
template <DeviceType D>
void TestSimple3x3WithoutBias() {
OpsTestNet net;
OpDefBuilder("Conv2D", "Conv2dTest")
......@@ -234,14 +241,13 @@ void TestSimple3x3WithoutBias() {
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001);
}
#ifdef __ARM_NEON
TEST_F(Conv2dOpTest, NEONWithouBias) {
TestSimple3x3WithoutBias<DeviceType::NEON>();
}
#endif
template<DeviceType D, typename T>
template <DeviceType D, typename T>
void TestNHWCSimple3x3WithoutBias() {
OpsTestNet net;
......@@ -255,8 +261,10 @@ void TestNHWCSimple3x3WithoutBias() {
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f});
if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Filter", "FilterImage", kernels::BufferType::FILTER);
BufferToImage<D, T>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Filter", "FilterImage",
kernels::BufferType::FILTER);
OpDefBuilder("Conv2D", "Conv2dTest")
.Input("InputImage")
......@@ -270,7 +278,8 @@ void TestNHWCSimple3x3WithoutBias() {
// Run
net.RunOp(D);
// Transfer output
ImageToBuffer<D, T>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT);
ImageToBuffer<D, T>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
} else {
OpDefBuilder("Conv2D", "Conv2dTest")
.Input("Input")
......@@ -300,7 +309,7 @@ TEST_F(Conv2dOpTest, OPENCLWithoutBias) {
TestNHWCSimple3x3WithoutBias<DeviceType::OPENCL, float>();
}
template<DeviceType D>
template <DeviceType D>
static void TestCombined3x3() {
// Construct graph
OpsTestNet net;
......@@ -335,17 +344,13 @@ static void TestCombined3x3() {
4.2f, 6.2f, 4.2f, 6.2f, 9.2f, 6.2f, 4.2f, 6.2f, 4.2f});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001);
}
#ifdef __ARM_NEON
TEST_F(Conv2dOpTest, NEONCombined) {
TestCombined3x3<DeviceType::NEON>();
}
TEST_F(Conv2dOpTest, NEONCombined) { TestCombined3x3<DeviceType::NEON>(); }
#endif
template<DeviceType D, typename T>
template <DeviceType D, typename T>
static void TestNHWCCombined3x3() {
// Construct graph
OpsTestNet net;
......@@ -363,9 +368,12 @@ static void TestNHWCCombined3x3() {
net.AddInputFromArray<D, T>("Bias", {2}, {0.1f, 0.2f});
if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Filter", "FilterImage", kernels::BufferType::FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT);
BufferToImage<D, T>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Filter", "FilterImage",
kernels::BufferType::FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2DTest")
.Input("InputImage")
......@@ -380,7 +388,8 @@ static void TestNHWCCombined3x3() {
// Run
net.RunOp(D);
ImageToBuffer<D, T>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT);
ImageToBuffer<D, T>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
} else {
OpDefBuilder("Conv2D", "Conv2DTest")
.Input("Input")
......@@ -394,16 +403,13 @@ static void TestNHWCCombined3x3() {
.Finalize(net.NewOperatorDef());
// Run
net.RunOp(D);
}
// Check
auto expected = CreateTensor<float>(
{1, 3, 3, 2}, {8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f,
12.1f, 6.2f, 18.1f, 9.2f, 12.1f, 6.2f,
8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f});
{1, 3, 3, 2}, {8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f, 12.1f, 6.2f, 18.1f,
9.2f, 12.1f, 6.2f, 8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f});
ExpectTensorNear<float, T>(*expected, *net.GetOutput("Output"), 0.01);
}
TEST_F(Conv2dOpTest, CPUStride2) {
......@@ -414,7 +420,7 @@ TEST_F(Conv2dOpTest, OPENCLStride2) {
TestNHWCCombined3x3<DeviceType::OPENCL, float>();
}
template<DeviceType D>
template <DeviceType D>
void TestConv1x1() {
// Construct graph
OpsTestNet net;
......@@ -435,9 +441,12 @@ void TestConv1x1() {
net.AddInputFromArray<D, float>("Bias", {2}, {0.1f, 0.2f});
if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, float>(net, "Filter", "FilterImage", kernels::BufferType::FILTER);
BufferToImage<D, float>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT);
BufferToImage<D, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
BufferToImage<D, float>(net, "Filter", "FilterImage",
kernels::BufferType::FILTER);
BufferToImage<D, float>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2DTest")
.Input("InputImage")
......@@ -451,7 +460,8 @@ void TestConv1x1() {
// Run
net.RunOp(D);
ImageToBuffer<D, float>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT);
ImageToBuffer<D, float>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
} else {
OpDefBuilder("Conv2D", "Conv2DTest")
.Input("Input")
......@@ -479,15 +489,11 @@ void TestConv1x1() {
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001);
}
TEST_F(Conv2dOpTest, CPUConv1x1) {
TestConv1x1<DeviceType::CPU>();
}
TEST_F(Conv2dOpTest, CPUConv1x1) { TestConv1x1<DeviceType::CPU>(); }
TEST_F(Conv2dOpTest, OPENCLConv1x1) {
TestConv1x1<DeviceType::OPENCL>();
}
TEST_F(Conv2dOpTest, OPENCLConv1x1) { TestConv1x1<DeviceType::OPENCL>(); }
template<DeviceType D, typename T>
template <DeviceType D, typename T>
static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
testing::internal::LogToStderr();
auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w,
......@@ -526,9 +532,12 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
expected.Copy(*net.GetOutput("Output"));
// run on gpu
BufferToImage<D, T>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Filter", "FilterImage", kernels::BufferType::FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT);
BufferToImage<D, T>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Filter", "FilterImage",
kernels::BufferType::FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2dTest")
.Input("InputImage")
......@@ -543,7 +552,8 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
// Run on device
net.RunOp(D);
ImageToBuffer<D, T>(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT);
ImageToBuffer<D, T>(net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.001);
};
......@@ -592,15 +602,20 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape,
.Finalize(net.NewOperatorDef());
std::vector<float> float_input_data;
GenerateRandomRealTypeData({batch, height, width, input_channels}, float_input_data);
GenerateRandomRealTypeData({batch, height, width, input_channels},
float_input_data);
std::vector<float> float_filter_data;
GenerateRandomRealTypeData({kernel_h, kernel_w, input_channels, output_channels}, float_filter_data);
GenerateRandomRealTypeData(
{kernel_h, kernel_w, input_channels, output_channels},
float_filter_data);
std::vector<float> float_bias_data;
GenerateRandomRealTypeData({output_channels}, float_bias_data);
// Add input data
net.AddInputFromArray<D, float>("Input", {batch, height, width, input_channels}, float_input_data);
net.AddInputFromArray<D, float>(
"Filter", {kernel_h, kernel_w, input_channels, output_channels}, float_filter_data);
"Input", {batch, height, width, input_channels}, float_input_data);
net.AddInputFromArray<D, float>(
"Filter", {kernel_h, kernel_w, input_channels, output_channels},
float_filter_data);
net.AddInputFromArray<D, float>("Bias", {output_channels}, float_bias_data);
// run on cpu
......@@ -610,9 +625,12 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape,
expected.Copy(*net.GetOutput("Output"));
// run on gpu
BufferToImage<D, half>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, half>(net, "Filter", "FilterImage", kernels::BufferType::FILTER);
BufferToImage<D, half>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT);
BufferToImage<D, half>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
BufferToImage<D, half>(net, "Filter", "FilterImage",
kernels::BufferType::FILTER);
BufferToImage<D, half>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2dTest")
.Input("InputImage")
......@@ -627,7 +645,8 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape,
// Run on device
net.RunOp(D);
ImageToBuffer<D, float>(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT);
ImageToBuffer<D, float>(net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.5);
};
......
......@@ -7,7 +7,6 @@
namespace mace {
TEST(CoreTest, INIT_MODE) {
std::vector<OperatorDef> op_defs;
Workspace ws;
......@@ -18,10 +17,11 @@ TEST(CoreTest, INIT_MODE) {
.Output("B2IOutput")
.AddIntArg("buffer_type", kernels::BufferType::FILTER)
.AddIntArg("mode", static_cast<int>(NetMode::INIT))
.Finalize(&op_defs[op_defs.size()-1]);
.Finalize(&op_defs[op_defs.size() - 1]);
Tensor *input =
ws.CreateTensor("Input", GetDeviceAllocator(DeviceType::OPENCL), DataTypeToEnum<float>::v());
ws.CreateTensor("Input", GetDeviceAllocator(DeviceType::OPENCL),
DataTypeToEnum<float>::v());
input->Resize({1, 3, 3, 3});
{
Tensor::MappingGuard input_mapper(input);
......@@ -34,23 +34,26 @@ TEST(CoreTest, INIT_MODE) {
.Input("B2IOutput")
.Output("Output")
.AddIntArg("buffer_type", kernels::BufferType::FILTER)
.Finalize(&op_defs[op_defs.size()-1]);
.Finalize(&op_defs[op_defs.size() - 1]);
NetDef net_def;
for (auto &op_def : op_defs) {
net_def.add_op()->CopyFrom(op_def);
}
auto net = CreateNet(net_def, &ws, DeviceType::OPENCL, NetMode::INIT);
std::shared_ptr<OperatorRegistry> op_registry(new OperatorRegistry());
auto net =
CreateNet(op_registry, net_def, &ws, DeviceType::OPENCL, NetMode::INIT);
net->Run();
EXPECT_TRUE(ws.GetTensor("B2IOutput") != nullptr);
EXPECT_TRUE(ws.GetTensor("Output") == nullptr);
net = CreateNet(net_def, &ws, DeviceType::OPENCL);
net = CreateNet(op_registry, net_def, &ws, DeviceType::OPENCL);
net->Run();
EXPECT_TRUE(ws.GetTensor("Output") != nullptr);
ExpectTensorNear<float>(*ws.GetTensor("Input"), *ws.GetTensor("Output"), 1e-5);
ExpectTensorNear<float>(*ws.GetTensor("Input"), *ws.GetTensor("Output"),
1e-5);
}
} // namespace mace
......@@ -6,21 +6,26 @@
namespace mace {
REGISTER_CPU_OPERATOR(OpKeyBuilder("DepthwiseConv2d")
void Register_DepthwiseConv2d(OperatorRegistry *op_registry) {
REGISTER_OPERATOR(op_registry, OpKeyBuilder("DepthwiseConv2d")
.Device(DeviceType::CPU)
.TypeConstraint<float>("T")
.Build(),
DepthwiseConv2dOp<DeviceType::CPU, float>);
#if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("DepthwiseConv2d")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("DepthwiseConv2d")
.Device(DeviceType::NEON)
.TypeConstraint<float>("T")
.Build(),
DepthwiseConv2dOp<DeviceType::NEON, float>);
#endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("DepthwiseConv2d")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("DepthwiseConv2d")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
.Build(),
DepthwiseConv2dOp<DeviceType::OPENCL, float>);
}
} // namespace mace
......@@ -41,12 +41,9 @@ void SimpleValidTest() {
{196.1f, 252.1f, 216.2f, 280.2f, 272.3f, 344.3f, 296.4f, 376.4f});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5);
}
TEST_F(DepthwiseConv2dOpTest, SimpleCPU) {
SimpleValidTest<DeviceType::CPU>();
}
TEST_F(DepthwiseConv2dOpTest, SimpleCPU) { SimpleValidTest<DeviceType::CPU>(); }
template <DeviceType D>
void TestNxNS12(const index_t height, const index_t width) {
......@@ -72,8 +69,10 @@ void TestNxNS12(const index_t height, const index_t width) {
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<D, float>("Input", {batch, input_channels, height, width});
net.AddRandomInput<D, float>("Filter", {multiplier, input_channels, kernel_h, kernel_w});
net.AddRandomInput<D, float>("Input",
{batch, input_channels, height, width});
net.AddRandomInput<D, float>(
"Filter", {multiplier, input_channels, kernel_h, kernel_w});
net.AddRandomInput<D, float>("Bias", {multiplier * input_channels});
// Run on device
net.RunOp(D);
......@@ -93,7 +92,6 @@ void TestNxNS12(const index_t height, const index_t width) {
func(kernel_size, kernel_size, stride, stride, SAME);
}
}
}
#if __ARM_NEON
......
......@@ -39,7 +39,7 @@ static void DepthwiseConv2d(int iters,
net.AddRandomInput<D, float>("Input", {batch, channels, height, width});
net.AddRandomInput<D, float>("Filter",
{output_channels, channels, kernel_h, kernel_w});
net.AddRandomInput<D, float>("Bias", {output_channels*channels});
net.AddRandomInput<D, float>("Bias", {output_channels * channels});
// Warm-up
for (int i = 0; i < 5; ++i) {
......@@ -70,7 +70,6 @@ static void DepthwiseConv2d(int iters,
#define BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE) \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, CPU); \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, NEON);\
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, OPENCL);
BM_DEPTHWISE_CONV_2D(1, 64, 32, 32, 3, 3, 1, VALID, 2, float);
......
......@@ -6,25 +6,30 @@
namespace mace {
REGISTER_CPU_OPERATOR(OpKeyBuilder("FusedConv2D")
void Register_FusedConv2D(OperatorRegistry *op_registry) {
REGISTER_OPERATOR(op_registry, OpKeyBuilder("FusedConv2D")
.Device(DeviceType::CPU)
.TypeConstraint<float>("T")
.Build(),
FusedConv2dOp<DeviceType::CPU, float>);
REGISTER_CPU_OPERATOR(OpKeyBuilder("FusedConv2D")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("FusedConv2D")
.Device(DeviceType::CPU)
.TypeConstraint<half>("T")
.Build(),
FusedConv2dOp<DeviceType::CPU, half>);
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("FusedConv2D")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("FusedConv2D")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
.Build(),
FusedConv2dOp<DeviceType::OPENCL, float>);
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("FusedConv2D")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("FusedConv2D")
.Device(DeviceType::OPENCL)
.TypeConstraint<half>("T")
.Build(),
FusedConv2dOp<DeviceType::OPENCL, half>);
}
} // namespace mace
......@@ -9,7 +9,7 @@ using namespace mace;
class FusedConv2dOpTest : public OpsTestBase {};
template<DeviceType D, typename T>
template <DeviceType D, typename T>
void TestNHWCSimple3x3VALID() {
OpsTestNet net;
// Add input data
......@@ -23,9 +23,12 @@ void TestNHWCSimple3x3VALID() {
net.AddInputFromArray<D, T>("Bias", {1}, {-0.1f});
if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Filter", "FilterImage", kernels::BufferType::FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT);
BufferToImage<D, T>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Filter", "FilterImage",
kernels::BufferType::FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("FusedConv2D", "FusedConv2dTest")
.Input("InputImage")
.Input("FilterImage")
......@@ -40,7 +43,8 @@ void TestNHWCSimple3x3VALID() {
net.RunOp(D);
// Transfer output
ImageToBuffer<D, T>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT);
ImageToBuffer<D, T>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
} else {
OpDefBuilder("FusedConv2D", "FusedConv2dTest")
......@@ -61,7 +65,7 @@ void TestNHWCSimple3x3VALID() {
ExpectTensorNear<float, T>(*expected, *net.GetOutput("Output"), 0.01);
}
template<DeviceType D, typename T>
template <DeviceType D, typename T>
void TestNHWCSimple3x3SAME() {
OpsTestNet net;
......@@ -76,9 +80,12 @@ void TestNHWCSimple3x3SAME() {
net.AddInputFromArray<D, T>("Bias", {1}, {-0.1f});
if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Filter", "FilterImage", kernels::BufferType::FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT);
BufferToImage<D, T>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Filter", "FilterImage",
kernels::BufferType::FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("FusedConv2D", "FusedConv2dTest")
.Input("InputImage")
.Input("FilterImage")
......@@ -93,7 +100,8 @@ void TestNHWCSimple3x3SAME() {
net.RunOp(D);
// Transfer output
ImageToBuffer<D, T>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT);
ImageToBuffer<D, T>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
} else {
OpDefBuilder("FusedConv2D", "FusedConv2dTest")
......@@ -111,8 +119,7 @@ void TestNHWCSimple3x3SAME() {
}
auto expected = CreateTensor<float>(
{1, 3, 3, 1},
{0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
{1, 3, 3, 1}, {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
ExpectTensorNear<float, T>(*expected, *net.GetOutput("Output"), 0.01);
}
......@@ -127,7 +134,7 @@ TEST_F(FusedConv2dOpTest, OPENCLSimple) {
TestNHWCSimple3x3SAME<DeviceType::OPENCL, float>();
}
template<DeviceType D, typename T>
template <DeviceType D, typename T>
void TestNHWCSimple3x3WithoutBias() {
OpsTestNet net;
......@@ -141,8 +148,10 @@ void TestNHWCSimple3x3WithoutBias() {
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f});
if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Filter", "FilterImage", kernels::BufferType::FILTER);
BufferToImage<D, T>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Filter", "FilterImage",
kernels::BufferType::FILTER);
OpDefBuilder("FusedConv2D", "FusedConv2dTest")
.Input("InputImage")
......@@ -156,7 +165,8 @@ void TestNHWCSimple3x3WithoutBias() {
// Run
net.RunOp(D);
// Transfer output
ImageToBuffer<D, T>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT);
ImageToBuffer<D, T>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
} else {
OpDefBuilder("FusedConv2D", "FusedConv2dTest")
.Input("Input")
......@@ -186,7 +196,7 @@ TEST_F(FusedConv2dOpTest, OPENCLWithoutBias) {
TestNHWCSimple3x3WithoutBias<DeviceType::OPENCL, float>();
}
template<DeviceType D>
template <DeviceType D>
void TestConv1x1() {
// Construct graph
OpsTestNet net;
......@@ -207,9 +217,12 @@ void TestConv1x1() {
net.AddInputFromArray<D, float>("Bias", {2}, {0.1f, 0.2f});
if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, float>(net, "Filter", "FilterImage", kernels::BufferType::FILTER);
BufferToImage<D, float>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT);
BufferToImage<D, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
BufferToImage<D, float>(net, "Filter", "FilterImage",
kernels::BufferType::FILTER);
BufferToImage<D, float>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("FusedConv2D", "FusedConv2dTest")
.Input("InputImage")
......@@ -223,7 +236,8 @@ void TestConv1x1() {
// Run
net.RunOp(D);
ImageToBuffer<D, float>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT);
ImageToBuffer<D, float>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
} else {
OpDefBuilder("FusedConv2D", "FusedConv2dTest")
.Input("Input")
......@@ -251,15 +265,11 @@ void TestConv1x1() {
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001);
}
TEST_F(FusedConv2dOpTest, CPUConv1x1) {
TestConv1x1<DeviceType::CPU>();
}
TEST_F(FusedConv2dOpTest, CPUConv1x1) { TestConv1x1<DeviceType::CPU>(); }
TEST_F(FusedConv2dOpTest, OPENCLConv1x1) {
TestConv1x1<DeviceType::OPENCL>();
}
TEST_F(FusedConv2dOpTest, OPENCLConv1x1) { TestConv1x1<DeviceType::OPENCL>(); }
template<DeviceType D, typename T>
template <DeviceType D, typename T>
static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
testing::internal::LogToStderr();
auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w,
......@@ -298,9 +308,12 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
expected.Copy(*net.GetOutput("Output"));
// run on gpu
BufferToImage<D, T>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Filter", "FilterImage", kernels::BufferType::FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT);
BufferToImage<D, T>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Filter", "FilterImage",
kernels::BufferType::FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("FusedConv2D", "FusedConv2dTest")
.Input("InputImage")
......@@ -315,7 +328,8 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
// Run on device
net.RunOp(D);
ImageToBuffer<D, T>(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT);
ImageToBuffer<D, T>(net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.001);
};
......@@ -331,7 +345,7 @@ TEST_F(FusedConv2dOpTest, OPENCLUnalignedConvNxNS12) {
TestComplexConvNxNS12<DeviceType::OPENCL, float>({107, 113, 5, 7});
}
template<DeviceType D>
template <DeviceType D>
static void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape) {
testing::internal::LogToStderr();
auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w,
......@@ -357,15 +371,20 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape) {
.Finalize(net.NewOperatorDef());
std::vector<float> float_input_data;
GenerateRandomRealTypeData({batch, height, width, input_channels}, float_input_data);
GenerateRandomRealTypeData({batch, height, width, input_channels},
float_input_data);
std::vector<float> float_filter_data;
GenerateRandomRealTypeData({kernel_h, kernel_w, input_channels, output_channels}, float_filter_data);
GenerateRandomRealTypeData(
{kernel_h, kernel_w, input_channels, output_channels},
float_filter_data);
std::vector<float> float_bias_data;
GenerateRandomRealTypeData({output_channels}, float_bias_data);
// Add input data
net.AddInputFromArray<D, float>("Input", {batch, height, width, input_channels}, float_input_data);
net.AddInputFromArray<D, float>(
"Filter", {kernel_h, kernel_w, input_channels, output_channels}, float_filter_data);
"Input", {batch, height, width, input_channels}, float_input_data);
net.AddInputFromArray<D, float>(
"Filter", {kernel_h, kernel_w, input_channels, output_channels},
float_filter_data);
net.AddInputFromArray<D, float>("Bias", {output_channels}, float_bias_data);
// run on cpu
......@@ -375,9 +394,12 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape) {
expected.Copy(*net.GetOutput("Output"));
// run on gpu
BufferToImage<D, half>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, half>(net, "Filter", "FilterImage", kernels::BufferType::FILTER);
BufferToImage<D, half>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT);
BufferToImage<D, half>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
BufferToImage<D, half>(net, "Filter", "FilterImage",
kernels::BufferType::FILTER);
BufferToImage<D, half>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("FusedConv2D", "FusedConv2dTest")
.Input("InputImage")
......@@ -392,7 +414,8 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape) {
// Run on device
net.RunOp(D);
ImageToBuffer<D, float>(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT);
ImageToBuffer<D, float>(net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.2);
};
......@@ -408,7 +431,7 @@ TEST_F(FusedConv2dOpTest, OPENCLHalfAlignedConvNxNS12) {
TestHalfComplexConvNxNS12<DeviceType::OPENCL>({32, 32, 32, 64});
}
template<DeviceType D, typename T>
template <DeviceType D, typename T>
static void TestGeneralConvNxNS12(const std::vector<index_t> &image_shape,
const std::vector<index_t> &filter_shape) {
testing::internal::LogToStderr();
......@@ -449,9 +472,12 @@ static void TestGeneralConvNxNS12(const std::vector<index_t> &image_shape,
expected.Copy(*net.GetOutput("Output"));
// run on gpu
BufferToImage<D, T>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Filter", "FilterImage", kernels::BufferType::FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT);
BufferToImage<D, T>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Filter", "FilterImage",
kernels::BufferType::FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("FusedConv2D", "FusedConv2dTest")
.Input("InputImage")
......@@ -466,7 +492,8 @@ static void TestGeneralConvNxNS12(const std::vector<index_t> &image_shape,
// Run on device
net.RunOp(D);
ImageToBuffer<D, T>(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT);
ImageToBuffer<D, T>(net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.001);
};
......@@ -477,13 +504,11 @@ static void TestGeneralConvNxNS12(const std::vector<index_t> &image_shape,
}
TEST_F(FusedConv2dOpTest, OPENCL7X7ConvNxNS12) {
TestGeneralConvNxNS12<DeviceType::OPENCL, float>({32, 32},
{7, 7, 3, 64});
TestGeneralConvNxNS12<DeviceType::OPENCL, float>({32, 32}, {7, 7, 3, 64});
}
TEST_F(FusedConv2dOpTest, OPENCL15X1ConvNxNS12) {
TestGeneralConvNxNS12<DeviceType::OPENCL, float>({40, 40},
{15, 1, 32, 64});
TestGeneralConvNxNS12<DeviceType::OPENCL, float>({40, 40}, {15, 1, 32, 64});
}
template<DeviceType D, typename T>
......
......@@ -6,16 +6,20 @@
namespace mace {
REGISTER_CPU_OPERATOR(OpKeyBuilder("GlobalAvgPooling")
void Register_GlobalAvgPooling(OperatorRegistry *op_registry) {
REGISTER_OPERATOR(op_registry, OpKeyBuilder("GlobalAvgPooling")
.Device(DeviceType::CPU)
.TypeConstraint<float>("T")
.Build(),
GlobalAvgPoolingOp<DeviceType::CPU, float>);
#if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("GlobalAvgPooling")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("GlobalAvgPooling")
.Device(DeviceType::NEON)
.TypeConstraint<float>("T")
.Build(),
GlobalAvgPoolingOp<DeviceType::NEON, float>);
#endif // MACE_ENABLE_NEON
}
} // namespace mace
......@@ -22,7 +22,8 @@ static void GlobalAvgPooling(
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height, width});
net.AddRandomInput<DeviceType::CPU, float>("Input",
{batch, channels, height, width});
// Warm-up
for (int i = 0; i < 5; ++i) {
......
......@@ -6,14 +6,18 @@
namespace mace {
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("ImageToBuffer")
void Register_ImageToBuffer(OperatorRegistry *op_registry) {
REGISTER_OPERATOR(op_registry, OpKeyBuilder("ImageToBuffer")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
.Build(),
ImageToBufferOp<DeviceType::OPENCL, float>);
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("ImageToBuffer")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("ImageToBuffer")
.Device(DeviceType::OPENCL)
.TypeConstraint<half>("T")
.Build(),
ImageToBufferOp<DeviceType::OPENCL, half>);
}
} // namespace mace
......@@ -10,9 +10,9 @@
#include "gtest/gtest.h"
#include "mace/core/common.h"
#include "mace/core/net.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/tensor.h"
#include "mace/core/workspace.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/kernels/opencl/helper.h"
#include "mace/utils/utils.h"
......@@ -56,7 +56,8 @@ class OpDefBuilder {
return *this;
}
OpDefBuilder AddIntsArg(const std::string &name, const std::vector<int> &values) {
OpDefBuilder AddIntsArg(const std::string &name,
const std::vector<int> &values) {
auto arg = op_def_.add_arg();
arg->set_name(name);
for (auto value : values) {
......@@ -65,7 +66,8 @@ class OpDefBuilder {
return *this;
}
OpDefBuilder AddFloatsArg(const std::string &name, const std::vector<float> &values) {
OpDefBuilder AddFloatsArg(const std::string &name,
const std::vector<float> &values) {
auto arg = op_def_.add_arg();
arg->set_name(name);
for (auto value : values) {
......@@ -94,7 +96,7 @@ class OpDefBuilder {
class OpsTestNet {
public:
OpsTestNet() {}
OpsTestNet() : op_registry_(new OperatorRegistry()) {};
template <DeviceType D, typename T>
void AddInputFromArray(const std::string &name,
......@@ -135,9 +137,10 @@ class OpsTestNet {
std::mt19937 gen(rd());
std::normal_distribution<float> nd(0, 1);
if (DataTypeToEnum<T>::value == DT_HALF) {
std::generate(input_data, input_data + input->size(),
[&gen, &nd, positive] {
return half_float::half_cast<half>(positive ? std::abs(nd(gen)) : nd(gen));
std::generate(
input_data, input_data + input->size(), [&gen, &nd, positive] {
return half_float::half_cast<half>(positive ? std::abs(nd(gen))
: nd(gen));
});
} else {
std::generate(input_data, input_data + input->size(),
......@@ -160,7 +163,7 @@ class OpsTestNet {
for (auto &op_def_ : op_defs_) {
net_def.add_op()->CopyFrom(op_def_);
}
net_ = CreateNet(net_def, &ws_, device);
net_ = CreateNet(op_registry_, net_def, &ws_, device);
device_ = device;
return net_->Run();
}
......@@ -182,6 +185,7 @@ class OpsTestNet {
}
public:
std::shared_ptr<OperatorRegistry> op_registry_;
Workspace ws_;
std::vector<OperatorDef> op_defs_;
std::unique_ptr<NetBase> net_;
......@@ -211,7 +215,8 @@ void GenerateRandomRealTypeData(const std::vector<index_t> &shape,
res.resize(size);
if (DataTypeToEnum<T>::value == DT_HALF) {
std::generate(res.begin(), res.end(), [&gen, &nd] { return half_float::half_cast<half>(nd(gen)); });
std::generate(res.begin(), res.end(),
[&gen, &nd] { return half_float::half_cast<half>(nd(gen)); });
} else {
std::generate(res.begin(), res.end(), [&gen, &nd] { return nd(gen); });
}
......@@ -236,7 +241,8 @@ void GenerateRandomIntTypeData(const std::vector<index_t> &shape,
template <typename T>
unique_ptr<Tensor> CreateTensor(const std::vector<index_t> &shape,
const std::vector<T> &data) {
unique_ptr<Tensor> res(new Tensor(GetDeviceAllocator(DeviceType::CPU), DataTypeToEnum<T>::v()));
unique_ptr<Tensor> res(
new Tensor(GetDeviceAllocator(DeviceType::CPU), DataTypeToEnum<T>::v()));
res->Resize(shape);
T *input_data = res->mutable_data<T>();
memcpy(input_data, data.data(), data.size() * sizeof(T));
......@@ -268,9 +274,9 @@ inline std::string ShapeToString(const Tensor &x) {
template <typename T>
struct is_floating_point_type {
static const bool value =
std::is_same<T, float>::value || std::is_same<T, double>::value
|| std::is_same<T, half>::value;
static const bool value = std::is_same<T, float>::value ||
std::is_same<T, double>::value ||
std::is_same<T, half>::value;
};
template <typename T>
......@@ -293,7 +299,9 @@ inline void AssertSameDims(const Tensor &x, const Tensor &y) {
<< "y.shape [ " << ShapeToString(y) << "]";
}
template <typename EXP_TYPE, typename RES_TYPE, bool is_fp = is_floating_point_type<EXP_TYPE>::value>
template <typename EXP_TYPE,
typename RES_TYPE,
bool is_fp = is_floating_point_type<EXP_TYPE>::value>
struct Expector;
// Partial specialization for float and double.
......@@ -343,7 +351,6 @@ struct Expector<EXP_TYPE, RES_TYPE, true> {
}
}
}
};
template <typename T>
......@@ -355,8 +362,8 @@ void ExpectTensorNear(const Tensor &x, const Tensor &y, const double abs_err) {
template <typename EXP_TYPE, typename RES_TYPE>
void ExpectTensorNear(const Tensor &x, const Tensor &y, const double abs_err) {
static_assert(is_floating_point_type<EXP_TYPE>::value
&& is_floating_point_type<RES_TYPE>::value,
static_assert(is_floating_point_type<EXP_TYPE>::value &&
is_floating_point_type<RES_TYPE>::value,
"T is not a floating point type");
Expector<EXP_TYPE, RES_TYPE>::Near(x, y, abs_err);
}
......
......@@ -6,29 +6,36 @@
namespace mace {
REGISTER_CPU_OPERATOR(OpKeyBuilder("Pooling")
void Register_Pooling(OperatorRegistry *op_registry) {
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling")
.Device(DeviceType::CPU)
.TypeConstraint<float>("T")
.Build(),
PoolingOp<DeviceType::CPU, float>);
REGISTER_CPU_OPERATOR(OpKeyBuilder("Pooling")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling")
.Device(DeviceType::CPU)
.TypeConstraint<half>("T")
.Build(),
PoolingOp<DeviceType::CPU, half>);
#if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("Pooling")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling")
.Device(DeviceType::NEON)
.TypeConstraint<float>("T")
.Build(),
PoolingOp<DeviceType::NEON, float>);
#endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Pooling")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
.Build(),
PoolingOp<DeviceType::OPENCL, float>);
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Pooling")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling")
.Device(DeviceType::OPENCL)
.TypeConstraint<half>("T")
.Build(),
PoolingOp<DeviceType::OPENCL, half>);
}
} // namespace mace
......@@ -35,7 +35,8 @@ static void Pooling(int iters,
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height, width});
net.AddRandomInput<DeviceType::CPU, float>("Input",
{batch, channels, height, width});
// Warm-up
for (int i = 0; i < 5; ++i) {
......
......@@ -42,7 +42,6 @@ TEST_F(PoolingOpTest, MAX_VALID) {
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001);
}
TEST_F(PoolingOpTest, MAX_SAME) {
// Construct graph
OpsTestNet net;
......@@ -122,7 +121,7 @@ TEST_F(PoolingOpTest, MAX_k2x2s2x2) {
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001);
}
template<DeviceType D>
template <DeviceType D>
static void SimpleMaxPooling3S2() {
// Construct graph
OpsTestNet net;
......@@ -134,7 +133,8 @@ static void SimpleMaxPooling3S2() {
14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26});
if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
OpDefBuilder("Pooling", "PoolingTest")
.Input("InputImage")
.Output("OutputImage")
......@@ -145,7 +145,8 @@ static void SimpleMaxPooling3S2() {
.AddIntsArg("dilations", {1, 1})
.Finalize(net.NewOperatorDef());
net.RunOp(D);
ImageToBuffer<D, float>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT);
ImageToBuffer<D, float>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
} else {
// Run
OpDefBuilder("Pooling", "PoolingTest")
......@@ -166,15 +167,13 @@ static void SimpleMaxPooling3S2() {
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001);
}
TEST_F(PoolingOpTest, CPUSimpleMaxPooling3S2) {
SimpleMaxPooling3S2<CPU>();
}
TEST_F(PoolingOpTest, CPUSimpleMaxPooling3S2) { SimpleMaxPooling3S2<CPU>(); }
TEST_F(PoolingOpTest, OPENCLSimpleMaxPooling3S2) {
SimpleMaxPooling3S2<OPENCL>();
}
template<DeviceType D, typename T>
template <DeviceType D, typename T>
static void MaxPooling3S2(const std::vector<index_t> &input_shape,
const std::vector<int> strides,
Padding padding) {
......@@ -211,13 +210,14 @@ static void MaxPooling3S2(const std::vector<index_t> &input_shape,
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef());
net.RunOp(D);
ImageToBuffer<D, T>(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT);
ImageToBuffer<D, T>(net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT);
ExpectTensorNear<T>(expected, *net.GetOutput("OPENCLOutput"), 0.001);
}
// TODO(chenghui) : there is a bug.
//TEST_F(PoolingOpTest, NEONAlignedMaxPooling3S2) {
// TEST_F(PoolingOpTest, NEONAlignedMaxPooling3S2) {
// AlignedMaxPooling3S2<NEON>(Padding::VALID);
// AlignedMaxPooling3S2<NEON>(Padding::SAME);
//}
......@@ -272,7 +272,7 @@ TEST_F(PoolingOpTest, AVG_VALID) {
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001);
}
template<DeviceType D>
template <DeviceType D>
static void SimpleAvgPoolingTest() {
// Construct graph
OpsTestNet net;
......@@ -282,7 +282,8 @@ static void SimpleAvgPoolingTest() {
"Input", {1, 2, 8, 1},
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15});
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
OpDefBuilder("Pooling", "PoolingTest")
.Input("InputImage")
.Output("OutputImage")
......@@ -294,7 +295,8 @@ static void SimpleAvgPoolingTest() {
.Finalize(net.NewOperatorDef());
// Run
net.RunOp(D);
ImageToBuffer<D, float>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT);
ImageToBuffer<D, float>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
// Check
auto expected = CreateTensor<float>({1, 1, 4, 1}, {4.5, 6.5, 8.5, 10.5});
......@@ -306,7 +308,7 @@ TEST_F(PoolingOpTest, OPENCLSimpleAvgPooling) {
SimpleAvgPoolingTest<OPENCL>();
}
template<DeviceType D, typename T>
template <DeviceType D, typename T>
static void AvgPoolingTest(const std::vector<index_t> &shape,
const std::vector<int> &kernels,
const std::vector<int> &strides,
......@@ -343,38 +345,49 @@ static void AvgPoolingTest(const std::vector<index_t> &shape,
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef());
net.RunOp(D);
ImageToBuffer<D, T>(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT);
ImageToBuffer<D, T>(net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT);
ExpectTensorNear<float, T>(expected, *net.GetOutput("OPENCLOutput"), 0.01);
}
TEST_F(PoolingOpTest, OPENCLAlignedAvgPooling) {
AvgPoolingTest<OPENCL, float>({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::VALID);
AvgPoolingTest<OPENCL, float>({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::SAME);
AvgPoolingTest<OPENCL, float>({3, 15, 15, 128}, {4, 4}, {4, 4},
Padding::VALID);
AvgPoolingTest<OPENCL, float>({3, 15, 15, 128}, {4, 4}, {4, 4},
Padding::SAME);
}
TEST_F(PoolingOpTest, OPENCLHalfAlignedAvgPooling) {
AvgPoolingTest<OPENCL, half>({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::VALID);
AvgPoolingTest<OPENCL, half>({3, 15, 15, 128}, {4, 4}, {4, 4},
Padding::VALID);
AvgPoolingTest<OPENCL, half>({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::SAME);
}
TEST_F(PoolingOpTest, OPENCLAlignedLargeKernelAvgPooling) {
AvgPoolingTest<OPENCL, float>({3, 64, 64, 128}, {16, 16}, {16, 16}, Padding::VALID);
AvgPoolingTest<OPENCL, float>({3, 64, 64, 128}, {16, 16}, {16, 16}, Padding::SAME);
AvgPoolingTest<OPENCL, float>({3, 64, 64, 128}, {16, 16}, {16, 16},
Padding::VALID);
AvgPoolingTest<OPENCL, float>({3, 64, 64, 128}, {16, 16}, {16, 16},
Padding::SAME);
}
TEST_F(PoolingOpTest, OPENCLHalfAlignedLargeKernelAvgPooling) {
AvgPoolingTest<OPENCL, half>({3, 64, 64, 128}, {16, 16}, {16, 16}, Padding::VALID);
AvgPoolingTest<OPENCL, half>({3, 64, 64, 128}, {16, 16}, {16, 16}, Padding::SAME);
AvgPoolingTest<OPENCL, half>({3, 64, 64, 128}, {16, 16}, {16, 16},
Padding::VALID);
AvgPoolingTest<OPENCL, half>({3, 64, 64, 128}, {16, 16}, {16, 16},
Padding::SAME);
}
TEST_F(PoolingOpTest, OPENCLUnAlignedAvgPooling) {
AvgPoolingTest<OPENCL, float>({3, 31, 37, 128}, {2, 2}, {2, 2}, Padding::VALID);
AvgPoolingTest<OPENCL, float>({3, 31, 37, 128}, {2, 2}, {2, 2}, Padding::SAME);
AvgPoolingTest<OPENCL, float>({3, 31, 37, 128}, {2, 2}, {2, 2},
Padding::VALID);
AvgPoolingTest<OPENCL, float>({3, 31, 37, 128}, {2, 2}, {2, 2},
Padding::SAME);
}
TEST_F(PoolingOpTest, OPENCLUnAlignedLargeKernelAvgPooling) {
AvgPoolingTest<OPENCL, float>({3, 31, 37, 128}, {8, 8}, {8, 8}, Padding::VALID);
AvgPoolingTest<OPENCL, float>({3, 31, 37, 128}, {8, 8}, {8, 8}, Padding::SAME);
AvgPoolingTest<OPENCL, float>({3, 31, 37, 128}, {8, 8}, {8, 8},
Padding::VALID);
AvgPoolingTest<OPENCL, float>({3, 31, 37, 128}, {8, 8}, {8, 8},
Padding::SAME);
}
......@@ -6,26 +6,32 @@
namespace mace {
REGISTER_CPU_OPERATOR(OpKeyBuilder("Relu")
void Register_Relu(OperatorRegistry *op_registry) {
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Relu")
.Device(DeviceType::CPU)
.TypeConstraint<float>("T")
.Build(),
ReluOp<DeviceType::CPU, float>);
#if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("Relu")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Relu")
.Device(DeviceType::NEON)
.TypeConstraint<float>("T")
.Build(),
ReluOp<DeviceType::NEON, float>);
#endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Relu")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Relu")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
.Build(),
ReluOp<DeviceType::OPENCL, float>);
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Relu")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Relu")
.Device(DeviceType::OPENCL)
.TypeConstraint<half>("T")
.Build(),
ReluOp<DeviceType::OPENCL, half>);
}
} // namespace mace
......@@ -19,7 +19,8 @@ static void ReluBenchmark(
net.AddRandomInput<D, float>("Input", {batch, height, width, channels});
if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
OpDefBuilder("Relu", "ReluBM")
.Input("InputImage")
......@@ -56,7 +57,7 @@ static void ReluBenchmark(
#define BM_RELU(N, C, H, W, TYPE) \
BM_RELU_MACRO(N, C, H, W, TYPE, CPU); \
BM_RELU_MACRO(N, C, H, W, TYPE, NEON);\
BM_RELU_MACRO(N, C, H, W, TYPE, NEON); \
BM_RELU_MACRO(N, C, H, W, TYPE, OPENCL);
BM_RELU(1, 1, 512, 512, float);
......
......@@ -14,13 +14,13 @@ void TestSimple() {
OpsTestNet net;
// Add input data
net.AddInputFromArray<D, float>("Input",
{2, 2, 2, 2},
{-7, 7, -6, 6, -5, 5, -4, 4,
-3, 3, -2, 2, -1, 1, 0, 0});
net.AddInputFromArray<D, float>(
"Input", {2, 2, 2, 2},
{-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0});
if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
OpDefBuilder("Relu", "ReluTest")
.Input("InputImage")
......@@ -31,7 +31,8 @@ void TestSimple() {
net.RunOp(D);
// Transfer output
ImageToBuffer<D, float>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT);
ImageToBuffer<D, float>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
} else {
OpDefBuilder("Relu", "ReluTest")
.Input("Input")
......@@ -42,38 +43,30 @@ void TestSimple() {
net.RunOp(D);
}
auto expected = CreateTensor<float>({2, 2, 2, 2},
{0, 7, 0, 6, 0, 5, 0, 4,
0, 3, 0, 2, 0, 1, 0, 0});
auto expected = CreateTensor<float>(
{2, 2, 2, 2}, {0, 7, 0, 6, 0, 5, 0, 4, 0, 3, 0, 2, 0, 1, 0, 0});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5);
}
TEST_F(ReluOpTest, CPUSimple) {
TestSimple<DeviceType::CPU>();
}
TEST_F(ReluOpTest, CPUSimple) { TestSimple<DeviceType::CPU>(); }
#if __ARM_NEON
TEST_F(ReluOpTest, NEONSimple) {
TestSimple<DeviceType::NEON>();
}
TEST_F(ReluOpTest, NEONSimple) { TestSimple<DeviceType::NEON>(); }
#endif
TEST_F(ReluOpTest, OPENCLSimple) {
TestSimple<DeviceType::OPENCL>();
}
TEST_F(ReluOpTest, OPENCLSimple) { TestSimple<DeviceType::OPENCL>(); }
template <DeviceType D>
void TestUnalignedSimple() {
OpsTestNet net;
// Add input data
net.AddInputFromArray<D, float>("Input",
{1, 3, 2, 1},
{-7, 7, -6, 6, -5, 5});
net.AddInputFromArray<D, float>("Input", {1, 3, 2, 1}, {-7, 7, -6, 6, -5, 5});
if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
OpDefBuilder("Relu", "ReluTest")
.Input("InputImage")
......@@ -84,7 +77,8 @@ void TestUnalignedSimple() {
net.RunOp(D);
// Transfer output
ImageToBuffer<D, float>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT);
ImageToBuffer<D, float>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
} else {
OpDefBuilder("Relu", "ReluTest")
.Input("Input")
......@@ -95,8 +89,7 @@ void TestUnalignedSimple() {
net.RunOp(D);
}
auto expected = CreateTensor<float>({1, 3, 2, 1},
{0, 7, 0, 6, 0, 5});
auto expected = CreateTensor<float>({1, 3, 2, 1}, {0, 7, 0, 6, 0, 5});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5);
}
......@@ -120,13 +113,13 @@ void TestSimpleReluX() {
OpsTestNet net;
// Add input data
net.AddInputFromArray<D, float>("Input",
{2, 2, 2, 2},
{-7, 7, -6, 6, -5, 5, -4, 4,
-3, 3, -2, 2, -1, 1, 0, 0});
net.AddInputFromArray<D, float>(
"Input", {2, 2, 2, 2},
{-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0});
if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
OpDefBuilder("Relu", "ReluTest")
.Input("InputImage")
......@@ -138,7 +131,8 @@ void TestSimpleReluX() {
net.RunOp(D);
// Transfer output
ImageToBuffer<D, float>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT);
ImageToBuffer<D, float>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
} else {
OpDefBuilder("Relu", "ReluTest")
.Input("Input")
......@@ -150,38 +144,31 @@ void TestSimpleReluX() {
net.RunOp(D);
}
auto expected = CreateTensor<float>({2, 2, 2, 2},
{0, 6, 0, 6, 0, 5, 0, 4,
0, 3, 0, 2, 0, 1, 0, 0});
auto expected = CreateTensor<float>(
{2, 2, 2, 2}, {0, 6, 0, 6, 0, 5, 0, 4, 0, 3, 0, 2, 0, 1, 0, 0});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5);
}
TEST_F(ReluOpTest, CPUSimpleReluX) {
TestSimpleReluX<DeviceType::CPU>();
}
TEST_F(ReluOpTest, CPUSimpleReluX) { TestSimpleReluX<DeviceType::CPU>(); }
#if __ARM_NEON
TEST_F(ReluOpTest, NEONSimpleReluX) {
TestSimpleReluX<DeviceType::NEON>();
}
TEST_F(ReluOpTest, NEONSimpleReluX) { TestSimpleReluX<DeviceType::NEON>(); }
#endif
TEST_F(ReluOpTest, OPENCLSimpleReluX) {
TestSimpleReluX<DeviceType::OPENCL>();
}
TEST_F(ReluOpTest, OPENCLSimpleReluX) { TestSimpleReluX<DeviceType::OPENCL>(); }
template <DeviceType D>
void TestUnalignedSimpleReluX() {
OpsTestNet net;
// Add input data
net.AddInputFromArray<D, float>("Input",
{1, 1, 7, 1},
net.AddInputFromArray<D, float>("Input", {1, 1, 7, 1},
{-7, 7, -6, 6, -5, 5, -4});
if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
OpDefBuilder("Relu", "ReluTest")
.Input("InputImage")
......@@ -193,7 +180,8 @@ void TestUnalignedSimpleReluX() {
net.RunOp(D);
// Transfer output
ImageToBuffer<D, float>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT);
ImageToBuffer<D, float>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
} else {
OpDefBuilder("Relu", "ReluTest")
.Input("Input")
......@@ -205,8 +193,7 @@ void TestUnalignedSimpleReluX() {
net.RunOp(D);
}
auto expected = CreateTensor<float>({1, 1, 7, 1},
{0, 6, 0, 6, 0, 5, 0});
auto expected = CreateTensor<float>({1, 1, 7, 1}, {0, 6, 0, 6, 0, 5, 0});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5);
}
......
......@@ -6,26 +6,32 @@
namespace mace {
REGISTER_CPU_OPERATOR(OpKeyBuilder("ResizeBilinear")
void Register_ResizeBilinear(OperatorRegistry *op_registry) {
REGISTER_OPERATOR(op_registry, OpKeyBuilder("ResizeBilinear")
.Device(DeviceType::CPU)
.TypeConstraint<float>("T")
.Build(),
ResizeBilinearOp<DeviceType::CPU, float>);
#if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("ResizeBilinear")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("ResizeBilinear")
.Device(DeviceType::NEON)
.TypeConstraint<float>("T")
.Build(),
ResizeBilinearOp<DeviceType::NEON, float>);
#endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("ResizeBilinear")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("ResizeBilinear")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
.Build(),
ResizeBilinearOp<DeviceType::OPENCL, float>);
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("ResizeBilinear")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("ResizeBilinear")
.Device(DeviceType::OPENCL)
.TypeConstraint<half>("T")
.Build(),
ResizeBilinearOp<DeviceType::OPENCL, half>);
}
} // namespace mace
......@@ -26,7 +26,8 @@ static void ResizeBilinearBenchmark(int iters,
net.AddInputFromArray<D, index_t>("OutSize", {2},
{output_height, output_width});
if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, T>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
OpDefBuilder("ResizeBilinear", "ResizeBilinearBenchmark")
.Input("InputImage")
.Input("OutSize")
......
......@@ -91,7 +91,8 @@ void TestRandomResizeBilinear() {
expected.Copy(*net.GetOutput("Output"));
if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
OpDefBuilder("ResizeBilinear", "ResizeBilinearTest")
.Input("InputImage")
......@@ -102,7 +103,8 @@ void TestRandomResizeBilinear() {
// Run
net.RunOp(D);
ImageToBuffer<D, float>(net, "OutputImage", "DeviceOutput", kernels::BufferType::IN_OUT);
ImageToBuffer<D, float>(net, "OutputImage", "DeviceOutput",
kernels::BufferType::IN_OUT);
} else {
// TODO support NEON
}
......
......@@ -6,13 +6,17 @@
namespace mace {
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("SpaceToBatchND")
void Register_SpaceToBatchND(OperatorRegistry *op_registry) {
REGISTER_OPERATOR(op_registry, OpKeyBuilder("SpaceToBatchND")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
.Build(),
SpaceToBatchNDOp<DeviceType::OPENCL, float>);
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("SpaceToBatchND")
REGISTER_OPERATOR(op_registry, OpKeyBuilder("SpaceToBatchND")
.Device(DeviceType::OPENCL)
.TypeConstraint<half>("T")
.Build(),
SpaceToBatchNDOp<DeviceType::OPENCL, half>);
}
} // namespace mace
......@@ -15,7 +15,8 @@ static void BMSpaceToBatch(
OpsTestNet net;
net.AddRandomInput<D, float>("Input", {batch, height, width, channels});
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
OpDefBuilder("SpaceToBatchND", "SpaceToBatchNDTest")
.Input("InputImage")
.Output("OutputImage")
......@@ -37,14 +38,16 @@ static void BMSpaceToBatch(
}
#define BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, TYPE, DEVICE) \
static void BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE( \
static void \
BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BMSpaceToBatch<DEVICE, TYPE>(iters, N, H, W, C, SHAPE); \
} \
BENCHMARK(BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE)
BENCHMARK( \
BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE)
#define BM_SPACE_TO_BATCH(N, H, W, C, SHAPE, TYPE) \
BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, TYPE, OPENCL);
......
......@@ -2,23 +2,23 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <fstream>
#include "gtest/gtest.h"
#include "mace/ops/ops_test_util.h"
#include <fstream>
using namespace mace;
template<DeviceType D>
template <DeviceType D>
void RunSpaceToBatch(const std::vector<index_t> &input_shape,
const std::vector<float> &input_data,
const std::vector<int> &block_shape_data,
const std::vector<int> &padding_data,
const Tensor *expected) {
OpsTestNet net;
net.AddInputFromArray<D, float>(
"Input", input_shape, input_data);
net.AddInputFromArray<D, float>("Input", input_shape, input_data);
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
OpDefBuilder("SpaceToBatchND", "SpaceToBatchNDTest")
.Input("InputImage")
.Output("OutputImage")
......@@ -29,12 +29,13 @@ void RunSpaceToBatch(const std::vector<index_t> &input_shape,
// Run
net.RunOp(D);
ImageToBuffer<D, float>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT);
ImageToBuffer<D, float>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
// Check
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-8);
}
template<DeviceType D>
template <DeviceType D>
void RunBatchToSpace(const std::vector<index_t> &input_shape,
const std::vector<float> &input_data,
const std::vector<int> &block_shape_data,
......@@ -42,10 +43,10 @@ void RunBatchToSpace(const std::vector<index_t> &input_shape,
const Tensor *expected) {
OpsTestNet net;
// Add input data
net.AddInputFromArray<D, float>(
"Input", input_shape, input_data);
net.AddInputFromArray<D, float>("Input", input_shape, input_data);
BufferToImage<D, float>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
BufferToImage<D, float>(net, "Input", "InputImage",
kernels::BufferType::IN_OUT);
OpDefBuilder("BatchToSpaceND", "BatchToSpaceNDTest")
.Input("InputImage")
.Output("OutputImage")
......@@ -56,21 +57,21 @@ void RunBatchToSpace(const std::vector<index_t> &input_shape,
// Run
net.RunOp(D);
ImageToBuffer<D, float>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT);
ImageToBuffer<D, float>(net, "OutputImage", "Output",
kernels::BufferType::IN_OUT);
// Check
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-8);
}
template<typename T>
template <typename T>
void TestBidirectionalTransform(const std::vector<index_t> &space_shape,
const std::vector<float> &space_data,
const std::vector<int> &block_data,
const std::vector<int> &padding_data,
const std::vector<index_t> &batch_shape,
const std::vector<float> &batch_data) {
auto space_tensor = unique_ptr<Tensor>(new Tensor(GetDeviceAllocator(DeviceType::OPENCL),
DataTypeToEnum<T>::v()));
auto space_tensor = unique_ptr<Tensor>(new Tensor(
GetDeviceAllocator(DeviceType::OPENCL), DataTypeToEnum<T>::v()));
space_tensor->Resize(space_shape);
{
Tensor::MappingGuard space_mapper(space_tensor.get());
......@@ -81,8 +82,8 @@ void TestBidirectionalTransform(const std::vector<index_t> &space_shape,
memcpy(space_ptr, space_data.data(), space_data.size() * sizeof(T));
}
auto batch_tensor = unique_ptr<Tensor>(new Tensor(GetDeviceAllocator(DeviceType::OPENCL),
DataTypeToEnum<T>::v()));
auto batch_tensor = unique_ptr<Tensor>(new Tensor(
GetDeviceAllocator(DeviceType::OPENCL), DataTypeToEnum<T>::v()));
batch_tensor->Resize(batch_shape);
{
Tensor::MappingGuard batch_mapper(batch_tensor.get());
......@@ -91,113 +92,81 @@ void TestBidirectionalTransform(const std::vector<index_t> &space_shape,
memcpy(batch_ptr, batch_data.data(), batch_data.size() * sizeof(T));
}
RunSpaceToBatch<DeviceType::OPENCL>(space_shape, space_data,
block_data,
padding_data,
batch_tensor.get());
RunSpaceToBatch<DeviceType::OPENCL>(space_shape, space_data, block_data,
padding_data, batch_tensor.get());
RunBatchToSpace<DeviceType::OPENCL>(batch_shape, batch_data,
block_data,
padding_data,
space_tensor.get());
RunBatchToSpace<DeviceType::OPENCL>(batch_shape, batch_data, block_data,
padding_data, space_tensor.get());
}
TEST(SpaceToBatchTest, SmallData) {
TestBidirectionalTransform<float>({1, 2, 2, 1},
{1, 2, 3, 4},
{2, 2},
{0, 0, 0, 0},
{4, 1, 1, 1},
{1, 2, 3, 4}
);
TestBidirectionalTransform<float>({1, 2, 2, 1}, {1, 2, 3, 4}, {2, 2},
{0, 0, 0, 0}, {4, 1, 1, 1}, {1, 2, 3, 4});
}
TEST(SpaceToBatchTest, SmallDataWithOnePadding) {
TestBidirectionalTransform<float>({1, 2, 2, 1},
{1, 2, 3, 4},
{3, 3},
{1, 0, 1, 0},
{9, 1, 1, 1},
{0, 0, 0, 0, 1, 2, 0, 3, 4}
);
TestBidirectionalTransform<float>({1, 2, 2, 1}, {1, 2, 3, 4}, {3, 3},
{1, 0, 1, 0}, {9, 1, 1, 1},
{0, 0, 0, 0, 1, 2, 0, 3, 4});
}
TEST(SpaceToBatchTest, SmallDataWithTwoPadding) {
TestBidirectionalTransform<float>({1, 2, 2, 1},
{1, 2, 3, 4},
{2, 2},
{1, 1, 1, 1},
{4, 2, 2, 1},
{0, 0, 0, 4, 0, 0, 3, 0, 0, 2, 0, 0, 1, 0, 0, 0}
);
TestBidirectionalTransform<float>(
{1, 2, 2, 1}, {1, 2, 3, 4}, {2, 2}, {1, 1, 1, 1}, {4, 2, 2, 1},
{0, 0, 0, 4, 0, 0, 3, 0, 0, 2, 0, 0, 1, 0, 0, 0});
}
TEST(SpaceToBatchTest, SmallDataWithLargeImage) {
TestBidirectionalTransform<float>({1, 2, 10, 1},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20},
{2, 2},
{0, 0, 0, 0},
{4, 1, 5, 1},
{1, 3, 5, 7, 9,
2, 4, 6, 8, 10,
11, 13, 15, 17, 19,
12, 14, 16, 18, 20}
);
TestBidirectionalTransform<float>(
{1, 2, 10, 1},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20},
{2, 2}, {0, 0, 0, 0}, {4, 1, 5, 1},
{1, 3, 5, 7, 9, 2, 4, 6, 8, 10, 11, 13, 15, 17, 19, 12, 14, 16, 18, 20});
}
TEST(SpaceToBatchTest, MultiChannelData) {
TestBidirectionalTransform<float>({1, 2, 2, 3},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
{2, 2},
{0, 0, 0, 0},
{4, 1, 1, 3},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}
);
TestBidirectionalTransform<float>(
{1, 2, 2, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, {2, 2},
{0, 0, 0, 0}, {4, 1, 1, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
}
TEST(SpaceToBatchTest, LargerMultiChannelData) {
TestBidirectionalTransform<float>({1, 4, 4, 1},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
{2, 2},
{0, 0, 0, 0},
{4, 2, 2, 1},
{1, 3, 9, 11, 2, 4, 10, 12, 5, 7, 13, 15, 6, 8, 14, 16}
);
TestBidirectionalTransform<float>(
{1, 4, 4, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
{2, 2}, {0, 0, 0, 0}, {4, 2, 2, 1},
{1, 3, 9, 11, 2, 4, 10, 12, 5, 7, 13, 15, 6, 8, 14, 16});
}
TEST(SpaceToBatchTest, MultiBatchData) {
TestBidirectionalTransform<float>({2, 2, 4, 1},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
{2, 2},
{0, 0, 0, 0},
{8, 1, 2, 1},
{1, 3, 2, 4, 5, 7, 6, 8, 9, 11, 10, 12, 13, 15, 14, 16}
);
TestBidirectionalTransform<float>(
{2, 2, 4, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
{2, 2}, {0, 0, 0, 0}, {8, 1, 2, 1},
{1, 3, 2, 4, 5, 7, 6, 8, 9, 11, 10, 12, 13, 15, 14, 16});
}
TEST(SpaceToBatchTest, MultiBatchAndChannelData) {
TestBidirectionalTransform<float>({2, 2, 4, 2},
TestBidirectionalTransform<float>(
{2, 2, 4, 2},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32},
{2, 2},
{0, 0, 0, 0},
{8, 1, 2, 2},
{2, 2}, {0, 0, 0, 0}, {8, 1, 2, 2},
{1, 2, 5, 6, 3, 4, 7, 8, 9, 10, 13, 14, 11, 12, 15, 16,
17, 18, 21, 22, 19, 20, 23, 24, 25, 26, 29, 30, 27, 28, 31, 32}
);
17, 18, 21, 22, 19, 20, 23, 24, 25, 26, 29, 30, 27, 28, 31, 32});
}
//TEST(SpaceTobatchTest, CompareTF) {
// TEST(SpaceTobatchTest, CompareTF) {
//
// const std::string space_file = "/data/local/tmp/test/input";
// const std::string batch_file = "/data/local/tmp/test/output";
// const std::vector<index_t> space_shape = {1, 256, 256, 32};
// const int space_size = std::accumulate(space_shape.begin(), space_shape.end(), 1, std::multiplies<int>());
// const int space_size = std::accumulate(space_shape.begin(),
// space_shape.end(), 1, std::multiplies<int>());
// const std::vector<index_t> batch_shape = {4, 130, 130, 32};
// const int batch_size = std::accumulate(batch_shape.begin(), batch_shape.end(), 1, std::multiplies<int>());
// const int batch_size = std::accumulate(batch_shape.begin(),
// batch_shape.end(), 1, std::multiplies<int>());
//
// auto space_tensor = unique_ptr<Tensor>(new Tensor(GetDeviceAllocator(DeviceType::OPENCL),
// auto space_tensor = unique_ptr<Tensor>(new
// Tensor(GetDeviceAllocator(DeviceType::OPENCL),
// DataTypeToEnum<float>::v()));
// space_tensor->Resize(space_shape);
// std::vector<float> space_data(space_size, 0.0);
......@@ -216,7 +185,8 @@ TEST(SpaceToBatchTest, MultiBatchAndChannelData) {
// VLOG(0) << "open space file failed";
// }
//
// auto batch_tensor = unique_ptr<Tensor>(new Tensor(GetDeviceAllocator(DeviceType::OPENCL),
// auto batch_tensor = unique_ptr<Tensor>(new
// Tensor(GetDeviceAllocator(DeviceType::OPENCL),
// DataTypeToEnum<float>::v()));
// std::vector<float> batch_data(batch_size, 0.0);
// batch_tensor->Resize(batch_shape);
......@@ -231,7 +201,8 @@ TEST(SpaceToBatchTest, MultiBatchAndChannelData) {
// }
// Tensor::MappingGuard batch_mapper(batch_tensor.get());
// float *batch_ptr = batch_tensor->mutable_data<float>();
// MACE_CHECK(static_cast<size_t>(batch_tensor->size()) == batch_data.size());
// MACE_CHECK(static_cast<size_t>(batch_tensor->size()) ==
// batch_data.size());
// memcpy(batch_ptr, batch_data.data(), batch_data.size() * sizeof(float));
// }
//
......@@ -245,4 +216,3 @@ TEST(SpaceToBatchTest, MultiBatchAndChannelData) {
// {2, 2, 2, 2},
// space_tensor.get());
//}
......@@ -27,7 +27,7 @@ void Create{{tensor.name}}(std::vector<mace::ConstTensor> &tensors) {
#include "mace/core/public/mace.h"
namespace {
static void UpdateOp(mace::OperatorDef &op,
void UpdateOp(mace::OperatorDef &op,
const std::string &name,
const std::string &type,
const std::vector<std::string> &inputs,
......
......@@ -17,9 +17,8 @@ BAZEL_BIN_PATH=${BAZEL_BIN_PATH#//}
BAZEL_BIN_PATH=bazel-bin/$BAZEL_BIN_PATH
BIN_NAME=`echo $BAZEL_TARGET | cut -d: -f2`
ANDROID_ABI=armeabi-v7a
ANDROID_ABI=arm64-v8a
STRIP=""
ANDROID_ABI=armeabi-v7a
STRIP="--strip always"
VLOG_LEVEL=0
PROFILINE="--define profiling=true"
......@@ -31,7 +30,7 @@ bazel build -c opt $STRIP --verbose_failures $BAZEL_TARGET \
--crosstool_top=//external:android/crosstool \
--host_crosstool_top=@bazel_tools//tools/cpp:toolchain \
--cpu=$ANDROID_ABI \
--define neon=true
--define neon=false
if [ $? -ne 0 ]; then
exit 1
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册