提交 cb871b06 编写于 作者: L Liangliang He

Add Neon device

上级 87134853
......@@ -12,8 +12,13 @@
namespace mace {
// 16 bytes = 32 * 4 (Neon)
#ifdef __ANDROID__
// 16 bytes = 128 bits = 32 * 4 (Neon)
constexpr size_t kMaceAlignment = 16;
#else
// 32 bytes = 256 bits (AVX512)
constexpr size_t kMaceAlignment = 32;
#endif
class Allocator {
public:
......@@ -41,25 +46,18 @@ class CPUAllocator: public Allocator {
void* data = nullptr;
#ifdef __ANDROID__
data = memalign(kMaceAlignment, nbytes);
#elif defined(_MSC_VER)
data = _aligned_malloc(nbytes, kMaceAlignment);
#else
CHECK(posix_memalign(&data, kMaceAlignment, nbytes) == 0);
#endif
CHECK_NOTNULL(data);
// TODO(heliangliang) This should be avoided sometimes
memset(data, 0, nbytes);
return data;
}
#ifdef _MSC_VER
void Delete(void* data) {
_aligned_free(data);
}
#else
void Delete(void* data) {
free(data);
}
#endif
void CopyBytes(void* dst, const void* src, size_t size) {
memcpy(dst, src, size);
......@@ -80,6 +78,11 @@ struct DeviceContext<DeviceType::CPU> {
static Allocator* allocator() { return cpu_allocator(); }
};
template <>
struct DeviceContext<DeviceType::NEON> {
static Allocator* allocator() { return cpu_allocator(); }
};
Allocator* GetDeviceAllocator(DeviceType type);
} // namespace mace
......
......@@ -18,6 +18,13 @@ MACE_DEFINE_REGISTRY(
Workspace*);
MACE_REGISTER_DEVICE_TYPE(DeviceType::CPU, CPUOperatorRegistry);
MACE_DEFINE_REGISTRY(
NEONOperatorRegistry,
OperatorBase,
const OperatorDef&,
Workspace*);
MACE_REGISTER_DEVICE_TYPE(DeviceType::NEON, CPUOperatorRegistry);
unique_ptr<OperatorBase> CreateOperator(
const OperatorDef& operator_def,
Workspace* ws,
......@@ -33,4 +40,4 @@ OperatorBase::OperatorBase(const OperatorDef &operator_def, Workspace *ws)
}
} // namespace mace
\ No newline at end of file
} // namespace mace
......@@ -145,6 +145,17 @@ MACE_DECLARE_REGISTRY(
#define REGISTER_CPU_OPERATOR(name, ...) \
MACE_REGISTER_CLASS(CPUOperatorRegistry, name, __VA_ARGS__)
MACE_DECLARE_REGISTRY(
NEONOperatorRegistry,
OperatorBase,
const OperatorDef&,
Workspace*);
#define REGISTER_NEON_OPERATOR_CREATOR(key, ...) \
MACE_REGISTER_CREATOR(NEONOperatorRegistry, key, __VA_ARGS__)
#define REGISTER_NEON_OPERATOR(name, ...) \
MACE_REGISTER_CLASS(NEONOperatorRegistry, name, __VA_ARGS__)
unique_ptr<OperatorBase> CreateOperator(
const OperatorDef &operator_def,
Workspace *ws,
......
......@@ -23,6 +23,23 @@ bool ReluOp<DeviceType::CPU, float>::Run() {
return true;
}
template <>
bool ReluOp<DeviceType::NEON, float>::Run() {
const Tensor* X = Input(0);
Tensor* Y = Output(0);
Y->ResizeLike(X);
const float* Xdata = X-> data<float>();
float* Ydata = Y->mutable_data<float>();
for (int i = 0; i < X->size(); ++i) {
Ydata[i] = std::max(Xdata[i], 0.f);
VLOG(0) << i << ": " << Xdata[i] << " " << Ydata[i];
}
return true;
}
REGISTER_CPU_OPERATOR(Relu, ReluOp<DeviceType::CPU, float>);
REGISTER_NEON_OPERATOR(Relu, ReluOp<DeviceType::NEON, float>);
} // namespace mace
......@@ -3,8 +3,9 @@ syntax = "proto2";
package mace;
enum DeviceType {
CPU = 0; // In default, we will use CPU.
GPU = 1;
CPU = 0; // In default, we will use CPU.
NEON = 1;
OPENCL = 2;
}
enum DataType {
......@@ -70,4 +71,4 @@ message NetDef {
optional string version = 3;
repeated Argument arg = 4;
repeated TensorProto tensors = 5;
}
\ No newline at end of file
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册