From cb871b06e9397dcd43df4b376d4dc68c29a2d125 Mon Sep 17 00:00:00 2001 From: Liangliang He Date: Fri, 1 Sep 2017 10:56:08 +0800 Subject: [PATCH] Add Neon device --- mace/core/allocator.h | 21 ++++++++++++--------- mace/core/operator.cc | 9 ++++++++- mace/core/operator.h | 11 +++++++++++ mace/ops/relu.cc | 17 +++++++++++++++++ mace/proto/mace.proto | 7 ++++--- 5 files changed, 52 insertions(+), 13 deletions(-) diff --git a/mace/core/allocator.h b/mace/core/allocator.h index 110b012b..79efd610 100644 --- a/mace/core/allocator.h +++ b/mace/core/allocator.h @@ -12,8 +12,13 @@ namespace mace { -// 16 bytes = 32 * 4 (Neon) +#ifdef __ANDROID__ +// 16 bytes = 128 bits = 32 * 4 (Neon) constexpr size_t kMaceAlignment = 16; +#else +// 32 bytes = 256 bits (AVX512) +constexpr size_t kMaceAlignment = 32; +#endif class Allocator { public: @@ -41,25 +46,18 @@ class CPUAllocator: public Allocator { void* data = nullptr; #ifdef __ANDROID__ data = memalign(kMaceAlignment, nbytes); -#elif defined(_MSC_VER) - data = _aligned_malloc(nbytes, kMaceAlignment); #else CHECK(posix_memalign(&data, kMaceAlignment, nbytes) == 0); #endif CHECK_NOTNULL(data); + // TODO(heliangliang) This should be avoided sometimes memset(data, 0, nbytes); return data; } -#ifdef _MSC_VER - void Delete(void* data) { - _aligned_free(data); - } -#else void Delete(void* data) { free(data); } -#endif void CopyBytes(void* dst, const void* src, size_t size) { memcpy(dst, src, size); @@ -80,6 +78,11 @@ struct DeviceContext { static Allocator* allocator() { return cpu_allocator(); } }; +template <> +struct DeviceContext { + static Allocator* allocator() { return cpu_allocator(); } +}; + Allocator* GetDeviceAllocator(DeviceType type); } // namespace mace diff --git a/mace/core/operator.cc b/mace/core/operator.cc index 0072b58a..078574a7 100644 --- a/mace/core/operator.cc +++ b/mace/core/operator.cc @@ -18,6 +18,13 @@ MACE_DEFINE_REGISTRY( Workspace*); MACE_REGISTER_DEVICE_TYPE(DeviceType::CPU, CPUOperatorRegistry); +MACE_DEFINE_REGISTRY( + NEONOperatorRegistry, + OperatorBase, + const OperatorDef&, + Workspace*); +MACE_REGISTER_DEVICE_TYPE(DeviceType::NEON, CPUOperatorRegistry); + unique_ptr CreateOperator( const OperatorDef& operator_def, Workspace* ws, @@ -33,4 +40,4 @@ OperatorBase::OperatorBase(const OperatorDef &operator_def, Workspace *ws) } -} // namespace mace \ No newline at end of file +} // namespace mace diff --git a/mace/core/operator.h b/mace/core/operator.h index 27e1fa16..6ac672f6 100644 --- a/mace/core/operator.h +++ b/mace/core/operator.h @@ -145,6 +145,17 @@ MACE_DECLARE_REGISTRY( #define REGISTER_CPU_OPERATOR(name, ...) \ MACE_REGISTER_CLASS(CPUOperatorRegistry, name, __VA_ARGS__) +MACE_DECLARE_REGISTRY( + NEONOperatorRegistry, + OperatorBase, + const OperatorDef&, + Workspace*); + +#define REGISTER_NEON_OPERATOR_CREATOR(key, ...) \ + MACE_REGISTER_CREATOR(NEONOperatorRegistry, key, __VA_ARGS__) +#define REGISTER_NEON_OPERATOR(name, ...) \ + MACE_REGISTER_CLASS(NEONOperatorRegistry, name, __VA_ARGS__) + unique_ptr CreateOperator( const OperatorDef &operator_def, Workspace *ws, diff --git a/mace/ops/relu.cc b/mace/ops/relu.cc index 94646e0f..66662f38 100644 --- a/mace/ops/relu.cc +++ b/mace/ops/relu.cc @@ -23,6 +23,23 @@ bool ReluOp::Run() { return true; } +template <> +bool ReluOp::Run() { + const Tensor* X = Input(0); + Tensor* Y = Output(0); + Y->ResizeLike(X); + + const float* Xdata = X-> data(); + float* Ydata = Y->mutable_data(); + for (int i = 0; i < X->size(); ++i) { + Ydata[i] = std::max(Xdata[i], 0.f); + VLOG(0) << i << ": " << Xdata[i] << " " << Ydata[i]; + } + + return true; +} + REGISTER_CPU_OPERATOR(Relu, ReluOp); +REGISTER_NEON_OPERATOR(Relu, ReluOp); } // namespace mace diff --git a/mace/proto/mace.proto b/mace/proto/mace.proto index 10c37f12..05c317d1 100644 --- a/mace/proto/mace.proto +++ b/mace/proto/mace.proto @@ -3,8 +3,9 @@ syntax = "proto2"; package mace; enum DeviceType { - CPU = 0; // In default, we will use CPU. - GPU = 1; + CPU = 0; // In default, we will use CPU. + NEON = 1; + OPENCL = 2; } enum DataType { @@ -70,4 +71,4 @@ message NetDef { optional string version = 3; repeated Argument arg = 4; repeated TensorProto tensors = 5; -} \ No newline at end of file +} -- GitLab