diff --git a/mace/core/BUILD b/mace/core/BUILD index 41a3ec2d9e4e3b3286e26ff7d1ebfdaf22a24379..68af68fcf64d6f154537ff21ceb92168279f725c 100644 --- a/mace/core/BUILD +++ b/mace/core/BUILD @@ -52,10 +52,10 @@ cc_library( ]), deps = [ "//mace/utils", + "//mace/codegen:generated_version", ] + if_android([ ":opencl_headers", "//mace/codegen:generated_opencl", - "//mace/codegen:generated_version", "@half//:half", ]) + if_production_mode([ "//mace/codegen:generated_tuning_params", diff --git a/mace/core/operator.cc b/mace/core/operator.cc index 1aedbe709f157b9aea61ea8e2a9dbf5e0a87a611..29082361e30e1635e99bb0c20c939b4fbddc864f 100644 --- a/mace/core/operator.cc +++ b/mace/core/operator.cc @@ -79,7 +79,6 @@ extern void Register_AddN(OperatorRegistry *op_registry); extern void Register_BatchNorm(OperatorRegistry *op_registry); extern void Register_BatchToSpaceND(OperatorRegistry *op_registry); extern void Register_BiasAdd(OperatorRegistry *op_registry); -extern void Register_BufferToImage(OperatorRegistry *op_registry); extern void Register_ChannelShuffle(OperatorRegistry *op_registry); extern void Register_Concat(OperatorRegistry *op_registry); extern void Register_Conv2D(OperatorRegistry *op_registry); @@ -92,7 +91,6 @@ extern void Register_FoldedBatchNorm(OperatorRegistry *op_registry); extern void Register_FullyConnected(OperatorRegistry *op_registry); extern void Register_FusedConv2D(OperatorRegistry *op_registry); extern void Register_GlobalAvgPooling(OperatorRegistry *op_registry); -extern void Register_ImageToBuffer(OperatorRegistry *op_registry); extern void Register_LocalResponseNorm(OperatorRegistry *op_registry); extern void Register_MatMul(OperatorRegistry *op_registry); extern void Register_Pad(OperatorRegistry *op_registry); @@ -111,6 +109,11 @@ extern void Register_SpaceToDepth(OperatorRegistry *op_registry); extern void Register_Transpose(OperatorRegistry *op_registry); extern void Register_WinogradInverseTransform(OperatorRegistry *op_registry); extern void Register_WinogradTransform(OperatorRegistry *op_registry); + +#ifdef MACE_ENABLE_OPENCL +extern void Register_BufferToImage(OperatorRegistry *op_registry); +extern void Register_ImageToBuffer(OperatorRegistry *op_registry); +#endif // MACE_ENABLE_OPENCL } // namespace ops OperatorRegistry::OperatorRegistry() { @@ -120,7 +123,6 @@ OperatorRegistry::OperatorRegistry() { ops::Register_BatchNorm(this); ops::Register_BatchToSpaceND(this); ops::Register_BiasAdd(this); - ops::Register_BufferToImage(this); ops::Register_ChannelShuffle(this); ops::Register_Concat(this); ops::Register_Conv2D(this); @@ -133,7 +135,6 @@ OperatorRegistry::OperatorRegistry() { ops::Register_FullyConnected(this); ops::Register_FusedConv2D(this); ops::Register_GlobalAvgPooling(this); - ops::Register_ImageToBuffer(this); ops::Register_LocalResponseNorm(this); ops::Register_MatMul(this); ops::Register_Pad(this); @@ -152,6 +153,11 @@ OperatorRegistry::OperatorRegistry() { ops::Register_Transpose(this); ops::Register_WinogradInverseTransform(this); ops::Register_WinogradTransform(this); + +#ifdef MACE_ENABLE_OPENCL + ops::Register_BufferToImage(this); + ops::Register_ImageToBuffer(this); +#endif // MACE_ENABLE_OPENCL } } // namespace mace diff --git a/mace/core/runtime/cpu/cpu_runtime.cc b/mace/core/runtime/cpu/cpu_runtime.cc index f55115d6b36a22b90a0fdc3db1f8431a15642345..39c7c12547fdfec4c5fb28ab6c238cc527d49e59 100644 --- a/mace/core/runtime/cpu/cpu_runtime.cc +++ b/mace/core/runtime/cpu/cpu_runtime.cc @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -86,7 +87,7 @@ void SetThreadAffinity(cpu_set_t mask) { #if defined(__ANDROID__) pid_t pid = gettid(); #else - pid_t pid = pthread_self(); + pid_t pid = syscall(SYS_gettid); #endif int err = sched_setaffinity(pid, sizeof(mask), &mask); MACE_CHECK(err == 0, "set affinity error: ", errno); diff --git a/mace/kernels/BUILD b/mace/kernels/BUILD index 50ab5c954efdb6fb2b61dd62492b9a614b8a6fc4..c7f144b02ddb395097e73ae5436348872b65c032 100644 --- a/mace/kernels/BUILD +++ b/mace/kernels/BUILD @@ -14,20 +14,30 @@ cc_library( srcs = glob( [ "*.cc", - "opencl/*.cc", "arm/*.cc", ], exclude = [ "*_test.cc", "arm/*_test.cc", + ], + ) + if_android(glob([ + "opencl/*.cc", + ], + exclude = [ "opencl/*_test.cc", + ])), + hdrs = glob( + [ + "*.h", + "arm/*.h", ], - ), - hdrs = glob([ - "*.h", - "opencl/*.h", - "arm/*.h", - ]), + exclude = [ + "buffer_to_image.h", + ], + ) + if_android(glob([ + "opencl/*.h", + "buffer_to_image.h", + ])), copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon"]) + diff --git a/mace/kernels/activation.h b/mace/kernels/activation.h index 8ed3ca755f1280ef37e2201e0b81f324c5be4a14..2b64fa64946a481ef59e589d38abd5ad0c2d3266 100644 --- a/mace/kernels/activation.h +++ b/mace/kernels/activation.h @@ -21,10 +21,13 @@ #include #include "mace/core/future.h" -#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" #include "mace/core/types.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/cl2_header.h" +#endif // MACE_ENABLE_OPENCL + namespace mace { namespace kernels { @@ -166,6 +169,7 @@ class ActivationFunctor { float relux_max_limit_; }; +#ifdef MACE_ENABLE_OPENCL template class ActivationFunctor { public: @@ -186,6 +190,7 @@ class ActivationFunctor { std::string tuning_key_prefix_; std::vector input_shape_; }; +#endif // MACE_ENABLE_OPENCL } // namespace kernels } // namespace mace diff --git a/mace/kernels/addn.h b/mace/kernels/addn.h index 86389d7f0ea4cef253fb41220d8b2abee7029b56..14221e901cb896efbdc4ea3fa38e2eeb5fb5b1bb 100644 --- a/mace/kernels/addn.h +++ b/mace/kernels/addn.h @@ -23,9 +23,12 @@ #include #include "mace/core/future.h" -#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/cl2_header.h" +#endif // MACE_ENABLE_OPENCL + namespace mace { namespace kernels { @@ -88,6 +91,7 @@ struct AddNFunctor { } }; +#ifdef MACE_ENABLE_OPENCL template struct AddNFunctor { void operator()(const std::vector &input_tensors, @@ -99,6 +103,7 @@ struct AddNFunctor { std::unique_ptr kernel_error_; std::vector input_shape_; }; +#endif // MACE_ENABLE_OPENCL } // namespace kernels } // namespace mace diff --git a/mace/kernels/batch_norm.h b/mace/kernels/batch_norm.h index b2617b94ed7cf463168fcdb9780d7257ddd6acad..ff9e3f4b16f0cbdd2d8e735965cc4c7aea7300a8 100644 --- a/mace/kernels/batch_norm.h +++ b/mace/kernels/batch_norm.h @@ -22,11 +22,14 @@ #include #include "mace/core/future.h" -#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" #include "mace/kernels/activation.h" #include "mace/public/mace.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/cl2_header.h" +#endif // MACE_ENABLE_OPENCL + namespace mace { namespace kernels { @@ -159,7 +162,7 @@ struct BatchNormFunctor : BatchNormFunctorBase { StatsFuture *future); }; - +#ifdef MACE_ENABLE_OPENCL template struct BatchNormFunctor : BatchNormFunctorBase { BatchNormFunctor(const bool folded_constant, @@ -179,6 +182,7 @@ struct BatchNormFunctor : BatchNormFunctorBase { std::unique_ptr kernel_error_; std::vector input_shape_; }; +#endif // MACE_ENABLE_OPENCL } // namespace kernels } // namespace mace diff --git a/mace/kernels/bias_add.h b/mace/kernels/bias_add.h index 88131b18e61baa1f135caf5cccbdcf29eb23352c..87bc0831befc846323136b862aef2aa2122b3c59 100644 --- a/mace/kernels/bias_add.h +++ b/mace/kernels/bias_add.h @@ -19,10 +19,13 @@ #include #include "mace/core/future.h" -#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" #include "mace/public/mace.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/cl2_header.h" +#endif // MACE_ENABLE_OPENCL + namespace mace { namespace kernels { @@ -68,6 +71,7 @@ void BiasAddFunctor::operator()( StatsFuture *future); */ +#ifdef MACE_ENABLE_OPENCL template struct BiasAddFunctor { void operator()(const Tensor *input, @@ -79,6 +83,7 @@ struct BiasAddFunctor { std::unique_ptr kernel_error_; std::vector input_shape_; }; +#endif // MACE_ENABLE_OPENCL } // namespace kernels } // namespace mace diff --git a/mace/kernels/channel_shuffle.h b/mace/kernels/channel_shuffle.h index 8ef030ef8554906373f882d99e798c574820bd8e..51dbbca7832145ee055fb6408596d95dd7462b04 100644 --- a/mace/kernels/channel_shuffle.h +++ b/mace/kernels/channel_shuffle.h @@ -60,6 +60,7 @@ struct ChannelShuffleFunctor { const int groups_; }; +#ifdef MACE_ENABLE_OPENCL template struct ChannelShuffleFunctor { explicit ChannelShuffleFunctor(const int groups) : groups_(groups) {} @@ -72,6 +73,7 @@ struct ChannelShuffleFunctor { const int groups_; std::vector input_shape_; }; +#endif // MACE_ENABLE_OPENCL } // namespace kernels } // namespace mace diff --git a/mace/kernels/concat.h b/mace/kernels/concat.h index c55bf4c7328356dff50f9e4416def6edb7fbe4a3..6ae7a769e55488b44f711f66e8c34ab3f3736c7e 100644 --- a/mace/kernels/concat.h +++ b/mace/kernels/concat.h @@ -19,11 +19,14 @@ #include #include "mace/core/future.h" -#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" #include "mace/core/types.h" #include "mace/public/mace.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/cl2_header.h" +#endif // MACE_ENABLE_OPENCL + namespace mace { namespace kernels { @@ -88,6 +91,7 @@ struct ConcatFunctor : ConcatFunctorBase { } }; +#ifdef MACE_ENABLE_OPENCL template struct ConcatFunctor : ConcatFunctorBase { explicit ConcatFunctor(const int32_t axis) : ConcatFunctorBase(axis) {} @@ -100,6 +104,7 @@ struct ConcatFunctor : ConcatFunctorBase { std::unique_ptr kernel_error_; std::vector input_shape_; }; +#endif // MACE_ENABLE_OPENCL } // namespace kernels } // namespace mace diff --git a/mace/kernels/conv_2d.h b/mace/kernels/conv_2d.h index 63f200f9f5598e608ef0f37f62345a553b4dc0c7..c2a2d89784b48387d674961d2b912d97d4912c2d 100644 --- a/mace/kernels/conv_2d.h +++ b/mace/kernels/conv_2d.h @@ -23,12 +23,15 @@ #include #include "mace/core/future.h" -#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" #include "mace/kernels/activation.h" #include "mace/kernels/conv_pool_2d_util.h" #include "mace/utils/utils.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/cl2_header.h" +#endif // MACE_ENABLE_OPENCL + namespace mace { namespace kernels { @@ -457,6 +460,7 @@ struct Conv2dFunctor : Conv2dFunctorBase { ScratchBuffer *scratch_; }; +#ifdef MACE_ENABLE_OPENCL template struct Conv2dFunctor : Conv2dFunctorBase { Conv2dFunctor(const int *strides, @@ -485,6 +489,7 @@ struct Conv2dFunctor : Conv2dFunctorBase { std::unique_ptr kernel_error_; std::vector input_shape_; }; +#endif // MACE_ENABLE_OPENCL } // namespace kernels } // namespace mace diff --git a/mace/kernels/cwise.h b/mace/kernels/cwise.h index 997410a36829b86a95b21451f1846892a28c27b8..dde27da7235b76b9187382f79b22224e6bfd688c 100644 --- a/mace/kernels/cwise.h +++ b/mace/kernels/cwise.h @@ -20,9 +20,12 @@ #include #include "mace/core/future.h" -#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/cl2_header.h" +#endif // MACE_ENABLE_OPENCL + namespace mace { namespace kernels { @@ -116,6 +119,7 @@ struct CWiseFunctor : CWiseFunctorBase { } }; +#ifdef MACE_ENABLE_OPENCL template struct CWiseFunctor : CWiseFunctorBase { CWiseFunctor(const CWiseType type, const float coeff) @@ -130,6 +134,7 @@ struct CWiseFunctor : CWiseFunctorBase { std::unique_ptr kernel_error_; std::vector input_shape_; }; +#endif // MACE_ENABLE_OPENCL } // namespace kernels } // namespace mace diff --git a/mace/kernels/depth_to_space.h b/mace/kernels/depth_to_space.h index 3ea7f8abf7886a939e5b616152a1b0a5c8fd29ca..41a282f6ca04dc04c577da513d5f4ab3f2519ba0 100644 --- a/mace/kernels/depth_to_space.h +++ b/mace/kernels/depth_to_space.h @@ -18,10 +18,13 @@ #include #include "mace/core/future.h" -#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" #include "mace/public/mace.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/cl2_header.h" +#endif // MACE_ENABLE_OPENCL + namespace mace { namespace kernels { @@ -112,6 +115,7 @@ struct DepthToSpaceOpFunctor { bool d2s_; }; +#ifdef MACE_ENABLE_OPENCL template struct DepthToSpaceOpFunctor { DepthToSpaceOpFunctor(const int block_size, bool d2s) @@ -125,6 +129,7 @@ struct DepthToSpaceOpFunctor { std::unique_ptr kernel_error_; std::vector input_shape_; }; +#endif // MACE_ENABLE_OPENCL } // namespace kernels } // namespace mace diff --git a/mace/kernels/depthwise_conv2d.h b/mace/kernels/depthwise_conv2d.h index e560be8c3e89978af510d2010e632ba8c3d58f84..5c90711fa000576e254f42ee31cfb1d75c997e57 100644 --- a/mace/kernels/depthwise_conv2d.h +++ b/mace/kernels/depthwise_conv2d.h @@ -23,11 +23,14 @@ #include #include "mace/core/future.h" -#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/kernels/conv_pool_2d_util.h" #include "mace/kernels/activation.h" #include "mace/public/mace.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/cl2_header.h" +#endif // MACE_ENABLE_OPENCL + namespace mace { namespace kernels { @@ -441,6 +444,7 @@ struct DepthwiseConv2dFunctor StatsFuture *future); }; +#ifdef MACE_ENABLE_OPENCL template struct DepthwiseConv2dFunctor : DepthwiseConv2dFunctorBase { @@ -468,6 +472,7 @@ struct DepthwiseConv2dFunctor std::unique_ptr kernel_error_; std::vector input_shape_; }; +#endif // MACE_ENABLE_OPENCL } // namespace kernels } // namespace mace diff --git a/mace/kernels/eltwise.h b/mace/kernels/eltwise.h index 94c37cdb4ae1cedde48a6504331ecead3c01caee..aa1a3e21c0be4a202f3fe1954b1a4bca1bb3b118 100644 --- a/mace/kernels/eltwise.h +++ b/mace/kernels/eltwise.h @@ -20,9 +20,12 @@ #include #include "mace/core/future.h" -#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/cl2_header.h" +#endif // MACE_ENABLE_OPENCL + namespace mace { namespace kernels { @@ -105,6 +108,7 @@ struct EltwiseFunctor : EltwiseFunctorBase { } }; +#ifdef MACE_ENABLE_OPENCL template struct EltwiseFunctor : EltwiseFunctorBase { EltwiseFunctor(const EltwiseType type, const std::vector &coeff) @@ -120,6 +124,7 @@ struct EltwiseFunctor : EltwiseFunctorBase { std::unique_ptr kernel_error_; std::vector input_shape_; }; +#endif // MACE_ENABLE_OPENCL } // namespace kernels } // namespace mace diff --git a/mace/kernels/fully_connected.h b/mace/kernels/fully_connected.h index bf52bd33c16659a4dd4e88c3770b4eb3027d896f..32395a32386f6365bc909cbf2df4661b3ea56f4d 100644 --- a/mace/kernels/fully_connected.h +++ b/mace/kernels/fully_connected.h @@ -19,16 +19,19 @@ #include #include "mace/core/future.h" -#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" #include "mace/kernels/activation.h" + +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/kernels/opencl/helper.h" +#endif // MACE_ENABLE_OPENCL namespace mace { namespace kernels { struct FullyConnectedBase { - FullyConnectedBase(const BufferType weight_type, + FullyConnectedBase(const int /*BufferType*/ weight_type, const ActivationType activation, const float relux_max_limit) : weight_type_(weight_type), @@ -42,7 +45,7 @@ struct FullyConnectedBase { template struct FullyConnectedFunctor : FullyConnectedBase { - FullyConnectedFunctor(const BufferType weight_type, + FullyConnectedFunctor(const int /*BufferType*/ weight_type, const ActivationType activation, const float relux_max_limit) : FullyConnectedBase(weight_type, activation, relux_max_limit) {} @@ -89,7 +92,7 @@ struct FullyConnectedFunctor : FullyConnectedBase { template <> struct FullyConnectedFunctor : FullyConnectedBase { - FullyConnectedFunctor(const BufferType weight_type, + FullyConnectedFunctor(const int /*BufferType*/ weight_type, const ActivationType activation, const float relux_max_limit) : FullyConnectedBase(weight_type, activation, relux_max_limit) {} @@ -101,9 +104,10 @@ struct FullyConnectedFunctor : FullyConnectedBase { StatsFuture *future); }; +#ifdef MACE_ENABLE_OPENCL template struct FullyConnectedFunctor : FullyConnectedBase { - FullyConnectedFunctor(const BufferType weight_type, + FullyConnectedFunctor(const int /*BufferType*/ weight_type, const ActivationType activation, const float relux_max_limit) : FullyConnectedBase(weight_type, activation, relux_max_limit) {} @@ -120,6 +124,7 @@ struct FullyConnectedFunctor : FullyConnectedBase { std::vector input_shape_; std::unique_ptr kernel_error_; }; +#endif // MACE_ENABLE_OPENCL } // namespace kernels } // namespace mace diff --git a/mace/kernels/gemm.cc b/mace/kernels/gemm.cc index b252949af135a1238f926c20174f72268530567b..2668c69a6d1c3bf899fce51cb9cbdb0d5d3fdeef 100644 --- a/mace/kernels/gemm.cc +++ b/mace/kernels/gemm.cc @@ -14,6 +14,7 @@ #include #include +#include #if defined(MACE_ENABLE_NEON) #include diff --git a/mace/kernels/local_response_norm.h b/mace/kernels/local_response_norm.h index c0b638ac85652bef28e416e116f08bcc7279dfab..a9c8b5cce1d5588a78e202159d8fff48c9fc480c 100644 --- a/mace/kernels/local_response_norm.h +++ b/mace/kernels/local_response_norm.h @@ -10,10 +10,13 @@ #include #include "mace/core/future.h" -#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" #include "mace/public/mace.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/cl2_header.h" +#endif // MACE_ENABLE_OPENCL + namespace mace { namespace kernels { diff --git a/mace/kernels/matmul.h b/mace/kernels/matmul.h index c12aac56335862a7e88047b4857a45237687ae71..f572f63d62ec456a828a9d8ab2cf00cd9a48d15b 100644 --- a/mace/kernels/matmul.h +++ b/mace/kernels/matmul.h @@ -25,11 +25,14 @@ #include #include "mace/core/future.h" -#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" #include "mace/kernels/gemm.h" #include "mace/utils/utils.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/cl2_header.h" +#endif // MACE_ENABLE_OPENCL + namespace mace { namespace kernels { @@ -70,6 +73,7 @@ struct MatMulFunctor { } }; +#ifdef MACE_ENABLE_OPENCL template struct MatMulFunctor { void operator()(const Tensor *A, @@ -81,6 +85,7 @@ struct MatMulFunctor { uint32_t kwg_size_; std::unique_ptr kernel_error_; }; +#endif // MACE_ENABLE_OPENCL } // namespace kernels } // namespace mace diff --git a/mace/kernels/pad.h b/mace/kernels/pad.h index b1977202a13fd9395c21a202fa541154c0f23ca1..62728e5b1940bdc0c65ac707914eb914aeae5612 100644 --- a/mace/kernels/pad.h +++ b/mace/kernels/pad.h @@ -20,9 +20,12 @@ #include #include "mace/core/future.h" -#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/cl2_header.h" +#endif // MACE_ENABLE_OPENCL + namespace mace { namespace kernels { @@ -79,6 +82,7 @@ struct PadFunctor : public PadFunctorBase { } }; +#ifdef MACE_ENABLE_OPENCL template struct PadFunctor : PadFunctorBase { PadFunctor(const std::vector &paddings, @@ -94,6 +98,7 @@ struct PadFunctor : PadFunctorBase { std::unique_ptr kernel_error_; std::vector input_shape_; }; +#endif // MACE_ENABLE_OPENCL } // namespace kernels } // namespace mace diff --git a/mace/kernels/pooling.h b/mace/kernels/pooling.h index ff39e0ff70dbd3e79c28822f562e15e711ba9d6a..65d9c4092f3ca5f3fb893bee71104a8596f1989f 100644 --- a/mace/kernels/pooling.h +++ b/mace/kernels/pooling.h @@ -21,10 +21,13 @@ #include #include "mace/core/future.h" -#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" #include "mace/kernels/conv_pool_2d_util.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/cl2_header.h" +#endif // MACE_ENABLE_OPENCL + namespace mace { enum PoolingType { @@ -192,6 +195,7 @@ struct PoolingFunctor : PoolingFunctorBase { StatsFuture *future); }; +#ifdef MACE_ENABLE_OPENCL template struct PoolingFunctor : PoolingFunctorBase { PoolingFunctor(const PoolingType pooling_type, @@ -212,6 +216,7 @@ struct PoolingFunctor : PoolingFunctorBase { std::unique_ptr kernel_error_; std::vector input_shape_; }; +#endif // MACE_ENABLE_OPENCL } // namespace kernels } // namespace mace diff --git a/mace/kernels/reorganize.h b/mace/kernels/reorganize.h index 06cef0e3a0adf1f80b2b4eb872ded4657e3bbbc1..e987d3e15f494cee561e22a37e24a62c0bb88092 100644 --- a/mace/kernels/reorganize.h +++ b/mace/kernels/reorganize.h @@ -18,9 +18,12 @@ #include #include "mace/core/future.h" -#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/cl2_header.h" +#endif // MACE_ENABLE_OPENCL + namespace mace { namespace kernels { diff --git a/mace/kernels/reshape.h b/mace/kernels/reshape.h index fc25f96e08abd0984c18da74d331df22ce1a0dc2..2405447accb388603205c94b9a9e36a21e8dc8da 100644 --- a/mace/kernels/reshape.h +++ b/mace/kernels/reshape.h @@ -18,9 +18,12 @@ #include #include "mace/core/future.h" -#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/cl2_header.h" +#endif // MACE_ENABLE_OPENCL + namespace mace { namespace kernels { diff --git a/mace/kernels/resize_bilinear.h b/mace/kernels/resize_bilinear.h index 9b22b7b7a71e3367c70a5ee2414708cc540f1ab8..9b450ad8abe972ace2cc11c147c7e94790266784 100644 --- a/mace/kernels/resize_bilinear.h +++ b/mace/kernels/resize_bilinear.h @@ -20,9 +20,12 @@ #include #include "mace/core/future.h" -#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/cl2_header.h" +#endif // MACE_ENABLE_OPENCL + namespace mace { namespace kernels { @@ -177,6 +180,7 @@ struct ResizeBilinearFunctor : ResizeBilinearFunctorBase { } }; +#ifdef MACE_ENABLE_OPENCL template struct ResizeBilinearFunctor : ResizeBilinearFunctorBase { @@ -190,6 +194,7 @@ struct ResizeBilinearFunctor std::unique_ptr kernel_error_; std::vector input_shape_; }; +#endif // MACE_ENABLE_OPENCL } // namespace kernels } // namespace mace diff --git a/mace/kernels/slice.h b/mace/kernels/slice.h index ea08e7f43a40df710eb614438be17331942ae7c5..32eddad47af21ab06f2f26eb61583ace6844b914 100644 --- a/mace/kernels/slice.h +++ b/mace/kernels/slice.h @@ -20,11 +20,14 @@ #include #include "mace/core/future.h" -#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" #include "mace/core/types.h" #include "mace/public/mace.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/cl2_header.h" +#endif // MACE_ENABLE_OPENCL + namespace mace { namespace kernels { @@ -81,6 +84,7 @@ struct SliceFunctor : SliceFunctorBase { } }; +#ifdef MACE_ENABLE_OPENCL template struct SliceFunctor : SliceFunctorBase { explicit SliceFunctor(const int32_t axis) : SliceFunctorBase(axis) {} @@ -92,6 +96,7 @@ struct SliceFunctor : SliceFunctorBase { uint32_t kwg_size_; std::unique_ptr kernel_error_; }; +#endif // MACE_ENABLE_OPENCL } // namespace kernels } // namespace mace diff --git a/mace/kernels/softmax.h b/mace/kernels/softmax.h index 33e11c6591c915c3a9c70eee18c4ae76479be343..56b141d73393eab258aa830e414f00db94225923 100644 --- a/mace/kernels/softmax.h +++ b/mace/kernels/softmax.h @@ -21,11 +21,14 @@ #include #include "mace/core/future.h" -#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" #include "mace/public/mace.h" #include "mace/utils/utils.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/cl2_header.h" +#endif // MACE_ENABLE_OPENCL + namespace mace { namespace kernels { @@ -72,6 +75,7 @@ struct SoftmaxFunctor { void operator()(const Tensor *logits, Tensor *output, StatsFuture *future); }; +#ifdef MACE_ENABLE_OPENCL template struct SoftmaxFunctor { void operator()(const Tensor *logits, Tensor *output, StatsFuture *future); @@ -81,6 +85,7 @@ struct SoftmaxFunctor { std::unique_ptr kernel_error_; std::vector input_shape_; }; +#endif // MACE_ENABLE_OPENCL } // namespace kernels } // namespace mace diff --git a/mace/kernels/space_to_batch.h b/mace/kernels/space_to_batch.h index 46c472c8a4f2ed91e7a866d47294177c97e1a31a..52119a45ca8ffc00831deffaacb7ea47635379f8 100644 --- a/mace/kernels/space_to_batch.h +++ b/mace/kernels/space_to_batch.h @@ -19,10 +19,13 @@ #include #include "mace/core/future.h" -#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" #include "mace/public/mace.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/cl2_header.h" +#endif // MACE_ENABLE_OPENCL + namespace mace { namespace kernels { @@ -54,6 +57,7 @@ struct SpaceToBatchFunctor : SpaceToBatchFunctorBase { } }; +#ifdef MACE_ENABLE_OPENCL template struct SpaceToBatchFunctor : SpaceToBatchFunctorBase { SpaceToBatchFunctor(const std::vector &paddings, @@ -71,6 +75,7 @@ struct SpaceToBatchFunctor : SpaceToBatchFunctorBase { std::unique_ptr kernel_error_; std::vector space_shape_; }; +#endif // MACE_ENABLE_OPENCL } // namespace kernels } // namespace mace diff --git a/mace/kernels/winograd_transform.h b/mace/kernels/winograd_transform.h index 538c62bc51e6eabcd3a3d055ff9a7bdece0c36ed..932604bc63cc786aa5e44096c74fff1b22d61c4c 100644 --- a/mace/kernels/winograd_transform.h +++ b/mace/kernels/winograd_transform.h @@ -19,11 +19,14 @@ #include #include "mace/core/future.h" -#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" #include "mace/kernels/activation.h" #include "mace/kernels/conv_pool_2d_util.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/cl2_header.h" +#endif // MACE_ENABLE_OPENCL + namespace mace { namespace kernels { @@ -52,6 +55,7 @@ struct WinogradTransformFunctor : WinogradTransformFunctorBase { } }; +#ifdef MACE_ENABLE_OPENCL template struct WinogradTransformFunctor : WinogradTransformFunctorBase { @@ -66,6 +70,7 @@ struct WinogradTransformFunctor std::unique_ptr kernel_error_; std::vector input_shape_; }; +#endif // MACE_ENABLE_OPENCL struct WinogradInverseTransformFunctorBase { WinogradInverseTransformFunctorBase(const int batch, @@ -104,6 +109,7 @@ struct WinogradInverseTransformFunctor : WinogradInverseTransformFunctorBase { } }; +#ifdef MACE_ENABLE_OPENCL template struct WinogradInverseTransformFunctor : WinogradInverseTransformFunctorBase { @@ -125,6 +131,7 @@ struct WinogradInverseTransformFunctor std::unique_ptr kernel_error_; std::vector input_shape_; }; +#endif // MACE_ENABLE_OPENCL } // namespace kernels } // namespace mace diff --git a/mace/ops/BUILD b/mace/ops/BUILD index ba39f5af4e2caef921ea49184b9880abf1a14715..a03b9d178fa8c7543003f70d60ee236fc0eb663e 100644 --- a/mace/ops/BUILD +++ b/mace/ops/BUILD @@ -28,8 +28,14 @@ cc_library( exclude = [ "*_test.cc", "*_benchmark.cc", + "buffer_to_image.cc", + "image_to_buffer.cc", ], - ), + ) + if_android( + [ + "buffer_to_image.cc", + "image_to_buffer.cc", + ]), hdrs = glob( ["*.h"], exclude = ["ops_test_util.h"], diff --git a/mace/ops/activation.cc b/mace/ops/activation.cc index 9beb3f8092c66da948de53980a893d1b4a75e747..56f936c3bf044b332be70b3a9891e49a05bf169c 100644 --- a/mace/ops/activation.cc +++ b/mace/ops/activation.cc @@ -24,6 +24,7 @@ void Register_Activation(OperatorRegistry *op_registry) { .Build(), ActivationOp); +#ifdef MACE_ENABLE_OPENCL REGISTER_OPERATOR(op_registry, OpKeyBuilder("Activation") .Device(DeviceType::OPENCL) .TypeConstraint("T") @@ -35,6 +36,8 @@ void Register_Activation(OperatorRegistry *op_registry) { .TypeConstraint("T") .Build(), ActivationOp); +#endif // MACE_ENABLE_OPENCL + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Activation") .Device(DeviceType::NEON) .TypeConstraint("T") diff --git a/mace/ops/addn.cc b/mace/ops/addn.cc index db6cb26a8dc10d624e8432355a2970be57bda43b..4c78b3968edb572df5e9d4f70f8f3f2b806d02bd 100644 --- a/mace/ops/addn.cc +++ b/mace/ops/addn.cc @@ -24,6 +24,7 @@ void Register_AddN(OperatorRegistry *op_registry) { .Build(), AddNOp); +#ifdef MACE_ENABLE_OPENCL REGISTER_OPERATOR(op_registry, OpKeyBuilder("AddN") .Device(DeviceType::OPENCL) .TypeConstraint("T") @@ -35,6 +36,7 @@ void Register_AddN(OperatorRegistry *op_registry) { .TypeConstraint("T") .Build(), AddNOp); +#endif // MACE_ENABLE_OPENCL REGISTER_OPERATOR(op_registry, OpKeyBuilder("AddN") .Device(DeviceType::NEON) diff --git a/mace/ops/batch_norm.cc b/mace/ops/batch_norm.cc index 255b31e96468d2ea4a7e927e35d1a5e58b873cbf..7472c629d343172bef1a69f9c043dc89a5f0355b 100644 --- a/mace/ops/batch_norm.cc +++ b/mace/ops/batch_norm.cc @@ -24,6 +24,7 @@ void Register_BatchNorm(OperatorRegistry *op_registry) { .Build(), BatchNormOp); +#ifdef MACE_ENABLE_OPENCL REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchNorm") .Device(DeviceType::OPENCL) .TypeConstraint("T") @@ -35,6 +36,8 @@ void Register_BatchNorm(OperatorRegistry *op_registry) { .TypeConstraint("T") .Build(), BatchNormOp); +#endif // MACE_ENABLE_OPENCL + REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchNorm") .Device(DeviceType::NEON) .TypeConstraint("T") diff --git a/mace/ops/batch_to_space.cc b/mace/ops/batch_to_space.cc index 574eca53e0a7573c4fd61a5966a402a28710ea90..7966657731af26002aee48daa1b0788cbeba82b5 100644 --- a/mace/ops/batch_to_space.cc +++ b/mace/ops/batch_to_space.cc @@ -18,6 +18,7 @@ namespace mace { namespace ops { void Register_BatchToSpaceND(OperatorRegistry *op_registry) { +#ifdef MACE_ENABLE_OPENCL REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchToSpaceND") .Device(DeviceType::OPENCL) .TypeConstraint("T") @@ -28,6 +29,7 @@ void Register_BatchToSpaceND(OperatorRegistry *op_registry) { .TypeConstraint("T") .Build(), BatchToSpaceNDOp); +#endif // MACE_ENABLE_OPENCL } } // namespace ops diff --git a/mace/ops/bias_add.cc b/mace/ops/bias_add.cc index dbbcc9b329fcb20c9d4bb7a4ff103cad9952ba00..b2d217dceba554056a69afb83774f4a1d3ec88e6 100644 --- a/mace/ops/bias_add.cc +++ b/mace/ops/bias_add.cc @@ -24,6 +24,7 @@ void Register_BiasAdd(OperatorRegistry *op_registry) { .Build(), BiasAddOp); +#ifdef MACE_ENABLE_OPENCL REGISTER_OPERATOR(op_registry, OpKeyBuilder("BiasAdd") .Device(DeviceType::OPENCL) .TypeConstraint("T") @@ -35,6 +36,7 @@ void Register_BiasAdd(OperatorRegistry *op_registry) { .TypeConstraint("T") .Build(), BiasAddOp); +#endif // MACE_ENABLE_OPENCL } } // namespace ops diff --git a/mace/ops/channel_shuffle.cc b/mace/ops/channel_shuffle.cc index 83ef3fe67298ae1573dc1b7f61a13af1cda994f6..980a81d5ee0ae79157b8b5dffd548f008acbaeba 100644 --- a/mace/ops/channel_shuffle.cc +++ b/mace/ops/channel_shuffle.cc @@ -23,16 +23,20 @@ void Register_ChannelShuffle(OperatorRegistry *op_registry) { .TypeConstraint("T") .Build(), ChannelShuffleOp); + +#ifdef MACE_ENABLE_OPENCL REGISTER_OPERATOR(op_registry, OpKeyBuilder("ChannelShuffle") .Device(DeviceType::OPENCL) .TypeConstraint("T") .Build(), ChannelShuffleOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("ChannelShuffle") .Device(DeviceType::OPENCL) .TypeConstraint("T") .Build(), ChannelShuffleOp); +#endif // MACE_ENABLE_OPENCL } } // namespace ops diff --git a/mace/ops/concat.cc b/mace/ops/concat.cc index 5e8a3bcffcee02688f57e1bcb90e5555a9b5a21b..3599c0f4bbba4a3c35aaa946915eb4a72cc7fe38 100644 --- a/mace/ops/concat.cc +++ b/mace/ops/concat.cc @@ -23,16 +23,21 @@ void Register_Concat(OperatorRegistry *op_registry) { .TypeConstraint("T") .Build(), ConcatOp); + +#ifdef MACE_ENABLE_OPENCL REGISTER_OPERATOR(op_registry, OpKeyBuilder("Concat") .Device(DeviceType::OPENCL) .TypeConstraint("T") .Build(), ConcatOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Concat") .Device(DeviceType::OPENCL) .TypeConstraint("T") .Build(), ConcatOp); +#endif // MACE_ENABLE_OPENCL + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Concat") .Device(DeviceType::NEON) .TypeConstraint("T") diff --git a/mace/ops/conv_2d.cc b/mace/ops/conv_2d.cc index 081019b52e8794328d33c7a6823c4fc5ffeb7429..a89efddd4e0b1d93ff6f178870ac5439c20b6f42 100644 --- a/mace/ops/conv_2d.cc +++ b/mace/ops/conv_2d.cc @@ -24,6 +24,7 @@ void Register_Conv2D(OperatorRegistry *op_registry) { .Build(), Conv2dOp); +#ifdef MACE_ENABLE_OPENCL REGISTER_OPERATOR(op_registry, OpKeyBuilder("Conv2D") .Device(DeviceType::OPENCL) .TypeConstraint("T") @@ -35,6 +36,7 @@ void Register_Conv2D(OperatorRegistry *op_registry) { .TypeConstraint("T") .Build(), Conv2dOp); +#endif // MACE_ENABLE_OPENCL REGISTER_OPERATOR(op_registry, OpKeyBuilder("Conv2D") .Device(DeviceType::NEON) diff --git a/mace/ops/cwise.cc b/mace/ops/cwise.cc index 4f98b63dc4c161f4f807c7d1149c3f7bd7410722..fa975875d7322489edb3524790b94a5867d740f0 100644 --- a/mace/ops/cwise.cc +++ b/mace/ops/cwise.cc @@ -24,6 +24,7 @@ void Register_CWise(OperatorRegistry *op_registry) { .Build(), CWiseOp); +#ifdef MACE_ENABLE_OPENCL REGISTER_OPERATOR(op_registry, OpKeyBuilder("CWise") .Device(DeviceType::OPENCL) .TypeConstraint("T") @@ -35,6 +36,7 @@ void Register_CWise(OperatorRegistry *op_registry) { .TypeConstraint("T") .Build(), CWiseOp); +#endif // MACE_ENABLE_OPENCL } } // namespace ops diff --git a/mace/ops/depth_to_space.cc b/mace/ops/depth_to_space.cc index e5299efb5765430cf6e15aeb9533926d2ed3c9c1..f98b06ce4c29dbc330b7214eddbc61841cd7472c 100644 --- a/mace/ops/depth_to_space.cc +++ b/mace/ops/depth_to_space.cc @@ -24,6 +24,7 @@ void Register_DepthToSpace(OperatorRegistry *op_registry) { .Build(), DepthToSpaceOp); +#ifdef MACE_ENABLE_OPENCL REGISTER_OPERATOR(op_registry, OpKeyBuilder("DepthToSpace") .Device(DeviceType::OPENCL) .TypeConstraint("T") @@ -35,6 +36,7 @@ void Register_DepthToSpace(OperatorRegistry *op_registry) { .TypeConstraint("T") .Build(), DepthToSpaceOp); +#endif // MACE_ENABLE_OPENCL } } // namespace ops diff --git a/mace/ops/depthwise_conv2d.cc b/mace/ops/depthwise_conv2d.cc index 71ee85b445d52cc93b69b487a496fd208e3f50f8..9e16c7fba9dec4f47021b5ff3e83ac2af35ae871 100644 --- a/mace/ops/depthwise_conv2d.cc +++ b/mace/ops/depthwise_conv2d.cc @@ -24,6 +24,7 @@ void Register_DepthwiseConv2d(OperatorRegistry *op_registry) { .Build(), DepthwiseConv2dOp); +#ifdef MACE_ENABLE_OPENCL REGISTER_OPERATOR(op_registry, OpKeyBuilder("DepthwiseConv2d") .Device(DeviceType::OPENCL) .TypeConstraint("T") @@ -35,6 +36,7 @@ void Register_DepthwiseConv2d(OperatorRegistry *op_registry) { .TypeConstraint("T") .Build(), DepthwiseConv2dOp); +#endif // MACE_ENABLE_OPENCL REGISTER_OPERATOR(op_registry, OpKeyBuilder("DepthwiseConv2d") .Device(DeviceType::NEON) diff --git a/mace/ops/eltwise.cc b/mace/ops/eltwise.cc index b3ad92feb494361be870dd0bbd435e047fa736bb..520e124816ebcc746f619edc531cf8042806775c 100644 --- a/mace/ops/eltwise.cc +++ b/mace/ops/eltwise.cc @@ -24,6 +24,7 @@ void Register_Eltwise(OperatorRegistry *op_registry) { .Build(), EltwiseOp); +#ifdef MACE_ENABLE_OPENCL REGISTER_OPERATOR(op_registry, OpKeyBuilder("Eltwise") .Device(DeviceType::OPENCL) .TypeConstraint("T") @@ -35,6 +36,8 @@ void Register_Eltwise(OperatorRegistry *op_registry) { .TypeConstraint("T") .Build(), EltwiseOp); +#endif // MACE_ENABLE_OPENCL + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Eltwise") .Device(DeviceType::NEON) .TypeConstraint("T") diff --git a/mace/ops/folded_batch_norm.cc b/mace/ops/folded_batch_norm.cc index f75554489aa194629b7c6a6cedf52961641408f9..afb63c4ee66345c31d44b95610bef0b91964c604 100644 --- a/mace/ops/folded_batch_norm.cc +++ b/mace/ops/folded_batch_norm.cc @@ -24,6 +24,7 @@ void Register_FoldedBatchNorm(OperatorRegistry *op_registry) { .Build(), FoldedBatchNormOp); +#ifdef MACE_ENABLE_OPENCL REGISTER_OPERATOR(op_registry, OpKeyBuilder("FoldedBatchNorm") .Device(DeviceType::OPENCL) .TypeConstraint("T") @@ -35,6 +36,8 @@ void Register_FoldedBatchNorm(OperatorRegistry *op_registry) { .TypeConstraint("T") .Build(), FoldedBatchNormOp); +#endif // MACE_ENABLE_OPENCL + REGISTER_OPERATOR(op_registry, OpKeyBuilder("FoldedBatchNorm") .Device(DeviceType::NEON) .TypeConstraint("T") diff --git a/mace/ops/fully_connected.cc b/mace/ops/fully_connected.cc index 8f8df09b22725847bd72b6bf9df64b67f7336451..b88df0d745d21476b93c3a032d4c0dc4d7274b1b 100644 --- a/mace/ops/fully_connected.cc +++ b/mace/ops/fully_connected.cc @@ -24,6 +24,7 @@ void Register_FullyConnected(OperatorRegistry *op_registry) { .Build(), FullyConnectedOp); +#ifdef MACE_ENABLE_OPENCL REGISTER_OPERATOR(op_registry, OpKeyBuilder("FC") .Device(DeviceType::OPENCL) .TypeConstraint("T") @@ -35,6 +36,7 @@ void Register_FullyConnected(OperatorRegistry *op_registry) { .TypeConstraint("T") .Build(), FullyConnectedOp); +#endif // MACE_ENABLE_OPENCL REGISTER_OPERATOR(op_registry, OpKeyBuilder("FC") .Device(DeviceType::NEON) diff --git a/mace/ops/fully_connected.h b/mace/ops/fully_connected.h index a14a5ecc54a36c52377d8c46151f441a06f2535f..e7882d04cda08259c40cee984fe73d60e94aa5ce 100644 --- a/mace/ops/fully_connected.h +++ b/mace/ops/fully_connected.h @@ -28,10 +28,9 @@ class FullyConnectedOp : public Operator { public: FullyConnectedOp(const OperatorDef &operator_def, Workspace *ws) : Operator(operator_def, ws), - functor_(static_cast( - OperatorBase::GetSingleArgument( - "weight_type", static_cast( - kernels::WEIGHT_WIDTH))), + functor_(OperatorBase::GetSingleArgument( + "weight_type", + 7 /*static_cast(kernels::WEIGHT_WIDTH)*/), kernels::StringToActivationType( OperatorBase::GetSingleArgument("activation", "NOOP")), diff --git a/mace/ops/fused_conv_2d.cc b/mace/ops/fused_conv_2d.cc index a1d4f32cebbae18f48be077c2973231a7e55a440..64972e9c361db3978ec849de5abe2e90dd8c5e27 100644 --- a/mace/ops/fused_conv_2d.cc +++ b/mace/ops/fused_conv_2d.cc @@ -24,6 +24,7 @@ void Register_FusedConv2D(OperatorRegistry *op_registry) { .Build(), FusedConv2dOp); +#ifdef MACE_ENABLE_OPENCL REGISTER_OPERATOR(op_registry, OpKeyBuilder("FusedConv2D") .Device(DeviceType::OPENCL) .TypeConstraint("T") @@ -35,6 +36,8 @@ void Register_FusedConv2D(OperatorRegistry *op_registry) { .TypeConstraint("T") .Build(), FusedConv2dOp); +#endif // MACE_ENABLE_OPENCL + REGISTER_OPERATOR(op_registry, OpKeyBuilder("FusedConv2D") .Device(DeviceType::NEON) .TypeConstraint("T") diff --git a/mace/ops/matmul.cc b/mace/ops/matmul.cc index 5c22263e30e6e4433d9ee37a1f6ef20367108990..2b8169499ae5bf30871f3265ccaee8e2d5b06d0d 100644 --- a/mace/ops/matmul.cc +++ b/mace/ops/matmul.cc @@ -24,6 +24,7 @@ void Register_MatMul(OperatorRegistry *op_registry) { .Build(), MatMulOp); +#ifdef MACE_ENABLE_OPENCL REGISTER_OPERATOR(op_registry, OpKeyBuilder("MatMul") .Device(DeviceType::OPENCL) .TypeConstraint("T") @@ -35,6 +36,7 @@ void Register_MatMul(OperatorRegistry *op_registry) { .TypeConstraint("T") .Build(), MatMulOp); +#endif // MACE_ENABLE_OPENCL } } // namespace ops diff --git a/mace/ops/pad.cc b/mace/ops/pad.cc index 0d61b6c2009303a045ab26b1c23a2fecdc122580..5e0cd9f76af025794155e3abee4428202a050296 100644 --- a/mace/ops/pad.cc +++ b/mace/ops/pad.cc @@ -24,6 +24,7 @@ void Register_Pad(OperatorRegistry *op_registry) { .Build(), PadOp); +#ifdef MACE_ENABLE_OPENCL REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pad") .Device(DeviceType::OPENCL) .TypeConstraint("T") @@ -34,6 +35,7 @@ void Register_Pad(OperatorRegistry *op_registry) { .TypeConstraint("T") .Build(), PadOp); +#endif // MACE_ENABLE_OPENCL } } // namespace ops diff --git a/mace/ops/pooling.cc b/mace/ops/pooling.cc index 086baba6df368258616e287adacbdbc07a58d368..4755841f673fdf98579f4c8ce2117443b0b71e34 100644 --- a/mace/ops/pooling.cc +++ b/mace/ops/pooling.cc @@ -23,22 +23,21 @@ void Register_Pooling(OperatorRegistry *op_registry) { .TypeConstraint("T") .Build(), PoolingOp); - REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling") - .Device(DeviceType::CPU) - .TypeConstraint("T") - .Build(), - PoolingOp); +#ifdef MACE_ENABLE_OPENCL REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling") .Device(DeviceType::OPENCL) .TypeConstraint("T") .Build(), PoolingOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling") .Device(DeviceType::OPENCL) .TypeConstraint("T") .Build(), PoolingOp); +#endif // MACE_ENABLE_OPENCL + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling") .Device(DeviceType::NEON) .TypeConstraint("T") diff --git a/mace/ops/resize_bilinear.cc b/mace/ops/resize_bilinear.cc index 4d6296087b044443235c5e9fee358ec624dfe602..9368bfecb420e5676f58291494ce4da72c4cbc74 100644 --- a/mace/ops/resize_bilinear.cc +++ b/mace/ops/resize_bilinear.cc @@ -24,6 +24,7 @@ void Register_ResizeBilinear(OperatorRegistry *op_registry) { .Build(), ResizeBilinearOp); +#ifdef MACE_ENABLE_OPENCL REGISTER_OPERATOR(op_registry, OpKeyBuilder("ResizeBilinear") .Device(DeviceType::OPENCL) .TypeConstraint("T") @@ -35,6 +36,7 @@ void Register_ResizeBilinear(OperatorRegistry *op_registry) { .TypeConstraint("T") .Build(), ResizeBilinearOp); +#endif // MACE_ENABLE_OPENCL } } // namespace ops diff --git a/mace/ops/slice.cc b/mace/ops/slice.cc index 15b703826e7251907fb4af8d541034894059897b..a14a02de5dc5e399dd000ed3b8acc390ae5894f9 100644 --- a/mace/ops/slice.cc +++ b/mace/ops/slice.cc @@ -24,16 +24,20 @@ void Register_Slice(OperatorRegistry *op_registry) { .Build(), SliceOp); +#ifdef MACE_ENABLE_OPENCL REGISTER_OPERATOR(op_registry, OpKeyBuilder("Slice") .Device(DeviceType::OPENCL) .TypeConstraint("T") .Build(), SliceOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Slice") .Device(DeviceType::OPENCL) .TypeConstraint("T") .Build(), SliceOp); +#endif // MACE_ENABLE_OPENCL + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Slice") .Device(DeviceType::NEON) .TypeConstraint("T") diff --git a/mace/ops/softmax.cc b/mace/ops/softmax.cc index aab15bb01dbf2ebf70ae39d017029569671687c7..6ffe85804773522826f862e51c6834439e0a5f32 100644 --- a/mace/ops/softmax.cc +++ b/mace/ops/softmax.cc @@ -24,6 +24,7 @@ void Register_Softmax(OperatorRegistry *op_registry) { .Build(), SoftmaxOp); +#ifdef MACE_ENABLE_OPENCL REGISTER_OPERATOR(op_registry, OpKeyBuilder("Softmax") .Device(DeviceType::OPENCL) .TypeConstraint("T") @@ -35,6 +36,8 @@ void Register_Softmax(OperatorRegistry *op_registry) { .TypeConstraint("T") .Build(), SoftmaxOp); +#endif // MACE_ENABLE_OPENCL + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Softmax") .Device(DeviceType::NEON) .TypeConstraint("T") diff --git a/mace/ops/space_to_batch.cc b/mace/ops/space_to_batch.cc index 44d747ec73538fcf6c0c2559f5927ec6c59a9a30..704203b2341b8c18eaa61420185105489773a0de 100644 --- a/mace/ops/space_to_batch.cc +++ b/mace/ops/space_to_batch.cc @@ -18,16 +18,19 @@ namespace mace { namespace ops { void Register_SpaceToBatchND(OperatorRegistry *op_registry) { +#ifdef MACE_ENABLE_OPENCL REGISTER_OPERATOR(op_registry, OpKeyBuilder("SpaceToBatchND") .Device(DeviceType::OPENCL) .TypeConstraint("T") .Build(), SpaceToBatchNDOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("SpaceToBatchND") .Device(DeviceType::OPENCL) .TypeConstraint("T") .Build(), SpaceToBatchNDOp); +#endif // MACE_ENABLE_OPENCL } } // namespace ops diff --git a/mace/ops/space_to_depth.cc b/mace/ops/space_to_depth.cc index 17fbc29d5f4ec021b07b8e8882f68a0208b9f02f..8ffd17b86e184e4b73157474e44ffd8d4dc129c4 100644 --- a/mace/ops/space_to_depth.cc +++ b/mace/ops/space_to_depth.cc @@ -24,6 +24,7 @@ void Register_SpaceToDepth(OperatorRegistry *op_registry) { .Build(), SpaceToDepthOp); +#ifdef MACE_ENABLE_OPENCL REGISTER_OPERATOR(op_registry, OpKeyBuilder("SpaceToDepth") .Device(DeviceType::OPENCL) .TypeConstraint("T") @@ -35,6 +36,7 @@ void Register_SpaceToDepth(OperatorRegistry *op_registry) { .TypeConstraint("T") .Build(), SpaceToDepthOp); +#endif // MACE_ENABLE_OPENCL } } // namespace ops diff --git a/mace/ops/winograd_inverse_transform.cc b/mace/ops/winograd_inverse_transform.cc index 946896b0b4ee091490fdef16fa1194d3bac493d7..3f33c6c6eb1dd7bbd2c1a24224ca11fd22b5aec7 100644 --- a/mace/ops/winograd_inverse_transform.cc +++ b/mace/ops/winograd_inverse_transform.cc @@ -18,16 +18,19 @@ namespace mace { namespace ops { void Register_WinogradInverseTransform(OperatorRegistry *op_registry) { +#ifdef MACE_ENABLE_OPENCL REGISTER_OPERATOR(op_registry, OpKeyBuilder("WinogradInverseTransform") .Device(DeviceType::OPENCL) .TypeConstraint("T") .Build(), WinogradInverseTransformOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("WinogradInverseTransform") .Device(DeviceType::OPENCL) .TypeConstraint("T") .Build(), WinogradInverseTransformOp); +#endif // MACE_ENABLE_OPENCL } } // namespace ops diff --git a/mace/ops/winograd_transform.cc b/mace/ops/winograd_transform.cc index 2dbd734e372213b34b67a47fa13a24a17f9c403b..0be6fee6d1408d897094b3ecdef049f7d4f3862c 100644 --- a/mace/ops/winograd_transform.cc +++ b/mace/ops/winograd_transform.cc @@ -18,16 +18,19 @@ namespace mace { namespace ops { void Register_WinogradTransform(OperatorRegistry *op_registry) { +#ifdef MACE_ENABLE_OPENCL REGISTER_OPERATOR(op_registry, OpKeyBuilder("WinogradTransform") .Device(DeviceType::OPENCL) .TypeConstraint("T") .Build(), WinogradTransformOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("WinogradTransform") .Device(DeviceType::OPENCL) .TypeConstraint("T") .Build(), WinogradTransformOp); +#endif // MACE_ENABLE_OPENCL } } // namespace ops diff --git a/mace/tools/validation/BUILD b/mace/tools/validation/BUILD index a4e1cb876dd508b9a1a365c2350345301b9a3af4..636937d569c3ed2c8e213ccc5610ec85850618df 100644 --- a/mace/tools/validation/BUILD +++ b/mace/tools/validation/BUILD @@ -1,11 +1,12 @@ # Examples -load("//mace:mace.bzl", "if_openmp_enabled") +load("//mace:mace.bzl", "if_openmp_enabled", "if_android") cc_binary( name = "mace_run", srcs = ["mace_run.cc"], linkopts = if_openmp_enabled(["-fopenmp"]), linkstatic = 1, + copts = if_android(["-DMACE_ENABLE_OPENCL"]), deps = [ "//external:gflags_nothreads", "//mace/codegen:generated_models", diff --git a/mace/tools/validation/mace_run.cc b/mace/tools/validation/mace_run.cc index 838320f331663bd65618f43beae4a18ab0ae418a..c0d984897938225eb307044f6ea729a58d761861 100644 --- a/mace/tools/validation/mace_run.cc +++ b/mace/tools/validation/mace_run.cc @@ -33,12 +33,15 @@ #include #include "gflags/gflags.h" -#include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/public/mace.h" #include "mace/public/mace_runtime.h" #include "mace/utils/env_time.h" #include "mace/utils/logging.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/opencl_runtime.h" +#endif // MACE_ENABLE_OPENCL + // #include "mace/codegen/models/${MACE_MODEL_TAG}/${MACE_MODEL_TAG}.h" instead namespace mace { namespace MACE_MODEL_TAG { @@ -113,6 +116,7 @@ DeviceType ParseDeviceType(const std::string &device_str) { } } +#ifdef MACE_ENABLE_OPENCL void WriteOpenCLPlatformInfo(const std::string &output_dir) { std::string platform_info = OpenCLRuntime::Global()->platform_info(); const std::string cl_platform_info_file_name = output_dir @@ -126,6 +130,7 @@ void WriteOpenCLPlatformInfo(const std::string &output_dir) { LOG(WARNING) << "Write opencl platform info failed."; } } +#endif // MACE_ENABLE_OPENCL struct mallinfo LogMallinfoChange(struct mallinfo prev) { struct mallinfo curr = mallinfo(); @@ -225,11 +230,13 @@ bool RunModel(const std::vector &input_names, mace::SetOpenMPThreadPolicy( FLAGS_omp_num_threads, static_cast(FLAGS_cpu_affinity_policy)); +#ifdef MACE_ENABLE_OPENCL if (device_type == DeviceType::OPENCL) { mace::SetGPUHints( static_cast(FLAGS_gpu_perf_hint), static_cast(FLAGS_gpu_priority_hint)); } +#endif // MACE_ENABLE_OPENCL const char *kernel_path = getenv("MACE_CL_PROGRAM_PATH"); const std::string kernel_file_path = @@ -318,9 +325,11 @@ bool RunModel(const std::vector &input_names, printf("time %11.3f %11.3f %11.3f %11.3f %11.3f\n", create_net_millis, mace_engine_ctor_millis, init_millis, warmup_millis, model_run_millis); +#ifdef MACE_ENABLE_OPENCL if (device_type == DeviceType::OPENCL) { WriteOpenCLPlatformInfo(kernel_file_path); } +#endif // MACE_ENABLE_OPENCL for (size_t i = 0; i < output_count; ++i) { std::string output_name = diff --git a/tools/mace_tools.py b/tools/mace_tools.py index f475374e35103fa0c37d4dd22a0bfb5df66bd5b2..f25236f9881f7949211adbb8a55cbba67dba5f2f 100644 --- a/tools/mace_tools.py +++ b/tools/mace_tools.py @@ -34,26 +34,30 @@ from ConfigParser import ConfigParser def get_target_socs(configs): - available_socs = sh_commands.adb_get_all_socs() - target_socs = available_socs - if hasattr(configs, "target_socs"): - target_socs = set(configs["target_socs"]) - target_socs = target_socs & available_socs - - if FLAGS.target_socs != "all": - socs = set(FLAGS.target_socs.split(',')) - target_socs = target_socs & socs - missing_socs = socs.difference(target_socs) - if len(missing_socs) > 0: - print( - "Error: devices with SoCs are not connected %s" % missing_socs) + if "host" in configs["target_abis"]: + return [""] + else: + available_socs = sh_commands.adb_get_all_socs() + target_socs = available_socs + if hasattr(configs, "target_socs"): + target_socs = set(configs["target_socs"]) + target_socs = target_socs & available_socs + + if FLAGS.target_socs != "all": + socs = set(FLAGS.target_socs.split(',')) + target_socs = target_socs & socs + missing_socs = socs.difference(target_socs) + if len(missing_socs) > 0: + print( + "Error: devices with SoCs are not connected %s" % + missing_socs) + exit(1) + + if not target_socs: + print("Error: no device to run") exit(1) - if not target_socs: - print("Error: no device to run") - exit(1) - - return target_socs + return target_socs def get_data_and_device_type(runtime): @@ -339,8 +343,8 @@ def parse_args(): def process_models(project_name, configs, embed_model_data, vlog_level, - target_soc, target_abi, serialno, phone_data_dir, - option_args): + target_abi, phone_data_dir, option_args, + target_soc="", serialno="", device_name=""): hexagon_mode = get_hexagon_mode(configs) model_output_dirs = [] for model_name in configs["models"]: @@ -358,11 +362,16 @@ def process_models(project_name, configs, embed_model_data, vlog_level, # Create model build directory model_path_digest = md5sum(model_config["model_file_path"]) - device_name = sh_commands.adb_get_device_name_by_serialno(serialno) - model_output_dir = "%s/%s/%s/%s/%s/%s_%s/%s" % ( - FLAGS.output_dir, project_name, "build", - model_name, model_path_digest, device_name.replace(' ', ''), - target_soc, target_abi) + + if target_abi == "host": + model_output_dir = "%s/%s/%s/%s/%s/%s" % ( + FLAGS.output_dir, project_name, "build", + model_name, model_path_digest, target_abi) + else: + model_output_dir = "%s/%s/%s/%s/%s/%s_%s/%s" % ( + FLAGS.output_dir, project_name, "build", + model_name, model_path_digest, device_name.replace(' ', ''), + target_soc, target_abi) model_output_dirs.append(model_output_dir) if FLAGS.mode == "build" or FLAGS.mode == "all": @@ -563,15 +572,19 @@ def main(unused_args): print("Run on device: %s, %s, %s" % ( serialno, props["ro.board.platform"], props["ro.product.model"])) + device_name = \ + sh_commands.adb_get_device_name_by_serialno( + serialno) process_models(project_name, configs, embed_model_data, - vlog_level, target_soc, target_abi, - serialno, phone_data_dir, option_args) + vlog_level, target_abi, phone_data_dir, + option_args, target_soc, serialno, + device_name) else: print("====================================================") print("Run on host") process_models(project_name, configs, embed_model_data, - vlog_level, target_soc, target_abi, '', - phone_data_dir, option_args) + vlog_level, target_abi, phone_data_dir, + option_args) if FLAGS.mode == "build" or FLAGS.mode == "all": sh_commands.packaging_lib(FLAGS.output_dir, project_name)