提交 3f418e2d 编写于 作者: Y yejianwu

fix host run broken

上级 7c1711d8
...@@ -52,10 +52,10 @@ cc_library( ...@@ -52,10 +52,10 @@ cc_library(
]), ]),
deps = [ deps = [
"//mace/utils", "//mace/utils",
"//mace/codegen:generated_version",
] + if_android([ ] + if_android([
":opencl_headers", ":opencl_headers",
"//mace/codegen:generated_opencl", "//mace/codegen:generated_opencl",
"//mace/codegen:generated_version",
"@half//:half", "@half//:half",
]) + if_production_mode([ ]) + if_production_mode([
"//mace/codegen:generated_tuning_params", "//mace/codegen:generated_tuning_params",
......
...@@ -79,7 +79,6 @@ extern void Register_AddN(OperatorRegistry *op_registry); ...@@ -79,7 +79,6 @@ extern void Register_AddN(OperatorRegistry *op_registry);
extern void Register_BatchNorm(OperatorRegistry *op_registry); extern void Register_BatchNorm(OperatorRegistry *op_registry);
extern void Register_BatchToSpaceND(OperatorRegistry *op_registry); extern void Register_BatchToSpaceND(OperatorRegistry *op_registry);
extern void Register_BiasAdd(OperatorRegistry *op_registry); extern void Register_BiasAdd(OperatorRegistry *op_registry);
extern void Register_BufferToImage(OperatorRegistry *op_registry);
extern void Register_ChannelShuffle(OperatorRegistry *op_registry); extern void Register_ChannelShuffle(OperatorRegistry *op_registry);
extern void Register_Concat(OperatorRegistry *op_registry); extern void Register_Concat(OperatorRegistry *op_registry);
extern void Register_Conv2D(OperatorRegistry *op_registry); extern void Register_Conv2D(OperatorRegistry *op_registry);
...@@ -92,7 +91,6 @@ extern void Register_FoldedBatchNorm(OperatorRegistry *op_registry); ...@@ -92,7 +91,6 @@ extern void Register_FoldedBatchNorm(OperatorRegistry *op_registry);
extern void Register_FullyConnected(OperatorRegistry *op_registry); extern void Register_FullyConnected(OperatorRegistry *op_registry);
extern void Register_FusedConv2D(OperatorRegistry *op_registry); extern void Register_FusedConv2D(OperatorRegistry *op_registry);
extern void Register_GlobalAvgPooling(OperatorRegistry *op_registry); extern void Register_GlobalAvgPooling(OperatorRegistry *op_registry);
extern void Register_ImageToBuffer(OperatorRegistry *op_registry);
extern void Register_LocalResponseNorm(OperatorRegistry *op_registry); extern void Register_LocalResponseNorm(OperatorRegistry *op_registry);
extern void Register_MatMul(OperatorRegistry *op_registry); extern void Register_MatMul(OperatorRegistry *op_registry);
extern void Register_Pad(OperatorRegistry *op_registry); extern void Register_Pad(OperatorRegistry *op_registry);
...@@ -111,6 +109,11 @@ extern void Register_SpaceToDepth(OperatorRegistry *op_registry); ...@@ -111,6 +109,11 @@ extern void Register_SpaceToDepth(OperatorRegistry *op_registry);
extern void Register_Transpose(OperatorRegistry *op_registry); extern void Register_Transpose(OperatorRegistry *op_registry);
extern void Register_WinogradInverseTransform(OperatorRegistry *op_registry); extern void Register_WinogradInverseTransform(OperatorRegistry *op_registry);
extern void Register_WinogradTransform(OperatorRegistry *op_registry); extern void Register_WinogradTransform(OperatorRegistry *op_registry);
#ifdef MACE_ENABLE_OPENCL
extern void Register_BufferToImage(OperatorRegistry *op_registry);
extern void Register_ImageToBuffer(OperatorRegistry *op_registry);
#endif // MACE_ENABLE_OPENCL
} // namespace ops } // namespace ops
OperatorRegistry::OperatorRegistry() { OperatorRegistry::OperatorRegistry() {
...@@ -120,7 +123,6 @@ OperatorRegistry::OperatorRegistry() { ...@@ -120,7 +123,6 @@ OperatorRegistry::OperatorRegistry() {
ops::Register_BatchNorm(this); ops::Register_BatchNorm(this);
ops::Register_BatchToSpaceND(this); ops::Register_BatchToSpaceND(this);
ops::Register_BiasAdd(this); ops::Register_BiasAdd(this);
ops::Register_BufferToImage(this);
ops::Register_ChannelShuffle(this); ops::Register_ChannelShuffle(this);
ops::Register_Concat(this); ops::Register_Concat(this);
ops::Register_Conv2D(this); ops::Register_Conv2D(this);
...@@ -133,7 +135,6 @@ OperatorRegistry::OperatorRegistry() { ...@@ -133,7 +135,6 @@ OperatorRegistry::OperatorRegistry() {
ops::Register_FullyConnected(this); ops::Register_FullyConnected(this);
ops::Register_FusedConv2D(this); ops::Register_FusedConv2D(this);
ops::Register_GlobalAvgPooling(this); ops::Register_GlobalAvgPooling(this);
ops::Register_ImageToBuffer(this);
ops::Register_LocalResponseNorm(this); ops::Register_LocalResponseNorm(this);
ops::Register_MatMul(this); ops::Register_MatMul(this);
ops::Register_Pad(this); ops::Register_Pad(this);
...@@ -152,6 +153,11 @@ OperatorRegistry::OperatorRegistry() { ...@@ -152,6 +153,11 @@ OperatorRegistry::OperatorRegistry() {
ops::Register_Transpose(this); ops::Register_Transpose(this);
ops::Register_WinogradInverseTransform(this); ops::Register_WinogradInverseTransform(this);
ops::Register_WinogradTransform(this); ops::Register_WinogradTransform(this);
#ifdef MACE_ENABLE_OPENCL
ops::Register_BufferToImage(this);
ops::Register_ImageToBuffer(this);
#endif // MACE_ENABLE_OPENCL
} }
} // namespace mace } // namespace mace
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <omp.h> #include <omp.h>
#include <unistd.h> #include <unistd.h>
#include <sys/syscall.h>
#include <sys/types.h> #include <sys/types.h>
#include <algorithm> #include <algorithm>
#include <utility> #include <utility>
...@@ -86,7 +87,7 @@ void SetThreadAffinity(cpu_set_t mask) { ...@@ -86,7 +87,7 @@ void SetThreadAffinity(cpu_set_t mask) {
#if defined(__ANDROID__) #if defined(__ANDROID__)
pid_t pid = gettid(); pid_t pid = gettid();
#else #else
pid_t pid = pthread_self(); pid_t pid = syscall(SYS_gettid);
#endif #endif
int err = sched_setaffinity(pid, sizeof(mask), &mask); int err = sched_setaffinity(pid, sizeof(mask), &mask);
MACE_CHECK(err == 0, "set affinity error: ", errno); MACE_CHECK(err == 0, "set affinity error: ", errno);
......
...@@ -14,20 +14,30 @@ cc_library( ...@@ -14,20 +14,30 @@ cc_library(
srcs = glob( srcs = glob(
[ [
"*.cc", "*.cc",
"opencl/*.cc",
"arm/*.cc", "arm/*.cc",
], ],
exclude = [ exclude = [
"*_test.cc", "*_test.cc",
"arm/*_test.cc", "arm/*_test.cc",
],
) + if_android(glob([
"opencl/*.cc",
],
exclude = [
"opencl/*_test.cc", "opencl/*_test.cc",
])),
hdrs = glob(
[
"*.h",
"arm/*.h",
], ],
), exclude = [
hdrs = glob([ "buffer_to_image.h",
"*.h", ],
"opencl/*.h", ) + if_android(glob([
"arm/*.h", "opencl/*.h",
]), "buffer_to_image.h",
])),
copts = if_openmp_enabled(["-fopenmp"]) + copts = if_openmp_enabled(["-fopenmp"]) +
if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) +
if_android_armv7(["-mfpu=neon"]) + if_android_armv7(["-mfpu=neon"]) +
......
...@@ -21,10 +21,13 @@ ...@@ -21,10 +21,13 @@
#include <vector> #include <vector>
#include "mace/core/future.h" #include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/core/types.h" #include "mace/core/types.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace kernels {
...@@ -166,6 +169,7 @@ class ActivationFunctor<DeviceType::NEON, float> { ...@@ -166,6 +169,7 @@ class ActivationFunctor<DeviceType::NEON, float> {
float relux_max_limit_; float relux_max_limit_;
}; };
#ifdef MACE_ENABLE_OPENCL
template <typename T> template <typename T>
class ActivationFunctor<DeviceType::OPENCL, T> { class ActivationFunctor<DeviceType::OPENCL, T> {
public: public:
...@@ -186,6 +190,7 @@ class ActivationFunctor<DeviceType::OPENCL, T> { ...@@ -186,6 +190,7 @@ class ActivationFunctor<DeviceType::OPENCL, T> {
std::string tuning_key_prefix_; std::string tuning_key_prefix_;
std::vector<index_t> input_shape_; std::vector<index_t> input_shape_;
}; };
#endif // MACE_ENABLE_OPENCL
} // namespace kernels } // namespace kernels
} // namespace mace } // namespace mace
......
...@@ -23,9 +23,12 @@ ...@@ -23,9 +23,12 @@
#include <vector> #include <vector>
#include "mace/core/future.h" #include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace kernels {
...@@ -88,6 +91,7 @@ struct AddNFunctor { ...@@ -88,6 +91,7 @@ struct AddNFunctor {
} }
}; };
#ifdef MACE_ENABLE_OPENCL
template <typename T> template <typename T>
struct AddNFunctor<DeviceType::OPENCL, T> { struct AddNFunctor<DeviceType::OPENCL, T> {
void operator()(const std::vector<const Tensor *> &input_tensors, void operator()(const std::vector<const Tensor *> &input_tensors,
...@@ -99,6 +103,7 @@ struct AddNFunctor<DeviceType::OPENCL, T> { ...@@ -99,6 +103,7 @@ struct AddNFunctor<DeviceType::OPENCL, T> {
std::unique_ptr<BufferBase> kernel_error_; std::unique_ptr<BufferBase> kernel_error_;
std::vector<index_t> input_shape_; std::vector<index_t> input_shape_;
}; };
#endif // MACE_ENABLE_OPENCL
} // namespace kernels } // namespace kernels
} // namespace mace } // namespace mace
......
...@@ -22,11 +22,14 @@ ...@@ -22,11 +22,14 @@
#include <vector> #include <vector>
#include "mace/core/future.h" #include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/kernels/activation.h" #include "mace/kernels/activation.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace kernels {
...@@ -159,7 +162,7 @@ struct BatchNormFunctor<DeviceType::NEON, float> : BatchNormFunctorBase { ...@@ -159,7 +162,7 @@ struct BatchNormFunctor<DeviceType::NEON, float> : BatchNormFunctorBase {
StatsFuture *future); StatsFuture *future);
}; };
#ifdef MACE_ENABLE_OPENCL
template <typename T> template <typename T>
struct BatchNormFunctor<DeviceType::OPENCL, T> : BatchNormFunctorBase { struct BatchNormFunctor<DeviceType::OPENCL, T> : BatchNormFunctorBase {
BatchNormFunctor(const bool folded_constant, BatchNormFunctor(const bool folded_constant,
...@@ -179,6 +182,7 @@ struct BatchNormFunctor<DeviceType::OPENCL, T> : BatchNormFunctorBase { ...@@ -179,6 +182,7 @@ struct BatchNormFunctor<DeviceType::OPENCL, T> : BatchNormFunctorBase {
std::unique_ptr<BufferBase> kernel_error_; std::unique_ptr<BufferBase> kernel_error_;
std::vector<index_t> input_shape_; std::vector<index_t> input_shape_;
}; };
#endif // MACE_ENABLE_OPENCL
} // namespace kernels } // namespace kernels
} // namespace mace } // namespace mace
......
...@@ -19,10 +19,13 @@ ...@@ -19,10 +19,13 @@
#include <vector> #include <vector>
#include "mace/core/future.h" #include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace kernels {
...@@ -68,6 +71,7 @@ void BiasAddFunctor<DeviceType::NEON, float>::operator()( ...@@ -68,6 +71,7 @@ void BiasAddFunctor<DeviceType::NEON, float>::operator()(
StatsFuture *future); StatsFuture *future);
*/ */
#ifdef MACE_ENABLE_OPENCL
template <typename T> template <typename T>
struct BiasAddFunctor<DeviceType::OPENCL, T> { struct BiasAddFunctor<DeviceType::OPENCL, T> {
void operator()(const Tensor *input, void operator()(const Tensor *input,
...@@ -79,6 +83,7 @@ struct BiasAddFunctor<DeviceType::OPENCL, T> { ...@@ -79,6 +83,7 @@ struct BiasAddFunctor<DeviceType::OPENCL, T> {
std::unique_ptr<BufferBase> kernel_error_; std::unique_ptr<BufferBase> kernel_error_;
std::vector<index_t> input_shape_; std::vector<index_t> input_shape_;
}; };
#endif // MACE_ENABLE_OPENCL
} // namespace kernels } // namespace kernels
} // namespace mace } // namespace mace
......
...@@ -60,6 +60,7 @@ struct ChannelShuffleFunctor { ...@@ -60,6 +60,7 @@ struct ChannelShuffleFunctor {
const int groups_; const int groups_;
}; };
#ifdef MACE_ENABLE_OPENCL
template <typename T> template <typename T>
struct ChannelShuffleFunctor<DeviceType::OPENCL, T> { struct ChannelShuffleFunctor<DeviceType::OPENCL, T> {
explicit ChannelShuffleFunctor(const int groups) : groups_(groups) {} explicit ChannelShuffleFunctor(const int groups) : groups_(groups) {}
...@@ -72,6 +73,7 @@ struct ChannelShuffleFunctor<DeviceType::OPENCL, T> { ...@@ -72,6 +73,7 @@ struct ChannelShuffleFunctor<DeviceType::OPENCL, T> {
const int groups_; const int groups_;
std::vector<index_t> input_shape_; std::vector<index_t> input_shape_;
}; };
#endif // MACE_ENABLE_OPENCL
} // namespace kernels } // namespace kernels
} // namespace mace } // namespace mace
......
...@@ -19,11 +19,14 @@ ...@@ -19,11 +19,14 @@
#include <vector> #include <vector>
#include "mace/core/future.h" #include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/core/types.h" #include "mace/core/types.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace kernels {
...@@ -88,6 +91,7 @@ struct ConcatFunctor : ConcatFunctorBase { ...@@ -88,6 +91,7 @@ struct ConcatFunctor : ConcatFunctorBase {
} }
}; };
#ifdef MACE_ENABLE_OPENCL
template <typename T> template <typename T>
struct ConcatFunctor<DeviceType::OPENCL, T> : ConcatFunctorBase { struct ConcatFunctor<DeviceType::OPENCL, T> : ConcatFunctorBase {
explicit ConcatFunctor(const int32_t axis) : ConcatFunctorBase(axis) {} explicit ConcatFunctor(const int32_t axis) : ConcatFunctorBase(axis) {}
...@@ -100,6 +104,7 @@ struct ConcatFunctor<DeviceType::OPENCL, T> : ConcatFunctorBase { ...@@ -100,6 +104,7 @@ struct ConcatFunctor<DeviceType::OPENCL, T> : ConcatFunctorBase {
std::unique_ptr<BufferBase> kernel_error_; std::unique_ptr<BufferBase> kernel_error_;
std::vector<index_t> input_shape_; std::vector<index_t> input_shape_;
}; };
#endif // MACE_ENABLE_OPENCL
} // namespace kernels } // namespace kernels
} // namespace mace } // namespace mace
......
...@@ -23,12 +23,15 @@ ...@@ -23,12 +23,15 @@
#include <vector> #include <vector>
#include "mace/core/future.h" #include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/kernels/activation.h" #include "mace/kernels/activation.h"
#include "mace/kernels/conv_pool_2d_util.h" #include "mace/kernels/conv_pool_2d_util.h"
#include "mace/utils/utils.h" #include "mace/utils/utils.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace kernels {
...@@ -457,6 +460,7 @@ struct Conv2dFunctor<DeviceType::NEON, float> : Conv2dFunctorBase { ...@@ -457,6 +460,7 @@ struct Conv2dFunctor<DeviceType::NEON, float> : Conv2dFunctorBase {
ScratchBuffer *scratch_; ScratchBuffer *scratch_;
}; };
#ifdef MACE_ENABLE_OPENCL
template <typename T> template <typename T>
struct Conv2dFunctor<DeviceType::OPENCL, T> : Conv2dFunctorBase { struct Conv2dFunctor<DeviceType::OPENCL, T> : Conv2dFunctorBase {
Conv2dFunctor(const int *strides, Conv2dFunctor(const int *strides,
...@@ -485,6 +489,7 @@ struct Conv2dFunctor<DeviceType::OPENCL, T> : Conv2dFunctorBase { ...@@ -485,6 +489,7 @@ struct Conv2dFunctor<DeviceType::OPENCL, T> : Conv2dFunctorBase {
std::unique_ptr<BufferBase> kernel_error_; std::unique_ptr<BufferBase> kernel_error_;
std::vector<index_t> input_shape_; std::vector<index_t> input_shape_;
}; };
#endif // MACE_ENABLE_OPENCL
} // namespace kernels } // namespace kernels
} // namespace mace } // namespace mace
......
...@@ -20,9 +20,12 @@ ...@@ -20,9 +20,12 @@
#include <vector> #include <vector>
#include "mace/core/future.h" #include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace kernels {
...@@ -116,6 +119,7 @@ struct CWiseFunctor : CWiseFunctorBase { ...@@ -116,6 +119,7 @@ struct CWiseFunctor : CWiseFunctorBase {
} }
}; };
#ifdef MACE_ENABLE_OPENCL
template <typename T> template <typename T>
struct CWiseFunctor<DeviceType::OPENCL, T> : CWiseFunctorBase { struct CWiseFunctor<DeviceType::OPENCL, T> : CWiseFunctorBase {
CWiseFunctor(const CWiseType type, const float coeff) CWiseFunctor(const CWiseType type, const float coeff)
...@@ -130,6 +134,7 @@ struct CWiseFunctor<DeviceType::OPENCL, T> : CWiseFunctorBase { ...@@ -130,6 +134,7 @@ struct CWiseFunctor<DeviceType::OPENCL, T> : CWiseFunctorBase {
std::unique_ptr<BufferBase> kernel_error_; std::unique_ptr<BufferBase> kernel_error_;
std::vector<index_t> input_shape_; std::vector<index_t> input_shape_;
}; };
#endif // MACE_ENABLE_OPENCL
} // namespace kernels } // namespace kernels
} // namespace mace } // namespace mace
......
...@@ -18,10 +18,13 @@ ...@@ -18,10 +18,13 @@
#include <vector> #include <vector>
#include "mace/core/future.h" #include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace kernels {
...@@ -112,6 +115,7 @@ struct DepthToSpaceOpFunctor { ...@@ -112,6 +115,7 @@ struct DepthToSpaceOpFunctor {
bool d2s_; bool d2s_;
}; };
#ifdef MACE_ENABLE_OPENCL
template <typename T> template <typename T>
struct DepthToSpaceOpFunctor<DeviceType::OPENCL, T> { struct DepthToSpaceOpFunctor<DeviceType::OPENCL, T> {
DepthToSpaceOpFunctor(const int block_size, bool d2s) DepthToSpaceOpFunctor(const int block_size, bool d2s)
...@@ -125,6 +129,7 @@ struct DepthToSpaceOpFunctor<DeviceType::OPENCL, T> { ...@@ -125,6 +129,7 @@ struct DepthToSpaceOpFunctor<DeviceType::OPENCL, T> {
std::unique_ptr<BufferBase> kernel_error_; std::unique_ptr<BufferBase> kernel_error_;
std::vector<index_t> input_shape_; std::vector<index_t> input_shape_;
}; };
#endif // MACE_ENABLE_OPENCL
} // namespace kernels } // namespace kernels
} // namespace mace } // namespace mace
......
...@@ -23,11 +23,14 @@ ...@@ -23,11 +23,14 @@
#include <vector> #include <vector>
#include "mace/core/future.h" #include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/kernels/conv_pool_2d_util.h" #include "mace/kernels/conv_pool_2d_util.h"
#include "mace/kernels/activation.h" #include "mace/kernels/activation.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace kernels {
...@@ -441,6 +444,7 @@ struct DepthwiseConv2dFunctor<DeviceType::NEON, float> ...@@ -441,6 +444,7 @@ struct DepthwiseConv2dFunctor<DeviceType::NEON, float>
StatsFuture *future); StatsFuture *future);
}; };
#ifdef MACE_ENABLE_OPENCL
template <typename T> template <typename T>
struct DepthwiseConv2dFunctor<DeviceType::OPENCL, T> struct DepthwiseConv2dFunctor<DeviceType::OPENCL, T>
: DepthwiseConv2dFunctorBase { : DepthwiseConv2dFunctorBase {
...@@ -468,6 +472,7 @@ struct DepthwiseConv2dFunctor<DeviceType::OPENCL, T> ...@@ -468,6 +472,7 @@ struct DepthwiseConv2dFunctor<DeviceType::OPENCL, T>
std::unique_ptr<BufferBase> kernel_error_; std::unique_ptr<BufferBase> kernel_error_;
std::vector<index_t> input_shape_; std::vector<index_t> input_shape_;
}; };
#endif // MACE_ENABLE_OPENCL
} // namespace kernels } // namespace kernels
} // namespace mace } // namespace mace
......
...@@ -20,9 +20,12 @@ ...@@ -20,9 +20,12 @@
#include <vector> #include <vector>
#include "mace/core/future.h" #include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace kernels {
...@@ -105,6 +108,7 @@ struct EltwiseFunctor : EltwiseFunctorBase { ...@@ -105,6 +108,7 @@ struct EltwiseFunctor : EltwiseFunctorBase {
} }
}; };
#ifdef MACE_ENABLE_OPENCL
template <typename T> template <typename T>
struct EltwiseFunctor<DeviceType::OPENCL, T> : EltwiseFunctorBase { struct EltwiseFunctor<DeviceType::OPENCL, T> : EltwiseFunctorBase {
EltwiseFunctor(const EltwiseType type, const std::vector<float> &coeff) EltwiseFunctor(const EltwiseType type, const std::vector<float> &coeff)
...@@ -120,6 +124,7 @@ struct EltwiseFunctor<DeviceType::OPENCL, T> : EltwiseFunctorBase { ...@@ -120,6 +124,7 @@ struct EltwiseFunctor<DeviceType::OPENCL, T> : EltwiseFunctorBase {
std::unique_ptr<BufferBase> kernel_error_; std::unique_ptr<BufferBase> kernel_error_;
std::vector<index_t> input_shape_; std::vector<index_t> input_shape_;
}; };
#endif // MACE_ENABLE_OPENCL
} // namespace kernels } // namespace kernels
} // namespace mace } // namespace mace
......
...@@ -19,16 +19,19 @@ ...@@ -19,16 +19,19 @@
#include <vector> #include <vector>
#include "mace/core/future.h" #include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/kernels/activation.h" #include "mace/kernels/activation.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/kernels/opencl/helper.h" #include "mace/kernels/opencl/helper.h"
#endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace kernels {
struct FullyConnectedBase { struct FullyConnectedBase {
FullyConnectedBase(const BufferType weight_type, FullyConnectedBase(const int /*BufferType*/ weight_type,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit) const float relux_max_limit)
: weight_type_(weight_type), : weight_type_(weight_type),
...@@ -42,7 +45,7 @@ struct FullyConnectedBase { ...@@ -42,7 +45,7 @@ struct FullyConnectedBase {
template <DeviceType D, typename T> template <DeviceType D, typename T>
struct FullyConnectedFunctor : FullyConnectedBase { struct FullyConnectedFunctor : FullyConnectedBase {
FullyConnectedFunctor(const BufferType weight_type, FullyConnectedFunctor(const int /*BufferType*/ weight_type,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit) const float relux_max_limit)
: FullyConnectedBase(weight_type, activation, relux_max_limit) {} : FullyConnectedBase(weight_type, activation, relux_max_limit) {}
...@@ -89,7 +92,7 @@ struct FullyConnectedFunctor : FullyConnectedBase { ...@@ -89,7 +92,7 @@ struct FullyConnectedFunctor : FullyConnectedBase {
template <> template <>
struct FullyConnectedFunctor<DeviceType::NEON, float> : FullyConnectedBase { struct FullyConnectedFunctor<DeviceType::NEON, float> : FullyConnectedBase {
FullyConnectedFunctor(const BufferType weight_type, FullyConnectedFunctor(const int /*BufferType*/ weight_type,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit) const float relux_max_limit)
: FullyConnectedBase(weight_type, activation, relux_max_limit) {} : FullyConnectedBase(weight_type, activation, relux_max_limit) {}
...@@ -101,9 +104,10 @@ struct FullyConnectedFunctor<DeviceType::NEON, float> : FullyConnectedBase { ...@@ -101,9 +104,10 @@ struct FullyConnectedFunctor<DeviceType::NEON, float> : FullyConnectedBase {
StatsFuture *future); StatsFuture *future);
}; };
#ifdef MACE_ENABLE_OPENCL
template <typename T> template <typename T>
struct FullyConnectedFunctor<DeviceType::OPENCL, T> : FullyConnectedBase { struct FullyConnectedFunctor<DeviceType::OPENCL, T> : FullyConnectedBase {
FullyConnectedFunctor(const BufferType weight_type, FullyConnectedFunctor(const int /*BufferType*/ weight_type,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit) const float relux_max_limit)
: FullyConnectedBase(weight_type, activation, relux_max_limit) {} : FullyConnectedBase(weight_type, activation, relux_max_limit) {}
...@@ -120,6 +124,7 @@ struct FullyConnectedFunctor<DeviceType::OPENCL, T> : FullyConnectedBase { ...@@ -120,6 +124,7 @@ struct FullyConnectedFunctor<DeviceType::OPENCL, T> : FullyConnectedBase {
std::vector<index_t> input_shape_; std::vector<index_t> input_shape_;
std::unique_ptr<BufferBase> kernel_error_; std::unique_ptr<BufferBase> kernel_error_;
}; };
#endif // MACE_ENABLE_OPENCL
} // namespace kernels } // namespace kernels
} // namespace mace } // namespace mace
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <math.h> #include <math.h>
#include <algorithm> #include <algorithm>
#include <cstring>
#if defined(MACE_ENABLE_NEON) #if defined(MACE_ENABLE_NEON)
#include <arm_neon.h> #include <arm_neon.h>
......
...@@ -10,10 +10,13 @@ ...@@ -10,10 +10,13 @@
#include <vector> #include <vector>
#include "mace/core/future.h" #include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace kernels {
......
...@@ -25,11 +25,14 @@ ...@@ -25,11 +25,14 @@
#include <vector> #include <vector>
#include "mace/core/future.h" #include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/kernels/gemm.h" #include "mace/kernels/gemm.h"
#include "mace/utils/utils.h" #include "mace/utils/utils.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace kernels {
...@@ -70,6 +73,7 @@ struct MatMulFunctor { ...@@ -70,6 +73,7 @@ struct MatMulFunctor {
} }
}; };
#ifdef MACE_ENABLE_OPENCL
template<typename T> template<typename T>
struct MatMulFunctor<DeviceType::OPENCL, T> { struct MatMulFunctor<DeviceType::OPENCL, T> {
void operator()(const Tensor *A, void operator()(const Tensor *A,
...@@ -81,6 +85,7 @@ struct MatMulFunctor<DeviceType::OPENCL, T> { ...@@ -81,6 +85,7 @@ struct MatMulFunctor<DeviceType::OPENCL, T> {
uint32_t kwg_size_; uint32_t kwg_size_;
std::unique_ptr<BufferBase> kernel_error_; std::unique_ptr<BufferBase> kernel_error_;
}; };
#endif // MACE_ENABLE_OPENCL
} // namespace kernels } // namespace kernels
} // namespace mace } // namespace mace
......
...@@ -20,9 +20,12 @@ ...@@ -20,9 +20,12 @@
#include <vector> #include <vector>
#include "mace/core/future.h" #include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace kernels {
...@@ -79,6 +82,7 @@ struct PadFunctor : public PadFunctorBase { ...@@ -79,6 +82,7 @@ struct PadFunctor : public PadFunctorBase {
} }
}; };
#ifdef MACE_ENABLE_OPENCL
template <typename T> template <typename T>
struct PadFunctor<DeviceType::OPENCL, T> : PadFunctorBase { struct PadFunctor<DeviceType::OPENCL, T> : PadFunctorBase {
PadFunctor(const std::vector<int> &paddings, PadFunctor(const std::vector<int> &paddings,
...@@ -94,6 +98,7 @@ struct PadFunctor<DeviceType::OPENCL, T> : PadFunctorBase { ...@@ -94,6 +98,7 @@ struct PadFunctor<DeviceType::OPENCL, T> : PadFunctorBase {
std::unique_ptr<BufferBase> kernel_error_; std::unique_ptr<BufferBase> kernel_error_;
std::vector<index_t> input_shape_; std::vector<index_t> input_shape_;
}; };
#endif // MACE_ENABLE_OPENCL
} // namespace kernels } // namespace kernels
} // namespace mace } // namespace mace
......
...@@ -21,10 +21,13 @@ ...@@ -21,10 +21,13 @@
#include <vector> #include <vector>
#include "mace/core/future.h" #include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/kernels/conv_pool_2d_util.h" #include "mace/kernels/conv_pool_2d_util.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
enum PoolingType { enum PoolingType {
...@@ -192,6 +195,7 @@ struct PoolingFunctor<DeviceType::NEON, float> : PoolingFunctorBase { ...@@ -192,6 +195,7 @@ struct PoolingFunctor<DeviceType::NEON, float> : PoolingFunctorBase {
StatsFuture *future); StatsFuture *future);
}; };
#ifdef MACE_ENABLE_OPENCL
template <typename T> template <typename T>
struct PoolingFunctor<DeviceType::OPENCL, T> : PoolingFunctorBase { struct PoolingFunctor<DeviceType::OPENCL, T> : PoolingFunctorBase {
PoolingFunctor(const PoolingType pooling_type, PoolingFunctor(const PoolingType pooling_type,
...@@ -212,6 +216,7 @@ struct PoolingFunctor<DeviceType::OPENCL, T> : PoolingFunctorBase { ...@@ -212,6 +216,7 @@ struct PoolingFunctor<DeviceType::OPENCL, T> : PoolingFunctorBase {
std::unique_ptr<BufferBase> kernel_error_; std::unique_ptr<BufferBase> kernel_error_;
std::vector<index_t> input_shape_; std::vector<index_t> input_shape_;
}; };
#endif // MACE_ENABLE_OPENCL
} // namespace kernels } // namespace kernels
} // namespace mace } // namespace mace
......
...@@ -18,9 +18,12 @@ ...@@ -18,9 +18,12 @@
#include <vector> #include <vector>
#include "mace/core/future.h" #include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace kernels {
......
...@@ -18,9 +18,12 @@ ...@@ -18,9 +18,12 @@
#include <vector> #include <vector>
#include "mace/core/future.h" #include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace kernels {
......
...@@ -20,9 +20,12 @@ ...@@ -20,9 +20,12 @@
#include <vector> #include <vector>
#include "mace/core/future.h" #include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace kernels {
...@@ -177,6 +180,7 @@ struct ResizeBilinearFunctor : ResizeBilinearFunctorBase { ...@@ -177,6 +180,7 @@ struct ResizeBilinearFunctor : ResizeBilinearFunctorBase {
} }
}; };
#ifdef MACE_ENABLE_OPENCL
template <typename T> template <typename T>
struct ResizeBilinearFunctor<DeviceType::OPENCL, T> struct ResizeBilinearFunctor<DeviceType::OPENCL, T>
: ResizeBilinearFunctorBase { : ResizeBilinearFunctorBase {
...@@ -190,6 +194,7 @@ struct ResizeBilinearFunctor<DeviceType::OPENCL, T> ...@@ -190,6 +194,7 @@ struct ResizeBilinearFunctor<DeviceType::OPENCL, T>
std::unique_ptr<BufferBase> kernel_error_; std::unique_ptr<BufferBase> kernel_error_;
std::vector<index_t> input_shape_; std::vector<index_t> input_shape_;
}; };
#endif // MACE_ENABLE_OPENCL
} // namespace kernels } // namespace kernels
} // namespace mace } // namespace mace
......
...@@ -20,11 +20,14 @@ ...@@ -20,11 +20,14 @@
#include <vector> #include <vector>
#include "mace/core/future.h" #include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/core/types.h" #include "mace/core/types.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace kernels {
...@@ -81,6 +84,7 @@ struct SliceFunctor : SliceFunctorBase { ...@@ -81,6 +84,7 @@ struct SliceFunctor : SliceFunctorBase {
} }
}; };
#ifdef MACE_ENABLE_OPENCL
template<typename T> template<typename T>
struct SliceFunctor<DeviceType::OPENCL, T> : SliceFunctorBase { struct SliceFunctor<DeviceType::OPENCL, T> : SliceFunctorBase {
explicit SliceFunctor(const int32_t axis) : SliceFunctorBase(axis) {} explicit SliceFunctor(const int32_t axis) : SliceFunctorBase(axis) {}
...@@ -92,6 +96,7 @@ struct SliceFunctor<DeviceType::OPENCL, T> : SliceFunctorBase { ...@@ -92,6 +96,7 @@ struct SliceFunctor<DeviceType::OPENCL, T> : SliceFunctorBase {
uint32_t kwg_size_; uint32_t kwg_size_;
std::unique_ptr<BufferBase> kernel_error_; std::unique_ptr<BufferBase> kernel_error_;
}; };
#endif // MACE_ENABLE_OPENCL
} // namespace kernels } // namespace kernels
} // namespace mace } // namespace mace
......
...@@ -21,11 +21,14 @@ ...@@ -21,11 +21,14 @@
#include <vector> #include <vector>
#include "mace/core/future.h" #include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
#include "mace/utils/utils.h" #include "mace/utils/utils.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace kernels {
...@@ -72,6 +75,7 @@ struct SoftmaxFunctor<DeviceType::NEON, float> { ...@@ -72,6 +75,7 @@ struct SoftmaxFunctor<DeviceType::NEON, float> {
void operator()(const Tensor *logits, Tensor *output, StatsFuture *future); void operator()(const Tensor *logits, Tensor *output, StatsFuture *future);
}; };
#ifdef MACE_ENABLE_OPENCL
template <typename T> template <typename T>
struct SoftmaxFunctor<DeviceType::OPENCL, T> { struct SoftmaxFunctor<DeviceType::OPENCL, T> {
void operator()(const Tensor *logits, Tensor *output, StatsFuture *future); void operator()(const Tensor *logits, Tensor *output, StatsFuture *future);
...@@ -81,6 +85,7 @@ struct SoftmaxFunctor<DeviceType::OPENCL, T> { ...@@ -81,6 +85,7 @@ struct SoftmaxFunctor<DeviceType::OPENCL, T> {
std::unique_ptr<BufferBase> kernel_error_; std::unique_ptr<BufferBase> kernel_error_;
std::vector<index_t> input_shape_; std::vector<index_t> input_shape_;
}; };
#endif // MACE_ENABLE_OPENCL
} // namespace kernels } // namespace kernels
} // namespace mace } // namespace mace
......
...@@ -19,10 +19,13 @@ ...@@ -19,10 +19,13 @@
#include <vector> #include <vector>
#include "mace/core/future.h" #include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace kernels {
...@@ -54,6 +57,7 @@ struct SpaceToBatchFunctor : SpaceToBatchFunctorBase { ...@@ -54,6 +57,7 @@ struct SpaceToBatchFunctor : SpaceToBatchFunctorBase {
} }
}; };
#ifdef MACE_ENABLE_OPENCL
template <typename T> template <typename T>
struct SpaceToBatchFunctor<DeviceType::OPENCL, T> : SpaceToBatchFunctorBase { struct SpaceToBatchFunctor<DeviceType::OPENCL, T> : SpaceToBatchFunctorBase {
SpaceToBatchFunctor(const std::vector<int> &paddings, SpaceToBatchFunctor(const std::vector<int> &paddings,
...@@ -71,6 +75,7 @@ struct SpaceToBatchFunctor<DeviceType::OPENCL, T> : SpaceToBatchFunctorBase { ...@@ -71,6 +75,7 @@ struct SpaceToBatchFunctor<DeviceType::OPENCL, T> : SpaceToBatchFunctorBase {
std::unique_ptr<BufferBase> kernel_error_; std::unique_ptr<BufferBase> kernel_error_;
std::vector<index_t> space_shape_; std::vector<index_t> space_shape_;
}; };
#endif // MACE_ENABLE_OPENCL
} // namespace kernels } // namespace kernels
} // namespace mace } // namespace mace
......
...@@ -19,11 +19,14 @@ ...@@ -19,11 +19,14 @@
#include <vector> #include <vector>
#include "mace/core/future.h" #include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/kernels/activation.h" #include "mace/kernels/activation.h"
#include "mace/kernels/conv_pool_2d_util.h" #include "mace/kernels/conv_pool_2d_util.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace kernels { namespace kernels {
...@@ -52,6 +55,7 @@ struct WinogradTransformFunctor : WinogradTransformFunctorBase { ...@@ -52,6 +55,7 @@ struct WinogradTransformFunctor : WinogradTransformFunctorBase {
} }
}; };
#ifdef MACE_ENABLE_OPENCL
template <typename T> template <typename T>
struct WinogradTransformFunctor<DeviceType::OPENCL, T> struct WinogradTransformFunctor<DeviceType::OPENCL, T>
: WinogradTransformFunctorBase { : WinogradTransformFunctorBase {
...@@ -66,6 +70,7 @@ struct WinogradTransformFunctor<DeviceType::OPENCL, T> ...@@ -66,6 +70,7 @@ struct WinogradTransformFunctor<DeviceType::OPENCL, T>
std::unique_ptr<BufferBase> kernel_error_; std::unique_ptr<BufferBase> kernel_error_;
std::vector<index_t> input_shape_; std::vector<index_t> input_shape_;
}; };
#endif // MACE_ENABLE_OPENCL
struct WinogradInverseTransformFunctorBase { struct WinogradInverseTransformFunctorBase {
WinogradInverseTransformFunctorBase(const int batch, WinogradInverseTransformFunctorBase(const int batch,
...@@ -104,6 +109,7 @@ struct WinogradInverseTransformFunctor : WinogradInverseTransformFunctorBase { ...@@ -104,6 +109,7 @@ struct WinogradInverseTransformFunctor : WinogradInverseTransformFunctorBase {
} }
}; };
#ifdef MACE_ENABLE_OPENCL
template <typename T> template <typename T>
struct WinogradInverseTransformFunctor<DeviceType::OPENCL, T> struct WinogradInverseTransformFunctor<DeviceType::OPENCL, T>
: WinogradInverseTransformFunctorBase { : WinogradInverseTransformFunctorBase {
...@@ -125,6 +131,7 @@ struct WinogradInverseTransformFunctor<DeviceType::OPENCL, T> ...@@ -125,6 +131,7 @@ struct WinogradInverseTransformFunctor<DeviceType::OPENCL, T>
std::unique_ptr<BufferBase> kernel_error_; std::unique_ptr<BufferBase> kernel_error_;
std::vector<index_t> input_shape_; std::vector<index_t> input_shape_;
}; };
#endif // MACE_ENABLE_OPENCL
} // namespace kernels } // namespace kernels
} // namespace mace } // namespace mace
......
...@@ -28,8 +28,14 @@ cc_library( ...@@ -28,8 +28,14 @@ cc_library(
exclude = [ exclude = [
"*_test.cc", "*_test.cc",
"*_benchmark.cc", "*_benchmark.cc",
"buffer_to_image.cc",
"image_to_buffer.cc",
], ],
), ) + if_android(
[
"buffer_to_image.cc",
"image_to_buffer.cc",
]),
hdrs = glob( hdrs = glob(
["*.h"], ["*.h"],
exclude = ["ops_test_util.h"], exclude = ["ops_test_util.h"],
......
...@@ -24,6 +24,7 @@ void Register_Activation(OperatorRegistry *op_registry) { ...@@ -24,6 +24,7 @@ void Register_Activation(OperatorRegistry *op_registry) {
.Build(), .Build(),
ActivationOp<DeviceType::CPU, float>); ActivationOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Activation") REGISTER_OPERATOR(op_registry, OpKeyBuilder("Activation")
.Device(DeviceType::OPENCL) .Device(DeviceType::OPENCL)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
...@@ -35,6 +36,8 @@ void Register_Activation(OperatorRegistry *op_registry) { ...@@ -35,6 +36,8 @@ void Register_Activation(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T") .TypeConstraint<half>("T")
.Build(), .Build(),
ActivationOp<DeviceType::OPENCL, half>); ActivationOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Activation") REGISTER_OPERATOR(op_registry, OpKeyBuilder("Activation")
.Device(DeviceType::NEON) .Device(DeviceType::NEON)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
......
...@@ -24,6 +24,7 @@ void Register_AddN(OperatorRegistry *op_registry) { ...@@ -24,6 +24,7 @@ void Register_AddN(OperatorRegistry *op_registry) {
.Build(), .Build(),
AddNOp<DeviceType::CPU, float>); AddNOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("AddN") REGISTER_OPERATOR(op_registry, OpKeyBuilder("AddN")
.Device(DeviceType::OPENCL) .Device(DeviceType::OPENCL)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
...@@ -35,6 +36,7 @@ void Register_AddN(OperatorRegistry *op_registry) { ...@@ -35,6 +36,7 @@ void Register_AddN(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T") .TypeConstraint<half>("T")
.Build(), .Build(),
AddNOp<DeviceType::OPENCL, half>); AddNOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("AddN") REGISTER_OPERATOR(op_registry, OpKeyBuilder("AddN")
.Device(DeviceType::NEON) .Device(DeviceType::NEON)
......
...@@ -24,6 +24,7 @@ void Register_BatchNorm(OperatorRegistry *op_registry) { ...@@ -24,6 +24,7 @@ void Register_BatchNorm(OperatorRegistry *op_registry) {
.Build(), .Build(),
BatchNormOp<DeviceType::CPU, float>); BatchNormOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchNorm") REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchNorm")
.Device(DeviceType::OPENCL) .Device(DeviceType::OPENCL)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
...@@ -35,6 +36,8 @@ void Register_BatchNorm(OperatorRegistry *op_registry) { ...@@ -35,6 +36,8 @@ void Register_BatchNorm(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T") .TypeConstraint<half>("T")
.Build(), .Build(),
BatchNormOp<DeviceType::OPENCL, half>); BatchNormOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchNorm") REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchNorm")
.Device(DeviceType::NEON) .Device(DeviceType::NEON)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
......
...@@ -18,6 +18,7 @@ namespace mace { ...@@ -18,6 +18,7 @@ namespace mace {
namespace ops { namespace ops {
void Register_BatchToSpaceND(OperatorRegistry *op_registry) { void Register_BatchToSpaceND(OperatorRegistry *op_registry) {
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchToSpaceND") REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchToSpaceND")
.Device(DeviceType::OPENCL) .Device(DeviceType::OPENCL)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
...@@ -28,6 +29,7 @@ void Register_BatchToSpaceND(OperatorRegistry *op_registry) { ...@@ -28,6 +29,7 @@ void Register_BatchToSpaceND(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T") .TypeConstraint<half>("T")
.Build(), .Build(),
BatchToSpaceNDOp<DeviceType::OPENCL, half>); BatchToSpaceNDOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
} }
} // namespace ops } // namespace ops
......
...@@ -24,6 +24,7 @@ void Register_BiasAdd(OperatorRegistry *op_registry) { ...@@ -24,6 +24,7 @@ void Register_BiasAdd(OperatorRegistry *op_registry) {
.Build(), .Build(),
BiasAddOp<DeviceType::CPU, float>); BiasAddOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("BiasAdd") REGISTER_OPERATOR(op_registry, OpKeyBuilder("BiasAdd")
.Device(DeviceType::OPENCL) .Device(DeviceType::OPENCL)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
...@@ -35,6 +36,7 @@ void Register_BiasAdd(OperatorRegistry *op_registry) { ...@@ -35,6 +36,7 @@ void Register_BiasAdd(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T") .TypeConstraint<half>("T")
.Build(), .Build(),
BiasAddOp<DeviceType::OPENCL, half>); BiasAddOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
} }
} // namespace ops } // namespace ops
......
...@@ -23,16 +23,20 @@ void Register_ChannelShuffle(OperatorRegistry *op_registry) { ...@@ -23,16 +23,20 @@ void Register_ChannelShuffle(OperatorRegistry *op_registry) {
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
.Build(), .Build(),
ChannelShuffleOp<DeviceType::CPU, float>); ChannelShuffleOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("ChannelShuffle") REGISTER_OPERATOR(op_registry, OpKeyBuilder("ChannelShuffle")
.Device(DeviceType::OPENCL) .Device(DeviceType::OPENCL)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
.Build(), .Build(),
ChannelShuffleOp<DeviceType::OPENCL, float>); ChannelShuffleOp<DeviceType::OPENCL, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("ChannelShuffle") REGISTER_OPERATOR(op_registry, OpKeyBuilder("ChannelShuffle")
.Device(DeviceType::OPENCL) .Device(DeviceType::OPENCL)
.TypeConstraint<half>("T") .TypeConstraint<half>("T")
.Build(), .Build(),
ChannelShuffleOp<DeviceType::OPENCL, half>); ChannelShuffleOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
} }
} // namespace ops } // namespace ops
......
...@@ -23,16 +23,21 @@ void Register_Concat(OperatorRegistry *op_registry) { ...@@ -23,16 +23,21 @@ void Register_Concat(OperatorRegistry *op_registry) {
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
.Build(), .Build(),
ConcatOp<DeviceType::CPU, float>); ConcatOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Concat") REGISTER_OPERATOR(op_registry, OpKeyBuilder("Concat")
.Device(DeviceType::OPENCL) .Device(DeviceType::OPENCL)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
.Build(), .Build(),
ConcatOp<DeviceType::OPENCL, float>); ConcatOp<DeviceType::OPENCL, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Concat") REGISTER_OPERATOR(op_registry, OpKeyBuilder("Concat")
.Device(DeviceType::OPENCL) .Device(DeviceType::OPENCL)
.TypeConstraint<half>("T") .TypeConstraint<half>("T")
.Build(), .Build(),
ConcatOp<DeviceType::OPENCL, half>); ConcatOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Concat") REGISTER_OPERATOR(op_registry, OpKeyBuilder("Concat")
.Device(DeviceType::NEON) .Device(DeviceType::NEON)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
......
...@@ -24,6 +24,7 @@ void Register_Conv2D(OperatorRegistry *op_registry) { ...@@ -24,6 +24,7 @@ void Register_Conv2D(OperatorRegistry *op_registry) {
.Build(), .Build(),
Conv2dOp<DeviceType::CPU, float>); Conv2dOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Conv2D") REGISTER_OPERATOR(op_registry, OpKeyBuilder("Conv2D")
.Device(DeviceType::OPENCL) .Device(DeviceType::OPENCL)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
...@@ -35,6 +36,7 @@ void Register_Conv2D(OperatorRegistry *op_registry) { ...@@ -35,6 +36,7 @@ void Register_Conv2D(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T") .TypeConstraint<half>("T")
.Build(), .Build(),
Conv2dOp<DeviceType::OPENCL, half>); Conv2dOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Conv2D") REGISTER_OPERATOR(op_registry, OpKeyBuilder("Conv2D")
.Device(DeviceType::NEON) .Device(DeviceType::NEON)
......
...@@ -24,6 +24,7 @@ void Register_CWise(OperatorRegistry *op_registry) { ...@@ -24,6 +24,7 @@ void Register_CWise(OperatorRegistry *op_registry) {
.Build(), .Build(),
CWiseOp<DeviceType::CPU, float>); CWiseOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("CWise") REGISTER_OPERATOR(op_registry, OpKeyBuilder("CWise")
.Device(DeviceType::OPENCL) .Device(DeviceType::OPENCL)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
...@@ -35,6 +36,7 @@ void Register_CWise(OperatorRegistry *op_registry) { ...@@ -35,6 +36,7 @@ void Register_CWise(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T") .TypeConstraint<half>("T")
.Build(), .Build(),
CWiseOp<DeviceType::OPENCL, half>); CWiseOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
} }
} // namespace ops } // namespace ops
......
...@@ -24,6 +24,7 @@ void Register_DepthToSpace(OperatorRegistry *op_registry) { ...@@ -24,6 +24,7 @@ void Register_DepthToSpace(OperatorRegistry *op_registry) {
.Build(), .Build(),
DepthToSpaceOp<DeviceType::CPU, float>); DepthToSpaceOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("DepthToSpace") REGISTER_OPERATOR(op_registry, OpKeyBuilder("DepthToSpace")
.Device(DeviceType::OPENCL) .Device(DeviceType::OPENCL)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
...@@ -35,6 +36,7 @@ void Register_DepthToSpace(OperatorRegistry *op_registry) { ...@@ -35,6 +36,7 @@ void Register_DepthToSpace(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T") .TypeConstraint<half>("T")
.Build(), .Build(),
DepthToSpaceOp<DeviceType::OPENCL, half>); DepthToSpaceOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
} }
} // namespace ops } // namespace ops
......
...@@ -24,6 +24,7 @@ void Register_DepthwiseConv2d(OperatorRegistry *op_registry) { ...@@ -24,6 +24,7 @@ void Register_DepthwiseConv2d(OperatorRegistry *op_registry) {
.Build(), .Build(),
DepthwiseConv2dOp<DeviceType::CPU, float>); DepthwiseConv2dOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("DepthwiseConv2d") REGISTER_OPERATOR(op_registry, OpKeyBuilder("DepthwiseConv2d")
.Device(DeviceType::OPENCL) .Device(DeviceType::OPENCL)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
...@@ -35,6 +36,7 @@ void Register_DepthwiseConv2d(OperatorRegistry *op_registry) { ...@@ -35,6 +36,7 @@ void Register_DepthwiseConv2d(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T") .TypeConstraint<half>("T")
.Build(), .Build(),
DepthwiseConv2dOp<DeviceType::OPENCL, half>); DepthwiseConv2dOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("DepthwiseConv2d") REGISTER_OPERATOR(op_registry, OpKeyBuilder("DepthwiseConv2d")
.Device(DeviceType::NEON) .Device(DeviceType::NEON)
......
...@@ -24,6 +24,7 @@ void Register_Eltwise(OperatorRegistry *op_registry) { ...@@ -24,6 +24,7 @@ void Register_Eltwise(OperatorRegistry *op_registry) {
.Build(), .Build(),
EltwiseOp<DeviceType::CPU, float>); EltwiseOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Eltwise") REGISTER_OPERATOR(op_registry, OpKeyBuilder("Eltwise")
.Device(DeviceType::OPENCL) .Device(DeviceType::OPENCL)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
...@@ -35,6 +36,8 @@ void Register_Eltwise(OperatorRegistry *op_registry) { ...@@ -35,6 +36,8 @@ void Register_Eltwise(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T") .TypeConstraint<half>("T")
.Build(), .Build(),
EltwiseOp<DeviceType::OPENCL, half>); EltwiseOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Eltwise") REGISTER_OPERATOR(op_registry, OpKeyBuilder("Eltwise")
.Device(DeviceType::NEON) .Device(DeviceType::NEON)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
......
...@@ -24,6 +24,7 @@ void Register_FoldedBatchNorm(OperatorRegistry *op_registry) { ...@@ -24,6 +24,7 @@ void Register_FoldedBatchNorm(OperatorRegistry *op_registry) {
.Build(), .Build(),
FoldedBatchNormOp<DeviceType::CPU, float>); FoldedBatchNormOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("FoldedBatchNorm") REGISTER_OPERATOR(op_registry, OpKeyBuilder("FoldedBatchNorm")
.Device(DeviceType::OPENCL) .Device(DeviceType::OPENCL)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
...@@ -35,6 +36,8 @@ void Register_FoldedBatchNorm(OperatorRegistry *op_registry) { ...@@ -35,6 +36,8 @@ void Register_FoldedBatchNorm(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T") .TypeConstraint<half>("T")
.Build(), .Build(),
FoldedBatchNormOp<DeviceType::OPENCL, half>); FoldedBatchNormOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("FoldedBatchNorm") REGISTER_OPERATOR(op_registry, OpKeyBuilder("FoldedBatchNorm")
.Device(DeviceType::NEON) .Device(DeviceType::NEON)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
......
...@@ -24,6 +24,7 @@ void Register_FullyConnected(OperatorRegistry *op_registry) { ...@@ -24,6 +24,7 @@ void Register_FullyConnected(OperatorRegistry *op_registry) {
.Build(), .Build(),
FullyConnectedOp<DeviceType::CPU, float>); FullyConnectedOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("FC") REGISTER_OPERATOR(op_registry, OpKeyBuilder("FC")
.Device(DeviceType::OPENCL) .Device(DeviceType::OPENCL)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
...@@ -35,6 +36,7 @@ void Register_FullyConnected(OperatorRegistry *op_registry) { ...@@ -35,6 +36,7 @@ void Register_FullyConnected(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T") .TypeConstraint<half>("T")
.Build(), .Build(),
FullyConnectedOp<DeviceType::OPENCL, half>); FullyConnectedOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("FC") REGISTER_OPERATOR(op_registry, OpKeyBuilder("FC")
.Device(DeviceType::NEON) .Device(DeviceType::NEON)
......
...@@ -28,10 +28,9 @@ class FullyConnectedOp : public Operator<D, T> { ...@@ -28,10 +28,9 @@ class FullyConnectedOp : public Operator<D, T> {
public: public:
FullyConnectedOp(const OperatorDef &operator_def, Workspace *ws) FullyConnectedOp(const OperatorDef &operator_def, Workspace *ws)
: Operator<D, T>(operator_def, ws), : Operator<D, T>(operator_def, ws),
functor_(static_cast<kernels::BufferType>( functor_(OperatorBase::GetSingleArgument<int>(
OperatorBase::GetSingleArgument<int>( "weight_type",
"weight_type", static_cast<int>( 7 /*static_cast<int>(kernels::WEIGHT_WIDTH)*/),
kernels::WEIGHT_WIDTH))),
kernels::StringToActivationType( kernels::StringToActivationType(
OperatorBase::GetSingleArgument<std::string>("activation", OperatorBase::GetSingleArgument<std::string>("activation",
"NOOP")), "NOOP")),
......
...@@ -24,6 +24,7 @@ void Register_FusedConv2D(OperatorRegistry *op_registry) { ...@@ -24,6 +24,7 @@ void Register_FusedConv2D(OperatorRegistry *op_registry) {
.Build(), .Build(),
FusedConv2dOp<DeviceType::CPU, float>); FusedConv2dOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("FusedConv2D") REGISTER_OPERATOR(op_registry, OpKeyBuilder("FusedConv2D")
.Device(DeviceType::OPENCL) .Device(DeviceType::OPENCL)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
...@@ -35,6 +36,8 @@ void Register_FusedConv2D(OperatorRegistry *op_registry) { ...@@ -35,6 +36,8 @@ void Register_FusedConv2D(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T") .TypeConstraint<half>("T")
.Build(), .Build(),
FusedConv2dOp<DeviceType::OPENCL, half>); FusedConv2dOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("FusedConv2D") REGISTER_OPERATOR(op_registry, OpKeyBuilder("FusedConv2D")
.Device(DeviceType::NEON) .Device(DeviceType::NEON)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
......
...@@ -24,6 +24,7 @@ void Register_MatMul(OperatorRegistry *op_registry) { ...@@ -24,6 +24,7 @@ void Register_MatMul(OperatorRegistry *op_registry) {
.Build(), .Build(),
MatMulOp<DeviceType::CPU, float>); MatMulOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("MatMul") REGISTER_OPERATOR(op_registry, OpKeyBuilder("MatMul")
.Device(DeviceType::OPENCL) .Device(DeviceType::OPENCL)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
...@@ -35,6 +36,7 @@ void Register_MatMul(OperatorRegistry *op_registry) { ...@@ -35,6 +36,7 @@ void Register_MatMul(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T") .TypeConstraint<half>("T")
.Build(), .Build(),
MatMulOp<DeviceType::OPENCL, half>); MatMulOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
} }
} // namespace ops } // namespace ops
......
...@@ -24,6 +24,7 @@ void Register_Pad(OperatorRegistry *op_registry) { ...@@ -24,6 +24,7 @@ void Register_Pad(OperatorRegistry *op_registry) {
.Build(), .Build(),
PadOp<DeviceType::CPU, float>); PadOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pad") REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pad")
.Device(DeviceType::OPENCL) .Device(DeviceType::OPENCL)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
...@@ -34,6 +35,7 @@ void Register_Pad(OperatorRegistry *op_registry) { ...@@ -34,6 +35,7 @@ void Register_Pad(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T") .TypeConstraint<half>("T")
.Build(), .Build(),
PadOp<DeviceType::OPENCL, half>); PadOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
} }
} // namespace ops } // namespace ops
......
...@@ -23,22 +23,21 @@ void Register_Pooling(OperatorRegistry *op_registry) { ...@@ -23,22 +23,21 @@ void Register_Pooling(OperatorRegistry *op_registry) {
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
.Build(), .Build(),
PoolingOp<DeviceType::CPU, float>); PoolingOp<DeviceType::CPU, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling")
.Device(DeviceType::CPU)
.TypeConstraint<half>("T")
.Build(),
PoolingOp<DeviceType::CPU, half>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling") REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling")
.Device(DeviceType::OPENCL) .Device(DeviceType::OPENCL)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
.Build(), .Build(),
PoolingOp<DeviceType::OPENCL, float>); PoolingOp<DeviceType::OPENCL, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling") REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling")
.Device(DeviceType::OPENCL) .Device(DeviceType::OPENCL)
.TypeConstraint<half>("T") .TypeConstraint<half>("T")
.Build(), .Build(),
PoolingOp<DeviceType::OPENCL, half>); PoolingOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling") REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling")
.Device(DeviceType::NEON) .Device(DeviceType::NEON)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
......
...@@ -24,6 +24,7 @@ void Register_ResizeBilinear(OperatorRegistry *op_registry) { ...@@ -24,6 +24,7 @@ void Register_ResizeBilinear(OperatorRegistry *op_registry) {
.Build(), .Build(),
ResizeBilinearOp<DeviceType::CPU, float>); ResizeBilinearOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("ResizeBilinear") REGISTER_OPERATOR(op_registry, OpKeyBuilder("ResizeBilinear")
.Device(DeviceType::OPENCL) .Device(DeviceType::OPENCL)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
...@@ -35,6 +36,7 @@ void Register_ResizeBilinear(OperatorRegistry *op_registry) { ...@@ -35,6 +36,7 @@ void Register_ResizeBilinear(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T") .TypeConstraint<half>("T")
.Build(), .Build(),
ResizeBilinearOp<DeviceType::OPENCL, half>); ResizeBilinearOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
} }
} // namespace ops } // namespace ops
......
...@@ -24,16 +24,20 @@ void Register_Slice(OperatorRegistry *op_registry) { ...@@ -24,16 +24,20 @@ void Register_Slice(OperatorRegistry *op_registry) {
.Build(), .Build(),
SliceOp<DeviceType::CPU, float>); SliceOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Slice") REGISTER_OPERATOR(op_registry, OpKeyBuilder("Slice")
.Device(DeviceType::OPENCL) .Device(DeviceType::OPENCL)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
.Build(), .Build(),
SliceOp<DeviceType::OPENCL, float>); SliceOp<DeviceType::OPENCL, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Slice") REGISTER_OPERATOR(op_registry, OpKeyBuilder("Slice")
.Device(DeviceType::OPENCL) .Device(DeviceType::OPENCL)
.TypeConstraint<half>("T") .TypeConstraint<half>("T")
.Build(), .Build(),
SliceOp<DeviceType::OPENCL, half>); SliceOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Slice") REGISTER_OPERATOR(op_registry, OpKeyBuilder("Slice")
.Device(DeviceType::NEON) .Device(DeviceType::NEON)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
......
...@@ -24,6 +24,7 @@ void Register_Softmax(OperatorRegistry *op_registry) { ...@@ -24,6 +24,7 @@ void Register_Softmax(OperatorRegistry *op_registry) {
.Build(), .Build(),
SoftmaxOp<DeviceType::CPU, float>); SoftmaxOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Softmax") REGISTER_OPERATOR(op_registry, OpKeyBuilder("Softmax")
.Device(DeviceType::OPENCL) .Device(DeviceType::OPENCL)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
...@@ -35,6 +36,8 @@ void Register_Softmax(OperatorRegistry *op_registry) { ...@@ -35,6 +36,8 @@ void Register_Softmax(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T") .TypeConstraint<half>("T")
.Build(), .Build(),
SoftmaxOp<DeviceType::OPENCL, half>); SoftmaxOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Softmax") REGISTER_OPERATOR(op_registry, OpKeyBuilder("Softmax")
.Device(DeviceType::NEON) .Device(DeviceType::NEON)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
......
...@@ -18,16 +18,19 @@ namespace mace { ...@@ -18,16 +18,19 @@ namespace mace {
namespace ops { namespace ops {
void Register_SpaceToBatchND(OperatorRegistry *op_registry) { void Register_SpaceToBatchND(OperatorRegistry *op_registry) {
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("SpaceToBatchND") REGISTER_OPERATOR(op_registry, OpKeyBuilder("SpaceToBatchND")
.Device(DeviceType::OPENCL) .Device(DeviceType::OPENCL)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
.Build(), .Build(),
SpaceToBatchNDOp<DeviceType::OPENCL, float>); SpaceToBatchNDOp<DeviceType::OPENCL, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("SpaceToBatchND") REGISTER_OPERATOR(op_registry, OpKeyBuilder("SpaceToBatchND")
.Device(DeviceType::OPENCL) .Device(DeviceType::OPENCL)
.TypeConstraint<half>("T") .TypeConstraint<half>("T")
.Build(), .Build(),
SpaceToBatchNDOp<DeviceType::OPENCL, half>); SpaceToBatchNDOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
} }
} // namespace ops } // namespace ops
......
...@@ -24,6 +24,7 @@ void Register_SpaceToDepth(OperatorRegistry *op_registry) { ...@@ -24,6 +24,7 @@ void Register_SpaceToDepth(OperatorRegistry *op_registry) {
.Build(), .Build(),
SpaceToDepthOp<DeviceType::CPU, float>); SpaceToDepthOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("SpaceToDepth") REGISTER_OPERATOR(op_registry, OpKeyBuilder("SpaceToDepth")
.Device(DeviceType::OPENCL) .Device(DeviceType::OPENCL)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
...@@ -35,6 +36,7 @@ void Register_SpaceToDepth(OperatorRegistry *op_registry) { ...@@ -35,6 +36,7 @@ void Register_SpaceToDepth(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T") .TypeConstraint<half>("T")
.Build(), .Build(),
SpaceToDepthOp<DeviceType::OPENCL, half>); SpaceToDepthOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
} }
} // namespace ops } // namespace ops
......
...@@ -18,16 +18,19 @@ namespace mace { ...@@ -18,16 +18,19 @@ namespace mace {
namespace ops { namespace ops {
void Register_WinogradInverseTransform(OperatorRegistry *op_registry) { void Register_WinogradInverseTransform(OperatorRegistry *op_registry) {
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("WinogradInverseTransform") REGISTER_OPERATOR(op_registry, OpKeyBuilder("WinogradInverseTransform")
.Device(DeviceType::OPENCL) .Device(DeviceType::OPENCL)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
.Build(), .Build(),
WinogradInverseTransformOp<DeviceType::OPENCL, float>); WinogradInverseTransformOp<DeviceType::OPENCL, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("WinogradInverseTransform") REGISTER_OPERATOR(op_registry, OpKeyBuilder("WinogradInverseTransform")
.Device(DeviceType::OPENCL) .Device(DeviceType::OPENCL)
.TypeConstraint<half>("T") .TypeConstraint<half>("T")
.Build(), .Build(),
WinogradInverseTransformOp<DeviceType::OPENCL, half>); WinogradInverseTransformOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
} }
} // namespace ops } // namespace ops
......
...@@ -18,16 +18,19 @@ namespace mace { ...@@ -18,16 +18,19 @@ namespace mace {
namespace ops { namespace ops {
void Register_WinogradTransform(OperatorRegistry *op_registry) { void Register_WinogradTransform(OperatorRegistry *op_registry) {
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("WinogradTransform") REGISTER_OPERATOR(op_registry, OpKeyBuilder("WinogradTransform")
.Device(DeviceType::OPENCL) .Device(DeviceType::OPENCL)
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
.Build(), .Build(),
WinogradTransformOp<DeviceType::OPENCL, float>); WinogradTransformOp<DeviceType::OPENCL, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("WinogradTransform") REGISTER_OPERATOR(op_registry, OpKeyBuilder("WinogradTransform")
.Device(DeviceType::OPENCL) .Device(DeviceType::OPENCL)
.TypeConstraint<half>("T") .TypeConstraint<half>("T")
.Build(), .Build(),
WinogradTransformOp<DeviceType::OPENCL, half>); WinogradTransformOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
} }
} // namespace ops } // namespace ops
......
# Examples # Examples
load("//mace:mace.bzl", "if_openmp_enabled") load("//mace:mace.bzl", "if_openmp_enabled", "if_android")
cc_binary( cc_binary(
name = "mace_run", name = "mace_run",
srcs = ["mace_run.cc"], srcs = ["mace_run.cc"],
linkopts = if_openmp_enabled(["-fopenmp"]), linkopts = if_openmp_enabled(["-fopenmp"]),
linkstatic = 1, linkstatic = 1,
copts = if_android(["-DMACE_ENABLE_OPENCL"]),
deps = [ deps = [
"//external:gflags_nothreads", "//external:gflags_nothreads",
"//mace/codegen:generated_models", "//mace/codegen:generated_models",
......
...@@ -33,12 +33,15 @@ ...@@ -33,12 +33,15 @@
#include <numeric> #include <numeric>
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
#include "mace/public/mace_runtime.h" #include "mace/public/mace_runtime.h"
#include "mace/utils/env_time.h" #include "mace/utils/env_time.h"
#include "mace/utils/logging.h" #include "mace/utils/logging.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/opencl_runtime.h"
#endif // MACE_ENABLE_OPENCL
// #include "mace/codegen/models/${MACE_MODEL_TAG}/${MACE_MODEL_TAG}.h" instead // #include "mace/codegen/models/${MACE_MODEL_TAG}/${MACE_MODEL_TAG}.h" instead
namespace mace { namespace mace {
namespace MACE_MODEL_TAG { namespace MACE_MODEL_TAG {
...@@ -113,6 +116,7 @@ DeviceType ParseDeviceType(const std::string &device_str) { ...@@ -113,6 +116,7 @@ DeviceType ParseDeviceType(const std::string &device_str) {
} }
} }
#ifdef MACE_ENABLE_OPENCL
void WriteOpenCLPlatformInfo(const std::string &output_dir) { void WriteOpenCLPlatformInfo(const std::string &output_dir) {
std::string platform_info = OpenCLRuntime::Global()->platform_info(); std::string platform_info = OpenCLRuntime::Global()->platform_info();
const std::string cl_platform_info_file_name = output_dir const std::string cl_platform_info_file_name = output_dir
...@@ -126,6 +130,7 @@ void WriteOpenCLPlatformInfo(const std::string &output_dir) { ...@@ -126,6 +130,7 @@ void WriteOpenCLPlatformInfo(const std::string &output_dir) {
LOG(WARNING) << "Write opencl platform info failed."; LOG(WARNING) << "Write opencl platform info failed.";
} }
} }
#endif // MACE_ENABLE_OPENCL
struct mallinfo LogMallinfoChange(struct mallinfo prev) { struct mallinfo LogMallinfoChange(struct mallinfo prev) {
struct mallinfo curr = mallinfo(); struct mallinfo curr = mallinfo();
...@@ -225,11 +230,13 @@ bool RunModel(const std::vector<std::string> &input_names, ...@@ -225,11 +230,13 @@ bool RunModel(const std::vector<std::string> &input_names,
mace::SetOpenMPThreadPolicy( mace::SetOpenMPThreadPolicy(
FLAGS_omp_num_threads, FLAGS_omp_num_threads,
static_cast<CPUAffinityPolicy >(FLAGS_cpu_affinity_policy)); static_cast<CPUAffinityPolicy >(FLAGS_cpu_affinity_policy));
#ifdef MACE_ENABLE_OPENCL
if (device_type == DeviceType::OPENCL) { if (device_type == DeviceType::OPENCL) {
mace::SetGPUHints( mace::SetGPUHints(
static_cast<GPUPerfHint>(FLAGS_gpu_perf_hint), static_cast<GPUPerfHint>(FLAGS_gpu_perf_hint),
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint)); static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
} }
#endif // MACE_ENABLE_OPENCL
const char *kernel_path = getenv("MACE_CL_PROGRAM_PATH"); const char *kernel_path = getenv("MACE_CL_PROGRAM_PATH");
const std::string kernel_file_path = const std::string kernel_file_path =
...@@ -318,9 +325,11 @@ bool RunModel(const std::vector<std::string> &input_names, ...@@ -318,9 +325,11 @@ bool RunModel(const std::vector<std::string> &input_names,
printf("time %11.3f %11.3f %11.3f %11.3f %11.3f\n", create_net_millis, printf("time %11.3f %11.3f %11.3f %11.3f %11.3f\n", create_net_millis,
mace_engine_ctor_millis, init_millis, warmup_millis, model_run_millis); mace_engine_ctor_millis, init_millis, warmup_millis, model_run_millis);
#ifdef MACE_ENABLE_OPENCL
if (device_type == DeviceType::OPENCL) { if (device_type == DeviceType::OPENCL) {
WriteOpenCLPlatformInfo(kernel_file_path); WriteOpenCLPlatformInfo(kernel_file_path);
} }
#endif // MACE_ENABLE_OPENCL
for (size_t i = 0; i < output_count; ++i) { for (size_t i = 0; i < output_count; ++i) {
std::string output_name = std::string output_name =
......
...@@ -34,26 +34,30 @@ from ConfigParser import ConfigParser ...@@ -34,26 +34,30 @@ from ConfigParser import ConfigParser
def get_target_socs(configs): def get_target_socs(configs):
available_socs = sh_commands.adb_get_all_socs() if "host" in configs["target_abis"]:
target_socs = available_socs return [""]
if hasattr(configs, "target_socs"): else:
target_socs = set(configs["target_socs"]) available_socs = sh_commands.adb_get_all_socs()
target_socs = target_socs & available_socs target_socs = available_socs
if hasattr(configs, "target_socs"):
if FLAGS.target_socs != "all": target_socs = set(configs["target_socs"])
socs = set(FLAGS.target_socs.split(',')) target_socs = target_socs & available_socs
target_socs = target_socs & socs
missing_socs = socs.difference(target_socs) if FLAGS.target_socs != "all":
if len(missing_socs) > 0: socs = set(FLAGS.target_socs.split(','))
print( target_socs = target_socs & socs
"Error: devices with SoCs are not connected %s" % missing_socs) missing_socs = socs.difference(target_socs)
if len(missing_socs) > 0:
print(
"Error: devices with SoCs are not connected %s" %
missing_socs)
exit(1)
if not target_socs:
print("Error: no device to run")
exit(1) exit(1)
if not target_socs: return target_socs
print("Error: no device to run")
exit(1)
return target_socs
def get_data_and_device_type(runtime): def get_data_and_device_type(runtime):
...@@ -339,8 +343,8 @@ def parse_args(): ...@@ -339,8 +343,8 @@ def parse_args():
def process_models(project_name, configs, embed_model_data, vlog_level, def process_models(project_name, configs, embed_model_data, vlog_level,
target_soc, target_abi, serialno, phone_data_dir, target_abi, phone_data_dir, option_args,
option_args): target_soc="", serialno="", device_name=""):
hexagon_mode = get_hexagon_mode(configs) hexagon_mode = get_hexagon_mode(configs)
model_output_dirs = [] model_output_dirs = []
for model_name in configs["models"]: for model_name in configs["models"]:
...@@ -358,11 +362,16 @@ def process_models(project_name, configs, embed_model_data, vlog_level, ...@@ -358,11 +362,16 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
# Create model build directory # Create model build directory
model_path_digest = md5sum(model_config["model_file_path"]) model_path_digest = md5sum(model_config["model_file_path"])
device_name = sh_commands.adb_get_device_name_by_serialno(serialno)
model_output_dir = "%s/%s/%s/%s/%s/%s_%s/%s" % ( if target_abi == "host":
FLAGS.output_dir, project_name, "build", model_output_dir = "%s/%s/%s/%s/%s/%s" % (
model_name, model_path_digest, device_name.replace(' ', ''), FLAGS.output_dir, project_name, "build",
target_soc, target_abi) model_name, model_path_digest, target_abi)
else:
model_output_dir = "%s/%s/%s/%s/%s/%s_%s/%s" % (
FLAGS.output_dir, project_name, "build",
model_name, model_path_digest, device_name.replace(' ', ''),
target_soc, target_abi)
model_output_dirs.append(model_output_dir) model_output_dirs.append(model_output_dir)
if FLAGS.mode == "build" or FLAGS.mode == "all": if FLAGS.mode == "build" or FLAGS.mode == "all":
...@@ -563,15 +572,19 @@ def main(unused_args): ...@@ -563,15 +572,19 @@ def main(unused_args):
print("Run on device: %s, %s, %s" % ( print("Run on device: %s, %s, %s" % (
serialno, props["ro.board.platform"], serialno, props["ro.board.platform"],
props["ro.product.model"])) props["ro.product.model"]))
device_name = \
sh_commands.adb_get_device_name_by_serialno(
serialno)
process_models(project_name, configs, embed_model_data, process_models(project_name, configs, embed_model_data,
vlog_level, target_soc, target_abi, vlog_level, target_abi, phone_data_dir,
serialno, phone_data_dir, option_args) option_args, target_soc, serialno,
device_name)
else: else:
print("====================================================") print("====================================================")
print("Run on host") print("Run on host")
process_models(project_name, configs, embed_model_data, process_models(project_name, configs, embed_model_data,
vlog_level, target_soc, target_abi, '', vlog_level, target_abi, phone_data_dir,
phone_data_dir, option_args) option_args)
if FLAGS.mode == "build" or FLAGS.mode == "all": if FLAGS.mode == "build" or FLAGS.mode == "all":
sh_commands.packaging_lib(FLAGS.output_dir, project_name) sh_commands.packaging_lib(FLAGS.output_dir, project_name)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册