提交 3f418e2d 编写于 作者: Y yejianwu

fix host run broken

上级 7c1711d8
......@@ -52,10 +52,10 @@ cc_library(
]),
deps = [
"//mace/utils",
"//mace/codegen:generated_version",
] + if_android([
":opencl_headers",
"//mace/codegen:generated_opencl",
"//mace/codegen:generated_version",
"@half//:half",
]) + if_production_mode([
"//mace/codegen:generated_tuning_params",
......
......@@ -79,7 +79,6 @@ extern void Register_AddN(OperatorRegistry *op_registry);
extern void Register_BatchNorm(OperatorRegistry *op_registry);
extern void Register_BatchToSpaceND(OperatorRegistry *op_registry);
extern void Register_BiasAdd(OperatorRegistry *op_registry);
extern void Register_BufferToImage(OperatorRegistry *op_registry);
extern void Register_ChannelShuffle(OperatorRegistry *op_registry);
extern void Register_Concat(OperatorRegistry *op_registry);
extern void Register_Conv2D(OperatorRegistry *op_registry);
......@@ -92,7 +91,6 @@ extern void Register_FoldedBatchNorm(OperatorRegistry *op_registry);
extern void Register_FullyConnected(OperatorRegistry *op_registry);
extern void Register_FusedConv2D(OperatorRegistry *op_registry);
extern void Register_GlobalAvgPooling(OperatorRegistry *op_registry);
extern void Register_ImageToBuffer(OperatorRegistry *op_registry);
extern void Register_LocalResponseNorm(OperatorRegistry *op_registry);
extern void Register_MatMul(OperatorRegistry *op_registry);
extern void Register_Pad(OperatorRegistry *op_registry);
......@@ -111,6 +109,11 @@ extern void Register_SpaceToDepth(OperatorRegistry *op_registry);
extern void Register_Transpose(OperatorRegistry *op_registry);
extern void Register_WinogradInverseTransform(OperatorRegistry *op_registry);
extern void Register_WinogradTransform(OperatorRegistry *op_registry);
#ifdef MACE_ENABLE_OPENCL
extern void Register_BufferToImage(OperatorRegistry *op_registry);
extern void Register_ImageToBuffer(OperatorRegistry *op_registry);
#endif // MACE_ENABLE_OPENCL
} // namespace ops
OperatorRegistry::OperatorRegistry() {
......@@ -120,7 +123,6 @@ OperatorRegistry::OperatorRegistry() {
ops::Register_BatchNorm(this);
ops::Register_BatchToSpaceND(this);
ops::Register_BiasAdd(this);
ops::Register_BufferToImage(this);
ops::Register_ChannelShuffle(this);
ops::Register_Concat(this);
ops::Register_Conv2D(this);
......@@ -133,7 +135,6 @@ OperatorRegistry::OperatorRegistry() {
ops::Register_FullyConnected(this);
ops::Register_FusedConv2D(this);
ops::Register_GlobalAvgPooling(this);
ops::Register_ImageToBuffer(this);
ops::Register_LocalResponseNorm(this);
ops::Register_MatMul(this);
ops::Register_Pad(this);
......@@ -152,6 +153,11 @@ OperatorRegistry::OperatorRegistry() {
ops::Register_Transpose(this);
ops::Register_WinogradInverseTransform(this);
ops::Register_WinogradTransform(this);
#ifdef MACE_ENABLE_OPENCL
ops::Register_BufferToImage(this);
ops::Register_ImageToBuffer(this);
#endif // MACE_ENABLE_OPENCL
}
} // namespace mace
......@@ -16,6 +16,7 @@
#include <omp.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <algorithm>
#include <utility>
......@@ -86,7 +87,7 @@ void SetThreadAffinity(cpu_set_t mask) {
#if defined(__ANDROID__)
pid_t pid = gettid();
#else
pid_t pid = pthread_self();
pid_t pid = syscall(SYS_gettid);
#endif
int err = sched_setaffinity(pid, sizeof(mask), &mask);
MACE_CHECK(err == 0, "set affinity error: ", errno);
......
......@@ -14,20 +14,30 @@ cc_library(
srcs = glob(
[
"*.cc",
"opencl/*.cc",
"arm/*.cc",
],
exclude = [
"*_test.cc",
"arm/*_test.cc",
],
) + if_android(glob([
"opencl/*.cc",
],
exclude = [
"opencl/*_test.cc",
])),
hdrs = glob(
[
"*.h",
"arm/*.h",
],
),
hdrs = glob([
"*.h",
"opencl/*.h",
"arm/*.h",
]),
exclude = [
"buffer_to_image.h",
],
) + if_android(glob([
"opencl/*.h",
"buffer_to_image.h",
])),
copts = if_openmp_enabled(["-fopenmp"]) +
if_neon_enabled(["-DMACE_ENABLE_NEON"]) +
if_android_armv7(["-mfpu=neon"]) +
......
......@@ -21,10 +21,13 @@
#include <vector>
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/core/types.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace {
namespace kernels {
......@@ -166,6 +169,7 @@ class ActivationFunctor<DeviceType::NEON, float> {
float relux_max_limit_;
};
#ifdef MACE_ENABLE_OPENCL
template <typename T>
class ActivationFunctor<DeviceType::OPENCL, T> {
public:
......@@ -186,6 +190,7 @@ class ActivationFunctor<DeviceType::OPENCL, T> {
std::string tuning_key_prefix_;
std::vector<index_t> input_shape_;
};
#endif // MACE_ENABLE_OPENCL
} // namespace kernels
} // namespace mace
......
......@@ -23,9 +23,12 @@
#include <vector>
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace {
namespace kernels {
......@@ -88,6 +91,7 @@ struct AddNFunctor {
}
};
#ifdef MACE_ENABLE_OPENCL
template <typename T>
struct AddNFunctor<DeviceType::OPENCL, T> {
void operator()(const std::vector<const Tensor *> &input_tensors,
......@@ -99,6 +103,7 @@ struct AddNFunctor<DeviceType::OPENCL, T> {
std::unique_ptr<BufferBase> kernel_error_;
std::vector<index_t> input_shape_;
};
#endif // MACE_ENABLE_OPENCL
} // namespace kernels
} // namespace mace
......
......@@ -22,11 +22,14 @@
#include <vector>
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/kernels/activation.h"
#include "mace/public/mace.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace {
namespace kernels {
......@@ -159,7 +162,7 @@ struct BatchNormFunctor<DeviceType::NEON, float> : BatchNormFunctorBase {
StatsFuture *future);
};
#ifdef MACE_ENABLE_OPENCL
template <typename T>
struct BatchNormFunctor<DeviceType::OPENCL, T> : BatchNormFunctorBase {
BatchNormFunctor(const bool folded_constant,
......@@ -179,6 +182,7 @@ struct BatchNormFunctor<DeviceType::OPENCL, T> : BatchNormFunctorBase {
std::unique_ptr<BufferBase> kernel_error_;
std::vector<index_t> input_shape_;
};
#endif // MACE_ENABLE_OPENCL
} // namespace kernels
} // namespace mace
......
......@@ -19,10 +19,13 @@
#include <vector>
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/public/mace.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace {
namespace kernels {
......@@ -68,6 +71,7 @@ void BiasAddFunctor<DeviceType::NEON, float>::operator()(
StatsFuture *future);
*/
#ifdef MACE_ENABLE_OPENCL
template <typename T>
struct BiasAddFunctor<DeviceType::OPENCL, T> {
void operator()(const Tensor *input,
......@@ -79,6 +83,7 @@ struct BiasAddFunctor<DeviceType::OPENCL, T> {
std::unique_ptr<BufferBase> kernel_error_;
std::vector<index_t> input_shape_;
};
#endif // MACE_ENABLE_OPENCL
} // namespace kernels
} // namespace mace
......
......@@ -60,6 +60,7 @@ struct ChannelShuffleFunctor {
const int groups_;
};
#ifdef MACE_ENABLE_OPENCL
template <typename T>
struct ChannelShuffleFunctor<DeviceType::OPENCL, T> {
explicit ChannelShuffleFunctor(const int groups) : groups_(groups) {}
......@@ -72,6 +73,7 @@ struct ChannelShuffleFunctor<DeviceType::OPENCL, T> {
const int groups_;
std::vector<index_t> input_shape_;
};
#endif // MACE_ENABLE_OPENCL
} // namespace kernels
} // namespace mace
......
......@@ -19,11 +19,14 @@
#include <vector>
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/core/types.h"
#include "mace/public/mace.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace {
namespace kernels {
......@@ -88,6 +91,7 @@ struct ConcatFunctor : ConcatFunctorBase {
}
};
#ifdef MACE_ENABLE_OPENCL
template <typename T>
struct ConcatFunctor<DeviceType::OPENCL, T> : ConcatFunctorBase {
explicit ConcatFunctor(const int32_t axis) : ConcatFunctorBase(axis) {}
......@@ -100,6 +104,7 @@ struct ConcatFunctor<DeviceType::OPENCL, T> : ConcatFunctorBase {
std::unique_ptr<BufferBase> kernel_error_;
std::vector<index_t> input_shape_;
};
#endif // MACE_ENABLE_OPENCL
} // namespace kernels
} // namespace mace
......
......@@ -23,12 +23,15 @@
#include <vector>
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/kernels/activation.h"
#include "mace/kernels/conv_pool_2d_util.h"
#include "mace/utils/utils.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace {
namespace kernels {
......@@ -457,6 +460,7 @@ struct Conv2dFunctor<DeviceType::NEON, float> : Conv2dFunctorBase {
ScratchBuffer *scratch_;
};
#ifdef MACE_ENABLE_OPENCL
template <typename T>
struct Conv2dFunctor<DeviceType::OPENCL, T> : Conv2dFunctorBase {
Conv2dFunctor(const int *strides,
......@@ -485,6 +489,7 @@ struct Conv2dFunctor<DeviceType::OPENCL, T> : Conv2dFunctorBase {
std::unique_ptr<BufferBase> kernel_error_;
std::vector<index_t> input_shape_;
};
#endif // MACE_ENABLE_OPENCL
} // namespace kernels
} // namespace mace
......
......@@ -20,9 +20,12 @@
#include <vector>
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace {
namespace kernels {
......@@ -116,6 +119,7 @@ struct CWiseFunctor : CWiseFunctorBase {
}
};
#ifdef MACE_ENABLE_OPENCL
template <typename T>
struct CWiseFunctor<DeviceType::OPENCL, T> : CWiseFunctorBase {
CWiseFunctor(const CWiseType type, const float coeff)
......@@ -130,6 +134,7 @@ struct CWiseFunctor<DeviceType::OPENCL, T> : CWiseFunctorBase {
std::unique_ptr<BufferBase> kernel_error_;
std::vector<index_t> input_shape_;
};
#endif // MACE_ENABLE_OPENCL
} // namespace kernels
} // namespace mace
......
......@@ -18,10 +18,13 @@
#include <vector>
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/public/mace.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace {
namespace kernels {
......@@ -112,6 +115,7 @@ struct DepthToSpaceOpFunctor {
bool d2s_;
};
#ifdef MACE_ENABLE_OPENCL
template <typename T>
struct DepthToSpaceOpFunctor<DeviceType::OPENCL, T> {
DepthToSpaceOpFunctor(const int block_size, bool d2s)
......@@ -125,6 +129,7 @@ struct DepthToSpaceOpFunctor<DeviceType::OPENCL, T> {
std::unique_ptr<BufferBase> kernel_error_;
std::vector<index_t> input_shape_;
};
#endif // MACE_ENABLE_OPENCL
} // namespace kernels
} // namespace mace
......
......@@ -23,11 +23,14 @@
#include <vector>
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/kernels/conv_pool_2d_util.h"
#include "mace/kernels/activation.h"
#include "mace/public/mace.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace {
namespace kernels {
......@@ -441,6 +444,7 @@ struct DepthwiseConv2dFunctor<DeviceType::NEON, float>
StatsFuture *future);
};
#ifdef MACE_ENABLE_OPENCL
template <typename T>
struct DepthwiseConv2dFunctor<DeviceType::OPENCL, T>
: DepthwiseConv2dFunctorBase {
......@@ -468,6 +472,7 @@ struct DepthwiseConv2dFunctor<DeviceType::OPENCL, T>
std::unique_ptr<BufferBase> kernel_error_;
std::vector<index_t> input_shape_;
};
#endif // MACE_ENABLE_OPENCL
} // namespace kernels
} // namespace mace
......
......@@ -20,9 +20,12 @@
#include <vector>
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace {
namespace kernels {
......@@ -105,6 +108,7 @@ struct EltwiseFunctor : EltwiseFunctorBase {
}
};
#ifdef MACE_ENABLE_OPENCL
template <typename T>
struct EltwiseFunctor<DeviceType::OPENCL, T> : EltwiseFunctorBase {
EltwiseFunctor(const EltwiseType type, const std::vector<float> &coeff)
......@@ -120,6 +124,7 @@ struct EltwiseFunctor<DeviceType::OPENCL, T> : EltwiseFunctorBase {
std::unique_ptr<BufferBase> kernel_error_;
std::vector<index_t> input_shape_;
};
#endif // MACE_ENABLE_OPENCL
} // namespace kernels
} // namespace mace
......
......@@ -19,16 +19,19 @@
#include <vector>
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/kernels/activation.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/kernels/opencl/helper.h"
#endif // MACE_ENABLE_OPENCL
namespace mace {
namespace kernels {
struct FullyConnectedBase {
FullyConnectedBase(const BufferType weight_type,
FullyConnectedBase(const int /*BufferType*/ weight_type,
const ActivationType activation,
const float relux_max_limit)
: weight_type_(weight_type),
......@@ -42,7 +45,7 @@ struct FullyConnectedBase {
template <DeviceType D, typename T>
struct FullyConnectedFunctor : FullyConnectedBase {
FullyConnectedFunctor(const BufferType weight_type,
FullyConnectedFunctor(const int /*BufferType*/ weight_type,
const ActivationType activation,
const float relux_max_limit)
: FullyConnectedBase(weight_type, activation, relux_max_limit) {}
......@@ -89,7 +92,7 @@ struct FullyConnectedFunctor : FullyConnectedBase {
template <>
struct FullyConnectedFunctor<DeviceType::NEON, float> : FullyConnectedBase {
FullyConnectedFunctor(const BufferType weight_type,
FullyConnectedFunctor(const int /*BufferType*/ weight_type,
const ActivationType activation,
const float relux_max_limit)
: FullyConnectedBase(weight_type, activation, relux_max_limit) {}
......@@ -101,9 +104,10 @@ struct FullyConnectedFunctor<DeviceType::NEON, float> : FullyConnectedBase {
StatsFuture *future);
};
#ifdef MACE_ENABLE_OPENCL
template <typename T>
struct FullyConnectedFunctor<DeviceType::OPENCL, T> : FullyConnectedBase {
FullyConnectedFunctor(const BufferType weight_type,
FullyConnectedFunctor(const int /*BufferType*/ weight_type,
const ActivationType activation,
const float relux_max_limit)
: FullyConnectedBase(weight_type, activation, relux_max_limit) {}
......@@ -120,6 +124,7 @@ struct FullyConnectedFunctor<DeviceType::OPENCL, T> : FullyConnectedBase {
std::vector<index_t> input_shape_;
std::unique_ptr<BufferBase> kernel_error_;
};
#endif // MACE_ENABLE_OPENCL
} // namespace kernels
} // namespace mace
......
......@@ -14,6 +14,7 @@
#include <math.h>
#include <algorithm>
#include <cstring>
#if defined(MACE_ENABLE_NEON)
#include <arm_neon.h>
......
......@@ -10,10 +10,13 @@
#include <vector>
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/public/mace.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace {
namespace kernels {
......
......@@ -25,11 +25,14 @@
#include <vector>
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/kernels/gemm.h"
#include "mace/utils/utils.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace {
namespace kernels {
......@@ -70,6 +73,7 @@ struct MatMulFunctor {
}
};
#ifdef MACE_ENABLE_OPENCL
template<typename T>
struct MatMulFunctor<DeviceType::OPENCL, T> {
void operator()(const Tensor *A,
......@@ -81,6 +85,7 @@ struct MatMulFunctor<DeviceType::OPENCL, T> {
uint32_t kwg_size_;
std::unique_ptr<BufferBase> kernel_error_;
};
#endif // MACE_ENABLE_OPENCL
} // namespace kernels
} // namespace mace
......
......@@ -20,9 +20,12 @@
#include <vector>
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace {
namespace kernels {
......@@ -79,6 +82,7 @@ struct PadFunctor : public PadFunctorBase {
}
};
#ifdef MACE_ENABLE_OPENCL
template <typename T>
struct PadFunctor<DeviceType::OPENCL, T> : PadFunctorBase {
PadFunctor(const std::vector<int> &paddings,
......@@ -94,6 +98,7 @@ struct PadFunctor<DeviceType::OPENCL, T> : PadFunctorBase {
std::unique_ptr<BufferBase> kernel_error_;
std::vector<index_t> input_shape_;
};
#endif // MACE_ENABLE_OPENCL
} // namespace kernels
} // namespace mace
......
......@@ -21,10 +21,13 @@
#include <vector>
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/kernels/conv_pool_2d_util.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace {
enum PoolingType {
......@@ -192,6 +195,7 @@ struct PoolingFunctor<DeviceType::NEON, float> : PoolingFunctorBase {
StatsFuture *future);
};
#ifdef MACE_ENABLE_OPENCL
template <typename T>
struct PoolingFunctor<DeviceType::OPENCL, T> : PoolingFunctorBase {
PoolingFunctor(const PoolingType pooling_type,
......@@ -212,6 +216,7 @@ struct PoolingFunctor<DeviceType::OPENCL, T> : PoolingFunctorBase {
std::unique_ptr<BufferBase> kernel_error_;
std::vector<index_t> input_shape_;
};
#endif // MACE_ENABLE_OPENCL
} // namespace kernels
} // namespace mace
......
......@@ -18,9 +18,12 @@
#include <vector>
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace {
namespace kernels {
......
......@@ -18,9 +18,12 @@
#include <vector>
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace {
namespace kernels {
......
......@@ -20,9 +20,12 @@
#include <vector>
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace {
namespace kernels {
......@@ -177,6 +180,7 @@ struct ResizeBilinearFunctor : ResizeBilinearFunctorBase {
}
};
#ifdef MACE_ENABLE_OPENCL
template <typename T>
struct ResizeBilinearFunctor<DeviceType::OPENCL, T>
: ResizeBilinearFunctorBase {
......@@ -190,6 +194,7 @@ struct ResizeBilinearFunctor<DeviceType::OPENCL, T>
std::unique_ptr<BufferBase> kernel_error_;
std::vector<index_t> input_shape_;
};
#endif // MACE_ENABLE_OPENCL
} // namespace kernels
} // namespace mace
......
......@@ -20,11 +20,14 @@
#include <vector>
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/core/types.h"
#include "mace/public/mace.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace {
namespace kernels {
......@@ -81,6 +84,7 @@ struct SliceFunctor : SliceFunctorBase {
}
};
#ifdef MACE_ENABLE_OPENCL
template<typename T>
struct SliceFunctor<DeviceType::OPENCL, T> : SliceFunctorBase {
explicit SliceFunctor(const int32_t axis) : SliceFunctorBase(axis) {}
......@@ -92,6 +96,7 @@ struct SliceFunctor<DeviceType::OPENCL, T> : SliceFunctorBase {
uint32_t kwg_size_;
std::unique_ptr<BufferBase> kernel_error_;
};
#endif // MACE_ENABLE_OPENCL
} // namespace kernels
} // namespace mace
......
......@@ -21,11 +21,14 @@
#include <vector>
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/public/mace.h"
#include "mace/utils/utils.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace {
namespace kernels {
......@@ -72,6 +75,7 @@ struct SoftmaxFunctor<DeviceType::NEON, float> {
void operator()(const Tensor *logits, Tensor *output, StatsFuture *future);
};
#ifdef MACE_ENABLE_OPENCL
template <typename T>
struct SoftmaxFunctor<DeviceType::OPENCL, T> {
void operator()(const Tensor *logits, Tensor *output, StatsFuture *future);
......@@ -81,6 +85,7 @@ struct SoftmaxFunctor<DeviceType::OPENCL, T> {
std::unique_ptr<BufferBase> kernel_error_;
std::vector<index_t> input_shape_;
};
#endif // MACE_ENABLE_OPENCL
} // namespace kernels
} // namespace mace
......
......@@ -19,10 +19,13 @@
#include <vector>
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/public/mace.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace {
namespace kernels {
......@@ -54,6 +57,7 @@ struct SpaceToBatchFunctor : SpaceToBatchFunctorBase {
}
};
#ifdef MACE_ENABLE_OPENCL
template <typename T>
struct SpaceToBatchFunctor<DeviceType::OPENCL, T> : SpaceToBatchFunctorBase {
SpaceToBatchFunctor(const std::vector<int> &paddings,
......@@ -71,6 +75,7 @@ struct SpaceToBatchFunctor<DeviceType::OPENCL, T> : SpaceToBatchFunctorBase {
std::unique_ptr<BufferBase> kernel_error_;
std::vector<index_t> space_shape_;
};
#endif // MACE_ENABLE_OPENCL
} // namespace kernels
} // namespace mace
......
......@@ -19,11 +19,14 @@
#include <vector>
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/kernels/activation.h"
#include "mace/kernels/conv_pool_2d_util.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace {
namespace kernels {
......@@ -52,6 +55,7 @@ struct WinogradTransformFunctor : WinogradTransformFunctorBase {
}
};
#ifdef MACE_ENABLE_OPENCL
template <typename T>
struct WinogradTransformFunctor<DeviceType::OPENCL, T>
: WinogradTransformFunctorBase {
......@@ -66,6 +70,7 @@ struct WinogradTransformFunctor<DeviceType::OPENCL, T>
std::unique_ptr<BufferBase> kernel_error_;
std::vector<index_t> input_shape_;
};
#endif // MACE_ENABLE_OPENCL
struct WinogradInverseTransformFunctorBase {
WinogradInverseTransformFunctorBase(const int batch,
......@@ -104,6 +109,7 @@ struct WinogradInverseTransformFunctor : WinogradInverseTransformFunctorBase {
}
};
#ifdef MACE_ENABLE_OPENCL
template <typename T>
struct WinogradInverseTransformFunctor<DeviceType::OPENCL, T>
: WinogradInverseTransformFunctorBase {
......@@ -125,6 +131,7 @@ struct WinogradInverseTransformFunctor<DeviceType::OPENCL, T>
std::unique_ptr<BufferBase> kernel_error_;
std::vector<index_t> input_shape_;
};
#endif // MACE_ENABLE_OPENCL
} // namespace kernels
} // namespace mace
......
......@@ -28,8 +28,14 @@ cc_library(
exclude = [
"*_test.cc",
"*_benchmark.cc",
"buffer_to_image.cc",
"image_to_buffer.cc",
],
),
) + if_android(
[
"buffer_to_image.cc",
"image_to_buffer.cc",
]),
hdrs = glob(
["*.h"],
exclude = ["ops_test_util.h"],
......
......@@ -24,6 +24,7 @@ void Register_Activation(OperatorRegistry *op_registry) {
.Build(),
ActivationOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Activation")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
......@@ -35,6 +36,8 @@ void Register_Activation(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T")
.Build(),
ActivationOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Activation")
.Device(DeviceType::NEON)
.TypeConstraint<float>("T")
......
......@@ -24,6 +24,7 @@ void Register_AddN(OperatorRegistry *op_registry) {
.Build(),
AddNOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("AddN")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
......@@ -35,6 +36,7 @@ void Register_AddN(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T")
.Build(),
AddNOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("AddN")
.Device(DeviceType::NEON)
......
......@@ -24,6 +24,7 @@ void Register_BatchNorm(OperatorRegistry *op_registry) {
.Build(),
BatchNormOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchNorm")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
......@@ -35,6 +36,8 @@ void Register_BatchNorm(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T")
.Build(),
BatchNormOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchNorm")
.Device(DeviceType::NEON)
.TypeConstraint<float>("T")
......
......@@ -18,6 +18,7 @@ namespace mace {
namespace ops {
void Register_BatchToSpaceND(OperatorRegistry *op_registry) {
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchToSpaceND")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
......@@ -28,6 +29,7 @@ void Register_BatchToSpaceND(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T")
.Build(),
BatchToSpaceNDOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
}
} // namespace ops
......
......@@ -24,6 +24,7 @@ void Register_BiasAdd(OperatorRegistry *op_registry) {
.Build(),
BiasAddOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("BiasAdd")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
......@@ -35,6 +36,7 @@ void Register_BiasAdd(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T")
.Build(),
BiasAddOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
}
} // namespace ops
......
......@@ -23,16 +23,20 @@ void Register_ChannelShuffle(OperatorRegistry *op_registry) {
.TypeConstraint<float>("T")
.Build(),
ChannelShuffleOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("ChannelShuffle")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
.Build(),
ChannelShuffleOp<DeviceType::OPENCL, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("ChannelShuffle")
.Device(DeviceType::OPENCL)
.TypeConstraint<half>("T")
.Build(),
ChannelShuffleOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
}
} // namespace ops
......
......@@ -23,16 +23,21 @@ void Register_Concat(OperatorRegistry *op_registry) {
.TypeConstraint<float>("T")
.Build(),
ConcatOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Concat")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
.Build(),
ConcatOp<DeviceType::OPENCL, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Concat")
.Device(DeviceType::OPENCL)
.TypeConstraint<half>("T")
.Build(),
ConcatOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Concat")
.Device(DeviceType::NEON)
.TypeConstraint<float>("T")
......
......@@ -24,6 +24,7 @@ void Register_Conv2D(OperatorRegistry *op_registry) {
.Build(),
Conv2dOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Conv2D")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
......@@ -35,6 +36,7 @@ void Register_Conv2D(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T")
.Build(),
Conv2dOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Conv2D")
.Device(DeviceType::NEON)
......
......@@ -24,6 +24,7 @@ void Register_CWise(OperatorRegistry *op_registry) {
.Build(),
CWiseOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("CWise")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
......@@ -35,6 +36,7 @@ void Register_CWise(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T")
.Build(),
CWiseOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
}
} // namespace ops
......
......@@ -24,6 +24,7 @@ void Register_DepthToSpace(OperatorRegistry *op_registry) {
.Build(),
DepthToSpaceOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("DepthToSpace")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
......@@ -35,6 +36,7 @@ void Register_DepthToSpace(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T")
.Build(),
DepthToSpaceOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
}
} // namespace ops
......
......@@ -24,6 +24,7 @@ void Register_DepthwiseConv2d(OperatorRegistry *op_registry) {
.Build(),
DepthwiseConv2dOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("DepthwiseConv2d")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
......@@ -35,6 +36,7 @@ void Register_DepthwiseConv2d(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T")
.Build(),
DepthwiseConv2dOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("DepthwiseConv2d")
.Device(DeviceType::NEON)
......
......@@ -24,6 +24,7 @@ void Register_Eltwise(OperatorRegistry *op_registry) {
.Build(),
EltwiseOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Eltwise")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
......@@ -35,6 +36,8 @@ void Register_Eltwise(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T")
.Build(),
EltwiseOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Eltwise")
.Device(DeviceType::NEON)
.TypeConstraint<float>("T")
......
......@@ -24,6 +24,7 @@ void Register_FoldedBatchNorm(OperatorRegistry *op_registry) {
.Build(),
FoldedBatchNormOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("FoldedBatchNorm")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
......@@ -35,6 +36,8 @@ void Register_FoldedBatchNorm(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T")
.Build(),
FoldedBatchNormOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("FoldedBatchNorm")
.Device(DeviceType::NEON)
.TypeConstraint<float>("T")
......
......@@ -24,6 +24,7 @@ void Register_FullyConnected(OperatorRegistry *op_registry) {
.Build(),
FullyConnectedOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("FC")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
......@@ -35,6 +36,7 @@ void Register_FullyConnected(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T")
.Build(),
FullyConnectedOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("FC")
.Device(DeviceType::NEON)
......
......@@ -28,10 +28,9 @@ class FullyConnectedOp : public Operator<D, T> {
public:
FullyConnectedOp(const OperatorDef &operator_def, Workspace *ws)
: Operator<D, T>(operator_def, ws),
functor_(static_cast<kernels::BufferType>(
OperatorBase::GetSingleArgument<int>(
"weight_type", static_cast<int>(
kernels::WEIGHT_WIDTH))),
functor_(OperatorBase::GetSingleArgument<int>(
"weight_type",
7 /*static_cast<int>(kernels::WEIGHT_WIDTH)*/),
kernels::StringToActivationType(
OperatorBase::GetSingleArgument<std::string>("activation",
"NOOP")),
......
......@@ -24,6 +24,7 @@ void Register_FusedConv2D(OperatorRegistry *op_registry) {
.Build(),
FusedConv2dOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("FusedConv2D")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
......@@ -35,6 +36,8 @@ void Register_FusedConv2D(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T")
.Build(),
FusedConv2dOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("FusedConv2D")
.Device(DeviceType::NEON)
.TypeConstraint<float>("T")
......
......@@ -24,6 +24,7 @@ void Register_MatMul(OperatorRegistry *op_registry) {
.Build(),
MatMulOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("MatMul")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
......@@ -35,6 +36,7 @@ void Register_MatMul(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T")
.Build(),
MatMulOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
}
} // namespace ops
......
......@@ -24,6 +24,7 @@ void Register_Pad(OperatorRegistry *op_registry) {
.Build(),
PadOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pad")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
......@@ -34,6 +35,7 @@ void Register_Pad(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T")
.Build(),
PadOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
}
} // namespace ops
......
......@@ -23,22 +23,21 @@ void Register_Pooling(OperatorRegistry *op_registry) {
.TypeConstraint<float>("T")
.Build(),
PoolingOp<DeviceType::CPU, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling")
.Device(DeviceType::CPU)
.TypeConstraint<half>("T")
.Build(),
PoolingOp<DeviceType::CPU, half>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
.Build(),
PoolingOp<DeviceType::OPENCL, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling")
.Device(DeviceType::OPENCL)
.TypeConstraint<half>("T")
.Build(),
PoolingOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling")
.Device(DeviceType::NEON)
.TypeConstraint<float>("T")
......
......@@ -24,6 +24,7 @@ void Register_ResizeBilinear(OperatorRegistry *op_registry) {
.Build(),
ResizeBilinearOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("ResizeBilinear")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
......@@ -35,6 +36,7 @@ void Register_ResizeBilinear(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T")
.Build(),
ResizeBilinearOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
}
} // namespace ops
......
......@@ -24,16 +24,20 @@ void Register_Slice(OperatorRegistry *op_registry) {
.Build(),
SliceOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Slice")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
.Build(),
SliceOp<DeviceType::OPENCL, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Slice")
.Device(DeviceType::OPENCL)
.TypeConstraint<half>("T")
.Build(),
SliceOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Slice")
.Device(DeviceType::NEON)
.TypeConstraint<float>("T")
......
......@@ -24,6 +24,7 @@ void Register_Softmax(OperatorRegistry *op_registry) {
.Build(),
SoftmaxOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Softmax")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
......@@ -35,6 +36,8 @@ void Register_Softmax(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T")
.Build(),
SoftmaxOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Softmax")
.Device(DeviceType::NEON)
.TypeConstraint<float>("T")
......
......@@ -18,16 +18,19 @@ namespace mace {
namespace ops {
void Register_SpaceToBatchND(OperatorRegistry *op_registry) {
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("SpaceToBatchND")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
.Build(),
SpaceToBatchNDOp<DeviceType::OPENCL, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("SpaceToBatchND")
.Device(DeviceType::OPENCL)
.TypeConstraint<half>("T")
.Build(),
SpaceToBatchNDOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
}
} // namespace ops
......
......@@ -24,6 +24,7 @@ void Register_SpaceToDepth(OperatorRegistry *op_registry) {
.Build(),
SpaceToDepthOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("SpaceToDepth")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
......@@ -35,6 +36,7 @@ void Register_SpaceToDepth(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T")
.Build(),
SpaceToDepthOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
}
} // namespace ops
......
......@@ -18,16 +18,19 @@ namespace mace {
namespace ops {
void Register_WinogradInverseTransform(OperatorRegistry *op_registry) {
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("WinogradInverseTransform")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
.Build(),
WinogradInverseTransformOp<DeviceType::OPENCL, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("WinogradInverseTransform")
.Device(DeviceType::OPENCL)
.TypeConstraint<half>("T")
.Build(),
WinogradInverseTransformOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
}
} // namespace ops
......
......@@ -18,16 +18,19 @@ namespace mace {
namespace ops {
void Register_WinogradTransform(OperatorRegistry *op_registry) {
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("WinogradTransform")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
.Build(),
WinogradTransformOp<DeviceType::OPENCL, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("WinogradTransform")
.Device(DeviceType::OPENCL)
.TypeConstraint<half>("T")
.Build(),
WinogradTransformOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
}
} // namespace ops
......
# Examples
load("//mace:mace.bzl", "if_openmp_enabled")
load("//mace:mace.bzl", "if_openmp_enabled", "if_android")
cc_binary(
name = "mace_run",
srcs = ["mace_run.cc"],
linkopts = if_openmp_enabled(["-fopenmp"]),
linkstatic = 1,
copts = if_android(["-DMACE_ENABLE_OPENCL"]),
deps = [
"//external:gflags_nothreads",
"//mace/codegen:generated_models",
......
......@@ -33,12 +33,15 @@
#include <numeric>
#include "gflags/gflags.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/public/mace.h"
#include "mace/public/mace_runtime.h"
#include "mace/utils/env_time.h"
#include "mace/utils/logging.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/opencl_runtime.h"
#endif // MACE_ENABLE_OPENCL
// #include "mace/codegen/models/${MACE_MODEL_TAG}/${MACE_MODEL_TAG}.h" instead
namespace mace {
namespace MACE_MODEL_TAG {
......@@ -113,6 +116,7 @@ DeviceType ParseDeviceType(const std::string &device_str) {
}
}
#ifdef MACE_ENABLE_OPENCL
void WriteOpenCLPlatformInfo(const std::string &output_dir) {
std::string platform_info = OpenCLRuntime::Global()->platform_info();
const std::string cl_platform_info_file_name = output_dir
......@@ -126,6 +130,7 @@ void WriteOpenCLPlatformInfo(const std::string &output_dir) {
LOG(WARNING) << "Write opencl platform info failed.";
}
}
#endif // MACE_ENABLE_OPENCL
struct mallinfo LogMallinfoChange(struct mallinfo prev) {
struct mallinfo curr = mallinfo();
......@@ -225,11 +230,13 @@ bool RunModel(const std::vector<std::string> &input_names,
mace::SetOpenMPThreadPolicy(
FLAGS_omp_num_threads,
static_cast<CPUAffinityPolicy >(FLAGS_cpu_affinity_policy));
#ifdef MACE_ENABLE_OPENCL
if (device_type == DeviceType::OPENCL) {
mace::SetGPUHints(
static_cast<GPUPerfHint>(FLAGS_gpu_perf_hint),
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
}
#endif // MACE_ENABLE_OPENCL
const char *kernel_path = getenv("MACE_CL_PROGRAM_PATH");
const std::string kernel_file_path =
......@@ -318,9 +325,11 @@ bool RunModel(const std::vector<std::string> &input_names,
printf("time %11.3f %11.3f %11.3f %11.3f %11.3f\n", create_net_millis,
mace_engine_ctor_millis, init_millis, warmup_millis, model_run_millis);
#ifdef MACE_ENABLE_OPENCL
if (device_type == DeviceType::OPENCL) {
WriteOpenCLPlatformInfo(kernel_file_path);
}
#endif // MACE_ENABLE_OPENCL
for (size_t i = 0; i < output_count; ++i) {
std::string output_name =
......
......@@ -34,26 +34,30 @@ from ConfigParser import ConfigParser
def get_target_socs(configs):
available_socs = sh_commands.adb_get_all_socs()
target_socs = available_socs
if hasattr(configs, "target_socs"):
target_socs = set(configs["target_socs"])
target_socs = target_socs & available_socs
if FLAGS.target_socs != "all":
socs = set(FLAGS.target_socs.split(','))
target_socs = target_socs & socs
missing_socs = socs.difference(target_socs)
if len(missing_socs) > 0:
print(
"Error: devices with SoCs are not connected %s" % missing_socs)
if "host" in configs["target_abis"]:
return [""]
else:
available_socs = sh_commands.adb_get_all_socs()
target_socs = available_socs
if hasattr(configs, "target_socs"):
target_socs = set(configs["target_socs"])
target_socs = target_socs & available_socs
if FLAGS.target_socs != "all":
socs = set(FLAGS.target_socs.split(','))
target_socs = target_socs & socs
missing_socs = socs.difference(target_socs)
if len(missing_socs) > 0:
print(
"Error: devices with SoCs are not connected %s" %
missing_socs)
exit(1)
if not target_socs:
print("Error: no device to run")
exit(1)
if not target_socs:
print("Error: no device to run")
exit(1)
return target_socs
return target_socs
def get_data_and_device_type(runtime):
......@@ -339,8 +343,8 @@ def parse_args():
def process_models(project_name, configs, embed_model_data, vlog_level,
target_soc, target_abi, serialno, phone_data_dir,
option_args):
target_abi, phone_data_dir, option_args,
target_soc="", serialno="", device_name=""):
hexagon_mode = get_hexagon_mode(configs)
model_output_dirs = []
for model_name in configs["models"]:
......@@ -358,11 +362,16 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
# Create model build directory
model_path_digest = md5sum(model_config["model_file_path"])
device_name = sh_commands.adb_get_device_name_by_serialno(serialno)
model_output_dir = "%s/%s/%s/%s/%s/%s_%s/%s" % (
FLAGS.output_dir, project_name, "build",
model_name, model_path_digest, device_name.replace(' ', ''),
target_soc, target_abi)
if target_abi == "host":
model_output_dir = "%s/%s/%s/%s/%s/%s" % (
FLAGS.output_dir, project_name, "build",
model_name, model_path_digest, target_abi)
else:
model_output_dir = "%s/%s/%s/%s/%s/%s_%s/%s" % (
FLAGS.output_dir, project_name, "build",
model_name, model_path_digest, device_name.replace(' ', ''),
target_soc, target_abi)
model_output_dirs.append(model_output_dir)
if FLAGS.mode == "build" or FLAGS.mode == "all":
......@@ -563,15 +572,19 @@ def main(unused_args):
print("Run on device: %s, %s, %s" % (
serialno, props["ro.board.platform"],
props["ro.product.model"]))
device_name = \
sh_commands.adb_get_device_name_by_serialno(
serialno)
process_models(project_name, configs, embed_model_data,
vlog_level, target_soc, target_abi,
serialno, phone_data_dir, option_args)
vlog_level, target_abi, phone_data_dir,
option_args, target_soc, serialno,
device_name)
else:
print("====================================================")
print("Run on host")
process_models(project_name, configs, embed_model_data,
vlog_level, target_soc, target_abi, '',
phone_data_dir, option_args)
vlog_level, target_abi, phone_data_dir,
option_args)
if FLAGS.mode == "build" or FLAGS.mode == "all":
sh_commands.packaging_lib(FLAGS.output_dir, project_name)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册