提交 3ba07e0b 编写于 作者: L liuqi

Change DeviceType::OPENCL to DeviceType::GPU and fix some bugs.

上级 d98386d6
......@@ -24,16 +24,16 @@ void Register_Custom_Op(OperatorRegistry *op_registry) {
Custom_Op<DeviceType::CPU, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("op_name")
.Device(DeviceType::OPENCL)
.Device(DeviceType::GPU)
.TypeConstraint<float>("T")
.Build(),
Custom_Op<DeviceType::OPENCL, float>);
Custom_Op<DeviceType::GPU, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("op_name")
.Device(DeviceType::OPENCL)
.Device(DeviceType::GPU)
.TypeConstraint<half>("T")
.Build(),
Custom_Op<DeviceType::OPENCL, half>);
Custom_Op<DeviceType::GPU, half>);
}
} // namespace ops
......
......@@ -316,7 +316,7 @@ unsigned char *model_data = mace::MACE_MODEL_TAG::LoadModelData(FLAGS_model_data
NetDef net_def = mace::MACE_MODEL_TAG::CreateNet(model_data);
//3. 声明设备类型
DeviceType device_type = DeviceType::OPENCL;
DeviceType device_type = DeviceType::GPU;
//4. 定义输入输出名称数组
std::vector<std::string> input_names = {...};
......@@ -350,8 +350,8 @@ for (size_t i = 0; i < output_count; ++i) {
//6. 创建MaceEngine对象
mace::MaceEngine engine(&net_def, device_type, input_names, output_names);
//7. 如果设备类型是OPENCL或HEXAGON,可以在此释放model_data
if (device_type == DeviceType::OPENCL || device_type == DeviceType::HEXAGON) {
//7. 如果设备类型是GPU或者HEXAGON,可以在此释放model_data
if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) {
mace::MACE_MODEL_TAG::UnloadModelData(model_data);
}
......
......@@ -108,10 +108,8 @@ inline int64_t NowMicros() {
DeviceType ParseDeviceType(const std::string &device_str) {
if (device_str.compare("CPU") == 0) {
return DeviceType::CPU;
} else if (device_str.compare("NEON") == 0) {
return DeviceType::NEON;
} else if (device_str.compare("OPENCL") == 0) {
return DeviceType::OPENCL;
} else if (device_str.compare("GPU") == 0) {
return DeviceType::GPU;
} else if (device_str.compare("HEXAGON") == 0) {
return DeviceType::HEXAGON;
} else {
......@@ -198,7 +196,7 @@ bool Run(MaceEngine *engine,
return true;
}
DEFINE_string(device, "CPU", "Device [CPU|NEON|OPENCL]");
DEFINE_string(device, "CPU", "Device [CPU|GPU|DSP]");
DEFINE_string(input_node, "input_node0,input_node1",
"input nodes, separated by comma");
DEFINE_string(output_node, "output_node0,output_node1",
......@@ -279,7 +277,7 @@ int Main(int argc, char **argv) {
mace::SetOpenMPThreadPolicy(
FLAGS_omp_num_threads,
static_cast<CPUAffinityPolicy >(FLAGS_cpu_affinity_policy));
if (device_type == DeviceType::OPENCL) {
if (device_type == DeviceType::GPU) {
mace::SetGPUHints(
static_cast<GPUPerfHint>(FLAGS_gpu_perf_hint),
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
......@@ -347,7 +345,7 @@ int Main(int argc, char **argv) {
LOG(INFO) << "Run init";
std::unique_ptr<mace::MaceEngine> engine_ptr(
new mace::MaceEngine(&net_def, device_type, input_names, output_names));
if (device_type == DeviceType::OPENCL || device_type == DeviceType::HEXAGON) {
if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) {
mace::MACE_MODEL_TAG::UnloadModelData(model_data);
}
......
......@@ -141,10 +141,8 @@ std::string FormatName(const std::string input) {
DeviceType ParseDeviceType(const std::string &device_str) {
if (device_str.compare("CPU") == 0) {
return DeviceType::CPU;
} else if (device_str.compare("NEON") == 0) {
return DeviceType::NEON;
} else if (device_str.compare("OPENCL") == 0) {
return DeviceType::OPENCL;
} else if (device_str.compare("GPU") == 0) {
return DeviceType::GPU;
} else if (device_str.compare("HEXAGON") == 0) {
return DeviceType::HEXAGON;
} else {
......@@ -277,7 +275,7 @@ int Main(int argc, char **argv) {
FLAGS_gpu_model_data_file.c_str());
NetDef gpu_net_def = mace::MACE_GPU_MODEL_TAG::CreateNet(gpu_model_data);
mace::MaceEngine gpu_engine(&gpu_net_def, DeviceType::OPENCL, input_names,
mace::MaceEngine gpu_engine(&gpu_net_def, DeviceType::GPU, input_names,
output_names);
mace::MACE_GPU_MODEL_TAG::UnloadModelData(gpu_model_data);
......
......@@ -34,9 +34,8 @@ Allocator *GetDeviceAllocator(DeviceType type) {
}
MACE_REGISTER_ALLOCATOR(DeviceType::CPU, new CPUAllocator());
MACE_REGISTER_ALLOCATOR(DeviceType::NEON, new CPUAllocator());
#ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_ALLOCATOR(DeviceType::OPENCL, new OpenCLAllocator());
MACE_REGISTER_ALLOCATOR(DeviceType::GPU, new OpenCLAllocator());
#endif
MACE_REGISTER_ALLOCATOR(DeviceType::HEXAGON, new CPUAllocator());
......
......@@ -189,7 +189,7 @@ class Image : public BufferBase {
public:
Image()
: BufferBase(0),
allocator_(GetDeviceAllocator(OPENCL)),
allocator_(GetDeviceAllocator(GPU)),
buf_(nullptr),
mapped_buf_(nullptr) {}
......@@ -198,7 +198,7 @@ class Image : public BufferBase {
std::accumulate(
shape.begin(), shape.end(), 1, std::multiplies<index_t>()) *
GetEnumTypeSize(data_type)),
allocator_(GetDeviceAllocator(OPENCL)),
allocator_(GetDeviceAllocator(GPU)),
mapped_buf_(nullptr) {
shape_ = shape;
data_type_ = data_type;
......
......@@ -193,7 +193,7 @@ MaceStatus MaceEngine::Impl::Run(
input_tensors.push_back(input_tensor);
}
for (auto &output : *outputs) {
if (device_type_ == DeviceType::OPENCL) {
if (device_type_ == DeviceType::GPU) {
MACE_CHECK(output.second.shape().size() == 4,
"The outputs' shape must be 4-dimension with NHWC format,"
" please use 1 to fill missing dimensions");
......@@ -217,7 +217,7 @@ MaceStatus MaceEngine::Impl::Run(
#endif
#ifdef MACE_ENABLE_OPENCL
if (device_type_ == OPENCL) {
if (device_type_ == GPU) {
OpenCLRuntime::Global()->SaveBuiltCLProgram();
}
#endif
......
......@@ -54,7 +54,7 @@ bool SerialNet::Run(RunMetadata *run_metadata) {
auto &op = *iter;
MACE_LATENCY_LOGGER(2, "Running operator ", op->debug_def().name(), "(",
op->debug_def().type(), ")");
bool future_wait = (device_type_ == DeviceType::OPENCL &&
bool future_wait = (device_type_ == DeviceType::GPU &&
(run_metadata != nullptr ||
std::distance(iter, operators_.end()) == 1));
......
......@@ -88,7 +88,6 @@ extern void Register_Dequantize(OperatorRegistry *op_registry);
extern void Register_Eltwise(OperatorRegistry *op_registry);
extern void Register_FoldedBatchNorm(OperatorRegistry *op_registry);
extern void Register_FullyConnected(OperatorRegistry *op_registry);
extern void Register_FusedConv2D(OperatorRegistry *op_registry);
extern void Register_LocalResponseNorm(OperatorRegistry *op_registry);
extern void Register_MatMul(OperatorRegistry *op_registry);
extern void Register_Pad(OperatorRegistry *op_registry);
......@@ -96,7 +95,6 @@ extern void Register_Pooling(OperatorRegistry *op_registry);
extern void Register_Proposal(OperatorRegistry *op_registry);
extern void Register_PSROIAlign(OperatorRegistry *op_registry);
extern void Register_Quantize(OperatorRegistry *op_registry);
extern void Register_ReOrganize(OperatorRegistry *op_registry);
extern void Register_Requantize(OperatorRegistry *op_registry);
extern void Register_Reshape(OperatorRegistry *op_registry);
extern void Register_ResizeBilinear(OperatorRegistry *op_registry);
......@@ -130,7 +128,6 @@ OperatorRegistry::OperatorRegistry() {
ops::Register_Eltwise(this);
ops::Register_FoldedBatchNorm(this);
ops::Register_FullyConnected(this);
ops::Register_FusedConv2D(this);
ops::Register_LocalResponseNorm(this);
ops::Register_MatMul(this);
ops::Register_Pad(this);
......@@ -139,7 +136,6 @@ OperatorRegistry::OperatorRegistry() {
ops::Register_PSROIAlign(this);
ops::Register_Quantize(this);
ops::Register_Requantize(this);
ops::Register_ReOrganize(this);
ops::Register_Reshape(this);
ops::Register_ResizeBilinear(this);
ops::Register_Slice(this);
......
......@@ -82,7 +82,7 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) {
VLOG(3) << "Model data size: " << model_data_size;
if (model_data_size > 0) {
if (type == DeviceType::CPU || type == DeviceType::NEON) {
if (type == DeviceType::CPU) {
tensor_buffer_ = std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(type),
model_data_ptr,
......@@ -119,7 +119,7 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) {
tensor_map_[const_tensor.name()] = std::move(tensor);
}
if (type == DeviceType::CPU || type == DeviceType::OPENCL) {
if (type == DeviceType::CPU || type == DeviceType::GPU) {
CreateOutputTensorBuffer(net_def, type);
}
}
......@@ -149,7 +149,7 @@ void Workspace::CreateOutputTensorBuffer(const NetDef &net_def,
}
MACE_CHECK(dtype != DataType::DT_INVALID, "data type is invalid.");
for (auto &mem_block : net_def.mem_arena().mem_block()) {
if (device_type == DeviceType::OPENCL) {
if (device_type == DeviceType::GPU) {
std::unique_ptr<BufferBase> image_buf(
new Image({mem_block.x(), mem_block.y()}, dtype));
preallocated_allocator_.SetBuffer(mem_block.mem_id(),
......@@ -170,7 +170,7 @@ void Workspace::CreateOutputTensorBuffer(const NetDef &net_def,
std::unique_ptr<Tensor> tensor
(new Tensor(preallocated_allocator_.GetBuffer(mem_ids[i]), dtype));
tensor->SetSourceOpName(op.name());
if (device_type == DeviceType::OPENCL) {
if (device_type == DeviceType::GPU) {
VLOG(3) << "Tensor: " << op.name() << "(" << op.type() << ")"
<< " Mem: " << mem_ids[i]
<< " Image shape: "
......@@ -191,7 +191,7 @@ void Workspace::CreateOutputTensorBuffer(const NetDef &net_def,
}
ScratchBuffer *Workspace::GetScratchBuffer(DeviceType device_type) {
if (device_type == CPU || device_type == NEON) {
if (device_type == CPU) {
return host_scratch_buffer_.get();
} else {
return nullptr;
......
......@@ -22,7 +22,7 @@
* --input_file=input_data \
* --output_file=mace.out \
* --model_data_file=model_data.data \
* --device=OPENCL
* --device=GPU
*/
#include <malloc.h>
#include <stdint.h>
......@@ -102,10 +102,8 @@ std::string FormatName(const std::string input) {
DeviceType ParseDeviceType(const std::string &device_str) {
if (device_str.compare("CPU") == 0) {
return DeviceType::CPU;
} else if (device_str.compare("NEON") == 0) {
return DeviceType::NEON;
} else if (device_str.compare("OPENCL") == 0) {
return DeviceType::OPENCL;
} else if (device_str.compare("GPU") == 0) {
return DeviceType::GPU;
} else if (device_str.compare("HEXAGON") == 0) {
return DeviceType::HEXAGON;
} else {
......@@ -135,7 +133,7 @@ DEFINE_string(output_file,
DEFINE_string(model_data_file,
"",
"model data file name, used when EMBED_MODEL_DATA set to 0");
DEFINE_string(device, "OPENCL", "CPU/NEON/OPENCL/HEXAGON");
DEFINE_string(device, "GPU", "CPU/GPU/HEXAGON");
DEFINE_int32(round, 1, "round");
DEFINE_int32(restart_round, 1, "restart round");
DEFINE_int32(malloc_check_cycle, -1, "malloc debug check cycle, -1 to disable");
......@@ -160,7 +158,7 @@ bool RunModel(const std::vector<std::string> &input_names,
MaceStatus res = mace::SetOpenMPThreadPolicy(
FLAGS_omp_num_threads,
static_cast<CPUAffinityPolicy >(FLAGS_cpu_affinity_policy));
if (device_type == DeviceType::OPENCL) {
if (device_type == DeviceType::GPU) {
mace::SetGPUHints(
static_cast<GPUPerfHint>(FLAGS_gpu_perf_hint),
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
......@@ -178,7 +176,7 @@ bool RunModel(const std::vector<std::string> &input_names,
// Init model
mace::MaceEngine engine(&net_def, device_type, input_names,
output_names);
if (device_type == DeviceType::OPENCL || device_type == DeviceType::HEXAGON) {
if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) {
mace::MACE_MODEL_TAG::UnloadModelData(model_data);
}
......
......@@ -162,7 +162,7 @@ class ActivationFunctor<DeviceType::CPU, float> {
#ifdef MACE_ENABLE_OPENCL
template <typename T>
class ActivationFunctor<DeviceType::OPENCL, T> {
class ActivationFunctor<DeviceType::GPU, T> {
public:
ActivationFunctor(ActivationType type, T relux_max_limit)
: activation_(type), relux_max_limit_(static_cast<T>(relux_max_limit)) {}
......
......@@ -93,7 +93,7 @@ struct AddNFunctor {
#ifdef MACE_ENABLE_OPENCL
template <typename T>
struct AddNFunctor<DeviceType::OPENCL, T> {
struct AddNFunctor<DeviceType::GPU, T> {
void operator()(const std::vector<const Tensor *> &input_tensors,
Tensor *output_tensor,
StatsFuture *future);
......
......@@ -128,7 +128,7 @@ struct BatchNormFunctor<DeviceType::CPU, float> : BatchNormFunctorBase {
#ifdef MACE_ENABLE_OPENCL
template<typename T>
struct BatchNormFunctor<DeviceType::OPENCL, T> : BatchNormFunctorBase {
struct BatchNormFunctor<DeviceType::GPU, T> : BatchNormFunctorBase {
BatchNormFunctor(const bool folded_constant,
const ActivationType activation,
const float relux_max_limit)
......
......@@ -65,7 +65,7 @@ struct BiasAddFunctor<DeviceType::CPU, float> {
#ifdef MACE_ENABLE_OPENCL
template<typename T>
struct BiasAddFunctor<DeviceType::OPENCL, T> {
struct BiasAddFunctor<DeviceType::GPU, T> {
void operator()(const Tensor *input,
const Tensor *bias,
Tensor *output,
......
......@@ -44,7 +44,7 @@ struct BufferToImageFunctor : BufferToImageFunctorBase {
};
template <typename T>
struct BufferToImageFunctor<DeviceType::OPENCL, T> : BufferToImageFunctorBase {
struct BufferToImageFunctor<DeviceType::GPU, T> : BufferToImageFunctorBase {
explicit BufferToImageFunctor(bool i2b = false)
: BufferToImageFunctorBase(i2b) {}
void operator()(Tensor *input,
......
......@@ -67,7 +67,7 @@ struct ChannelShuffleFunctor {
#ifdef MACE_ENABLE_OPENCL
template<typename T>
struct ChannelShuffleFunctor<DeviceType::OPENCL, T> {
struct ChannelShuffleFunctor<DeviceType::GPU, T> {
explicit ChannelShuffleFunctor(const int groups) : groups_(groups) {}
void operator()(const Tensor *input, Tensor *output, StatsFuture *future);
......
......@@ -93,7 +93,7 @@ struct ConcatFunctor : ConcatFunctorBase {
#ifdef MACE_ENABLE_OPENCL
template <typename T>
struct ConcatFunctor<DeviceType::OPENCL, T> : ConcatFunctorBase {
struct ConcatFunctor<DeviceType::GPU, T> : ConcatFunctorBase {
explicit ConcatFunctor(const int32_t axis) : ConcatFunctorBase(axis) {}
void operator()(const std::vector<const Tensor *> &input_list,
......
......@@ -615,7 +615,7 @@ struct Conv2dFunctor<DeviceType::CPU, float> : Conv2dFunctorBase {
#ifdef MACE_ENABLE_OPENCL
template<typename T>
struct Conv2dFunctor<DeviceType::OPENCL, T> : Conv2dFunctorBase {
struct Conv2dFunctor<DeviceType::GPU, T> : Conv2dFunctorBase {
Conv2dFunctor(const int *strides,
const Padding &padding_type,
const std::vector<int> &paddings,
......
......@@ -117,7 +117,7 @@ struct DepthToSpaceOpFunctor {
#ifdef MACE_ENABLE_OPENCL
template<typename T>
struct DepthToSpaceOpFunctor<DeviceType::OPENCL, T> {
struct DepthToSpaceOpFunctor<DeviceType::GPU, T> {
DepthToSpaceOpFunctor(const int block_size, bool d2s)
: block_size_(block_size), d2s_(d2s) {}
void operator()(const Tensor *input, Tensor *output, StatsFuture *future);
......
......@@ -297,7 +297,7 @@ struct DepthwiseConv2dFunctor<DeviceType::CPU, float>
#ifdef MACE_ENABLE_OPENCL
template<typename T>
struct DepthwiseConv2dFunctor<DeviceType::OPENCL, T>
struct DepthwiseConv2dFunctor<DeviceType::GPU, T>
: DepthwiseConv2dFunctorBase {
DepthwiseConv2dFunctor(const int *strides,
const Padding padding_type,
......
......@@ -363,7 +363,7 @@ struct EltwiseFunctor<DeviceType::CPU, float>: EltwiseFunctorBase {
#ifdef MACE_ENABLE_OPENCL
template <typename T>
struct EltwiseFunctor<DeviceType::OPENCL, T> : EltwiseFunctorBase {
struct EltwiseFunctor<DeviceType::GPU, T> : EltwiseFunctorBase {
EltwiseFunctor(const EltwiseType type,
const std::vector<float> &coeff,
const float value)
......
......@@ -88,7 +88,7 @@ struct FullyConnectedFunctor<DeviceType::CPU, float>: FullyConnectedBase {
#ifdef MACE_ENABLE_OPENCL
template <typename T>
struct FullyConnectedFunctor<DeviceType::OPENCL, T> : FullyConnectedBase {
struct FullyConnectedFunctor<DeviceType::GPU, T> : FullyConnectedBase {
FullyConnectedFunctor(const int /*BufferType*/ weight_type,
const ActivationType activation,
const float relux_max_limit)
......
......@@ -75,7 +75,7 @@ struct MatMulFunctor {
#ifdef MACE_ENABLE_OPENCL
template<typename T>
struct MatMulFunctor<DeviceType::OPENCL, T> {
struct MatMulFunctor<DeviceType::GPU, T> {
void operator()(const Tensor *A,
const Tensor *B,
Tensor *C,
......
......@@ -23,7 +23,7 @@ namespace mace {
namespace kernels {
template <typename T>
void ActivationFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
void ActivationFunctor<DeviceType::GPU, T>::operator()(const Tensor *input,
const Tensor *alpha,
Tensor *output,
StatsFuture *future) {
......@@ -46,7 +46,7 @@ void ActivationFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::OPENCL), 1)));
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1)));
kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap();
......@@ -124,7 +124,7 @@ void ActivationFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
}
}
template struct ActivationFunctor<DeviceType::OPENCL, float>;
template struct ActivationFunctor<DeviceType::OPENCL, half>;
template struct ActivationFunctor<DeviceType::GPU, float>;
template struct ActivationFunctor<DeviceType::GPU, half>;
} // namespace kernels
} // namespace mace
......@@ -22,7 +22,7 @@ namespace mace {
namespace kernels {
template <typename T>
void AddNFunctor<DeviceType::OPENCL, T>::operator()(
void AddNFunctor<DeviceType::GPU, T>::operator()(
const std::vector<const Tensor *> &input_tensors,
Tensor *output_tensor,
StatsFuture *future) {
......@@ -58,7 +58,7 @@ void AddNFunctor<DeviceType::OPENCL, T>::operator()(
if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::OPENCL), 1)));
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1)));
kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap();
......@@ -119,9 +119,9 @@ void AddNFunctor<DeviceType::OPENCL, T>::operator()(
}
}
template struct AddNFunctor<DeviceType::OPENCL, float>;
template struct AddNFunctor<DeviceType::GPU, float>;
template struct AddNFunctor<DeviceType::OPENCL, half>;
template struct AddNFunctor<DeviceType::GPU, half>;
} // namespace kernels
} // namespace mace
......@@ -23,7 +23,7 @@ namespace mace {
namespace kernels {
template <typename T>
void BatchNormFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
void BatchNormFunctor<DeviceType::GPU, T>::operator()(const Tensor *input,
const Tensor *scale,
const Tensor *offset,
const Tensor *mean,
......@@ -56,7 +56,7 @@ void BatchNormFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::OPENCL), 1)));
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1)));
kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap();
......@@ -130,7 +130,7 @@ void BatchNormFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
}
}
template struct BatchNormFunctor<DeviceType::OPENCL, float>;
template struct BatchNormFunctor<DeviceType::OPENCL, half>;
template struct BatchNormFunctor<DeviceType::GPU, float>;
template struct BatchNormFunctor<DeviceType::GPU, half>;
} // namespace kernels
} // namespace mace
......@@ -22,7 +22,7 @@ namespace mace {
namespace kernels {
template <typename T>
void BiasAddFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
void BiasAddFunctor<DeviceType::GPU, T>::operator()(const Tensor *input,
const Tensor *bias,
Tensor *output,
StatsFuture *future) {
......@@ -49,7 +49,7 @@ void BiasAddFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::OPENCL), 1)));
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1)));
kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap();
......@@ -115,7 +115,7 @@ void BiasAddFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
}
}
template struct BiasAddFunctor<DeviceType::OPENCL, float>;
template struct BiasAddFunctor<DeviceType::OPENCL, half>;
template struct BiasAddFunctor<DeviceType::GPU, float>;
template struct BiasAddFunctor<DeviceType::GPU, half>;
} // namespace kernels
} // namespace mace
......@@ -20,7 +20,7 @@ namespace mace {
namespace kernels {
template <typename T>
void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(
void BufferToImageFunctor<DeviceType::GPU, T>::operator()(
Tensor *buffer, const BufferType type, Tensor *image, StatsFuture *future) {
std::vector<size_t> image_shape;
......@@ -95,7 +95,7 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(
built_options.emplace("-DOUT_OF_RANGE_CHECK");
if (!kernel_error_) {
kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::OPENCL), 1)));
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1)));
kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap();
......@@ -177,8 +177,8 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(
}
}
template struct BufferToImageFunctor<DeviceType::OPENCL, float>;
template struct BufferToImageFunctor<DeviceType::OPENCL, half>;
template struct BufferToImageFunctor<DeviceType::GPU, float>;
template struct BufferToImageFunctor<DeviceType::GPU, half>;
} // namespace kernels
} // namespace mace
......@@ -23,7 +23,7 @@ namespace mace {
namespace kernels {
template <typename T>
void ChannelShuffleFunctor<DeviceType::OPENCL, T>::operator()(
void ChannelShuffleFunctor<DeviceType::GPU, T>::operator()(
const Tensor *input,
Tensor *output,
StatsFuture *future) {
......@@ -56,7 +56,7 @@ void ChannelShuffleFunctor<DeviceType::OPENCL, T>::operator()(
if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::OPENCL), 1)));
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1)));
kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap();
......@@ -108,8 +108,8 @@ void ChannelShuffleFunctor<DeviceType::OPENCL, T>::operator()(
}
template
struct ChannelShuffleFunctor<DeviceType::OPENCL, float>;
struct ChannelShuffleFunctor<DeviceType::GPU, float>;
template
struct ChannelShuffleFunctor<DeviceType::OPENCL, half>;
struct ChannelShuffleFunctor<DeviceType::GPU, half>;
} // namespace kernels
} // namespace mace
......@@ -50,7 +50,7 @@ static void Concat2(cl::Kernel *kernel,
if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK");
*kernel_error = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::OPENCL), 1)));
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1)));
(*kernel_error)->Map(nullptr);
*((*kernel_error)->mutable_data<char>()) = 0;
(*kernel_error)->UnMap();
......@@ -132,7 +132,7 @@ static void ConcatN(cl::Kernel *kernel,
if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK");
*kernel_error = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::OPENCL), 1)));
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1)));
(*kernel_error)->Map(nullptr);
*((*kernel_error)->mutable_data<char>()) = 0;
(*kernel_error)->UnMap();
......@@ -216,7 +216,7 @@ static void ConcatN(cl::Kernel *kernel,
}
template <typename T>
void ConcatFunctor<DeviceType::OPENCL, T>::operator()(
void ConcatFunctor<DeviceType::GPU, T>::operator()(
const std::vector<const Tensor *> &input_list,
Tensor *output,
StatsFuture *future) {
......@@ -264,8 +264,8 @@ void ConcatFunctor<DeviceType::OPENCL, T>::operator()(
}
}
template struct ConcatFunctor<DeviceType::OPENCL, float>;
template struct ConcatFunctor<DeviceType::OPENCL, half>;
template struct ConcatFunctor<DeviceType::GPU, float>;
template struct ConcatFunctor<DeviceType::GPU, half>;
} // namespace kernels
} // namespace mace
......@@ -67,7 +67,7 @@ extern void Conv2dOpencl(cl::Kernel *kernel,
std::unique_ptr<BufferBase> *kernel_error);
template <typename T>
void Conv2dFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
void Conv2dFunctor<DeviceType::GPU, T>::operator()(const Tensor *input,
const Tensor *filter,
const Tensor *bias,
Tensor *output,
......@@ -128,8 +128,8 @@ void Conv2dFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
}
}
template struct Conv2dFunctor<DeviceType::OPENCL, float>;
template struct Conv2dFunctor<DeviceType::OPENCL, half>;
template struct Conv2dFunctor<DeviceType::GPU, float>;
template struct Conv2dFunctor<DeviceType::GPU, half>;
} // namespace kernels
} // namespace mace
......@@ -61,7 +61,7 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel,
if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK");
*kernel_error = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::OPENCL), 1)));
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1)));
(*kernel_error)->Map(nullptr);
*((*kernel_error)->mutable_data<char>()) = 0;
(*kernel_error)->UnMap();
......
......@@ -58,7 +58,7 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel,
if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK");
*kernel_error = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::OPENCL), 1)));
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1)));
(*kernel_error)->Map(nullptr);
*((*kernel_error)->mutable_data<char>()) = 0;
(*kernel_error)->UnMap();
......
......@@ -58,7 +58,7 @@ extern void Conv2dOpencl(cl::Kernel *kernel,
if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK");
*kernel_error = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::OPENCL), 1)));
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1)));
(*kernel_error)->Map(nullptr);
*((*kernel_error)->mutable_data<char>()) = 0;
(*kernel_error)->UnMap();
......
......@@ -23,7 +23,7 @@ namespace mace {
namespace kernels {
template <typename T>
void DepthToSpaceOpFunctor<DeviceType::OPENCL, T>::operator()(
void DepthToSpaceOpFunctor<DeviceType::GPU, T>::operator()(
const Tensor *input, Tensor *output, StatsFuture *future) {
const index_t batch = input->dim(0);
const index_t input_height = input->dim(1);
......@@ -86,7 +86,7 @@ void DepthToSpaceOpFunctor<DeviceType::OPENCL, T>::operator()(
if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::OPENCL), 1)));
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1)));
kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap();
......@@ -145,8 +145,8 @@ void DepthToSpaceOpFunctor<DeviceType::OPENCL, T>::operator()(
}
}
template struct DepthToSpaceOpFunctor<DeviceType::OPENCL, float>;
template struct DepthToSpaceOpFunctor<DeviceType::OPENCL, half>;
template struct DepthToSpaceOpFunctor<DeviceType::GPU, float>;
template struct DepthToSpaceOpFunctor<DeviceType::GPU, half>;
} // namespace kernels
} // namespace mace
......@@ -66,7 +66,7 @@ static void DepthwiseConv2d(cl::Kernel *kernel,
if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK");
*kernel_error = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::OPENCL), 1)));
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1)));
(*kernel_error)->Map(nullptr);
*((*kernel_error)->mutable_data<char>()) = 0;
(*kernel_error)->UnMap();
......@@ -163,7 +163,7 @@ static void DepthwiseConv2d(cl::Kernel *kernel,
}
template <typename T>
void DepthwiseConv2dFunctor<DeviceType::OPENCL, T>::operator()(
void DepthwiseConv2dFunctor<DeviceType::GPU, T>::operator()(
const Tensor *input,
const Tensor *filter,
const Tensor *bias,
......@@ -215,8 +215,8 @@ void DepthwiseConv2dFunctor<DeviceType::OPENCL, T>::operator()(
&kwg_size_, &kernel_error_);
}
template struct DepthwiseConv2dFunctor<DeviceType::OPENCL, float>;
template struct DepthwiseConv2dFunctor<DeviceType::OPENCL, half>;
template struct DepthwiseConv2dFunctor<DeviceType::GPU, float>;
template struct DepthwiseConv2dFunctor<DeviceType::GPU, half>;
} // namespace kernels
} // namespace mace
......@@ -21,7 +21,7 @@ namespace mace {
namespace kernels {
template <typename T>
void EltwiseFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input0,
void EltwiseFunctor<DeviceType::GPU, T>::operator()(const Tensor *input0,
const Tensor *input1,
Tensor *output,
StatsFuture *future) {
......@@ -74,7 +74,7 @@ void EltwiseFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input0,
if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::OPENCL), 1)));
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1)));
kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap();
......@@ -129,7 +129,7 @@ void EltwiseFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input0,
}
}
template struct EltwiseFunctor<DeviceType::OPENCL, float>;
template struct EltwiseFunctor<DeviceType::OPENCL, half>;
template struct EltwiseFunctor<DeviceType::GPU, float>;
template struct EltwiseFunctor<DeviceType::GPU, half>;
} // namespace kernels
} // namespace mace
......@@ -76,7 +76,7 @@ void FCWXKernel(cl::Kernel *kernel,
if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK");
*kernel_error = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::OPENCL), 1)));
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1)));
(*kernel_error)->Map(nullptr);
*((*kernel_error)->mutable_data<char>()) = 0;
(*kernel_error)->UnMap();
......@@ -202,7 +202,7 @@ void FCWTXKernel(cl::Kernel *kernel,
if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK");
*kernel_error = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::OPENCL), 1)));
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1)));
(*kernel_error)->Map(nullptr);
*((*kernel_error)->mutable_data<char>()) = 0;
(*kernel_error)->UnMap();
......@@ -282,7 +282,7 @@ void FCWTXKernel(cl::Kernel *kernel,
} // namespace
template <typename T>
void FullyConnectedFunctor<DeviceType::OPENCL, T>::operator()(
void FullyConnectedFunctor<DeviceType::GPU, T>::operator()(
const Tensor *input,
const Tensor *weight,
const Tensor *bias,
......@@ -305,9 +305,9 @@ void FullyConnectedFunctor<DeviceType::OPENCL, T>::operator()(
}
}
template struct FullyConnectedFunctor<DeviceType::OPENCL, float>;
template struct FullyConnectedFunctor<DeviceType::GPU, float>;
template struct FullyConnectedFunctor<DeviceType::OPENCL, half>;
template struct FullyConnectedFunctor<DeviceType::GPU, half>;
} // namespace kernels
} // namespace mace
......@@ -21,7 +21,7 @@ namespace mace {
namespace kernels {
template <typename T>
void MatMulFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *A,
void MatMulFunctor<DeviceType::GPU, T>::operator()(const Tensor *A,
const Tensor *B,
Tensor *C,
StatsFuture *future) {
......@@ -53,7 +53,7 @@ void MatMulFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *A,
if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::OPENCL), 1)));
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1)));
kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap();
......@@ -98,9 +98,9 @@ void MatMulFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *A,
}
}
template struct MatMulFunctor<DeviceType::OPENCL, float>;
template struct MatMulFunctor<DeviceType::GPU, float>;
template struct MatMulFunctor<DeviceType::OPENCL, half>;
template struct MatMulFunctor<DeviceType::GPU, half>;
} // namespace kernels
} // namespace mace
......@@ -56,7 +56,7 @@ const bool BufferToImageOpImpl(Tensor *buffer,
if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::OPENCL), 1)));
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1)));
kernel_error->Map(nullptr);
*(kernel_error->mutable_data<char>()) = 0;
kernel_error->UnMap();
......@@ -136,13 +136,13 @@ TEST(OutOfRangeCheckTest, RandomTest) {
std::vector<index_t> buffer_shape = {batch, height, width, channels};
Workspace ws;
Tensor *buffer = ws.CreateTensor("Buffer",
GetDeviceAllocator(DeviceType::OPENCL),
GetDeviceAllocator(DeviceType::GPU),
DataTypeToEnum<float>::v());
buffer->Resize(buffer_shape);
std::vector<size_t> image_shape;
Tensor *image = ws.CreateTensor("Image",
GetDeviceAllocator(DeviceType::OPENCL),
GetDeviceAllocator(DeviceType::GPU),
DataTypeToEnum<float>::v());
CalImage2DShape(buffer->shape(), IN_OUT_CHANNEL, &image_shape);
image->ResizeImage(buffer->shape(), image_shape);
......
......@@ -21,7 +21,7 @@ namespace mace {
namespace kernels {
template<typename T>
void PadFunctor<DeviceType::OPENCL, T>::operator()(
void PadFunctor<DeviceType::GPU, T>::operator()(
const Tensor *input,
Tensor *output,
StatsFuture *future) {
......@@ -59,7 +59,7 @@ void PadFunctor<DeviceType::OPENCL, T>::operator()(
if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::OPENCL), 1)));
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1)));
kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap();
......@@ -115,9 +115,9 @@ void PadFunctor<DeviceType::OPENCL, T>::operator()(
}
template
struct PadFunctor<DeviceType::OPENCL, float>;
struct PadFunctor<DeviceType::GPU, float>;
template
struct PadFunctor<DeviceType::OPENCL, half>;
struct PadFunctor<DeviceType::GPU, half>;
} // namespace kernels
} // namespace mace
......@@ -22,7 +22,7 @@ namespace mace {
namespace kernels {
template <typename T>
void PoolingFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
void PoolingFunctor<DeviceType::GPU, T>::operator()(const Tensor *input,
Tensor *output,
StatsFuture *future) {
MACE_CHECK(dilations_[0] == 1 && dilations_[1] == 1)
......@@ -50,7 +50,7 @@ void PoolingFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::OPENCL), 1)));
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1)));
kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap();
......@@ -148,7 +148,7 @@ void PoolingFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
}
}
template struct PoolingFunctor<DeviceType::OPENCL, float>;
template struct PoolingFunctor<DeviceType::OPENCL, half>;
template struct PoolingFunctor<DeviceType::GPU, float>;
template struct PoolingFunctor<DeviceType::GPU, half>;
} // namespace kernels
} // namespace mace
......@@ -23,7 +23,7 @@ namespace mace {
namespace kernels {
template <typename T>
void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()(
void ResizeBilinearFunctor<DeviceType::GPU, T>::operator()(
const Tensor *input, Tensor *output, StatsFuture *future) {
const index_t batch = input->dim(0);
const index_t in_height = input->dim(1);
......@@ -50,7 +50,7 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()(
if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::OPENCL), 1)));
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1)));
kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap();
......@@ -113,8 +113,8 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()(
}
}
template struct ResizeBilinearFunctor<DeviceType::OPENCL, float>;
template struct ResizeBilinearFunctor<DeviceType::OPENCL, half>;
template struct ResizeBilinearFunctor<DeviceType::GPU, float>;
template struct ResizeBilinearFunctor<DeviceType::GPU, half>;
} // namespace kernels
} // namespace mace
......@@ -21,7 +21,7 @@ namespace mace {
namespace kernels {
template<typename T>
void SliceFunctor<DeviceType::OPENCL, T>::operator()(
void SliceFunctor<DeviceType::GPU, T>::operator()(
const Tensor *input,
const std::vector<Tensor *> &output_list,
StatsFuture *future) {
......@@ -51,7 +51,7 @@ void SliceFunctor<DeviceType::OPENCL, T>::operator()(
if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::OPENCL), 1)));
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1)));
kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap();
......@@ -133,9 +133,9 @@ void SliceFunctor<DeviceType::OPENCL, T>::operator()(
}
template
struct SliceFunctor<DeviceType::OPENCL, float>;
struct SliceFunctor<DeviceType::GPU, float>;
template
struct SliceFunctor<DeviceType::OPENCL, half>;
struct SliceFunctor<DeviceType::GPU, half>;
} // namespace kernels
} // namespace mace
......@@ -23,7 +23,7 @@ namespace mace {
namespace kernels {
template <typename T>
void SoftmaxFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *logits,
void SoftmaxFunctor<DeviceType::GPU, T>::operator()(const Tensor *logits,
Tensor *output,
StatsFuture *future) {
const index_t batch = logits->dim(0);
......@@ -49,7 +49,7 @@ void SoftmaxFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *logits,
if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::OPENCL), 1)));
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1)));
kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap();
......@@ -95,7 +95,7 @@ void SoftmaxFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *logits,
}
}
template struct SoftmaxFunctor<DeviceType::OPENCL, float>;
template struct SoftmaxFunctor<DeviceType::OPENCL, half>;
template struct SoftmaxFunctor<DeviceType::GPU, float>;
template struct SoftmaxFunctor<DeviceType::GPU, half>;
} // namespace kernels
} // namespace mace
......@@ -25,7 +25,7 @@ namespace mace {
namespace kernels {
template <typename T>
void SpaceToBatchFunctor<DeviceType::OPENCL, T>::operator()(
void SpaceToBatchFunctor<DeviceType::GPU, T>::operator()(
Tensor *space_tensor,
const std::vector<index_t> &output_shape,
Tensor *batch_tensor,
......@@ -60,7 +60,7 @@ void SpaceToBatchFunctor<DeviceType::OPENCL, T>::operator()(
if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::OPENCL), 1)));
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1)));
kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap();
......@@ -120,8 +120,8 @@ void SpaceToBatchFunctor<DeviceType::OPENCL, T>::operator()(
}
}
template struct SpaceToBatchFunctor<DeviceType::OPENCL, float>;
template struct SpaceToBatchFunctor<DeviceType::OPENCL, half>;
template struct SpaceToBatchFunctor<DeviceType::GPU, float>;
template struct SpaceToBatchFunctor<DeviceType::GPU, half>;
} // namespace kernels
} // namespace mace
......
......@@ -22,7 +22,7 @@ namespace mace {
namespace kernels {
template <typename T>
void WinogradTransformFunctor<DeviceType::OPENCL, T>::operator()(
void WinogradTransformFunctor<DeviceType::GPU, T>::operator()(
const Tensor *input_tensor, Tensor *output_tensor, StatsFuture *future) {
auto runtime = OpenCLRuntime::Global();
......@@ -39,7 +39,7 @@ void WinogradTransformFunctor<DeviceType::OPENCL, T>::operator()(
if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::OPENCL), 1)));
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1)));
kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap();
......@@ -117,7 +117,7 @@ void WinogradTransformFunctor<DeviceType::OPENCL, T>::operator()(
}
template <typename T>
void WinogradInverseTransformFunctor<DeviceType::OPENCL, T>::operator()(
void WinogradInverseTransformFunctor<DeviceType::GPU, T>::operator()(
const Tensor *input_tensor,
const Tensor *bias,
Tensor *output_tensor,
......@@ -138,7 +138,7 @@ void WinogradInverseTransformFunctor<DeviceType::OPENCL, T>::operator()(
if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::OPENCL), 1)));
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1)));
kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap();
......@@ -231,11 +231,11 @@ void WinogradInverseTransformFunctor<DeviceType::OPENCL, T>::operator()(
}
}
template struct WinogradTransformFunctor<DeviceType::OPENCL, float>;
template struct WinogradTransformFunctor<DeviceType::OPENCL, half>;
template struct WinogradTransformFunctor<DeviceType::GPU, float>;
template struct WinogradTransformFunctor<DeviceType::GPU, half>;
template struct WinogradInverseTransformFunctor<DeviceType::OPENCL, float>;
template struct WinogradInverseTransformFunctor<DeviceType::OPENCL, half>;
template struct WinogradInverseTransformFunctor<DeviceType::GPU, float>;
template struct WinogradInverseTransformFunctor<DeviceType::GPU, half>;
} // namespace kernels
} // namespace mace
......@@ -61,21 +61,21 @@ struct PadFunctor : public PadFunctorBase {
std::fill(output_ptr, output_ptr + output->size(), this->constant_value_);
const index_t batch = input->dim(0);
const index_t height = input->dim(1);
const index_t width = input->dim(2);
const index_t channel = input->dim(3);
const index_t channel = input->dim(1);
const index_t height = input->dim(2);
const index_t width = input->dim(3);
#pragma omp parallel for collapse(3)
for (index_t b = 0; b < batch; ++b) {
for (index_t h = 0; h < height; ++h) {
for (index_t w = 0; w < width; ++w) {
const index_t in_offset = (((b * height + h) * width) + w) * channel;
for (index_t c = 0; c < channel; ++c) {
for (index_t h = 0; h < height; ++h) {
const index_t in_offset = (((b * channel + c) * height) + h) * width;
const index_t out_offset = (((b + this->paddings_[0]) * output->dim(1)
+ (h + this->paddings_[2])) * output->dim(2)
+ (w + this->paddings_[4])) * output->dim(3)
+ (c + this->paddings_[2])) * output->dim(2)
+ (h + this->paddings_[4])) * output->dim(3)
+ this->paddings_[6];
memcpy(output_ptr + out_offset,
input_ptr + in_offset,
channel * sizeof(T));
width * sizeof(T));
}
}
}
......@@ -84,7 +84,7 @@ struct PadFunctor : public PadFunctorBase {
#ifdef MACE_ENABLE_OPENCL
template <typename T>
struct PadFunctor<DeviceType::OPENCL, T> : PadFunctorBase {
struct PadFunctor<DeviceType::GPU, T> : PadFunctorBase {
PadFunctor(const std::vector<int> &paddings,
const float constant_value)
: PadFunctorBase(paddings, constant_value) {}
......
......@@ -261,7 +261,7 @@ struct PoolingFunctor<DeviceType::CPU, float>: PoolingFunctorBase {
#ifdef MACE_ENABLE_OPENCL
template <typename T>
struct PoolingFunctor<DeviceType::OPENCL, T> : PoolingFunctorBase {
struct PoolingFunctor<DeviceType::GPU, T> : PoolingFunctorBase {
PoolingFunctor(const PoolingType pooling_type,
const int *kernels,
const int *strides,
......
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_KERNELS_REORGANIZE_H_
#define MACE_KERNELS_REORGANIZE_H_
#include <vector>
#include "mace/core/future.h"
#include "mace/core/tensor.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif // MACE_ENABLE_OPENCL
namespace mace {
namespace kernels {
template <DeviceType D, typename T>
struct ReOrganizeFunctor {
void operator()(const Tensor *input,
const std::vector<index_t> &out_shape,
Tensor *output,
StatsFuture *future) {
const bool w2c = out_shape[3] > input->dim(3);
const index_t height = input->dim(1);
const index_t input_width = input->dim(2);
const index_t input_chan = input->dim(3);
const index_t output_width = output->dim(2);
const index_t output_chan = output->dim(3);
const T *input_ptr = input->data<T>();
T *output_ptr = output->mutable_data<T>();
if (w2c) {
MACE_CHECK((out_shape[3] % input->dim(3)) == 0);
const index_t multiplier = out_shape[3] / input->dim(3);
#pragma omp parallel for collapse(4)
for (index_t n = 0; n < out_shape[0]; ++n) {
for (index_t h = 0; h < out_shape[1]; ++h) {
for (index_t w = 0; w < out_shape[2]; ++w) {
for (index_t c = 0; c < out_shape[3]; ++c) {
const index_t out_offset =
((n * height + h) * output_width + w)
* output_chan + c;
const index_t in_w_idx = w + (c % multiplier) * output_width;
const index_t in_chan_idx = c / multiplier;
const index_t in_offset =
((n * height + h) * input_width + in_w_idx)
* input_chan + in_chan_idx;
output_ptr[out_offset] = input_ptr[in_offset];
}
}
}
}
} else {
MACE_CHECK((input->dim(3) % out_shape[3]) == 0);
const index_t multiplier = input->dim(3) / out_shape[3];
#pragma omp parallel for collapse(4)
for (index_t n = 0; n < out_shape[0]; ++n) {
for (index_t h = 0; h < out_shape[1]; ++h) {
for (index_t w = 0; w < out_shape[2]; ++w) {
for (index_t c = 0; c < out_shape[3]; ++c) {
const index_t out_offset =
((n * height + h) * output_width + w)
* output_chan + c;
const index_t in_w_idx = w % input_width;
const index_t in_chan_idx = w / input_width + c * multiplier;
const index_t in_offset =
((n * height + h) * input_width + in_w_idx)
* input_chan + in_chan_idx;
output_ptr[out_offset] = input_ptr[in_offset];
}
}
}
}
}
}
};
} // namespace kernels
} // namespace mace
#endif // MACE_KERNELS_REORGANIZE_H_
......@@ -179,7 +179,7 @@ struct ResizeBilinearFunctor<DeviceType::CPU, float>
#ifdef MACE_ENABLE_OPENCL
template<typename T>
struct ResizeBilinearFunctor<DeviceType::OPENCL, T>
struct ResizeBilinearFunctor<DeviceType::GPU, T>
: ResizeBilinearFunctorBase {
ResizeBilinearFunctor(const std::vector<index_t> &size, bool align_corners)
: ResizeBilinearFunctorBase(size, align_corners) {}
......
......@@ -86,7 +86,7 @@ struct SliceFunctor : SliceFunctorBase {
#ifdef MACE_ENABLE_OPENCL
template<typename T>
struct SliceFunctor<DeviceType::OPENCL, T> : SliceFunctorBase {
struct SliceFunctor<DeviceType::GPU, T> : SliceFunctorBase {
explicit SliceFunctor(const int32_t axis) : SliceFunctorBase(axis) {}
void operator()(const Tensor *input,
......
......@@ -94,7 +94,7 @@ struct SoftmaxFunctor<DeviceType::CPU, float> {
#ifdef MACE_ENABLE_OPENCL
template<typename T>
struct SoftmaxFunctor<DeviceType::OPENCL, T> {
struct SoftmaxFunctor<DeviceType::GPU, T> {
void operator()(const Tensor *logits, Tensor *output, StatsFuture *future);
cl::Kernel kernel_;
......
......@@ -59,7 +59,7 @@ struct SpaceToBatchFunctor : SpaceToBatchFunctorBase {
#ifdef MACE_ENABLE_OPENCL
template <typename T>
struct SpaceToBatchFunctor<DeviceType::OPENCL, T> : SpaceToBatchFunctorBase {
struct SpaceToBatchFunctor<DeviceType::GPU, T> : SpaceToBatchFunctorBase {
SpaceToBatchFunctor(const std::vector<int> &paddings,
const std::vector<int> &block_shape,
bool b2s)
......
......@@ -57,7 +57,7 @@ struct WinogradTransformFunctor : WinogradTransformFunctorBase {
#ifdef MACE_ENABLE_OPENCL
template <typename T>
struct WinogradTransformFunctor<DeviceType::OPENCL, T>
struct WinogradTransformFunctor<DeviceType::GPU, T>
: WinogradTransformFunctorBase {
WinogradTransformFunctor(const Padding &padding_type,
const std::vector<int> &paddings)
......@@ -111,7 +111,7 @@ struct WinogradInverseTransformFunctor : WinogradInverseTransformFunctorBase {
#ifdef MACE_ENABLE_OPENCL
template <typename T>
struct WinogradInverseTransformFunctor<DeviceType::OPENCL, T>
struct WinogradInverseTransformFunctor<DeviceType::GPU, T>
: WinogradInverseTransformFunctorBase {
WinogradInverseTransformFunctor(const int batch,
const int height,
......
......@@ -26,16 +26,16 @@ void Register_Activation(OperatorRegistry *op_registry) {
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Activation")
.Device(DeviceType::OPENCL)
.Device(DeviceType::GPU)
.TypeConstraint<float>("T")
.Build(),
ActivationOp<DeviceType::OPENCL, float>);
ActivationOp<DeviceType::GPU, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Activation")
.Device(DeviceType::OPENCL)
.Device(DeviceType::GPU)
.TypeConstraint<half>("T")
.Build(),
ActivationOp<DeviceType::OPENCL, half>);
ActivationOp<DeviceType::GPU, half>);
#endif // MACE_ENABLE_OPENCL
}
......
......@@ -33,7 +33,7 @@ void ReluBenchmark(
// Add input data
if (D == DeviceType::CPU) {
net.AddRandomInput<D, float>("Input", {batch, channels, height, width});
} else if (D == DeviceType::OPENCL) {
} else if (D == DeviceType::GPU) {
net.AddRandomInput<D, float>("Input", {batch, height, width, channels});
} else {
MACE_NOT_IMPLEMENTED;
......@@ -45,7 +45,7 @@ void ReluBenchmark(
.Output("Output")
.AddStringArg("activation", "RELU")
.Finalize(net.NewOperatorDef());
} else if (D == DeviceType::OPENCL) {
} else if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
......@@ -83,8 +83,8 @@ void ReluBenchmark(
#define BM_RELU(N, C, H, W) \
BM_RELU_MACRO(N, C, H, W, float, CPU); \
BM_RELU_MACRO(N, C, H, W, float, OPENCL); \
BM_RELU_MACRO(N, C, H, W, half, OPENCL);
BM_RELU_MACRO(N, C, H, W, float, GPU); \
BM_RELU_MACRO(N, C, H, W, half, GPU);
BM_RELU(1, 1, 512, 512);
BM_RELU(1, 3, 128, 128);
......@@ -107,7 +107,7 @@ void ReluxBenchmark(
net.AddRandomInput<D, float>("Input", {batch, height, width, channels});
}
if (D == DeviceType::OPENCL) {
if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
......@@ -151,8 +151,8 @@ void ReluxBenchmark(
#define BM_RELUX(N, C, H, W) \
BM_RELUX_MACRO(N, C, H, W, float, CPU); \
BM_RELUX_MACRO(N, C, H, W, float, OPENCL); \
BM_RELUX_MACRO(N, C, H, W, half, OPENCL);
BM_RELUX_MACRO(N, C, H, W, float, GPU); \
BM_RELUX_MACRO(N, C, H, W, half, GPU);
BM_RELUX(1, 1, 512, 512);
BM_RELUX(1, 3, 128, 128);
......@@ -171,7 +171,7 @@ void PreluBenchmark(
// Add input data
if (D == DeviceType::CPU) {
net.AddRandomInput<D, float>("Input", {batch, channels, height, width});
} else if (D == DeviceType::OPENCL) {
} else if (D == DeviceType::GPU) {
net.AddRandomInput<D, float>("Input", {batch, height, width, channels});
} else {
MACE_NOT_IMPLEMENTED;
......@@ -185,7 +185,7 @@ void PreluBenchmark(
.Output("Output")
.AddStringArg("activation", "PRELU")
.Finalize(net.NewOperatorDef());
} else if (D == DeviceType::OPENCL) {
} else if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, float>(&net, "Alpha", "AlphaImage",
......@@ -226,8 +226,8 @@ void PreluBenchmark(
#define BM_PRELU(N, C, H, W) \
BM_PRELU_MACRO(N, C, H, W, float, CPU); \
BM_PRELU_MACRO(N, C, H, W, float, OPENCL); \
BM_PRELU_MACRO(N, C, H, W, half, OPENCL);
BM_PRELU_MACRO(N, C, H, W, float, GPU); \
BM_PRELU_MACRO(N, C, H, W, half, GPU);
BM_PRELU(1, 1, 512, 512);
BM_PRELU(1, 3, 128, 128);
......@@ -250,7 +250,7 @@ void TanhBenchmark(
net.AddRandomInput<D, float>("Input", {batch, height, width, channels});
}
if (D == DeviceType::OPENCL) {
if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
......@@ -292,8 +292,8 @@ void TanhBenchmark(
#define BM_TANH(N, C, H, W) \
BM_TANH_MACRO(N, C, H, W, float, CPU); \
BM_TANH_MACRO(N, C, H, W, float, OPENCL); \
BM_TANH_MACRO(N, C, H, W, half, OPENCL);
BM_TANH_MACRO(N, C, H, W, float, GPU); \
BM_TANH_MACRO(N, C, H, W, half, GPU);
BM_TANH(1, 1, 512, 512);
BM_TANH(1, 3, 128, 128);
......@@ -316,7 +316,7 @@ void SigmoidBenchmark(
net.AddRandomInput<D, float>("Input", {batch, height, width, channels});
}
if (D == DeviceType::OPENCL) {
if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
......@@ -359,8 +359,8 @@ void SigmoidBenchmark(
#define BM_SIGMOID(N, C, H, W) \
BM_SIGMOID_MACRO(N, C, H, W, float, CPU); \
BM_SIGMOID_MACRO(N, C, H, W, float, OPENCL); \
BM_SIGMOID_MACRO(N, C, H, W, half, OPENCL);
BM_SIGMOID_MACRO(N, C, H, W, float, GPU); \
BM_SIGMOID_MACRO(N, C, H, W, half, GPU);
BM_SIGMOID(1, 1, 512, 512);
BM_SIGMOID(1, 3, 128, 128);
......
......@@ -31,7 +31,7 @@ void TestSimpleRelu() {
"Input", {2, 2, 2, 2},
{-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0});
if (D == DeviceType::OPENCL) {
if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
......@@ -68,7 +68,7 @@ void TestSimpleRelu() {
TEST_F(ActivationOpTest, CPUSimpleRelu) { TestSimpleRelu<DeviceType::CPU>(); }
TEST_F(ActivationOpTest, OPENCLSimpleRelu) {
TestSimpleRelu<DeviceType::OPENCL>();
TestSimpleRelu<DeviceType::GPU>();
}
namespace {
......@@ -79,7 +79,7 @@ void TestUnalignedSimpleRelu() {
// Add input data
net.AddInputFromArray<D, float>("Input", {1, 3, 2, 1}, {-7, 7, -6, 6, -5, 5});
if (D == DeviceType::OPENCL) {
if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
......@@ -117,7 +117,7 @@ TEST_F(ActivationOpTest, CPUUnalignedSimpleRelu) {
}
TEST_F(ActivationOpTest, OPENCLUnalignedSimpleRelu) {
TestUnalignedSimpleRelu<DeviceType::OPENCL>();
TestUnalignedSimpleRelu<DeviceType::GPU>();
}
......@@ -131,7 +131,7 @@ void TestSimpleRelux() {
"Input", {2, 2, 2, 2},
{-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0});
if (D == DeviceType::OPENCL) {
if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
......@@ -170,7 +170,7 @@ void TestSimpleRelux() {
TEST_F(ActivationOpTest, CPUSimple) { TestSimpleRelux<DeviceType::CPU>(); }
TEST_F(ActivationOpTest, OPENCLSimple) {
TestSimpleRelux<DeviceType::OPENCL>();
TestSimpleRelux<DeviceType::GPU>();
}
namespace {
......@@ -183,7 +183,7 @@ void TestSimpleReluRelux() {
"Input", {2, 2, 2, 2},
{-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0});
if (D == DeviceType::OPENCL) {
if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
......@@ -224,7 +224,7 @@ TEST_F(ActivationOpTest, CPUSimpleRelux) {
}
TEST_F(ActivationOpTest, OPENCLSimpleRelux) {
TestSimpleReluRelux<DeviceType::OPENCL>();
TestSimpleReluRelux<DeviceType::GPU>();
}
namespace {
......@@ -238,7 +238,7 @@ void TestSimplePrelu() {
{-7, 7, -6, 6, -5, -5, -4, -4, -3, 3, -2, 2, -1, -1, 0, 0});
net.AddInputFromArray<D, float>("Alpha", {2}, {2.0, 3.0});
if (D == DeviceType::OPENCL) {
if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, float>(&net, "Alpha", "AlphaImage",
......@@ -283,7 +283,7 @@ TEST_F(ActivationOpTest, CPUSimplePrelu) {
}
TEST_F(ActivationOpTest, OPENCLSimplePrelu) {
TestSimplePrelu<DeviceType::OPENCL>();
TestSimplePrelu<DeviceType::GPU>();
}
namespace {
......@@ -296,7 +296,7 @@ void TestSimpleTanh() {
"Input", {2, 2, 2, 2},
{-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0});
if (D == DeviceType::OPENCL) {
if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
......@@ -336,7 +336,7 @@ void TestSimpleTanh() {
TEST_F(ActivationOpTest, CPUSimpleTanh) { TestSimpleTanh<DeviceType::CPU>(); }
TEST_F(ActivationOpTest, OPENCLSimpleTanh) {
TestSimpleTanh<DeviceType::OPENCL>();
TestSimpleTanh<DeviceType::GPU>();
}
namespace {
......@@ -349,7 +349,7 @@ void TestSimpleSigmoid() {
"Input", {2, 2, 2, 2},
{-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0});
if (D == DeviceType::OPENCL) {
if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
......@@ -392,7 +392,7 @@ TEST_F(ActivationOpTest, CPUSimpleSigmoid) {
}
TEST_F(ActivationOpTest, OPENCLSimpleSigmoid) {
TestSimpleSigmoid<DeviceType::OPENCL>();
TestSimpleSigmoid<DeviceType::GPU>();
}
} // namespace test
......
......@@ -26,16 +26,16 @@ void Register_AddN(OperatorRegistry *op_registry) {
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("AddN")
.Device(DeviceType::OPENCL)
.Device(DeviceType::GPU)
.TypeConstraint<float>("T")
.Build(),
AddNOp<DeviceType::OPENCL, float>);
AddNOp<DeviceType::GPU, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("AddN")
.Device(DeviceType::OPENCL)
.Device(DeviceType::GPU)
.TypeConstraint<half>("T")
.Build(),
AddNOp<DeviceType::OPENCL, half>);
AddNOp<DeviceType::GPU, half>);
#endif // MACE_ENABLE_OPENCL
}
......
......@@ -33,7 +33,7 @@ void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) {
net.AddRandomInput<D, float>(MakeString("Input", i).c_str(), {n, h, w, c});
}
if (D == DeviceType::OPENCL) {
if (D == DeviceType::GPU) {
for (int i = 0; i < inputs; ++i) {
BufferToImage<D, T>(&net, MakeString("Input", i).c_str(),
MakeString("InputImage", i).c_str(),
......@@ -82,8 +82,8 @@ void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) {
#define BM_ADDN(INPUTS, N, H, W, C) \
BM_ADDN_MACRO(INPUTS, N, H, W, C, float, CPU); \
BM_ADDN_MACRO(INPUTS, N, H, W, C, float, OPENCL); \
BM_ADDN_MACRO(INPUTS, N, H, W, C, half, OPENCL);
BM_ADDN_MACRO(INPUTS, N, H, W, C, float, GPU); \
BM_ADDN_MACRO(INPUTS, N, H, W, C, half, GPU);
BM_ADDN(2, 1, 256, 256, 32);
BM_ADDN(2, 1, 128, 128, 32);
......
......@@ -64,7 +64,7 @@ void SimpleAdd3() {
{-0.1582, 2, 3, 4, 5, 6});
const int input_num = 4;
if (D == DeviceType::OPENCL) {
if (D == DeviceType::GPU) {
// run on gpu
for (int i = 0; i < input_num; ++i) {
BufferToImage<D, half>(&net, MakeString("Input", i),
......@@ -105,7 +105,7 @@ void SimpleAdd3() {
} // namespace
TEST_F(AddnOpTest, CPUSimpleAdd3) { SimpleAdd3<DeviceType::CPU>(); }
TEST_F(AddnOpTest, GPUSimpleAdd3) { SimpleAdd3<DeviceType::OPENCL>(); }
TEST_F(AddnOpTest, GPUSimpleAdd3) { SimpleAdd3<DeviceType::GPU>(); }
namespace {
template <DeviceType D>
......@@ -166,7 +166,7 @@ void RandomTest() {
}
} // namespace
TEST_F(AddnOpTest, OPENCLRandom) { RandomTest<DeviceType::OPENCL>(); }
TEST_F(AddnOpTest, OPENCLRandom) { RandomTest<DeviceType::GPU>(); }
} // namespace test
} // namespace ops
......
......@@ -26,16 +26,16 @@ void Register_BatchNorm(OperatorRegistry *op_registry) {
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchNorm")
.Device(DeviceType::OPENCL)
.Device(DeviceType::GPU)
.TypeConstraint<float>("T")
.Build(),
BatchNormOp<DeviceType::OPENCL, float>);
BatchNormOp<DeviceType::GPU, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchNorm")
.Device(DeviceType::OPENCL)
.Device(DeviceType::GPU)
.TypeConstraint<half>("T")
.Build(),
BatchNormOp<DeviceType::OPENCL, half>);
BatchNormOp<DeviceType::GPU, half>);
#endif // MACE_ENABLE_OPENCL
}
......
......@@ -32,7 +32,7 @@ void BatchNorm(
// Add input data
if (D == DeviceType::CPU) {
net.AddRandomInput<D, T>("Input", {batch, channels, height, width});
} else if (D == DeviceType::OPENCL) {
} else if (D == DeviceType::GPU) {
net.AddRandomInput<D, T>("Input", {batch, height, width, channels});
} else {
MACE_NOT_IMPLEMENTED;
......@@ -52,7 +52,7 @@ void BatchNorm(
.AddFloatArg("epsilon", 1e-3)
.Output("Output")
.Finalize(net.NewOperatorDef());
} else if (D == DeviceType::OPENCL) {
} else if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, float>(&net, "Scale", "ScaleImage",
......@@ -107,8 +107,8 @@ void BatchNorm(
#define BM_BATCH_NORM(N, C, H, W) \
BM_BATCH_NORM_MACRO(N, C, H, W, float, CPU); \
BM_BATCH_NORM_MACRO(N, C, H, W, float, OPENCL); \
BM_BATCH_NORM_MACRO(N, C, H, W, half, OPENCL);
BM_BATCH_NORM_MACRO(N, C, H, W, float, GPU); \
BM_BATCH_NORM_MACRO(N, C, H, W, half, GPU);
BM_BATCH_NORM(1, 1, 512, 512);
BM_BATCH_NORM(1, 3, 128, 128);
......
......@@ -49,7 +49,7 @@ void Simple() {
net.RunOp(D);
net.TransformDataFormat<D, float>("OutputNCHW", NCHW, "Output", NHWC);
} else if (D == DeviceType::OPENCL) {
} else if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, float>(&net, "Scale", "ScaleImage",
......@@ -90,7 +90,7 @@ void Simple() {
TEST_F(BatchNormOpTest, SimpleCPU) { Simple<DeviceType::CPU>(); }
TEST_F(BatchNormOpTest, SimpleOPENCL) { Simple<DeviceType::OPENCL>(); }
TEST_F(BatchNormOpTest, SimpleOPENCL) { Simple<DeviceType::GPU>(); }
TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
// generate random input
......@@ -103,12 +103,12 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
OpsTestNet net;
// Add input data
net.AddRandomInput<DeviceType::OPENCL, float>(
net.AddRandomInput<DeviceType::GPU, float>(
"Input", {batch, height, width, channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Scale", {channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Offset", {channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Mean", {channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Var", {channels});
net.AddRandomInput<DeviceType::GPU, float>("Scale", {channels});
net.AddRandomInput<DeviceType::GPU, float>("Offset", {channels});
net.AddRandomInput<DeviceType::GPU, float>("Mean", {channels});
net.AddRandomInput<DeviceType::GPU, float>("Var", {channels});
net.TransformDataFormat<DeviceType::CPU, float>("Input",
NHWC,
......@@ -139,15 +139,15 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
expected.Copy(*net.GetOutput("Output"));
// Run on opencl
BufferToImage<DeviceType::OPENCL, float>(&net, "Input", "InputImage",
BufferToImage<DeviceType::GPU, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::OPENCL, float>(&net, "Scale", "ScaleImage",
BufferToImage<DeviceType::GPU, float>(&net, "Scale", "ScaleImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(&net, "Offset", "OffsetImage",
BufferToImage<DeviceType::GPU, float>(&net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(&net, "Mean", "MeanImage",
BufferToImage<DeviceType::GPU, float>(&net, "Mean", "MeanImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(&net, "Var", "VarImage",
BufferToImage<DeviceType::GPU, float>(&net, "Var", "VarImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("BatchNorm", "BatchNormTest")
......@@ -162,14 +162,14 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
// Tuning
setenv("MACE_TUNING", "1", 1);
net.RunOp(DeviceType::OPENCL);
net.RunOp(DeviceType::GPU);
unsetenv("MACE_TUNING");
// Run on opencl
net.RunOp(DeviceType::OPENCL);
net.RunOp(DeviceType::GPU);
net.Sync();
ImageToBuffer<DeviceType::OPENCL, float>(&net, "OutputImage", "OPENCLOutput",
ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-5, 1e-4);
}
......@@ -186,12 +186,12 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) {
OpsTestNet net;
// Add input data
net.AddRandomInput<DeviceType::OPENCL, float>(
net.AddRandomInput<DeviceType::GPU, float>(
"Input", {batch, height, width, channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Scale", {channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Offset", {channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Mean", {channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Var", {channels});
net.AddRandomInput<DeviceType::GPU, float>("Scale", {channels});
net.AddRandomInput<DeviceType::GPU, float>("Offset", {channels});
net.AddRandomInput<DeviceType::GPU, float>("Mean", {channels});
net.AddRandomInput<DeviceType::GPU, float>("Var", {channels});
net.TransformDataFormat<DeviceType::CPU, float>("Input",
NHWC,
......@@ -221,15 +221,15 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) {
expected.Copy(*net.GetOutput("Output"));
// Run on opencl
BufferToImage<DeviceType::OPENCL, half>(&net, "Input", "InputImage",
BufferToImage<DeviceType::GPU, half>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::OPENCL, half>(&net, "Scale", "ScaleImage",
BufferToImage<DeviceType::GPU, half>(&net, "Scale", "ScaleImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(&net, "Offset", "OffsetImage",
BufferToImage<DeviceType::GPU, half>(&net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(&net, "Mean", "MeanImage",
BufferToImage<DeviceType::GPU, half>(&net, "Mean", "MeanImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(&net, "Var", "VarImage",
BufferToImage<DeviceType::GPU, half>(&net, "Var", "VarImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("BatchNorm", "BatchNormTest")
......@@ -245,14 +245,14 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) {
// Tuning
setenv("MACE_TUNING", "1", 1);
net.RunOp(DeviceType::OPENCL);
net.RunOp(DeviceType::GPU);
unsetenv("MACE_TUNING");
// Run on opencl
net.RunOp(DeviceType::OPENCL);
net.RunOp(DeviceType::GPU);
net.Sync();
ImageToBuffer<DeviceType::OPENCL, float>(&net, "OutputImage", "OPENCLOutput",
ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-1, 1e-2);
}
......@@ -269,12 +269,12 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
OpsTestNet net;
// Add input data
net.AddRandomInput<DeviceType::OPENCL, float>(
net.AddRandomInput<DeviceType::GPU, float>(
"Input", {batch, height, width, channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Scale", {channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Offset", {channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Mean", {channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Var", {channels});
net.AddRandomInput<DeviceType::GPU, float>("Scale", {channels});
net.AddRandomInput<DeviceType::GPU, float>("Offset", {channels});
net.AddRandomInput<DeviceType::GPU, float>("Mean", {channels});
net.AddRandomInput<DeviceType::GPU, float>("Var", {channels});
net.TransformDataFormat<DeviceType::CPU, float>("Input",
NHWC,
......@@ -304,15 +304,15 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
expected.Copy(*net.GetOutput("Output"));
// Run on opencl
BufferToImage<DeviceType::OPENCL, float>(&net, "Input", "InputImage",
BufferToImage<DeviceType::GPU, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::OPENCL, float>(&net, "Scale", "ScaleImage",
BufferToImage<DeviceType::GPU, float>(&net, "Scale", "ScaleImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(&net, "Offset", "OffsetImage",
BufferToImage<DeviceType::GPU, float>(&net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(&net, "Mean", "MeanImage",
BufferToImage<DeviceType::GPU, float>(&net, "Mean", "MeanImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(&net, "Var", "VarImage",
BufferToImage<DeviceType::GPU, float>(&net, "Var", "VarImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("BatchNorm", "BatchNormTest")
......@@ -327,14 +327,14 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
// tuning
setenv("MACE_TUNING", "1", 1);
net.RunOp(DeviceType::OPENCL);
net.RunOp(DeviceType::GPU);
unsetenv("MACE_TUNING");
// Run on opencl
net.RunOp(DeviceType::OPENCL);
net.RunOp(DeviceType::GPU);
net.Sync();
ImageToBuffer<DeviceType::OPENCL, float>(&net, "OutputImage", "OPENCLOutput",
ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-5, 1e-4);
}
......@@ -351,12 +351,12 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) {
OpsTestNet net;
// Add input data
net.AddRandomInput<DeviceType::OPENCL, float>(
net.AddRandomInput<DeviceType::GPU, float>(
"Input", {batch, height, width, channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Scale", {channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Offset", {channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Mean", {channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Var", {channels});
net.AddRandomInput<DeviceType::GPU, float>("Scale", {channels});
net.AddRandomInput<DeviceType::GPU, float>("Offset", {channels});
net.AddRandomInput<DeviceType::GPU, float>("Mean", {channels});
net.AddRandomInput<DeviceType::GPU, float>("Var", {channels});
net.TransformDataFormat<DeviceType::CPU, float>("Input",
NHWC,
......@@ -386,15 +386,15 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) {
expected.Copy(*net.GetOutput("Output"));
// Run on opencl
BufferToImage<DeviceType::OPENCL, half>(&net, "Input", "InputImage",
BufferToImage<DeviceType::GPU, half>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::OPENCL, half>(&net, "Scale", "ScaleImage",
BufferToImage<DeviceType::GPU, half>(&net, "Scale", "ScaleImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(&net, "Offset", "OffsetImage",
BufferToImage<DeviceType::GPU, half>(&net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(&net, "Mean", "MeanImage",
BufferToImage<DeviceType::GPU, half>(&net, "Mean", "MeanImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(&net, "Var", "VarImage",
BufferToImage<DeviceType::GPU, half>(&net, "Var", "VarImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("BatchNorm", "BatchNormTest")
......@@ -410,14 +410,14 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) {
// tuning
setenv("MACE_TUNING", "1", 1);
net.RunOp(DeviceType::OPENCL);
net.RunOp(DeviceType::GPU);
unsetenv("MACE_TUNING");
// Run on opencl
net.RunOp(DeviceType::OPENCL);
net.RunOp(DeviceType::GPU);
net.Sync();
ImageToBuffer<DeviceType::OPENCL, float>(&net, "OutputImage", "OPENCLOutput",
ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-1, 1e-2);
}
......
......@@ -20,15 +20,15 @@ namespace ops {
void Register_BatchToSpaceND(OperatorRegistry *op_registry) {
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchToSpaceND")
.Device(DeviceType::OPENCL)
.Device(DeviceType::GPU)
.TypeConstraint<float>("T")
.Build(),
BatchToSpaceNDOp<DeviceType::OPENCL, float>);
BatchToSpaceNDOp<DeviceType::GPU, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchToSpaceND")
.Device(DeviceType::OPENCL)
.Device(DeviceType::GPU)
.TypeConstraint<half>("T")
.Build(),
BatchToSpaceNDOp<DeviceType::OPENCL, half>);
BatchToSpaceNDOp<DeviceType::GPU, half>);
#endif // MACE_ENABLE_OPENCL
}
......
......@@ -63,7 +63,7 @@ void BMBatchToSpace(
BENCHMARK(BM_BATCH_TO_SPACE_##N##_##H##_##W##_##C##_##ARG##_##TYPE##_##DEVICE)
#define BM_BATCH_TO_SPACE(N, H, W, C, ARG) \
BM_BATCH_TO_SPACE_MACRO(N, H, W, C, ARG, float, OPENCL);
BM_BATCH_TO_SPACE_MACRO(N, H, W, C, ARG, float, GPU);
BM_BATCH_TO_SPACE(128, 8, 8, 128, 2);
BM_BATCH_TO_SPACE(4, 128, 128, 32, 2);
......
......@@ -26,16 +26,16 @@ void Register_BiasAdd(OperatorRegistry *op_registry) {
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("BiasAdd")
.Device(DeviceType::OPENCL)
.Device(DeviceType::GPU)
.TypeConstraint<float>("T")
.Build(),
BiasAddOp<DeviceType::OPENCL, float>);
BiasAddOp<DeviceType::GPU, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("BiasAdd")
.Device(DeviceType::OPENCL)
.Device(DeviceType::GPU)
.TypeConstraint<half>("T")
.Build(),
BiasAddOp<DeviceType::OPENCL, half>);
BiasAddOp<DeviceType::GPU, half>);
#endif // MACE_ENABLE_OPENCL
}
......
......@@ -31,7 +31,7 @@ void BiasAdd(int iters, int batch, int channels, int height, int width) {
// Add input data
if (D == DeviceType::CPU) {
net.AddRandomInput<D, T>("Input", {batch, channels, height, width});
} else if (D == DeviceType::OPENCL) {
} else if (D == DeviceType::GPU) {
net.AddRandomInput<D, T>("Input", {batch, height, width, channels});
} else {
MACE_NOT_IMPLEMENTED;
......@@ -44,7 +44,7 @@ void BiasAdd(int iters, int batch, int channels, int height, int width) {
.Input("Bias")
.Output("Output")
.Finalize(net.NewOperatorDef());
} else if (D == DeviceType::OPENCL) {
} else if (D == DeviceType::GPU) {
BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(&net, "Bias", "BiasImage",
......@@ -84,8 +84,8 @@ void BiasAdd(int iters, int batch, int channels, int height, int width) {
#define BM_BIAS_ADD(N, C, H, W) \
BM_BIAS_ADD_MACRO(N, C, H, W, float, CPU); \
BM_BIAS_ADD_MACRO(N, C, H, W, float, OPENCL); \
BM_BIAS_ADD_MACRO(N, C, H, W, half, OPENCL);
BM_BIAS_ADD_MACRO(N, C, H, W, float, GPU); \
BM_BIAS_ADD_MACRO(N, C, H, W, half, GPU);
BM_BIAS_ADD(1, 1, 512, 512);
BM_BIAS_ADD(1, 3, 128, 128);
......
......@@ -47,7 +47,7 @@ void BiasAddSimple() {
NCHW,
"Output",
NHWC);
} else if (D == DeviceType::OPENCL) {
} else if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, float>(&net, "Bias", "BiasImage",
......@@ -80,7 +80,7 @@ void BiasAddSimple() {
TEST_F(BiasAddOpTest, BiasAddSimpleCPU) { BiasAddSimple<DeviceType::CPU>(); }
TEST_F(BiasAddOpTest, BiasAddSimpleOPENCL) {
BiasAddSimple<DeviceType::OPENCL>();
BiasAddSimple<DeviceType::GPU>();
}
TEST_F(BiasAddOpTest, SimpleRandomOPENCL) {
......@@ -94,9 +94,9 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) {
OpsTestNet net;
// Add input data
net.AddRandomInput<DeviceType::OPENCL, float>(
net.AddRandomInput<DeviceType::GPU, float>(
"Input", {batch, height, width, channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Bias", {channels}, true);
net.AddRandomInput<DeviceType::GPU, float>("Bias", {channels}, true);
net.TransformDataFormat<DeviceType::CPU, float>("Input",
NHWC,
......@@ -123,9 +123,9 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) {
expected.Copy(*net.GetOutput("Output"));
// Run on opencl
BufferToImage<DeviceType::OPENCL, float>(&net, "Input", "InputImage",
BufferToImage<DeviceType::GPU, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::OPENCL, float>(&net, "Bias", "BiasImage",
BufferToImage<DeviceType::GPU, float>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("BiasAdd", "BiasAddTest")
......@@ -135,10 +135,10 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) {
.Finalize(net.NewOperatorDef());
// Run on opencl
net.RunOp(DeviceType::OPENCL);
net.RunOp(DeviceType::GPU);
net.Sync();
ImageToBuffer<DeviceType::OPENCL, float>(&net, "OutputImage", "OPENCLOutput",
ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-5);
}
......@@ -154,9 +154,9 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) {
OpsTestNet net;
// Add input data
net.AddRandomInput<DeviceType::OPENCL, float>(
net.AddRandomInput<DeviceType::GPU, float>(
"Input", {batch, height, width, channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Bias", {channels}, true);
net.AddRandomInput<DeviceType::GPU, float>("Bias", {channels}, true);
net.TransformDataFormat<DeviceType::CPU, float>("Input",
NHWC,
......@@ -182,9 +182,9 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) {
expected.Copy(*net.GetOutput("Output"));
// Run on opencl
BufferToImage<DeviceType::OPENCL, float>(&net, "Input", "InputImage",
BufferToImage<DeviceType::GPU, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::OPENCL, float>(&net, "Bias", "BiasImage",
BufferToImage<DeviceType::GPU, float>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("BiasAdd", "BiasAddTest")
......@@ -194,10 +194,10 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) {
.Finalize(net.NewOperatorDef());
// Run on opencl
net.RunOp(DeviceType::OPENCL);
net.RunOp(DeviceType::GPU);
net.Sync();
ImageToBuffer<DeviceType::OPENCL, float>(&net, "OutputImage", "OPENCLOutput",
ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-5);
}
......
......@@ -19,16 +19,16 @@ namespace ops {
void Register_BufferToImage(OperatorRegistry *op_registry) {
REGISTER_OPERATOR(op_registry, OpKeyBuilder("BufferToImage")
.Device(DeviceType::OPENCL)
.Device(DeviceType::GPU)
.TypeConstraint<float>("T")
.Build(),
BufferToImageOp<DeviceType::OPENCL, float>);
BufferToImageOp<DeviceType::GPU, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("BufferToImage")
.Device(DeviceType::OPENCL)
.Device(DeviceType::GPU)
.TypeConstraint<half>("T")
.Build(),
BufferToImageOp<DeviceType::OPENCL, half>);
BufferToImageOp<DeviceType::GPU, half>);
}
} // namespace ops
......
......@@ -54,73 +54,73 @@ void TestBidirectionTransform(const int type,
} // namespace
TEST(BufferToImageTest, ArgSmall) {
TestBidirectionTransform<DeviceType::OPENCL, float>(kernels::ARGUMENT, {1});
TestBidirectionTransform<DeviceType::GPU, float>(kernels::ARGUMENT, {1});
}
TEST(BufferToImageTest, ArgHalfSmall) {
TestBidirectionTransform<DeviceType::OPENCL, half>(kernels::ARGUMENT, {11});
TestBidirectionTransform<DeviceType::GPU, half>(kernels::ARGUMENT, {11});
}
TEST(BufferToImageTest, ArgMedia) {
TestBidirectionTransform<DeviceType::OPENCL, float>(kernels::ARGUMENT, {11});
TestBidirectionTransform<DeviceType::GPU, float>(kernels::ARGUMENT, {11});
}
TEST(BufferToImageTest, ArgLarge) {
TestBidirectionTransform<DeviceType::OPENCL, float>(kernels::ARGUMENT, {256});
TestBidirectionTransform<DeviceType::GPU, float>(kernels::ARGUMENT, {256});
}
TEST(BufferToImageTest, InputSmallSingleChannel) {
TestBidirectionTransform<DeviceType::OPENCL, float>(kernels::IN_OUT_CHANNEL,
TestBidirectionTransform<DeviceType::GPU, float>(kernels::IN_OUT_CHANNEL,
{1, 2, 3, 1});
}
TEST(BufferToImageTest, InputSmallMultipleChannel) {
TestBidirectionTransform<DeviceType::OPENCL, float>(kernels::IN_OUT_CHANNEL,
TestBidirectionTransform<DeviceType::GPU, float>(kernels::IN_OUT_CHANNEL,
{1, 2, 3, 3});
}
TEST(BufferToImageTest, InputSmallMultipleBatchAndChannel) {
TestBidirectionTransform<DeviceType::OPENCL, float>(kernels::IN_OUT_CHANNEL,
TestBidirectionTransform<DeviceType::GPU, float>(kernels::IN_OUT_CHANNEL,
{3, 2, 3, 3});
}
TEST(BufferToImageTest, InputMedia) {
TestBidirectionTransform<DeviceType::OPENCL, float>(kernels::IN_OUT_CHANNEL,
TestBidirectionTransform<DeviceType::GPU, float>(kernels::IN_OUT_CHANNEL,
{3, 13, 17, 128});
}
TEST(BufferToImageTest, InputLarge) {
TestBidirectionTransform<DeviceType::OPENCL, float>(kernels::IN_OUT_CHANNEL,
TestBidirectionTransform<DeviceType::GPU, float>(kernels::IN_OUT_CHANNEL,
{3, 64, 64, 256});
}
TEST(BufferToImageTest, Filter1x1Small) {
TestBidirectionTransform<DeviceType::OPENCL, float>(kernels::CONV2D_FILTER,
TestBidirectionTransform<DeviceType::GPU, float>(kernels::CONV2D_FILTER,
{1, 1, 3, 5});
}
TEST(BufferToImageTest, Filter1x1Media) {
TestBidirectionTransform<DeviceType::OPENCL, float>(kernels::CONV2D_FILTER,
TestBidirectionTransform<DeviceType::GPU, float>(kernels::CONV2D_FILTER,
{1, 1, 13, 17});
}
TEST(BufferToImageTest, Filter1x1Large) {
TestBidirectionTransform<DeviceType::OPENCL, float>(kernels::CONV2D_FILTER,
TestBidirectionTransform<DeviceType::GPU, float>(kernels::CONV2D_FILTER,
{1, 1, 128, 512});
}
TEST(BufferToImageTest, Filter3x3Small) {
TestBidirectionTransform<DeviceType::OPENCL, float>(kernels::CONV2D_FILTER,
TestBidirectionTransform<DeviceType::GPU, float>(kernels::CONV2D_FILTER,
{3, 3, 3, 5});
}
TEST(BufferToImageTest, Filter3x3Meida) {
TestBidirectionTransform<DeviceType::OPENCL, float>(kernels::CONV2D_FILTER,
TestBidirectionTransform<DeviceType::GPU, float>(kernels::CONV2D_FILTER,
{3, 3, 13, 17});
}
TEST(BufferToImageTest, Filter3x3Large) {
TestBidirectionTransform<DeviceType::OPENCL, float>(kernels::CONV2D_FILTER,
TestBidirectionTransform<DeviceType::GPU, float>(kernels::CONV2D_FILTER,
{3, 3, 128, 256});
}
......@@ -158,7 +158,7 @@ void TestDiffTypeBidirectionTransform(const int type,
} // namespace
TEST(BufferToImageTest, ArgFloatToHalfSmall) {
TestDiffTypeBidirectionTransform<DeviceType::OPENCL, half>(kernels::ARGUMENT,
TestDiffTypeBidirectionTransform<DeviceType::GPU, half>(kernels::ARGUMENT,
{11});
}
......@@ -203,7 +203,7 @@ TEST(BufferToImageTest, ArgStringHalfToHalfSmall) {
const unsigned char input_data[] = {
0xCD, 0x3C, 0x33, 0x40,
};
TestStringHalfBidirectionTransform<DeviceType::OPENCL, half>(
TestStringHalfBidirectionTransform<DeviceType::GPU, half>(
kernels::ARGUMENT, {2}, input_data);
}
......
......@@ -26,16 +26,16 @@ void Register_ChannelShuffle(OperatorRegistry *op_registry) {
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("ChannelShuffle")
.Device(DeviceType::OPENCL)
.Device(DeviceType::GPU)
.TypeConstraint<float>("T")
.Build(),
ChannelShuffleOp<DeviceType::OPENCL, float>);
ChannelShuffleOp<DeviceType::GPU, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("ChannelShuffle")
.Device(DeviceType::OPENCL)
.Device(DeviceType::GPU)
.TypeConstraint<half>("T")
.Build(),
ChannelShuffleOp<DeviceType::OPENCL, half>);
ChannelShuffleOp<DeviceType::GPU, half>);
#endif // MACE_ENABLE_OPENCL
}
......
......@@ -35,7 +35,7 @@ class ChannelShuffleOp : public Operator<D, T> {
const Tensor *input = this->Input(INPUT);
Tensor *output = this->Output(OUTPUT);
int channels;
if (D == OPENCL) {
if (D == GPU) {
channels = input->dim(3);
} else if (D == CPU) {
channels = input->dim(1);
......
......@@ -31,7 +31,7 @@ void ChannelShuffle(
// Add input data
if (D == DeviceType::CPU) {
net.AddRandomInput<D, float>("Input", {batch, height, channels, width});
} else if (D == DeviceType::OPENCL) {
} else if (D == DeviceType::GPU) {
net.AddRandomInput<D, float>("Input", {batch, height, width, channels});
} else {
MACE_NOT_IMPLEMENTED;
......@@ -42,7 +42,7 @@ void ChannelShuffle(
.Input("Input")
.Output("Output")
.Finalize(net.NewOperatorDef());
} else if (D == DeviceType::OPENCL) {
} else if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
......@@ -82,8 +82,8 @@ void ChannelShuffle(
#define BM_CHANNEL_SHUFFLE(N, C, H, W, G) \
BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, float, CPU); \
BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, float, OPENCL); \
BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, half, OPENCL);
BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, float, GPU); \
BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, half, GPU);
BM_CHANNEL_SHUFFLE(1, 64, 64, 64, 8);
BM_CHANNEL_SHUFFLE(1, 64, 128, 128, 8);
......
......@@ -60,11 +60,11 @@ TEST_F(ChannelShuffleOpTest, C16G4_OPENCL) {
OpsTestNet net;
// Add input data
net.AddInputFromArray<DeviceType::OPENCL, float>(
net.AddInputFromArray<DeviceType::GPU, float>(
"Input", {1, 1, 2, 16},
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31});
BufferToImage<DeviceType::OPENCL, float>(&net, "Input", "InputImage",
BufferToImage<DeviceType::GPU, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("ChannelShuffle", "ChannelShuffleTest")
......@@ -74,10 +74,10 @@ TEST_F(ChannelShuffleOpTest, C16G4_OPENCL) {
.Finalize(net.NewOperatorDef());
// Run
net.RunOp(DeviceType::OPENCL);
net.RunOp(DeviceType::GPU);
// Transfer output
ImageToBuffer<DeviceType::OPENCL, float>(&net, "OutputImage", "Output",
ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL);
// Check
......
......@@ -26,16 +26,16 @@ void Register_Concat(OperatorRegistry *op_registry) {
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Concat")
.Device(DeviceType::OPENCL)
.Device(DeviceType::GPU)
.TypeConstraint<float>("T")
.Build(),
ConcatOp<DeviceType::OPENCL, float>);
ConcatOp<DeviceType::GPU, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Concat")
.Device(DeviceType::OPENCL)
.Device(DeviceType::GPU)
.TypeConstraint<half>("T")
.Build(),
ConcatOp<DeviceType::OPENCL, half>);
ConcatOp<DeviceType::GPU, half>);
#endif // MACE_ENABLE_OPENCL
}
......
......@@ -74,12 +74,12 @@ void OpenclConcatHelper(int iters,
OpsTestNet net;
// Add input data
net.AddRandomInput<DeviceType::OPENCL, float>("Input0", shape0);
net.AddRandomInput<DeviceType::OPENCL, float>("Input1", shape1);
net.AddRandomInput<DeviceType::GPU, float>("Input0", shape0);
net.AddRandomInput<DeviceType::GPU, float>("Input1", shape1);
BufferToImage<DeviceType::OPENCL, T>(&net, "Input0", "InputImage0",
BufferToImage<DeviceType::GPU, T>(&net, "Input0", "InputImage0",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::OPENCL, T>(&net, "Input1", "InputImage1",
BufferToImage<DeviceType::GPU, T>(&net, "Input1", "InputImage1",
kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Concat", "ConcatBM")
.Input("InputImage0")
......@@ -91,7 +91,7 @@ void OpenclConcatHelper(int iters,
// Warm-up
for (int i = 0; i < 5; ++i) {
net.RunOp(DeviceType::OPENCL);
net.RunOp(DeviceType::GPU);
}
const int64_t tot =
......@@ -101,7 +101,7 @@ void OpenclConcatHelper(int iters,
testing::BytesProcessed(tot * sizeof(T));
mace::testing::StartTiming();
while (iters--) {
net.RunOp(DeviceType::OPENCL);
net.RunOp(DeviceType::GPU);
}
}
} // namespace
......
......@@ -171,9 +171,9 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes,
concat_axis_size += shapes[i][axis];
GenerateRandomRealTypeData(shapes[i], &inputs[i]);
input_ptrs[i] = inputs[i].data();
net.AddInputFromArray<DeviceType::OPENCL, float>(input_name,
net.AddInputFromArray<DeviceType::GPU, float>(input_name,
shapes[i], inputs[i]);
BufferToImage<DeviceType::OPENCL, T>(&net, input_name, image_name,
BufferToImage<DeviceType::GPU, T>(&net, input_name, image_name,
kernels::BufferType::IN_OUT_CHANNEL);
}
......@@ -188,9 +188,9 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes,
.Finalize(net.NewOperatorDef());
// Run
net.RunOp(DeviceType::OPENCL);
net.RunOp(DeviceType::GPU);
ImageToBuffer<DeviceType::OPENCL, float>(&net, "OutputImage", "Output",
ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL);
// Check
......
......@@ -26,16 +26,16 @@ void Register_Conv2D(OperatorRegistry *op_registry) {
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Conv2D")
.Device(DeviceType::OPENCL)
.Device(DeviceType::GPU)
.TypeConstraint<float>("T")
.Build(),
Conv2dOp<DeviceType::OPENCL, float>);
Conv2dOp<DeviceType::GPU, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Conv2D")
.Device(DeviceType::OPENCL)
.Device(DeviceType::GPU)
.TypeConstraint<half>("T")
.Build(),
Conv2dOp<DeviceType::OPENCL, half>);
Conv2dOp<DeviceType::GPU, half>);
#endif // MACE_ENABLE_OPENCL
}
......
......@@ -16,6 +16,7 @@
#define MACE_OPS_CONV_2D_H_
#include <memory>
#include <string>
#include "mace/core/operator.h"
#include "mace/kernels/conv_2d.h"
......@@ -33,8 +34,10 @@ class Conv2dOp : public ConvPool2dOpBase<D, T> {
this->padding_type_,
this->paddings_,
this->dilations_.data(),
kernels::ActivationType::NOOP,
0.0f,
kernels::StringToActivationType(
OperatorBase::GetSingleArgument<std::string>("activation",
"NOOP")),
OperatorBase::GetSingleArgument<float>("max_limit", 0.0f),
static_cast<bool>(OperatorBase::GetSingleArgument<int>(
"is_filter_transformed", false)),
ws->GetScratchBuffer(D)) {}
......
......@@ -47,7 +47,7 @@ void Conv2d(int iters,
{output_channels, channels, kernel_h,
kernel_w});
net.AddRandomInput<D, float>("Bias", {output_channels});
} else if (D == DeviceType::OPENCL) {
} else if (D == DeviceType::GPU) {
net.AddRandomInput<D, float>("Input", {batch, height, width, channels});
net.AddRandomInput<D, float>("Filter",
{kernel_h, kernel_w, output_channels,
......@@ -68,7 +68,7 @@ void Conv2d(int iters,
.AddIntsArg("dilations", {dilation, dilation})
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef());
} else if (D == DeviceType::OPENCL) {
} else if (D == DeviceType::GPU) {
BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(&net, "Filter", "FilterImage",
......@@ -138,8 +138,8 @@ void Conv2d(int iters,
#define BM_CONV_2D(N, C, H, W, KH, KW, S, D, P, OC) \
BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, float, CPU); \
BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, float, OPENCL); \
BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, half, OPENCL);
BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, float, GPU); \
BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, half, GPU);
......
......@@ -63,7 +63,7 @@ void TestNHWCSimple3x3VALID() {
NCHW,
"Output",
NHWC);
} else if (D == DeviceType::OPENCL) {
} else if (D == DeviceType::GPU) {
BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(&net, "Filter", "FilterImage",
......@@ -134,7 +134,7 @@ void TestNHWCSimple3x3SAME() {
NCHW,
"Output",
NHWC);
} else if (D == DeviceType::OPENCL) {
} else if (D == DeviceType::GPU) {
BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(&net, "Filter", "FilterImage",
......@@ -176,8 +176,8 @@ TEST_F(Conv2dOpTest, CPUSimple) {
}
TEST_F(Conv2dOpTest, OPENCLSimple) {
TestNHWCSimple3x3VALID<DeviceType::OPENCL, float>();
TestNHWCSimple3x3SAME<DeviceType::OPENCL, float>();
TestNHWCSimple3x3VALID<DeviceType::GPU, float>();
TestNHWCSimple3x3SAME<DeviceType::GPU, float>();
}
namespace {
......@@ -219,7 +219,7 @@ void TestNHWCSimple3x3WithoutBias() {
NCHW,
"Output",
NHWC);
} else if (D == DeviceType::OPENCL) {
} else if (D == DeviceType::GPU) {
BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(&net, "Filter", "FilterImage",
......@@ -255,7 +255,7 @@ TEST_F(Conv2dOpTest, CPUWithoutBias) {
}
TEST_F(Conv2dOpTest, OPENCLWithoutBias) {
TestNHWCSimple3x3WithoutBias<DeviceType::OPENCL, float>();
TestNHWCSimple3x3WithoutBias<DeviceType::GPU, float>();
}
namespace {
......@@ -301,7 +301,7 @@ void TestNHWCCombined3x3() {
NCHW,
"Output",
NHWC);
} else if (D == DeviceType::OPENCL) {
} else if (D == DeviceType::GPU) {
BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(&net, "Filter", "FilterImage",
......@@ -341,9 +341,164 @@ TEST_F(Conv2dOpTest, CPUStride2) {
}
TEST_F(Conv2dOpTest, OPENCLStride2) {
TestNHWCCombined3x3<DeviceType::OPENCL, float>();
TestNHWCCombined3x3<DeviceType::GPU, float>();
}
namespace {
template<DeviceType D, typename T>
void TestFusedNHWCSimple3x3VALID() {
OpsTestNet net;
// Add input data
net.AddInputFromArray<D, float>(
"Input", {1, 3, 3, 2},
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1});
net.AddInputFromArray<D, float>(
"Filter", {3, 3, 1, 2},
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f});
net.AddInputFromArray<D, float>("Bias", {1}, {-0.1f});
if (D == DeviceType::CPU) {
net.TransformDataFormat<DeviceType::CPU, float>("Input",
NHWC,
"InputNCHW",
NCHW);
net.TransformDataFormat<DeviceType::CPU, float>("Filter",
HWOI,
"FilterOIHW",
OIHW);
OpDefBuilder("Conv2D", "Conv2dTest")
.Input("InputNCHW")
.Input("FilterOIHW")
.Input("Bias")
.Output("OutputNCHW")
.AddIntsArg("strides", {1, 1})
.AddIntArg("padding", Padding::VALID)
.AddIntsArg("dilations", {1, 1})
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.AddStringArg("activation", "RELU")
.Finalize(net.NewOperatorDef());
// Run
net.RunOp(D);
net.TransformDataFormat<DeviceType::CPU, float>("OutputNCHW",
NCHW,
"Output",
NHWC);
} else if (D == DeviceType::GPU) {
BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER);
BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2DTest")
.Input("InputImage")
.Input("FilterImage")
.Input("BiasImage")
.Output("OutputImage")
.AddIntsArg("strides", {1, 1})
.AddIntArg("padding", Padding::VALID)
.AddIntsArg("dilations", {1, 1})
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.AddStringArg("activation", "RELU")
.Finalize(net.NewOperatorDef());
net.RunOp(D);
// Transfer output
ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL);
} else {
MACE_NOT_IMPLEMENTED;
}
auto expected = CreateTensor<float>({1, 1, 1, 1}, {0.0f});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"));
}
template<DeviceType D, typename T>
void TestFusedNHWCSimple3x3WithoutBias() {
OpsTestNet net;
// Add input data
net.AddInputFromArray<D, float>(
"Input", {1, 3, 3, 2},
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1});
net.AddInputFromArray<D, float>(
"Filter", {3, 3, 1, 2},
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f});
if (D == DeviceType::CPU) {
net.TransformDataFormat<DeviceType::CPU, float>("Input",
NHWC,
"InputNCHW",
NCHW);
net.TransformDataFormat<DeviceType::CPU, float>("Filter",
HWOI,
"FilterOIHW",
OIHW);
OpDefBuilder("Conv2D", "Conv2DTest")
.Input("InputNCHW")
.Input("FilterOIHW")
.Output("OutputNCHW")
.AddIntsArg("strides", {1, 1})
.AddIntArg("padding", Padding::VALID)
.AddIntsArg("dilations", {1, 1})
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.AddStringArg("activation", "RELU")
.Finalize(net.NewOperatorDef());
// Run
net.RunOp(D);
net.TransformDataFormat<DeviceType::CPU, float>("OutputNCHW",
NCHW,
"Output",
NHWC);
} else if (D == DeviceType::GPU) {
BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER);
OpDefBuilder("Conv2D", "Conv2DTest")
.Input("InputImage")
.Input("FilterImage")
.Output("OutputImage")
.AddIntsArg("strides", {1, 1})
.AddIntArg("padding", Padding::VALID)
.AddIntsArg("dilations", {1, 1})
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.AddStringArg("activation", "RELU")
.Finalize(net.NewOperatorDef());
// Run
net.RunOp(D);
// Transfer output
ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL);
} else {
MACE_NOT_IMPLEMENTED;
}
// Check
auto expected = CreateTensor<float>({1, 1, 1, 1}, {0.0f});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"));
}
} // namespace
TEST_F(Conv2dOpTest, FusedCPUSimple) {
TestFusedNHWCSimple3x3VALID<DeviceType::CPU, float>();
TestFusedNHWCSimple3x3WithoutBias<DeviceType::CPU, float>();
}
TEST_F(Conv2dOpTest, FusedOPENCLSimple) {
TestFusedNHWCSimple3x3VALID<DeviceType::GPU, float>();
TestFusedNHWCSimple3x3WithoutBias<DeviceType::GPU, float>();
}
namespace {
template<DeviceType D>
void TestConv1x1() {
......@@ -389,7 +544,7 @@ void TestConv1x1() {
NCHW,
"Output",
NHWC);
} else if (D == DeviceType::OPENCL) {
} else if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, float>(&net, "Filter", "FilterImage",
......@@ -431,7 +586,7 @@ void TestConv1x1() {
TEST_F(Conv2dOpTest, CPUConv1x1) { TestConv1x1<DeviceType::CPU>(); }
TEST_F(Conv2dOpTest, OPENCLConv1x1) { TestConv1x1<DeviceType::OPENCL>(); }
TEST_F(Conv2dOpTest, OPENCLConv1x1) { TestConv1x1<DeviceType::GPU>(); }
namespace {
template<DeviceType D, typename T>
......@@ -524,18 +679,18 @@ void TestComplexConvNxNS12(const std::vector<index_t> &shape,
} // namespace
TEST_F(Conv2dOpTest, OPENCLAlignedConvNxNS12) {
TestComplexConvNxNS12<DeviceType::OPENCL, float>({32, 16, 16, 32}, 1);
TestComplexConvNxNS12<DeviceType::OPENCL, float>({32, 16, 16, 32}, 2);
TestComplexConvNxNS12<DeviceType::GPU, float>({32, 16, 16, 32}, 1);
TestComplexConvNxNS12<DeviceType::GPU, float>({32, 16, 16, 32}, 2);
}
TEST_F(Conv2dOpTest, OPENCLUnalignedConvNxNS12) {
TestComplexConvNxNS12<DeviceType::OPENCL, float>({17, 113, 5, 7}, 1);
TestComplexConvNxNS12<DeviceType::OPENCL, float>({17, 113, 5, 7}, 2);
TestComplexConvNxNS12<DeviceType::GPU, float>({17, 113, 5, 7}, 1);
TestComplexConvNxNS12<DeviceType::GPU, float>({17, 113, 5, 7}, 2);
}
TEST_F(Conv2dOpTest, OPENCLUnalignedConvNxNS34) {
TestComplexConvNxNS12<DeviceType::OPENCL, float>({31, 113, 13, 17}, 3);
TestComplexConvNxNS12<DeviceType::OPENCL, float>({32, 32, 13, 17}, 4);
TestComplexConvNxNS12<DeviceType::GPU, float>({31, 113, 13, 17}, 3);
TestComplexConvNxNS12<DeviceType::GPU, float>({32, 32, 13, 17}, 4);
}
namespace {
......@@ -644,52 +799,52 @@ void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape,
} // namespace
TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv1x1S12) {
TestHalfComplexConvNxNS12<DeviceType::OPENCL>({32, 32}, {1, 1, 32, 64},
TestHalfComplexConvNxNS12<DeviceType::GPU>({32, 32}, {1, 1, 32, 64},
{1, 1});
}
TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv3x3S12) {
TestHalfComplexConvNxNS12<DeviceType::OPENCL>({32, 32}, {3, 3, 32, 64},
TestHalfComplexConvNxNS12<DeviceType::GPU>({32, 32}, {3, 3, 32, 64},
{1, 1});
}
TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv15x1S12) {
TestHalfComplexConvNxNS12<DeviceType::OPENCL>({32, 32}, {15, 1, 256, 2},
TestHalfComplexConvNxNS12<DeviceType::GPU>({32, 32}, {15, 1, 256, 2},
{1, 1});
}
TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv1x15S12) {
TestHalfComplexConvNxNS12<DeviceType::OPENCL>({32, 32}, {1, 15, 256, 2},
TestHalfComplexConvNxNS12<DeviceType::GPU>({32, 32}, {1, 15, 256, 2},
{1, 1});
}
TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv7x75S12) {
TestHalfComplexConvNxNS12<DeviceType::OPENCL>({32, 32}, {7, 7, 3, 64},
TestHalfComplexConvNxNS12<DeviceType::GPU>({32, 32}, {7, 7, 3, 64},
{1, 1});
}
TEST_F(Conv2dOpTest, OPENCLHalfUnalignedConv1x1S12) {
TestHalfComplexConvNxNS12<DeviceType::OPENCL>({107, 113}, {1, 1, 5, 7},
TestHalfComplexConvNxNS12<DeviceType::GPU>({107, 113}, {1, 1, 5, 7},
{1, 1});
}
TEST_F(Conv2dOpTest, OPENCLHalfUnalignedConv3x3S12) {
TestHalfComplexConvNxNS12<DeviceType::OPENCL>({107, 113}, {3, 3, 5, 7},
TestHalfComplexConvNxNS12<DeviceType::GPU>({107, 113}, {3, 3, 5, 7},
{1, 1});
}
TEST_F(Conv2dOpTest, OPENCLHalfConv5x5Dilation2) {
TestHalfComplexConvNxNS12<DeviceType::OPENCL>({64, 64}, {5, 5, 16, 16},
TestHalfComplexConvNxNS12<DeviceType::GPU>({64, 64}, {5, 5, 16, 16},
{2, 2});
}
TEST_F(Conv2dOpTest, OPENCLHalfConv7x7Dilation2) {
TestHalfComplexConvNxNS12<DeviceType::OPENCL>({64, 64}, {7, 7, 16, 16},
TestHalfComplexConvNxNS12<DeviceType::GPU>({64, 64}, {7, 7, 16, 16},
{2, 2});
}
TEST_F(Conv2dOpTest, OPENCLHalfConv7x7Dilation4) {
TestHalfComplexConvNxNS12<DeviceType::OPENCL>({63, 67}, {7, 7, 16, 16},
TestHalfComplexConvNxNS12<DeviceType::GPU>({63, 67}, {7, 7, 16, 16},
{4, 4});
}
......@@ -787,15 +942,115 @@ void TestDilationConvNxN(const std::vector<index_t> &shape,
} // namespace
TEST_F(Conv2dOpTest, OPENCLAlignedDilation2) {
TestDilationConvNxN<DeviceType::OPENCL, float>({32, 32, 32, 64}, 2);
TestDilationConvNxN<DeviceType::GPU, float>({32, 32, 32, 64}, 2);
}
TEST_F(Conv2dOpTest, OPENCLAligned2Dilation4) {
TestDilationConvNxN<DeviceType::OPENCL, float>({128, 128, 16, 16}, 4);
TestDilationConvNxN<DeviceType::GPU, float>({128, 128, 16, 16}, 4);
}
TEST_F(Conv2dOpTest, OPENCLUnalignedDilation4) {
TestDilationConvNxN<DeviceType::OPENCL, float>({107, 113, 5, 7}, 4);
TestDilationConvNxN<DeviceType::GPU, float>({107, 113, 5, 7}, 4);
}
namespace {
template<DeviceType D>
void TestGeneralHalfAtrousConv(const std::vector<index_t> &image_shape,
const std::vector<index_t> &filter_shape,
const std::vector<int> &dilations) {
testing::internal::LogToStderr();
auto func = [&](int stride_h, int stride_w, Padding type) {
srand(time(NULL));
// generate random input
index_t batch = 1;
index_t height = image_shape[0];
index_t width = image_shape[1];
index_t kernel_h = filter_shape[0];
index_t kernel_w = filter_shape[1];
index_t output_channels = filter_shape[2];
index_t input_channels = filter_shape[3];
OpsTestNet net;
// Add input data
net.AddRandomInput<D, float>("Input",
{batch, height, width, input_channels});
net.AddRandomInput<D, float>(
"Filter", {kernel_h, kernel_w, output_channels, input_channels});
net.AddRandomInput<D, float>("Bias", {output_channels});
net.TransformDataFormat<DeviceType::CPU, float>("Input",
NHWC,
"InputNCHW",
NCHW);
net.TransformDataFormat<DeviceType::CPU, float>("Filter",
HWOI,
"FilterOIHW",
OIHW);
// Construct graph
OpDefBuilder("Conv2D", "Conv2dTest")
.Input("InputNCHW")
.Input("FilterOIHW")
.Input("Bias")
.Output("OutputNCHW")
.AddIntsArg("strides", {stride_h, stride_w})
.AddIntArg("padding", type)
.AddIntsArg("dilations", dilations)
.Finalize(net.NewOperatorDef());
// run on cpu
net.RunOp();
net.TransformDataFormat<DeviceType::CPU, float>("OutputNCHW",
NCHW,
"Output",
NHWC);
// Check
Tensor expected;
expected.Copy(*net.GetOutput("Output"));
// run on gpu
BufferToImage<D, half>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, half>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER);
BufferToImage<D, half>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2dTest")
.Input("InputImage")
.Input("FilterImage")
.Input("BiasImage")
.Output("OutputImage")
.AddIntsArg("strides", {stride_h, stride_w})
.AddIntArg("padding", type)
.AddIntsArg("dilations", dilations)
.AddIntArg("T", static_cast<int>(DataTypeToEnum<half>::value))
.Finalize(net.NewOperatorDef());
// Run on device
net.RunOp(D);
ImageToBuffer<D, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"),
1e-2, 1e-1);
};
func(1, 1, VALID);
func(1, 1, SAME);
}
} // namespace
TEST_F(Conv2dOpTest, OPENCLHalf7X7AtrousConvD2) {
TestGeneralHalfAtrousConv<DeviceType::GPU>({32, 32}, {7, 7, 16, 3},
{2, 2});
}
TEST_F(Conv2dOpTest, OPENCLHalf15X15AtrousConvD4) {
TestGeneralHalfAtrousConv<DeviceType::GPU>({63, 71}, {15, 15, 16, 16},
{2, 2});
}
namespace {
......@@ -887,16 +1142,16 @@ void TestArbitraryPadConvNxN(const std::vector<index_t> &shape,
} // namespace
TEST_F(Conv2dOpTest, OPENCLAlignedPad1) {
TestArbitraryPadConvNxN<DeviceType::OPENCL, float>({32, 32, 32, 64}, {1, 1});
TestArbitraryPadConvNxN<DeviceType::GPU, float>({32, 32, 32, 64}, {1, 1});
}
TEST_F(Conv2dOpTest, OPENCLAlignedPad2) {
TestArbitraryPadConvNxN<DeviceType::OPENCL, float>({128, 128, 16, 16},
TestArbitraryPadConvNxN<DeviceType::GPU, float>({128, 128, 16, 16},
{2, 2});
}
TEST_F(Conv2dOpTest, OPENCLUnalignedPad4) {
TestArbitraryPadConvNxN<DeviceType::OPENCL, float>({107, 113, 5, 7}, {4, 4});
TestArbitraryPadConvNxN<DeviceType::GPU, float>({107, 113, 5, 7}, {4, 4});
}
} // namespace test
......
......@@ -32,7 +32,7 @@ TEST(CoreTest, INIT_MODE) {
.Finalize(&op_defs[op_defs.size() - 1]);
Tensor *input =
ws.CreateTensor("Input", GetDeviceAllocator(DeviceType::OPENCL),
ws.CreateTensor("Input", GetDeviceAllocator(DeviceType::GPU),
DataTypeToEnum<float>::v());
input->Resize({1, 3, 3, 3});
{
......@@ -54,13 +54,13 @@ TEST(CoreTest, INIT_MODE) {
}
std::shared_ptr<OperatorRegistry> op_registry(new OperatorRegistry());
auto net =
CreateNet(op_registry, net_def, &ws, DeviceType::OPENCL, NetMode::INIT);
CreateNet(op_registry, net_def, &ws, DeviceType::GPU, NetMode::INIT);
net->Run();
EXPECT_TRUE(ws.GetTensor("B2IOutput") != nullptr);
EXPECT_TRUE(ws.GetTensor("Output") == nullptr);
net = CreateNet(op_registry, net_def, &ws, DeviceType::OPENCL);
net = CreateNet(op_registry, net_def, &ws, DeviceType::GPU);
net->Run();
EXPECT_TRUE(ws.GetTensor("Output") != nullptr);
......
......@@ -26,16 +26,16 @@ void Register_DepthToSpace(OperatorRegistry *op_registry) {
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("DepthToSpace")
.Device(DeviceType::OPENCL)
.Device(DeviceType::GPU)
.TypeConstraint<float>("T")
.Build(),
DepthToSpaceOp<DeviceType::OPENCL, float>);
DepthToSpaceOp<DeviceType::GPU, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("DepthToSpace")
.Device(DeviceType::OPENCL)
.Device(DeviceType::GPU)
.TypeConstraint<half>("T")
.Build(),
DepthToSpaceOp<DeviceType::OPENCL, half>);
DepthToSpaceOp<DeviceType::GPU, half>);
#endif // MACE_ENABLE_OPENCL
}
......
......@@ -40,7 +40,7 @@ class DepthToSpaceOp : public Operator<D, T> {
int input_depth;
if (D == CPU) {
input_depth = input->dim(1);
} else if (D == OPENCL) {
} else if (D == GPU) {
input_depth = input->dim(3);
} else {
MACE_NOT_IMPLEMENTED;
......
......@@ -31,7 +31,7 @@ void DepthToSpace(
// Add input data
if (D == DeviceType::CPU) {
net.AddRandomInput<D, float>("Input", {batch, channels, height, width});
} else if (D == DeviceType::OPENCL) {
} else if (D == DeviceType::GPU) {
net.AddRandomInput<D, float>("Input", {batch, height, width, channels});
} else {
MACE_NOT_IMPLEMENTED;
......@@ -42,7 +42,7 @@ void DepthToSpace(
.Input("Input")
.Output("Output")
.Finalize(net.NewOperatorDef());
} else if (D == DeviceType::OPENCL) {
} else if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
......@@ -82,8 +82,8 @@ void DepthToSpace(
#define BM_DEPTH_TO_SPACE(N, C, H, W, G) \
BM_DEPTH_TO_SPACE_MACRO(N, C, H, W, G, float, CPU); \
BM_DEPTH_TO_SPACE_MACRO(N, C, H, W, G, float, OPENCL); \
BM_DEPTH_TO_SPACE_MACRO(N, C, H, W, G, half, OPENCL);
BM_DEPTH_TO_SPACE_MACRO(N, C, H, W, G, float, GPU); \
BM_DEPTH_TO_SPACE_MACRO(N, C, H, W, G, half, GPU);
BM_DEPTH_TO_SPACE(1, 64, 64, 64, 4);
BM_DEPTH_TO_SPACE(1, 64, 128, 128, 4);
......
......@@ -65,8 +65,8 @@ void RunDepthToSpace(const bool d2s,
}
if (D == DeviceType::OPENCL) {
ImageToBuffer<DeviceType::OPENCL, float>(&net, "OutputImage", "Output",
if (D == DeviceType::GPU) {
ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL);
}
auto expected = CreateTensor<float>(expected_shape, expected_data);
......@@ -88,7 +88,7 @@ TEST_F(SpaceToDepthOpTest, Input2x4x4_B2_CPU) {
}
TEST_F(SpaceToDepthOpTest, Input2x4x4_B2_OPENCL) {
RunDepthToSpace<DeviceType::OPENCL>(false, {1, 2, 4, 4},
RunDepthToSpace<DeviceType::GPU>(false, {1, 2, 4, 4},
{0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23,
8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31},
2,
......@@ -110,7 +110,7 @@ TEST_F(SpaceToDepthOpTest, Input2x2x4_B2_CPU) {
}
TEST_F(SpaceToDepthOpTest, Input4x4x1_B2_OPENCL) {
RunDepthToSpace<DeviceType::OPENCL>(false, {1, 2, 2, 4},
RunDepthToSpace<DeviceType::GPU>(false, {1, 2, 2, 4},
{1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16},
2,
......@@ -132,7 +132,7 @@ TEST_F(DepthToSpaceOpTest, Input1x2x16_B2_CPU) {
}
TEST_F(DepthToSpaceOpTest, Input1x2x16_B2_OPENCL) {
RunDepthToSpace<DeviceType::OPENCL>(true, {1, 1, 2, 16},
RunDepthToSpace<DeviceType::GPU>(true, {1, 1, 2, 16},
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31},
2,
......@@ -152,7 +152,7 @@ TEST_F(DepthToSpaceOpTest, Input1x1x16_B2_CPU) {
}
TEST_F(DepthToSpaceOpTest, Input1x1x16_B2_OPENCL) {
RunDepthToSpace<DeviceType::OPENCL>(true, {1, 1, 1, 16},
RunDepthToSpace<DeviceType::GPU>(true, {1, 1, 1, 16},
{1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16},
2,
......@@ -165,7 +165,7 @@ TEST_F(DepthToSpaceOpTest, Input1x1x16_B2_OPENCL) {
TEST_F(DepthToSpaceOpTest, InputLarger_B2_OPENCL) {
const std::vector<float > in = std::vector<float >(192 * 192 *128, 1.0);
RunDepthToSpace<DeviceType::OPENCL>(true, {1, 192, 192, 128},
RunDepthToSpace<DeviceType::GPU>(true, {1, 192, 192, 128},
in,
2,
{1, 384, 384, 32},
......@@ -234,19 +234,19 @@ void RandomTest(const bool d2s, const int block_size,
} // namespace
TEST_F(DepthToSpaceOpTest, OPENCLRandomFloat) {
RandomTest<DeviceType::OPENCL, float>(true, 2, {1, 192, 192, 128});
RandomTest<DeviceType::GPU, float>(true, 2, {1, 192, 192, 128});
}
TEST_F(DepthToSpaceOpTest, OPENCLRandomHalf) {
RandomTest<DeviceType::OPENCL, half>(true, 2, {1, 192, 192, 128});
RandomTest<DeviceType::GPU, half>(true, 2, {1, 192, 192, 128});
}
TEST_F(SpaceToDepthOpTest, OPENCLRandomFloat) {
RandomTest<DeviceType::OPENCL, float>(false, 2, {1, 384, 384, 32});
RandomTest<DeviceType::GPU, float>(false, 2, {1, 384, 384, 32});
}
TEST_F(SpaceToDepthOpTest, OPENCLRandomHalf) {
RandomTest<DeviceType::OPENCL, half>(false, 2, {1, 384, 384, 32});
RandomTest<DeviceType::GPU, half>(false, 2, {1, 384, 384, 32});
}
} // namespace test
......
......@@ -26,16 +26,16 @@ void Register_DepthwiseConv2d(OperatorRegistry *op_registry) {
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("DepthwiseConv2d")
.Device(DeviceType::OPENCL)
.Device(DeviceType::GPU)
.TypeConstraint<float>("T")
.Build(),
DepthwiseConv2dOp<DeviceType::OPENCL, float>);
DepthwiseConv2dOp<DeviceType::GPU, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("DepthwiseConv2d")
.Device(DeviceType::OPENCL)
.Device(DeviceType::GPU)
.TypeConstraint<half>("T")
.Build(),
DepthwiseConv2dOp<DeviceType::OPENCL, half>);
DepthwiseConv2dOp<DeviceType::GPU, half>);
#endif // MACE_ENABLE_OPENCL
}
......
......@@ -46,7 +46,7 @@ void DepthwiseConv2d(int iters,
net.AddRandomInput<D, float>(
"Filter", {multiplier, input_channels, kernel_h, kernel_w});
net.AddRandomInput<D, float>("Bias", {input_channels * multiplier});
} else if (D == DeviceType::OPENCL) {
} else if (D == DeviceType::GPU) {
net.AddRandomInput<D, float>("Input",
{batch, height, width, input_channels});
net.AddRandomInput<D, float>(
......@@ -67,7 +67,7 @@ void DepthwiseConv2d(int iters,
.AddIntsArg("dilations", {1, 1})
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef());
} else if (D == DeviceType::OPENCL) {
} else if (D == DeviceType::GPU) {
BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(&net, "Filter", "FilterImage",
......@@ -134,8 +134,8 @@ void DepthwiseConv2d(int iters,
#define BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P, M) \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, float, CPU); \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, float, OPENCL); \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, half, OPENCL);
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, float, GPU); \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, half, GPU);
BM_DEPTHWISE_CONV_2D(1, 32, 112, 112, 3, 3, 1, SAME, 1);
BM_DEPTHWISE_CONV_2D(1, 32, 56, 56, 3, 3, 2, VALID, 1);
......
......@@ -59,7 +59,7 @@ void SimpleValidTest() {
NCHW,
"Output",
NHWC);
} else if (D == DeviceType::OPENCL) {
} else if (D == DeviceType::GPU) {
BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(&net, "Filter", "FilterImage",
......@@ -105,11 +105,11 @@ TEST_F(DepthwiseConv2dOpTest, SimpleCPU) {
}
TEST_F(DepthwiseConv2dOpTest, SimpleOpenCL) {
SimpleValidTest<DeviceType::OPENCL, float>();
SimpleValidTest<DeviceType::GPU, float>();
}
TEST_F(DepthwiseConv2dOpTest, SimpleOpenCLHalf) {
SimpleValidTest<DeviceType::OPENCL, half>();
SimpleValidTest<DeviceType::GPU, half>();
}
namespace {
......@@ -184,7 +184,7 @@ void ComplexValidTest() {
NCHW,
"Output",
NHWC);
} else if (D == DeviceType::OPENCL) {
} else if (D == DeviceType::GPU) {
BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(&net, "Filter", "FilterImage",
......@@ -245,11 +245,11 @@ TEST_F(DepthwiseConv2dOpTest, ComplexCPU) {
}
TEST_F(DepthwiseConv2dOpTest, ComplexOpenCL) {
ComplexValidTest<DeviceType::OPENCL, float>();
ComplexValidTest<DeviceType::GPU, float>();
}
TEST_F(DepthwiseConv2dOpTest, ComplexOpenCLHalf) {
ComplexValidTest<DeviceType::OPENCL, half>();
ComplexValidTest<DeviceType::GPU, half>();
}
namespace {
......@@ -267,12 +267,12 @@ void TestNxNS12(const index_t height, const index_t width) {
OpsTestNet net;
// Add input data
net.AddRandomInput<DeviceType::OPENCL, float>("Input",
net.AddRandomInput<DeviceType::GPU, float>("Input",
{batch, height, width,
input_channels});
net.AddRandomInput<DeviceType::OPENCL, float>(
net.AddRandomInput<DeviceType::GPU, float>(
"Filter", {kernel_h, kernel_w, input_channels, multiplier});
net.AddRandomInput<DeviceType::OPENCL, float>("Bias",
net.AddRandomInput<DeviceType::GPU, float>("Bias",
{multiplier
* input_channels});
......@@ -307,11 +307,11 @@ void TestNxNS12(const index_t height, const index_t width) {
Tensor expected;
expected.Copy(*net.GetOutput("Output"));
BufferToImage<DeviceType::OPENCL, T>(&net, "Input", "InputImage",
BufferToImage<DeviceType::GPU, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::OPENCL, T>(&net, "Filter", "FilterImage",
BufferToImage<DeviceType::GPU, T>(&net, "Filter", "FilterImage",
kernels::BufferType::DW_CONV2D_FILTER);
BufferToImage<DeviceType::OPENCL, T>(&net, "Bias", "BiasImage",
BufferToImage<DeviceType::GPU, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("DepthwiseConv2d", "DepthwiseConv2DTest")
.Input("InputImage")
......@@ -324,10 +324,10 @@ void TestNxNS12(const index_t height, const index_t width) {
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef());
net.RunOp(DeviceType::OPENCL);
net.RunOp(DeviceType::GPU);
// Transfer output
ImageToBuffer<DeviceType::OPENCL, float>(&net,
ImageToBuffer<DeviceType::GPU, float>(&net,
"OutputImage",
"DeviceOutput",
kernels::BufferType::IN_OUT_CHANNEL);
......
......@@ -26,16 +26,16 @@ void Register_Eltwise(OperatorRegistry *op_registry) {
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Eltwise")
.Device(DeviceType::OPENCL)
.Device(DeviceType::GPU)
.TypeConstraint<float>("T")
.Build(),
EltwiseOp<DeviceType::OPENCL, float>);
EltwiseOp<DeviceType::GPU, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("Eltwise")
.Device(DeviceType::OPENCL)
.Device(DeviceType::GPU)
.TypeConstraint<half>("T")
.Build(),
EltwiseOp<DeviceType::OPENCL, half>);
EltwiseOp<DeviceType::GPU, half>);
#endif // MACE_ENABLE_OPENCL
}
......
......@@ -34,11 +34,11 @@ void EltwiseBenchmark(
net.AddRandomInput<D, T>("Input0", {n, h, w, c});
net.AddRandomInput<D, T>("Input1", {n, h, w, c});
if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(&net, "Input0", "InputImg0",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(&net, "Input1", "InputImg1",
kernels::BufferType::IN_OUT_CHANNEL);
if (D == DeviceType::GPU) {
BufferToImage<D, half>(&net, "Input0", "InputImg0",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, half>(&net, "Input1", "InputImg1",
kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Eltwise", "EltwiseTest")
.Input("InputImg0")
.Input("InputImg1")
......@@ -90,8 +90,8 @@ void EltwiseBenchmark(
#define BM_ELTWISE(ELT_TYPE, N, H, W, C) \
BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, float, CPU); \
BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, float, OPENCL); \
BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, half, OPENCL);
BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, float, GPU); \
BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, half, GPU);
BM_ELTWISE(2, 1, 128, 128, 32);
BM_ELTWISE(2, 1, 240, 240, 256);
......
......@@ -183,49 +183,49 @@ TEST_F(EltwiseOpTest, CPUSimpleTensorScalar) {
}
TEST_F(EltwiseOpTest, GPUSimpleTensorScalar) {
SimpleTensorScalar<DeviceType::OPENCL, float>(kernels::EltwiseType::SUM,
{1, 1, 1, 1}, {1}, 1,
{2});
SimpleTensorScalar<DeviceType::OPENCL, float>(kernels::EltwiseType::SUB,
{1, 1, 2, 3},
{1, 2, 3, 4, 5, 6},
1,
{0, 1, 2, 3, 4, 5});
SimpleTensorScalar<DeviceType::OPENCL, float>(kernels::EltwiseType::PROD,
{1, 1, 2, 3},
{1, 2, 3, 4, 5, 6},
2,
{2, 4, 6, 8, 10, 12});
SimpleTensorScalar<DeviceType::OPENCL, float>(kernels::EltwiseType::DIV,
{1, 1, 2, 3},
{2, 4, 6, 8, 10, 12},
2,
{1, 2, 3, 4, 5, 6});
SimpleTensorScalar<DeviceType::OPENCL, float>(kernels::EltwiseType::MIN,
{1, 1, 2, 3},
{1, 2, 3, 4, 5, 6},
1,
{1, 1, 1, 1, 1, 1});
SimpleTensorScalar<DeviceType::OPENCL, float>(kernels::EltwiseType::MAX,
{1, 1, 2, 3},
{1, 2, 3, 4, 5, 6},
3,
{3, 3, 3, 4, 5, 6});
SimpleTensorScalar<DeviceType::OPENCL, float>(kernels::EltwiseType::NEG,
{1, 1, 2, 3},
{1, 2, 3, 4, 5, 6},
3,
{-1, -2, -3, -4, -5, -6});
SimpleTensorScalar<DeviceType::OPENCL, float>(kernels::EltwiseType::ABS,
{1, 1, 2, 3},
{-1, -2, -3, -4, -5, -6},
3,
{1, 2, 3, 4, 5, 6});
SimpleTensorScalar<DeviceType::OPENCL, float>(kernels::EltwiseType::SQR_DIFF,
{1, 1, 2, 3},
{1, 2, 3, 4, 5, 6},
1,
{0, 1, 4, 9, 16, 25});
SimpleTensorScalar<DeviceType::GPU, float>(kernels::EltwiseType::SUM,
{1, 1, 1, 1}, {1}, 1,
{2});
SimpleTensorScalar<DeviceType::GPU, float>(kernels::EltwiseType::SUB,
{1, 1, 2, 3},
{1, 2, 3, 4, 5, 6},
1,
{0, 1, 2, 3, 4, 5});
SimpleTensorScalar<DeviceType::GPU, float>(kernels::EltwiseType::PROD,
{1, 1, 2, 3},
{1, 2, 3, 4, 5, 6},
2,
{2, 4, 6, 8, 10, 12});
SimpleTensorScalar<DeviceType::GPU, float>(kernels::EltwiseType::DIV,
{1, 1, 2, 3},
{2, 4, 6, 8, 10, 12},
2,
{1, 2, 3, 4, 5, 6});
SimpleTensorScalar<DeviceType::GPU, float>(kernels::EltwiseType::MIN,
{1, 1, 2, 3},
{1, 2, 3, 4, 5, 6},
1,
{1, 1, 1, 1, 1, 1});
SimpleTensorScalar<DeviceType::GPU, float>(kernels::EltwiseType::MAX,
{1, 1, 2, 3},
{1, 2, 3, 4, 5, 6},
3,
{3, 3, 3, 4, 5, 6});
SimpleTensorScalar<DeviceType::GPU, float>(kernels::EltwiseType::NEG,
{1, 1, 2, 3},
{1, 2, 3, 4, 5, 6},
3,
{-1, -2, -3, -4, -5, -6});
SimpleTensorScalar<DeviceType::GPU, float>(kernels::EltwiseType::ABS,
{1, 1, 2, 3},
{-1, -2, -3, -4, -5, -6},
3,
{1, 2, 3, 4, 5, 6});
SimpleTensorScalar<DeviceType::GPU, float>(kernels::EltwiseType::SQR_DIFF,
{1, 1, 2, 3},
{1, 2, 3, 4, 5, 6},
1,
{0, 1, 4, 9, 16, 25});
}
TEST_F(EltwiseOpTest, CPUSimpleTensorVector) {
......@@ -277,49 +277,49 @@ TEST_F(EltwiseOpTest, CPUSimpleTensorVector) {
}
TEST_F(EltwiseOpTest, GPUSimpleTensorVector) {
SimpleTensorEltwise<DeviceType::OPENCL, float>(
SimpleTensorEltwise<DeviceType::GPU, float>(
kernels::EltwiseType::SUM,
{1, 1, 2, 3}, {1, 2, 3, 4, 5, 6},
{1, 1, 1, 3}, {1, 2, 3},
{2, 4, 6, 5, 7, 9});
SimpleTensorEltwise<DeviceType::OPENCL, float>(
SimpleTensorEltwise<DeviceType::GPU, float>(
kernels::EltwiseType::SUB,
{1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
{1, 1, 1, 5}, {1, 2, 3, 4, 5},
{0, 0, 0, 0, 0, 5, 5, 5, 5, 5});
SimpleTensorEltwise<DeviceType::OPENCL, float>(
SimpleTensorEltwise<DeviceType::GPU, float>(
kernels::EltwiseType::SUB,
{1, 1, 1, 5}, {1, 2, 3, 4, 5},
{1, 2, 1, 5},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
{0, 0, 0, 0, 0, -5, -5, -5, -5, -5});
SimpleTensorEltwise<DeviceType::OPENCL, float>(
SimpleTensorEltwise<DeviceType::GPU, float>(
kernels::EltwiseType::PROD,
{1, 1, 1, 3}, {1, 2, 3},
{1, 2, 1, 3}, {1, 2, 3, 4, 5, 6},
{1, 4, 9, 4, 10, 18});
SimpleTensorEltwise<DeviceType::OPENCL, float>(
SimpleTensorEltwise<DeviceType::GPU, float>(
kernels::EltwiseType::DIV,
{1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
{1, 1, 1, 5}, {1, 1, 1, 1, 5},
{1, 2, 3, 4, 1, 6, 7, 8, 9, 2});
SimpleTensorEltwise<DeviceType::OPENCL, float>(
SimpleTensorEltwise<DeviceType::GPU, float>(
kernels::EltwiseType::DIV,
{1, 1, 1, 5}, {1, 1, 1, 2, 4},
{1, 2, 1, 5},
{1, 1, 1, 2, 2, 1, 1, 1, 1, 1},
{1, 1, 1, 1, 2, 1, 1, 1, 2, 4});
SimpleTensorEltwise<DeviceType::OPENCL, float>(
SimpleTensorEltwise<DeviceType::GPU, float>(
kernels::EltwiseType::MIN,
{1, 1, 1, 5}, {1, 2, 3, 4, 5},
{1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
{1, 2, 3, 4, 5, 1, 2, 3, 4, 5});
SimpleTensorEltwise<DeviceType::OPENCL, float>(
SimpleTensorEltwise<DeviceType::GPU, float>(
kernels::EltwiseType::MAX,
{1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
{1, 1, 1, 5}, {1, 2, 3, 4, 5},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10});
SimpleTensorEltwise<DeviceType::OPENCL, float>(
SimpleTensorEltwise<DeviceType::GPU, float>(
kernels::EltwiseType::SQR_DIFF,
{1, 1, 1, 5}, {1, 2, 3, 4, 5},
{1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
......@@ -369,43 +369,43 @@ TEST_F(EltwiseOpTest, CPUSimpleTensorTensor) {
25});
}
TEST_F(EltwiseOpTest, GPUSimpleTensorTensor) {
SimpleTensorEltwise<DeviceType::OPENCL, float>(
SimpleTensorEltwise<DeviceType::GPU, float>(
kernels::EltwiseType::SUM,
{1, 1, 2, 3}, {1, 2, 3, 4, 5, 6},
{1, 1, 2, 3}, {1, 2, 3, 4, 5, 6},
{2, 4, 6, 8, 10, 12});
SimpleTensorEltwise<DeviceType::OPENCL, float>(
SimpleTensorEltwise<DeviceType::GPU, float>(
kernels::EltwiseType::SUM,
{1, 1, 2, 3}, {1, 2, 3, 4, 5, 6},
{1, 1, 2, 3}, {1, 2, 3, 4, 5, 6},
{0.2, 0.4, 0.6, 0.8, 1, 1.2},
{0.1, 0.1});
SimpleTensorEltwise<DeviceType::OPENCL, float>(
SimpleTensorEltwise<DeviceType::GPU, float>(
kernels::EltwiseType::SUB,
{1, 1, 1, 5}, {1, 2, 3, 4, 5},
{1, 1, 1, 5}, {1, 2, 3, 4, 5},
{0, 0, 0, 0, 0});
SimpleTensorEltwise<DeviceType::OPENCL, float>(
SimpleTensorEltwise<DeviceType::GPU, float>(
kernels::EltwiseType::PROD,
{1, 2, 1, 3}, {1, 2, 3, 4, 5, 6},
{1, 2, 1, 3}, {1, 2, 3, 4, 5, 6},
{1, 4, 9, 16, 25, 36});
SimpleTensorEltwise<DeviceType::OPENCL, float>(
SimpleTensorEltwise<DeviceType::GPU, float>(
kernels::EltwiseType::DIV,
{1, 2, 1, 3}, {1, 2, 3, 4, 5, 6},
{1, 2, 1, 3}, {1, 2, 3, 4, 5, 6},
{1, 1, 1, 1, 1, 1});
SimpleTensorEltwise<DeviceType::OPENCL, float>(
SimpleTensorEltwise<DeviceType::GPU, float>(
kernels::EltwiseType::MIN,
{1, 2, 1, 5}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5},
{1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
{1, 2, 3, 4, 5, 1, 2, 3, 4, 5});
SimpleTensorEltwise<DeviceType::OPENCL, float>(
SimpleTensorEltwise<DeviceType::GPU, float>(
kernels::EltwiseType::MAX,
{1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
{1, 2, 1, 5}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10});
SimpleTensorEltwise<DeviceType::OPENCL, float>(
SimpleTensorEltwise<DeviceType::GPU, float>(
kernels::EltwiseType::SQR_DIFF,
{1, 2, 1, 5}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5},
{1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
......@@ -420,7 +420,7 @@ void RandomTensorScalar(const kernels::EltwiseType type,
OpsTestNet net;
// Add input data
net.AddRandomInput<DeviceType::OPENCL, float>("Input", shape, true, true);
net.AddRandomInput<DeviceType::GPU, float>("Input", shape, true, true);
net.TransformDataFormat<DeviceType::CPU, float>("Input",
NHWC,
......@@ -441,7 +441,7 @@ void RandomTensorScalar(const kernels::EltwiseType type,
Tensor expected;
expected.Copy(*net.GetOutput("Output"));
BufferToImage<DeviceType::OPENCL, T>(&net, "Input", "InputImg",
BufferToImage<DeviceType::GPU, T>(&net, "Input", "InputImg",
kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Eltwise", "EltwiseTest")
.Input("InputImg")
......@@ -452,15 +452,15 @@ void RandomTensorScalar(const kernels::EltwiseType type,
.Finalize(net.NewOperatorDef());
// Run
net.RunOp(DeviceType::OPENCL);
net.RunOp(DeviceType::GPU);
ImageToBuffer<DeviceType::OPENCL, float>(&net, "OutputImg", "OPENCLOutput",
ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImg", "GPUOutput",
kernels::BufferType::IN_OUT_CHANNEL);
if (DataTypeToEnum<T>::value == DT_FLOAT) {
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-5);
ExpectTensorNear<float>(expected, *net.GetOutput("GPUOutput"), 1e-5);
} else {
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-2,
ExpectTensorNear<float>(expected, *net.GetOutput("GPUOutput"), 1e-2,
1e-2);
}
}
......@@ -474,8 +474,8 @@ void RandomTensorEltwise(const kernels::EltwiseType type,
OpsTestNet net;
// Add input data
net.AddRandomInput<DeviceType::OPENCL, float>("Input0", shape0, true, true);
net.AddRandomInput<DeviceType::OPENCL, float>("Input1", shape1, true, true);
net.AddRandomInput<DeviceType::GPU, float>("Input0", shape0, true, true);
net.AddRandomInput<DeviceType::GPU, float>("Input1", shape1, true, true);
net.TransformDataFormat<DeviceType::CPU, float>("Input0", NHWC,
"TInput0", NCHW);
......@@ -496,9 +496,9 @@ void RandomTensorEltwise(const kernels::EltwiseType type,
Tensor expected;
expected.Copy(*net.GetOutput("Output"));
BufferToImage<DeviceType::OPENCL, T>(&net, "Input0", "InputImg0",
BufferToImage<DeviceType::GPU, T>(&net, "Input0", "InputImg0",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::OPENCL, T>(&net, "Input1", "InputImg1",
BufferToImage<DeviceType::GPU, T>(&net, "Input1", "InputImg1",
kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Eltwise", "EltwiseTest")
.Input("InputImg0")
......@@ -510,15 +510,15 @@ void RandomTensorEltwise(const kernels::EltwiseType type,
.Finalize(net.NewOperatorDef());
// Run
net.RunOp(DeviceType::OPENCL);
net.RunOp(DeviceType::GPU);
ImageToBuffer<DeviceType::OPENCL, float>(&net, "OutputImg", "OPENCLOutput",
ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImg", "GPUOutput",
kernels::BufferType::IN_OUT_CHANNEL);
if (DataTypeToEnum<T>::value == DT_FLOAT) {
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-5);
ExpectTensorNear<float>(expected, *net.GetOutput("GPUOutput"), 1e-5);
} else {
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-2,
ExpectTensorNear<float>(expected, *net.GetOutput("GPUOutput"), 1e-2,
1e-2);
}
}
......@@ -609,19 +609,19 @@ TEST_F(EltwiseOpTest, RandomTensorTensorFloat) {
TEST_F(EltwiseOpTest, RandomTensorTensorHalf) {
RandomTensorEltwise<half>(kernels::EltwiseType::SUM,
{1, 32, 32, 16}, {1, 32, 32, 16});
{1, 32, 32, 16}, {1, 32, 32, 16});
RandomTensorEltwise<half>(kernels::EltwiseType::SUB,
{3, 32, 32, 16}, {3, 32, 32, 16});
{3, 32, 32, 16}, {3, 32, 32, 16});
RandomTensorEltwise<half>(kernels::EltwiseType::PROD,
{1, 31, 37, 17}, {1, 31, 37, 17});
{1, 31, 37, 17}, {1, 31, 37, 17});
RandomTensorEltwise<half>(kernels::EltwiseType::DIV,
{5, 31, 37, 17}, {5, 31, 37, 17});
{5, 31, 37, 17}, {5, 31, 37, 17});
RandomTensorEltwise<half>(kernels::EltwiseType::MIN,
{1, 32, 32, 16}, {1, 32, 32, 16});
{1, 32, 32, 16}, {1, 32, 32, 16});
RandomTensorEltwise<half>(kernels::EltwiseType::MAX,
{3, 31, 37, 17}, {3, 31, 37, 17});
{3, 31, 37, 17}, {3, 31, 37, 17});
RandomTensorEltwise<half>(kernels::EltwiseType::SQR_DIFF,
{3, 31, 37, 17}, {3, 31, 37, 17});
{3, 31, 37, 17}, {3, 31, 37, 17});
}
......
......@@ -26,16 +26,16 @@ void Register_FoldedBatchNorm(OperatorRegistry *op_registry) {
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("FoldedBatchNorm")
.Device(DeviceType::OPENCL)
.Device(DeviceType::GPU)
.TypeConstraint<float>("T")
.Build(),
FoldedBatchNormOp<DeviceType::OPENCL, float>);
FoldedBatchNormOp<DeviceType::GPU, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("FoldedBatchNorm")
.Device(DeviceType::OPENCL)
.Device(DeviceType::GPU)
.TypeConstraint<half>("T")
.Build(),
FoldedBatchNormOp<DeviceType::OPENCL, half>);
FoldedBatchNormOp<DeviceType::GPU, half>);
#endif // MACE_ENABLE_OPENCL
}
......
......@@ -60,7 +60,7 @@ void Simple() {
// Run
net.RunOp(D);
net.TransformDataFormat<D, float>("OutputNCHW", NCHW, "Output", NHWC);
} else if (D == DeviceType::OPENCL) {
} else if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, float>(&net, "Scale", "ScaleImage",
......@@ -94,7 +94,7 @@ void Simple() {
TEST_F(FoldedBatchNormOpTest, SimpleCPU) { Simple<DeviceType::CPU>(); }
TEST_F(FoldedBatchNormOpTest, SimpleOPENCL) { Simple<DeviceType::OPENCL>(); }
TEST_F(FoldedBatchNormOpTest, SimpleOPENCL) { Simple<DeviceType::GPU>(); }
TEST_F(FoldedBatchNormOpTest, SimpleRandomOPENCL) {
// generate random input
......@@ -108,10 +108,10 @@ TEST_F(FoldedBatchNormOpTest, SimpleRandomOPENCL) {
OpsTestNet net;
// Add input data
net.AddRandomInput<DeviceType::OPENCL, float>(
net.AddRandomInput<DeviceType::GPU, float>(
"Input", {batch, height, width, channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Scale", {channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Offset", {channels});
net.AddRandomInput<DeviceType::GPU, float>("Scale", {channels});
net.AddRandomInput<DeviceType::GPU, float>("Offset", {channels});
net.TransformDataFormat<DeviceType::CPU, float>("Input",
NHWC,
......@@ -138,11 +138,11 @@ TEST_F(FoldedBatchNormOpTest, SimpleRandomOPENCL) {
expected.Copy(*net.GetOutput("Output"));
// Run on opencl
BufferToImage<DeviceType::OPENCL, float>(&net, "Input", "InputImage",
BufferToImage<DeviceType::GPU, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::OPENCL, float>(&net, "Scale", "ScaleImage",
BufferToImage<DeviceType::GPU, float>(&net, "Scale", "ScaleImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(&net, "Offset", "OffsetImage",
BufferToImage<DeviceType::GPU, float>(&net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("FoldedBatchNorm", "FoldedBatchNormTest")
......@@ -153,10 +153,10 @@ TEST_F(FoldedBatchNormOpTest, SimpleRandomOPENCL) {
.Finalize(net.NewOperatorDef());
// Run on opencl
net.RunOp(DeviceType::OPENCL);
net.RunOp(DeviceType::GPU);
net.Sync();
ImageToBuffer<DeviceType::OPENCL, float>(&net, "OutputImage", "OPENCLOutput",
ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-5, 1e-4);
}
......@@ -173,10 +173,10 @@ TEST_F(FoldedBatchNormOpTest, SimpleRandomHalfOPENCL) {
OpsTestNet net;
// Add input data
net.AddRandomInput<DeviceType::OPENCL, float>(
net.AddRandomInput<DeviceType::GPU, float>(
"Input", {batch, height, width, channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Scale", {channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Offset", {channels});
net.AddRandomInput<DeviceType::GPU, float>("Scale", {channels});
net.AddRandomInput<DeviceType::GPU, float>("Offset", {channels});
net.TransformDataFormat<DeviceType::CPU, float>("Input",
NHWC,
......@@ -203,11 +203,11 @@ TEST_F(FoldedBatchNormOpTest, SimpleRandomHalfOPENCL) {
expected.Copy(*net.GetOutput("Output"));
// Run on opencl
BufferToImage<DeviceType::OPENCL, half>(&net, "Input", "InputImage",
BufferToImage<DeviceType::GPU, half>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::OPENCL, half>(&net, "Scale", "ScaleImage",
BufferToImage<DeviceType::GPU, half>(&net, "Scale", "ScaleImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(&net, "Offset", "OffsetImage",
BufferToImage<DeviceType::GPU, half>(&net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("FoldedBatchNorm", "FoldedBatchNormTest")
......@@ -219,10 +219,10 @@ TEST_F(FoldedBatchNormOpTest, SimpleRandomHalfOPENCL) {
.Finalize(net.NewOperatorDef());
// Run on opencl
net.RunOp(DeviceType::OPENCL);
net.RunOp(DeviceType::GPU);
net.Sync();
ImageToBuffer<DeviceType::OPENCL, float>(&net, "OutputImage", "OPENCLOutput",
ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-2, 1e-2);
}
......@@ -239,10 +239,10 @@ TEST_F(FoldedBatchNormOpTest, ComplexRandomOPENCL) {
OpsTestNet net;
// Add input data
net.AddRandomInput<DeviceType::OPENCL, float>(
net.AddRandomInput<DeviceType::GPU, float>(
"Input", {batch, height, width, channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Scale", {channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Offset", {channels});
net.AddRandomInput<DeviceType::GPU, float>("Scale", {channels});
net.AddRandomInput<DeviceType::GPU, float>("Offset", {channels});
net.TransformDataFormat<DeviceType::CPU, float>("Input",
NHWC,
......@@ -269,11 +269,11 @@ TEST_F(FoldedBatchNormOpTest, ComplexRandomOPENCL) {
expected.Copy(*net.GetOutput("Output"));
// Run on opencl
BufferToImage<DeviceType::OPENCL, float>(&net, "Input", "InputImage",
BufferToImage<DeviceType::GPU, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::OPENCL, float>(&net, "Scale", "ScaleImage",
BufferToImage<DeviceType::GPU, float>(&net, "Scale", "ScaleImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(&net, "Offset", "OffsetImage",
BufferToImage<DeviceType::GPU, float>(&net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("FoldedBatchNorm", "FoldedBatchNormTest")
......@@ -284,9 +284,9 @@ TEST_F(FoldedBatchNormOpTest, ComplexRandomOPENCL) {
.Finalize(net.NewOperatorDef());
// Run on opencl
net.RunOp(DeviceType::OPENCL);
net.RunOp(DeviceType::GPU);
ImageToBuffer<DeviceType::OPENCL, float>(&net, "OutputImage", "OPENCLOutput",
ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-5, 1e-4);
}
......@@ -303,10 +303,10 @@ TEST_F(FoldedBatchNormOpTest, ComplexRandomHalfOPENCL) {
OpsTestNet net;
// Add input data
net.AddRandomInput<DeviceType::OPENCL, float>(
net.AddRandomInput<DeviceType::GPU, float>(
"Input", {batch, height, width, channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Scale", {channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Offset", {channels});
net.AddRandomInput<DeviceType::GPU, float>("Scale", {channels});
net.AddRandomInput<DeviceType::GPU, float>("Offset", {channels});
net.TransformDataFormat<DeviceType::CPU, float>("Input",
NHWC,
......@@ -333,11 +333,11 @@ TEST_F(FoldedBatchNormOpTest, ComplexRandomHalfOPENCL) {
expected.Copy(*net.GetOutput("Output"));
// Run on opencl
BufferToImage<DeviceType::OPENCL, half>(&net, "Input", "InputImage",
BufferToImage<DeviceType::GPU, half>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::OPENCL, half>(&net, "Scale", "ScaleImage",
BufferToImage<DeviceType::GPU, half>(&net, "Scale", "ScaleImage",
kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(&net, "Offset", "OffsetImage",
BufferToImage<DeviceType::GPU, half>(&net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("FoldedBatchNorm", "FoldedBatchNormTest")
......@@ -349,9 +349,9 @@ TEST_F(FoldedBatchNormOpTest, ComplexRandomHalfOPENCL) {
.Finalize(net.NewOperatorDef());
// Run on opencl
net.RunOp(DeviceType::OPENCL);
net.RunOp(DeviceType::GPU);
ImageToBuffer<DeviceType::OPENCL, float>(&net, "OutputImage", "OPENCLOutput",
ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-2, 1e-2);
}
......
......@@ -26,16 +26,16 @@ void Register_FullyConnected(OperatorRegistry *op_registry) {
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("FC")
.Device(DeviceType::OPENCL)
.Device(DeviceType::GPU)
.TypeConstraint<float>("T")
.Build(),
FullyConnectedOp<DeviceType::OPENCL, float>);
FullyConnectedOp<DeviceType::GPU, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("FC")
.Device(DeviceType::OPENCL)
.Device(DeviceType::GPU)
.TypeConstraint<half>("T")
.Build(),
FullyConnectedOp<DeviceType::OPENCL, half>);
FullyConnectedOp<DeviceType::GPU, half>);
#endif // MACE_ENABLE_OPENCL
}
......
......@@ -43,7 +43,7 @@ void FCBenchmark(
.Input("Bias")
.Output("Output")
.Finalize(net.NewOperatorDef());
} else if (D == DeviceType::OPENCL) {
} else if (D == DeviceType::GPU) {
kernels::BufferType weight_type = kernels::BufferType::WEIGHT_WIDTH;
BufferToImage<D, T>(&net, "Weight", "WeightImage",
weight_type);
......@@ -93,8 +93,8 @@ void FCBenchmark(
#define BM_FC(N, H, W, C, OC) \
BM_FC_MACRO(N, H, W, C, OC, float, CPU); \
BM_FC_MACRO(N, H, W, C, OC, float, OPENCL); \
BM_FC_MACRO(N, H, W, C, OC, half, OPENCL);
BM_FC_MACRO(N, H, W, C, OC, float, GPU); \
BM_FC_MACRO(N, H, W, C, OC, half, GPU);
BM_FC(1, 16, 16, 32, 32);
BM_FC(1, 8, 8, 32, 1000);
......
......@@ -51,7 +51,7 @@ void Simple(const std::vector<index_t> &input_shape,
// Run
net.RunOp(D);
net.TransformDataFormat<D, float>("OutputNCHW", NCHW, "Output", NHWC);
} else if (D == DeviceType::OPENCL) {
} else if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, float>(&net, "Weight", "WeightImage",
......@@ -104,14 +104,14 @@ TEST_F(FullyConnectedOpTest, SimpleCPUWithBatch) {
}
TEST_F(FullyConnectedOpTest, SimpleOPENCL) {
Simple<DeviceType::OPENCL>({1, 2, 2, 2}, {1, 2, 3, 4, 5, 6, 7, 8}, {1, 8},
Simple<DeviceType::GPU>({1, 2, 2, 2}, {1, 2, 3, 4, 5, 6, 7, 8}, {1, 8},
{1, 2, 3, 4, 5, 6, 7, 8}, {1}, {2}, {1, 1, 1, 1},
{206});
Simple<DeviceType::OPENCL>(
Simple<DeviceType::GPU>(
{1, 1, 2, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {2, 10},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100},
{2}, {2, 3}, {1, 1, 1, 2}, {387, 3853});
Simple<DeviceType::OPENCL>(
Simple<DeviceType::GPU>(
{1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {5, 6},
{1, 2, 3, 4, 5, 6, 10, 20, 30, 40, 50, 60, 1, 2, 3,
4, 5, 6, 10, 20, 30, 40, 50, 60, 1, 2, 3, 4, 5, 6},
......@@ -119,7 +119,7 @@ TEST_F(FullyConnectedOpTest, SimpleOPENCL) {
}
TEST_F(FullyConnectedOpTest, SimpleGPUWithBatch) {
Simple<DeviceType::OPENCL>({2, 1, 2, 2}, {1, 2, 3, 4, 5, 6, 7, 8}, {1, 4},
Simple<DeviceType::GPU>({2, 1, 2, 2}, {1, 2, 3, 4, 5, 6, 7, 8}, {1, 4},
{1, 2, 3, 4}, {1}, {2}, {2, 1, 1, 1}, {32, 72});
}
......@@ -136,11 +136,11 @@ void Complex(const index_t batch,
OpsTestNet net;
// Add input data
net.AddRandomInput<DeviceType::OPENCL, float>(
net.AddRandomInput<DeviceType::GPU, float>(
"Input", {batch, height, width, channels});
net.AddRandomInput<DeviceType::OPENCL, float>(
net.AddRandomInput<DeviceType::GPU, float>(
"Weight", {out_channel, height * width * channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Bias", {out_channel});
net.AddRandomInput<DeviceType::GPU, float>("Bias", {out_channel});
OpDefBuilder("FC", "FullyConnectedTest")
.Input("Input")
......@@ -159,11 +159,11 @@ void Complex(const index_t batch,
expected.Copy(*net.GetOutput("Output"));
// Run on opencl
BufferToImage<DeviceType::OPENCL, T>(&net, "Input", "InputImage",
BufferToImage<DeviceType::GPU, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::OPENCL, T>(&net, "Weight", "WeightImage",
BufferToImage<DeviceType::GPU, T>(&net, "Weight", "WeightImage",
kernels::BufferType::WEIGHT_HEIGHT);
BufferToImage<DeviceType::OPENCL, float>(&net, "Bias", "BiasImage",
BufferToImage<DeviceType::GPU, float>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("FC", "FullyConnectedTest")
......@@ -176,9 +176,9 @@ void Complex(const index_t batch,
.Finalize(net.NewOperatorDef());
// Run on opencl
net.RunOp(DeviceType::OPENCL);
net.RunOp(DeviceType::GPU);
ImageToBuffer<DeviceType::OPENCL, float>(&net, "OutputImage", "OPENCLOutput",
ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL);
if (DataTypeToEnum<T>::value == DataType::DT_HALF) {
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"),
......@@ -225,11 +225,11 @@ void TestWXFormat(const index_t batch,
OpsTestNet net;
// Add input data
net.AddRandomInput<DeviceType::OPENCL, float>(
net.AddRandomInput<DeviceType::GPU, float>(
"Input", {batch, height, width, channels});
net.AddRandomInput<DeviceType::OPENCL, float>(
net.AddRandomInput<DeviceType::GPU, float>(
"Weight", {out_channel, height * width * channels});
net.AddRandomInput<DeviceType::OPENCL, float>("Bias", {out_channel});
net.AddRandomInput<DeviceType::GPU, float>("Bias", {out_channel});
OpDefBuilder("FC", "FullyConnectedTest")
.Input("Input")
......@@ -248,11 +248,11 @@ void TestWXFormat(const index_t batch,
expected.Copy(*net.GetOutput("Output"));
// Run on opencl
BufferToImage<DeviceType::OPENCL, T>(&net, "Input", "InputImage",
BufferToImage<DeviceType::GPU, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::OPENCL, T>(&net, "Weight", "WeightImage",
BufferToImage<DeviceType::GPU, T>(&net, "Weight", "WeightImage",
kernels::BufferType::WEIGHT_WIDTH);
BufferToImage<DeviceType::OPENCL, T>(&net, "Bias", "BiasImage",
BufferToImage<DeviceType::GPU, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("FC", "FullyConnectedTest")
......@@ -264,9 +264,9 @@ void TestWXFormat(const index_t batch,
.Finalize(net.NewOperatorDef());
// Run
net.RunOp(DeviceType::OPENCL);
net.RunOp(DeviceType::GPU);
ImageToBuffer<DeviceType::OPENCL, float>(&net, "OutputImage", "OPENCLOutput",
ImageToBuffer<DeviceType::GPU, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL);
if (DataTypeToEnum<T>::value == DataType::DT_HALF) {
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"),
......
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/fused_conv_2d.h"
namespace mace {
namespace ops {
void Register_FusedConv2D(OperatorRegistry *op_registry) {
REGISTER_OPERATOR(op_registry, OpKeyBuilder("FusedConv2D")
.Device(DeviceType::CPU)
.TypeConstraint<float>("T")
.Build(),
FusedConv2dOp<DeviceType::CPU, float>);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR(op_registry, OpKeyBuilder("FusedConv2D")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
.Build(),
FusedConv2dOp<DeviceType::OPENCL, float>);
REGISTER_OPERATOR(op_registry, OpKeyBuilder("FusedConv2D")
.Device(DeviceType::OPENCL)
.TypeConstraint<half>("T")
.Build(),
FusedConv2dOp<DeviceType::OPENCL, half>);
#endif // MACE_ENABLE_OPENCL
}
} // namespace ops
} // namespace mace
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册