提交 e62a4230 编写于 作者: 刘琦

Merge branch 'refactor_smart_ptr' into 'master'

change new to make_unique

See merge request !1000
...@@ -33,6 +33,8 @@ ...@@ -33,6 +33,8 @@
#include "mace/core/runtime/hexagon/hexagon_device.h" #include "mace/core/runtime/hexagon/hexagon_device.h"
#endif // MACE_ENABLE_HEXAGON #endif // MACE_ENABLE_HEXAGON
#include "mace/utils/memory.h"
namespace mace { namespace mace {
namespace { namespace {
...@@ -289,7 +291,7 @@ MaceTensor::MaceTensor(const std::vector<int64_t> &shape, ...@@ -289,7 +291,7 @@ MaceTensor::MaceTensor(const std::vector<int64_t> &shape,
std::shared_ptr<float> data, std::shared_ptr<float> data,
const DataFormat format) { const DataFormat format) {
MACE_CHECK_NOTNULL(data.get()); MACE_CHECK_NOTNULL(data.get());
impl_ = std::unique_ptr<MaceTensor::Impl>(new MaceTensor::Impl()); impl_ = make_unique<MaceTensor::Impl>();
impl_->shape = shape; impl_->shape = shape;
impl_->data = data; impl_->data = data;
impl_->format = format; impl_->format = format;
...@@ -298,11 +300,11 @@ MaceTensor::MaceTensor(const std::vector<int64_t> &shape, ...@@ -298,11 +300,11 @@ MaceTensor::MaceTensor(const std::vector<int64_t> &shape,
} }
MaceTensor::MaceTensor() { MaceTensor::MaceTensor() {
impl_ = std::unique_ptr<MaceTensor::Impl>(new MaceTensor::Impl()); impl_ = make_unique<MaceTensor::Impl>();
} }
MaceTensor::MaceTensor(const MaceTensor &other) { MaceTensor::MaceTensor(const MaceTensor &other) {
impl_ = std::unique_ptr<MaceTensor::Impl>(new MaceTensor::Impl()); impl_ = make_unique<MaceTensor::Impl>();
impl_->shape = other.shape(); impl_->shape = other.shape();
impl_->data = other.data(); impl_->data = other.data();
impl_->format = other.data_format(); impl_->format = other.data_format();
...@@ -310,7 +312,7 @@ MaceTensor::MaceTensor(const MaceTensor &other) { ...@@ -310,7 +312,7 @@ MaceTensor::MaceTensor(const MaceTensor &other) {
} }
MaceTensor::MaceTensor(const MaceTensor &&other) { MaceTensor::MaceTensor(const MaceTensor &&other) {
impl_ = std::unique_ptr<MaceTensor::Impl>(new MaceTensor::Impl()); impl_ = make_unique<MaceTensor::Impl>();
impl_->shape = other.shape(); impl_->shape = other.shape();
impl_->data = other.data(); impl_->data = other.data();
impl_->format = other.data_format(); impl_->format = other.data_format();
...@@ -725,7 +727,7 @@ MaceStatus MaceEngine::Impl::Run( ...@@ -725,7 +727,7 @@ MaceStatus MaceEngine::Impl::Run(
} }
MaceEngine::MaceEngine(const MaceEngineConfig &config): MaceEngine::MaceEngine(const MaceEngineConfig &config):
impl_(new MaceEngine::Impl(config)) {} impl_(make_unique<MaceEngine::Impl>(config)) {}
MaceEngine::~MaceEngine() = default; MaceEngine::~MaceEngine() = default;
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "mace/ops/opencl/buffer_transformer.h" #include "mace/ops/opencl/buffer_transformer.h"
#include "mace/ops/opencl/image/activation.h" #include "mace/ops/opencl/image/activation.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
...@@ -88,9 +89,8 @@ class ActivationOp<DeviceType::GPU, T> : public Operation { ...@@ -88,9 +89,8 @@ class ActivationOp<DeviceType::GPU, T> : public Operation {
MemoryType mem_type; MemoryType mem_type;
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
mem_type = MemoryType::GPU_IMAGE; mem_type = MemoryType::GPU_IMAGE;
kernel_.reset( kernel_ = make_unique<opencl::image::ActivationKernel<T>>(
new opencl::image::ActivationKernel<T>(type, relux_max_limit, type, relux_max_limit, leakyrelu_coefficient);
leakyrelu_coefficient));
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/addn.h" #include "mace/ops/opencl/image/addn.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
...@@ -107,7 +108,7 @@ class AddNOp<DeviceType::GPU, T> : public Operation { ...@@ -107,7 +108,7 @@ class AddNOp<DeviceType::GPU, T> : public Operation {
explicit AddNOp(OpConstructContext *context) explicit AddNOp(OpConstructContext *context)
: Operation(context) { : Operation(context) {
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
kernel_.reset(new opencl::image::AddNKernel<T>); kernel_ = make_unique<opencl::image::AddNKernel<T>>();
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include <algorithm> #include <algorithm>
#include "mace/ops/arm/conv_winograd.h" #include "mace/ops/arm/conv_winograd.h"
#include "mace/utils/memory.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
...@@ -607,7 +608,7 @@ void TransformFilter8x8(const float *filter, ...@@ -607,7 +608,7 @@ void TransformFilter8x8(const float *filter,
} }
} }
void WinoGradConv3x3s1(const float *input, void WinogradConv3x3s1(const float *input,
const float *transformed_filter, const float *transformed_filter,
const index_t batch, const index_t batch,
const index_t in_height, const index_t in_height,
...@@ -659,7 +660,7 @@ void WinoGradConv3x3s1(const float *input, ...@@ -659,7 +660,7 @@ void WinoGradConv3x3s1(const float *input,
} }
} }
void WinoGradConv3x3s1(const float *input, void WinogradConv3x3s1(const float *input,
const float *filter, const float *filter,
const index_t batch, const index_t batch,
const index_t in_height, const index_t in_height,
...@@ -684,28 +685,30 @@ void WinoGradConv3x3s1(const float *input, ...@@ -684,28 +685,30 @@ void WinoGradConv3x3s1(const float *input,
index_t transformed_output_size = index_t transformed_output_size =
in_tile_area * batch * out_channels * tile_count; in_tile_area * batch * out_channels * tile_count;
float *transformed_input = new float[transformed_input_size]; // TNCB auto transformed_input =
float *transformed_filter = new float[transformed_filter_size]; // TOC make_unique<float[]>(transformed_input_size); // TNCB NOLINT
float *transformed_output = new float[transformed_output_size]; auto transformed_filter =
make_unique<float[]>(transformed_filter_size); // TOC NOLINT
auto transformed_output =
make_unique<float[]>(transformed_output_size); // NOLINT
switch (out_tile_size) { switch (out_tile_size) {
case 2: case 2:
TransformFilter4x4(filter, in_channels, out_channels, transformed_filter); TransformFilter4x4(filter, in_channels, out_channels,
transformed_filter.get());
break; break;
case 6: case 6:
TransformFilter8x8(filter, in_channels, out_channels, transformed_filter); TransformFilter8x8(filter, in_channels, out_channels,
transformed_filter.get());
break; break;
default: default:
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
WinoGradConv3x3s1(input, transformed_filter, batch, in_height, in_width, WinogradConv3x3s1(input, transformed_filter.get(), batch, in_height,
in_channels, out_channels, out_tile_size, transformed_input, in_width, in_channels, out_channels, out_tile_size,
transformed_output, output, sgemm, scratch_buffer); transformed_input.get(), transformed_output.get(),
output, sgemm, scratch_buffer);
delete[] transformed_input;
delete[] transformed_filter;
delete[] transformed_output;
} }
void ConvRef3x3s1(const float *input, void ConvRef3x3s1(const float *input,
......
...@@ -35,7 +35,7 @@ void TransformFilter8x8(const float *filter, ...@@ -35,7 +35,7 @@ void TransformFilter8x8(const float *filter,
const index_t out_channels, const index_t out_channels,
float *output); float *output);
void WinoGradConv3x3s1(const float *input, void WinogradConv3x3s1(const float *input,
const float *filter, const float *filter,
const index_t batch, const index_t batch,
const index_t in_height, const index_t in_height,
...@@ -47,7 +47,7 @@ void WinoGradConv3x3s1(const float *input, ...@@ -47,7 +47,7 @@ void WinoGradConv3x3s1(const float *input,
SGemm *sgemm, SGemm *sgemm,
ScratchBuffer *scratch_buffer); ScratchBuffer *scratch_buffer);
void WinoGradConv3x3s1(const float *input, void WinogradConv3x3s1(const float *input,
const float *transformed_filter, const float *transformed_filter,
const index_t batch, const index_t batch,
const index_t in_height, const index_t in_height,
......
...@@ -66,7 +66,7 @@ TEST(ConvWinogradTest, winograd) { ...@@ -66,7 +66,7 @@ TEST(ConvWinogradTest, winograd) {
in_channels, out_channels, output_data_ref); in_channels, out_channels, output_data_ref);
SGemm sgemm; SGemm sgemm;
ops::WinoGradConv3x3s1(input_data, filter_data, batch, in_height, ops::WinogradConv3x3s1(input_data, filter_data, batch, in_height,
in_width, in_channels, out_channels, 6, in_width, in_channels, out_channels, 6,
output_data, &sgemm, nullptr); output_data, &sgemm, nullptr);
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "mace/ops/opencl/buffer_transformer.h" #include "mace/ops/opencl/buffer_transformer.h"
#include "mace/ops/opencl/image/batch_norm.h" #include "mace/ops/opencl/image/batch_norm.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
...@@ -156,8 +157,8 @@ class BatchNormOp<DeviceType::GPU, T> : public Operation { ...@@ -156,8 +157,8 @@ class BatchNormOp<DeviceType::GPU, T> : public Operation {
MemoryType mem_type; MemoryType mem_type;
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
mem_type = MemoryType::GPU_IMAGE; mem_type = MemoryType::GPU_IMAGE;
kernel_.reset(new opencl::image::BatchNormKernel<T>( kernel_ = make_unique<opencl::image::BatchNormKernel<T>>(
epsilon, activation, relux_max_limit, leakyrelu_coefficient)); epsilon, activation, relux_max_limit, leakyrelu_coefficient);
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/batch_to_space.h" #include "mace/ops/opencl/image/batch_to_space.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
...@@ -266,7 +267,7 @@ class BatchToSpaceNDOp<DeviceType::GPU, T> : public BatchToSpaceOpBase { ...@@ -266,7 +267,7 @@ class BatchToSpaceNDOp<DeviceType::GPU, T> : public BatchToSpaceOpBase {
explicit BatchToSpaceNDOp(OpConstructContext *context) explicit BatchToSpaceNDOp(OpConstructContext *context)
: BatchToSpaceOpBase(context) { : BatchToSpaceOpBase(context) {
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
kernel_.reset(new opencl::image::BatchToSpaceKernel<T>); kernel_ = make_unique<opencl::image::BatchToSpaceKernel<T>>();
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "mace/ops/opencl/buffer_transformer.h" #include "mace/ops/opencl/buffer_transformer.h"
#include "mace/ops/opencl/image/bias_add.h" #include "mace/ops/opencl/image/bias_add.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
...@@ -103,7 +104,7 @@ class BiasAddOp<DeviceType::GPU, T> : public Operation { ...@@ -103,7 +104,7 @@ class BiasAddOp<DeviceType::GPU, T> : public Operation {
MemoryType mem_type; MemoryType mem_type;
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
mem_type = MemoryType::GPU_IMAGE; mem_type = MemoryType::GPU_IMAGE;
kernel_.reset(new opencl::image::BiasAddKernel<T>); kernel_ = make_unique<opencl::image::BiasAddKernel<T>>();
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/channel_shuffle.h" #include "mace/ops/opencl/image/channel_shuffle.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
...@@ -83,7 +84,7 @@ class ChannelShuffleOp<DeviceType::GPU, T> : public Operation { ...@@ -83,7 +84,7 @@ class ChannelShuffleOp<DeviceType::GPU, T> : public Operation {
: Operation(context) { : Operation(context) {
const int groups = Operation::GetOptionalArg<int>("group", 1); const int groups = Operation::GetOptionalArg<int>("group", 1);
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
kernel_.reset(new opencl::image::ChannelShuffleKernel<T>(groups)); kernel_ = make_unique<opencl::image::ChannelShuffleKernel<T>>(groups);
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "mace/core/operator.h" #include "mace/core/operator.h"
#include "mace/utils/quantize.h" #include "mace/utils/quantize.h"
#include "mace/utils/memory.h"
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/concat.h" #include "mace/ops/opencl/image/concat.h"
...@@ -199,7 +200,7 @@ class ConcatOp<DeviceType::GPU, T> : public ConcatOpBase { ...@@ -199,7 +200,7 @@ class ConcatOp<DeviceType::GPU, T> : public ConcatOpBase {
explicit ConcatOp(OpConstructContext *context) explicit ConcatOp(OpConstructContext *context)
: ConcatOpBase(context) { : ConcatOpBase(context) {
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
kernel_.reset(new opencl::image::ConcatKernel<T>(axis_)); kernel_ = make_unique<opencl::image::ConcatKernel<T>>(axis_);
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
......
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#include "mace/ops/arm/conv_winograd.h" #include "mace/ops/arm/conv_winograd.h"
#include "mace/ops/conv_pool_2d_base.h" #include "mace/ops/conv_pool_2d_base.h"
#include "mace/ops/common/conv_pool_2d_util.h" #include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/utils/memory.h"
#include "mace/utils/utils.h" #include "mace/utils/utils.h"
#ifdef MACE_ENABLE_NEON #ifdef MACE_ENABLE_NEON
...@@ -129,7 +130,7 @@ class Conv2dOp<DeviceType::CPU, float> : public ConvPool2dOpBase { ...@@ -129,7 +130,7 @@ class Conv2dOp<DeviceType::CPU, float> : public ConvPool2dOpBase {
if (filter_h == 1 && filter_w == 1 && stride_h == 1 && stride_w == 1 if (filter_h == 1 && filter_w == 1 && stride_h == 1 && stride_w == 1
&& dilation_h == 1 && dilation_w == 1) { && dilation_h == 1 && dilation_w == 1) {
if (conv2d_delegator_.get() == nullptr) { if (conv2d_delegator_.get() == nullptr) {
conv2d_delegator_.reset(new arm::fp32::Conv2dK1x1()); conv2d_delegator_ = make_unique<arm::fp32::Conv2dK1x1>();
} }
conv2d_delegator_->Compute(context, input, filter, output); conv2d_delegator_->Compute(context, input, filter, output);
} else { } else {
...@@ -354,7 +355,7 @@ class Conv2dOp<DeviceType::CPU, float> : public ConvPool2dOpBase { ...@@ -354,7 +355,7 @@ class Conv2dOp<DeviceType::CPU, float> : public ConvPool2dOpBase {
*transformed_output_data = transformed_output.mutable_data<float>(); *transformed_output_data = transformed_output.mutable_data<float>();
conv_func = [=](const float *pad_input, float *pad_output) { conv_func = [=](const float *pad_input, float *pad_output) {
WinoGradConv3x3s1(pad_input, WinogradConv3x3s1(pad_input,
transformed_filter_data, transformed_filter_data,
batch, batch,
extra_input_height, extra_input_height,
...@@ -508,12 +509,12 @@ class Conv2dOp<DeviceType::CPU, float> : public ConvPool2dOpBase { ...@@ -508,12 +509,12 @@ class Conv2dOp<DeviceType::CPU, float> : public ConvPool2dOpBase {
} }
#else #else
if (conv2d_delegator_.get() == nullptr) { if (conv2d_delegator_.get() == nullptr) {
conv2d_delegator_.reset(new ref::Conv2d<float>(paddings[0], conv2d_delegator_ = make_unique<ref::Conv2d<float>>(paddings[0],
paddings[1], paddings[1],
stride_h, stride_h,
stride_w, stride_w,
dilation_h, dilation_h,
dilation_w)); dilation_w);
} }
conv2d_delegator_->Compute(context, input, filter, output); conv2d_delegator_->Compute(context, input, filter, output);
#endif #endif
...@@ -848,7 +849,7 @@ class Conv2dOp<DeviceType::CPU, uint8_t> : public ConvPool2dOpBase { ...@@ -848,7 +849,7 @@ class Conv2dOp<DeviceType::CPU, uint8_t> : public ConvPool2dOpBase {
ScratchBuffer *scratch = context->device()->scratch_buffer(); ScratchBuffer *scratch = context->device()->scratch_buffer();
scratch->Rewind(); scratch->Rewind();
scratch->GrowSize(im2col_size); scratch->GrowSize(im2col_size);
im2col.reset(new Tensor(scratch->Scratch(im2col_size), DT_UINT8)); im2col = make_unique<Tensor>(scratch->Scratch(im2col_size), DT_UINT8);
uint8_t *im2col_data = im2col->mutable_data<uint8_t>(); uint8_t *im2col_data = im2col->mutable_data<uint8_t>();
Im2col(input_data, input->shape(), filter_h, filter_w, stride_h, Im2col(input_data, input->shape(), filter_h, filter_w, stride_h,
stride_w, static_cast<uint8_t>(input->zero_point()), stride_w, static_cast<uint8_t>(input->zero_point()),
...@@ -993,10 +994,10 @@ class Conv2dOp<DeviceType::GPU, T> : public ConvPool2dOpBase { ...@@ -993,10 +994,10 @@ class Conv2dOp<DeviceType::GPU, T> : public ConvPool2dOpBase {
MemoryType mem_type; MemoryType mem_type;
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
mem_type = MemoryType::GPU_IMAGE; mem_type = MemoryType::GPU_IMAGE;
kernel_.reset(new opencl::image::Conv2dKernel<T>); kernel_ = make_unique<opencl::image::Conv2dKernel<T>>();
} else { } else {
mem_type = MemoryType::GPU_BUFFER; mem_type = MemoryType::GPU_BUFFER;
kernel_.reset(new opencl::buffer::Conv2dKernel<T>); kernel_ = make_unique<opencl::buffer::Conv2dKernel<T>>();
} }
context->set_output_mem_type(mem_type); context->set_output_mem_type(mem_type);
// Transform filter tensor to target format // Transform filter tensor to target format
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/crop.h" #include "mace/ops/opencl/image/crop.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
...@@ -114,8 +115,8 @@ class CropOp<DeviceType::GPU, T> : public Operation { ...@@ -114,8 +115,8 @@ class CropOp<DeviceType::GPU, T> : public Operation {
: Operation(context) { : Operation(context) {
const int axis = Operation::GetOptionalArg<int>("axis", 2); const int axis = Operation::GetOptionalArg<int>("axis", 2);
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
kernel_.reset(new opencl::image::CropKernel<T>( kernel_ = make_unique<opencl::image::CropKernel<T>>(
axis, Operation::GetRepeatedArgs<int>("offset"))); axis, Operation::GetRepeatedArgs<int>("offset"));
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/ops/activation.h" #include "mace/ops/activation.h"
#include "mace/ops/arm/deconv_2d_neon.h" #include "mace/ops/arm/deconv_2d_neon.h"
#include "mace/utils/memory.h"
#include "mace/utils/utils.h" #include "mace/utils/utils.h"
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/buffer_transformer.h" #include "mace/ops/opencl/buffer_transformer.h"
...@@ -362,7 +363,7 @@ class Deconv2dOp<DeviceType::GPU, T> : public Deconv2dOpBase { ...@@ -362,7 +363,7 @@ class Deconv2dOp<DeviceType::GPU, T> : public Deconv2dOpBase {
: Deconv2dOpBase(context) { : Deconv2dOpBase(context) {
MemoryType mem_type = MemoryType::GPU_IMAGE; MemoryType mem_type = MemoryType::GPU_IMAGE;
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
kernel_.reset(new opencl::image::Deconv2dKernel<T>); kernel_ = make_unique<opencl::image::Deconv2dKernel<T>>();
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/depth_to_space.h" #include "mace/ops/opencl/image/depth_to_space.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
...@@ -97,7 +98,7 @@ class DepthToSpaceOp<DeviceType::GPU, T> : public Operation { ...@@ -97,7 +98,7 @@ class DepthToSpaceOp<DeviceType::GPU, T> : public Operation {
: Operation(context) { : Operation(context) {
int block_size = Operation::GetOptionalArg<int>("block_size", 1); int block_size = Operation::GetOptionalArg<int>("block_size", 1);
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
kernel_.reset(new opencl::image::DepthToSpaceKernel<T>(block_size)); kernel_ = make_unique<opencl::image::DepthToSpaceKernel<T>>(block_size);
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
......
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include "mace/ops/arm/depthwise_conv2d_neon.h" #include "mace/ops/arm/depthwise_conv2d_neon.h"
#include "mace/ops/conv_pool_2d_base.h" #include "mace/ops/conv_pool_2d_base.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
#include "mace/utils/memory.h"
#include "mace/utils/quantize.h" #include "mace/utils/quantize.h"
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/buffer_transformer.h" #include "mace/ops/opencl/buffer_transformer.h"
...@@ -493,10 +494,10 @@ class DepthwiseConv2dOp<DeviceType::GPU, T> : public DepthwiseConv2dOpBase { ...@@ -493,10 +494,10 @@ class DepthwiseConv2dOp<DeviceType::GPU, T> : public DepthwiseConv2dOpBase {
MemoryType mem_type; MemoryType mem_type;
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
mem_type = MemoryType::GPU_IMAGE; mem_type = MemoryType::GPU_IMAGE;
kernel_.reset(new opencl::image::DepthwiseConv2dKernel<T>); kernel_ = make_unique<opencl::image::DepthwiseConv2dKernel<T>>();
} else { } else {
mem_type = MemoryType::GPU_BUFFER; mem_type = MemoryType::GPU_BUFFER;
kernel_.reset(new opencl::buffer::DepthwiseConv2dKernel<T>); kernel_ = make_unique<opencl::buffer::DepthwiseConv2dKernel<T>>();
} }
context->set_output_mem_type(mem_type); context->set_output_mem_type(mem_type);
// Transform filter tensor to target format // Transform filter tensor to target format
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include "mace/ops/arm/depthwise_deconv2d_neon.h" #include "mace/ops/arm/depthwise_deconv2d_neon.h"
#include "mace/utils/utils.h" #include "mace/utils/utils.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
#include "mace/utils/memory.h"
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/buffer_transformer.h" #include "mace/ops/opencl/buffer_transformer.h"
#include "mace/ops/opencl/image/depthwise_deconv2d.h" #include "mace/ops/opencl/image/depthwise_deconv2d.h"
...@@ -412,7 +413,7 @@ class DepthwiseDeconv2dOp<DeviceType::GPU, T> : public Deconv2dOpBase { ...@@ -412,7 +413,7 @@ class DepthwiseDeconv2dOp<DeviceType::GPU, T> : public Deconv2dOpBase {
: Deconv2dOpBase(context) { : Deconv2dOpBase(context) {
MemoryType mem_type = MemoryType::GPU_IMAGE; MemoryType mem_type = MemoryType::GPU_IMAGE;
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
kernel_.reset(new opencl::image::DepthwiseDeconv2dKernel<T>); kernel_ = make_unique<opencl::image::DepthwiseDeconv2dKernel<T>>();
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "mace/core/future.h" #include "mace/core/future.h"
#include "mace/core/operator.h" #include "mace/core/operator.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/utils/memory.h"
#include "mace/utils/quantize.h" #include "mace/utils/quantize.h"
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/buffer_transformer.h" #include "mace/ops/opencl/buffer_transformer.h"
...@@ -1160,8 +1161,8 @@ class EltwiseOp<DeviceType::GPU, T> : public Operation { ...@@ -1160,8 +1161,8 @@ class EltwiseOp<DeviceType::GPU, T> : public Operation {
MemoryType mem_type; MemoryType mem_type;
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
mem_type = MemoryType::GPU_IMAGE; mem_type = MemoryType::GPU_IMAGE;
kernel_.reset(new opencl::image::EltwiseKernel<T>( kernel_ = make_unique<opencl::image::EltwiseKernel<T>>(
type, coeff, scalar_input, scalar_input_index)); type, coeff, scalar_input, scalar_input_index);
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
......
...@@ -38,6 +38,8 @@ ...@@ -38,6 +38,8 @@
#include "mace/ops/opencl/image/fully_connected.h" #include "mace/ops/opencl/image/fully_connected.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
...@@ -186,7 +188,7 @@ class FullyConnectedOp<DeviceType::GPU, T> : public FullyConnectedOpBase { ...@@ -186,7 +188,7 @@ class FullyConnectedOp<DeviceType::GPU, T> : public FullyConnectedOpBase {
MemoryType mem_type; MemoryType mem_type;
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
mem_type = MemoryType::GPU_IMAGE; mem_type = MemoryType::GPU_IMAGE;
kernel_.reset(new opencl::image::FullyConnectedKernel<T>); kernel_ = make_unique<opencl::image::FullyConnectedKernel<T>>();
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include "mace/core/operator.h" #include "mace/core/operator.h"
#include "mace/ops/opencl/buffer_transformer.h" #include "mace/ops/opencl/buffer_transformer.h"
#include "mace/ops/opencl/image/lstm_cell.h" #include "mace/ops/opencl/image/lstm_cell.h"
#include "mace/utils/memory.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
...@@ -36,7 +37,7 @@ class LSTMCellOp<DeviceType::GPU, T> : public Operation { ...@@ -36,7 +37,7 @@ class LSTMCellOp<DeviceType::GPU, T> : public Operation {
0.0)); 0.0));
MemoryType mem_type = MemoryType::GPU_IMAGE; MemoryType mem_type = MemoryType::GPU_IMAGE;
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
kernel_.reset(new opencl::image::LSTMCellKernel<T>(forget_bias)); kernel_ = make_unique<opencl::image::LSTMCellKernel<T>>(forget_bias);
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "mace/ops/opencl/buffer/utils.h" #include "mace/ops/opencl/buffer/utils.h"
#include "mace/ops/opencl/helper.h" #include "mace/ops/opencl/helper.h"
#include "mace/utils/memory.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
...@@ -211,8 +212,8 @@ MaceStatus Conv2dKernel<T>::Compute( ...@@ -211,8 +212,8 @@ MaceStatus Conv2dKernel<T>::Compute(
old_scratch_size_ = scratch->size(); old_scratch_size_ = scratch->size();
} }
padded_input.reset(new Tensor(scratch->Scratch(padded_input_size), padded_input = make_unique<Tensor>(scratch->Scratch(padded_input_size),
input->dtype())); input->dtype());
padded_input->Resize(padded_input_shape); padded_input->Resize(padded_input_shape);
PadInput(context, &kernels_[0], input, pad_top, pad_left, PadInput(context, &kernels_[0], input, pad_top, pad_left,
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "mace/ops/opencl/buffer/utils.h" #include "mace/ops/opencl/buffer/utils.h"
#include "mace/ops/opencl/helper.h" #include "mace/ops/opencl/helper.h"
#include "mace/utils/memory.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
...@@ -165,8 +166,8 @@ MaceStatus DepthwiseConv2dKernel<T>::Compute( ...@@ -165,8 +166,8 @@ MaceStatus DepthwiseConv2dKernel<T>::Compute(
old_scratch_size_ = scratch->size(); old_scratch_size_ = scratch->size();
} }
padded_input.reset(new Tensor(scratch->Scratch(padded_input_size), padded_input = make_unique<Tensor>(scratch->Scratch(padded_input_size),
input->dtype())); input->dtype());
padded_input->Resize(padded_input_shape); padded_input->Resize(padded_input_shape);
PadInput(context, &kernels_[0], input, pad_top, pad_left, PadInput(context, &kernels_[0], input, pad_top, pad_left,
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "mace/ops/opencl/buffer/utils.h" #include "mace/ops/opencl/buffer/utils.h"
#include "mace/ops/opencl/helper.h" #include "mace/ops/opencl/helper.h"
#include "mace/utils/memory.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
...@@ -124,8 +125,8 @@ MaceStatus PoolingKernel<T>::Compute( ...@@ -124,8 +125,8 @@ MaceStatus PoolingKernel<T>::Compute(
old_scratch_size_ = scratch->size(); old_scratch_size_ = scratch->size();
} }
padded_input.reset(new Tensor(scratch->Scratch(padded_input_size), padded_input = make_unique<Tensor>(scratch->Scratch(padded_input_size),
input->dtype())); input->dtype());
padded_input->Resize(padded_input_shape); padded_input->Resize(padded_input_shape);
PadInput(context, &kernels_[0], input, 0, 0, PadInput(context, &kernels_[0], input, 0, 0,
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "mace/ops/opencl/image/image_to_buffer.h" #include "mace/ops/opencl/image/image_to_buffer.h"
#include "mace/ops/opencl/buffer/buffer_transform.h" #include "mace/ops/opencl/buffer/buffer_transform.h"
#include "mace/ops/common/transpose.h" #include "mace/ops/common/transpose.h"
#include "mace/utils/memory.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
...@@ -34,11 +35,11 @@ class OpenCLBufferTransformer { ...@@ -34,11 +35,11 @@ class OpenCLBufferTransformer {
OpenCLBufferTransformer(const MemoryType in_mem_type, OpenCLBufferTransformer(const MemoryType in_mem_type,
const MemoryType out_mem_type) { const MemoryType out_mem_type) {
if (out_mem_type == MemoryType::GPU_IMAGE) { if (out_mem_type == MemoryType::GPU_IMAGE) {
kernel_.reset(new opencl::image::BufferToImage<T>); kernel_ = make_unique<opencl::image::BufferToImage<T>>();
} else if (in_mem_type == MemoryType::GPU_IMAGE) { } else if (in_mem_type == MemoryType::GPU_IMAGE) {
kernel_.reset(new opencl::image::ImageToBuffer<T>); kernel_ = make_unique<opencl::image::ImageToBuffer<T>>();
} else { } else {
kernel_.reset(new opencl::buffer::BufferTransform<T>); kernel_ = make_unique<opencl::buffer::BufferTransform<T>>();
} }
} }
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/runtime/opencl/opencl_util.h" #include "mace/core/runtime/opencl/opencl_util.h"
#include "mace/core/types.h" #include "mace/core/types.h"
#include "mace/utils/memory.h"
#include "mace/utils/utils.h" #include "mace/utils/utils.h"
namespace mace { namespace mace {
...@@ -41,8 +42,8 @@ namespace ops { ...@@ -41,8 +42,8 @@ namespace ops {
#define MACE_OUT_OF_RANGE_INIT(kernel) \ #define MACE_OUT_OF_RANGE_INIT(kernel) \
if (runtime->IsOutOfRangeCheckEnabled()) { \ if (runtime->IsOutOfRangeCheckEnabled()) { \
oorc_flag = std::move(std::unique_ptr<Buffer>( \ oorc_flag = make_unique<Buffer>( \
new Buffer((context)->device()->allocator()))); \ (context)->device()->allocator()); \
MACE_RETURN_IF_ERROR((oorc_flag)->Allocate(sizeof(int)));\ MACE_RETURN_IF_ERROR((oorc_flag)->Allocate(sizeof(int)));\
oorc_flag->Map(nullptr); \ oorc_flag->Map(nullptr); \
*(oorc_flag->mutable_data<int>()) = 0; \ *(oorc_flag->mutable_data<int>()) = 0; \
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include "mace/ops/common/activation_type.h" #include "mace/ops/common/activation_type.h"
#include "mace/ops/common/conv_pool_2d_util.h" #include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/ops/opencl/helper.h" #include "mace/ops/opencl/helper.h"
#include "mace/utils/memory.h"
#include "mace/utils/utils.h" #include "mace/utils/utils.h"
namespace mace { namespace mace {
...@@ -264,9 +265,9 @@ extern MaceStatus WinogradConv2dK3x3S1(OpContext *context, ...@@ -264,9 +265,9 @@ extern MaceStatus WinogradConv2dK3x3S1(OpContext *context,
OpenCLBufferType::IN_OUT_HEIGHT, OpenCLBufferType::IN_OUT_HEIGHT,
&t_input_image_shape); &t_input_image_shape);
ScratchImage transformed_input_image(scratch_manager); ScratchImage transformed_input_image(scratch_manager);
std::unique_ptr<Tensor> transformed_input(new Tensor( std::unique_ptr<Tensor> transformed_input = make_unique<Tensor>(
transformed_input_image.Scratch(context->device()->allocator(), transformed_input_image.Scratch(context->device()->allocator(),
t_input_image_shape, dt), dt)); t_input_image_shape, dt), dt);
MACE_RETURN_IF_ERROR(transformed_input->ResizeImage(t_input_shape, MACE_RETURN_IF_ERROR(transformed_input->ResizeImage(t_input_shape,
t_input_image_shape)); t_input_image_shape));
MACE_RETURN_IF_ERROR(WinogradInputTransform( MACE_RETURN_IF_ERROR(WinogradInputTransform(
...@@ -289,9 +290,9 @@ extern MaceStatus WinogradConv2dK3x3S1(OpContext *context, ...@@ -289,9 +290,9 @@ extern MaceStatus WinogradConv2dK3x3S1(OpContext *context,
&mm_output_image_shape); &mm_output_image_shape);
ScratchImage mm_output_image(scratch_manager); ScratchImage mm_output_image(scratch_manager);
std::unique_ptr<Tensor> mm_output(new Tensor( std::unique_ptr<Tensor> mm_output = make_unique<Tensor>(
mm_output_image.Scratch(context->device()->allocator(), mm_output_image.Scratch(context->device()->allocator(),
mm_output_image_shape, dt), dt)); mm_output_image_shape, dt), dt);
MACE_RETURN_IF_ERROR(mm_output->ResizeImage(mm_output_shape, MACE_RETURN_IF_ERROR(mm_output->ResizeImage(mm_output_shape,
mm_output_image_shape)); mm_output_image_shape));
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/core/workspace.h" #include "mace/core/workspace.h"
#include "mace/ops/opencl/helper.h" #include "mace/ops/opencl/helper.h"
#include "mace/utils/memory.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
...@@ -130,7 +131,8 @@ TEST(OutOfRangeCheckTest, RandomTest) { ...@@ -130,7 +131,8 @@ TEST(OutOfRangeCheckTest, RandomTest) {
index_t channels = 11; index_t channels = 11;
GPUContext gpu_context; GPUContext gpu_context;
std::unique_ptr<Device> device(new GPUDevice(gpu_context.opencl_tuner())); std::unique_ptr<Device> device = make_unique<GPUDevice>(
gpu_context.opencl_tuner());
Workspace ws; Workspace ws;
OpContext context(&ws, device.get()); OpContext context(&ws, device.get());
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
#include "mace/core/memory_optimizer.h" #include "mace/core/memory_optimizer.h"
#include "mace/utils/memory.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
...@@ -120,17 +121,15 @@ OpTestContext *OpTestContext::Get(int num_threads, ...@@ -120,17 +121,15 @@ OpTestContext *OpTestContext::Get(int num_threads,
OpTestContext::OpTestContext(int num_threads, OpTestContext::OpTestContext(int num_threads,
CPUAffinityPolicy cpu_affinity_policy, CPUAffinityPolicy cpu_affinity_policy,
bool use_gemmlowp) bool use_gemmlowp)
: gpu_context_(new GPUContext(GetStoragePathFromEnv())), : gpu_context_(std::make_shared<GPUContext>(GetStoragePathFromEnv())),
opencl_mem_types_({MemoryType::GPU_IMAGE}) { opencl_mem_types_({MemoryType::GPU_IMAGE}) {
device_map_[DeviceType::CPU] = std::unique_ptr<Device>( device_map_[DeviceType::CPU] = make_unique<CPUDevice>(
new CPUDevice(num_threads, num_threads, cpu_affinity_policy, use_gemmlowp);
cpu_affinity_policy,
use_gemmlowp)); device_map_[DeviceType::GPU] = make_unique<GPUDevice>(
gpu_context_->opencl_tuner(),
device_map_[DeviceType::GPU] = std::unique_ptr<Device>( gpu_context_->opencl_cache_storage(),
new GPUDevice(gpu_context_->opencl_tuner(), GPUPriorityHint::PRIORITY_NORMAL);
gpu_context_->opencl_cache_storage(),
GPUPriorityHint::PRIORITY_NORMAL));
} }
std::shared_ptr<GPUContext> OpTestContext::gpu_context() const { std::shared_ptr<GPUContext> OpTestContext::gpu_context() const {
...@@ -189,12 +188,12 @@ bool OpsTestNet::Setup(mace::DeviceType device) { ...@@ -189,12 +188,12 @@ bool OpsTestNet::Setup(mace::DeviceType device) {
} }
} }
MemoryOptimizer mem_optimizer; MemoryOptimizer mem_optimizer;
net_ = std::unique_ptr<NetBase>(new SerialNet( net_ = make_unique<SerialNet>(
op_registry_.get(), op_registry_.get(),
&net_def, &net_def,
&ws_, &ws_,
OpTestContext::Get()->GetDevice(device), OpTestContext::Get()->GetDevice(device),
&mem_optimizer)); &mem_optimizer);
MaceStatus status = (ws_.PreallocateOutputTensor( MaceStatus status = (ws_.PreallocateOutputTensor(
net_def, net_def,
&mem_optimizer, &mem_optimizer,
...@@ -236,12 +235,12 @@ MaceStatus OpsTestNet::RunNet(const mace::NetDef &net_def, ...@@ -236,12 +235,12 @@ MaceStatus OpsTestNet::RunNet(const mace::NetDef &net_def,
const mace::DeviceType device) { const mace::DeviceType device) {
device_type_ = device; device_type_ = device;
MemoryOptimizer mem_optimizer; MemoryOptimizer mem_optimizer;
net_ = std::unique_ptr<NetBase>(new SerialNet( net_ = make_unique<SerialNet>(
op_registry_.get(), op_registry_.get(),
&net_def, &net_def,
&ws_, &ws_,
OpTestContext::Get()->GetDevice(device), OpTestContext::Get()->GetDevice(device),
&mem_optimizer)); &mem_optimizer);
MACE_RETURN_IF_ERROR(ws_.PreallocateOutputTensor( MACE_RETURN_IF_ERROR(ws_.PreallocateOutputTensor(
net_def, net_def,
&mem_optimizer, &mem_optimizer,
......
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
#include "mace/core/workspace.h" #include "mace/core/workspace.h"
#include "mace/ops/ops_registry.h" #include "mace/ops/ops_registry.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
#include "mace/utils/memory.h"
#include "mace/utils/utils.h" #include "mace/utils/utils.h"
#include "mace/utils/quantize.h" #include "mace/utils/quantize.h"
#include "mace/ops/testing/test_utils.h" #include "mace/ops/testing/test_utils.h"
...@@ -97,7 +98,7 @@ class OpTestContext { ...@@ -97,7 +98,7 @@ class OpTestContext {
class OpsTestNet { class OpsTestNet {
public: public:
OpsTestNet() : OpsTestNet() :
op_registry_(new OpRegistry()) {} op_registry_(make_unique<OpRegistry>()) {}
template <DeviceType D, typename T> template <DeviceType D, typename T>
void AddInputFromArray(const std::string &name, void AddInputFromArray(const std::string &name,
...@@ -355,9 +356,9 @@ class OpsTestNet { ...@@ -355,9 +356,9 @@ class OpsTestNet {
std::unique_ptr<Tensor> CreateTensor( std::unique_ptr<Tensor> CreateTensor(
const std::vector<index_t> &shape = {}, const std::vector<index_t> &shape = {},
const std::vector<T> &data = {}) { const std::vector<T> &data = {}) {
std::unique_ptr<Tensor> res( std::unique_ptr<Tensor> res = make_unique<Tensor>(
new Tensor(OpTestContext::Get()->GetDevice(D)->allocator(), OpTestContext::Get()->GetDevice(D)->allocator(),
DataTypeToEnum<T>::v())); DataTypeToEnum<T>::v());
if (!data.empty()) { if (!data.empty()) {
res->Resize(shape); res->Resize(shape);
T *input_data = res->mutable_data<T>(); T *input_data = res->mutable_data<T>();
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/pad.h" #include "mace/ops/opencl/image/pad.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
...@@ -182,8 +183,8 @@ class PadOp<DeviceType::GPU, T> : public Operation { ...@@ -182,8 +183,8 @@ class PadOp<DeviceType::GPU, T> : public Operation {
float constant_value = Operation::GetOptionalArg<float>( float constant_value = Operation::GetOptionalArg<float>(
"constant_value", 0.0); "constant_value", 0.0);
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
kernel_.reset(new opencl::image::PadKernel<T>( kernel_ = make_unique<opencl::image::PadKernel<T>>(
type, paddings, constant_value)); type, paddings, constant_value);
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
......
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
#include "mace/ops/opencl/image/pooling.h" #include "mace/ops/opencl/image/pooling.h"
#include "mace/ops/opencl/buffer/pooling.h" #include "mace/ops/opencl/buffer/pooling.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
...@@ -433,10 +434,10 @@ class PoolingOp<DeviceType::GPU, T> : public PoolingOpBase { ...@@ -433,10 +434,10 @@ class PoolingOp<DeviceType::GPU, T> : public PoolingOpBase {
explicit PoolingOp(OpConstructContext *context) explicit PoolingOp(OpConstructContext *context)
: PoolingOpBase(context) { : PoolingOpBase(context) {
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
kernel_.reset(new opencl::image::PoolingKernel<T>); kernel_ = make_unique<opencl::image::PoolingKernel<T>>();
} else { } else {
context->set_output_mem_type(MemoryType::GPU_BUFFER); context->set_output_mem_type(MemoryType::GPU_BUFFER);
kernel_.reset(new opencl::buffer::PoolingKernel<T>); kernel_ = make_unique<opencl::buffer::PoolingKernel<T>>();
} }
} }
MaceStatus Run(OpContext *context) override { MaceStatus Run(OpContext *context) override {
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/reduce.h" #include "mace/ops/opencl/image/reduce.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
...@@ -847,9 +848,9 @@ class ReduceOp<DeviceType::GPU, T> : public ReduceOpBase { ...@@ -847,9 +848,9 @@ class ReduceOp<DeviceType::GPU, T> : public ReduceOpBase {
explicit ReduceOp(OpConstructContext *context) explicit ReduceOp(OpConstructContext *context)
: ReduceOpBase(context) { : ReduceOpBase(context) {
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
kernel_.reset(new opencl::image::ReduceKernel<T>(reduce_type_, kernel_ = make_unique<opencl::image::ReduceKernel<T>>(reduce_type_,
axis_, axis_,
keep_dims_)); keep_dims_);
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/resize_bicubic.h" #include "mace/ops/opencl/image/resize_bicubic.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
...@@ -197,9 +198,8 @@ class ResizeBicubicOp<DeviceType::GPU, T> : public Operation { ...@@ -197,9 +198,8 @@ class ResizeBicubicOp<DeviceType::GPU, T> : public Operation {
"size", {-1, -1}); "size", {-1, -1});
MACE_CHECK(size.size() == 2); MACE_CHECK(size.size() == 2);
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
kernel_.reset(new opencl::image::ResizeBicubicKernel<T>(align_corners, kernel_ = make_unique<opencl::image::ResizeBicubicKernel<T>>(
size[0], align_corners, size[0], size[1]);
size[1]));
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <vector> #include <vector>
#include "mace/core/operator.h" #include "mace/core/operator.h"
#include "mace/utils/memory.h"
#include "mace/utils/quantize.h" #include "mace/utils/quantize.h"
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/resize_bilinear.h" #include "mace/ops/opencl/image/resize_bilinear.h"
...@@ -332,9 +333,8 @@ class ResizeBilinearOp<DeviceType::GPU, T> : public Operation { ...@@ -332,9 +333,8 @@ class ResizeBilinearOp<DeviceType::GPU, T> : public Operation {
"size", {-1, -1}); "size", {-1, -1});
MACE_CHECK(size.size() == 2); MACE_CHECK(size.size() == 2);
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
kernel_.reset(new opencl::image::ResizeBilinearKernel<T>(align_corners, kernel_ = make_unique<opencl::image::ResizeBilinearKernel<T>>(
size[0], align_corners, size[0], size[1]);
size[1]));
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/resize_nearest_neighbor.h" #include "mace/ops/opencl/image/resize_nearest_neighbor.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
...@@ -142,8 +143,8 @@ class ResizeNearestNeighborOp<DeviceType::GPU, T> : public Operation { ...@@ -142,8 +143,8 @@ class ResizeNearestNeighborOp<DeviceType::GPU, T> : public Operation {
bool align_corners = Operation::GetOptionalArg<bool>( bool align_corners = Operation::GetOptionalArg<bool>(
"align_corners", false); "align_corners", false);
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
kernel_.reset(new opencl::image::ResizeNearestNeighborKernel<T>( kernel_ = make_unique<opencl::image::ResizeNearestNeighborKernel<T>>(
align_corners)); align_corners);
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include "mace/ops/sgemm.h" #include "mace/ops/sgemm.h"
#include "mace/core/runtime/cpu/cpu_runtime.h" #include "mace/core/runtime/cpu/cpu_runtime.h"
#include "mace/utils/memory.h"
#if defined(MACE_ENABLE_NEON) #if defined(MACE_ENABLE_NEON)
#include <arm_neon.h> #include <arm_neon.h>
...@@ -55,27 +56,27 @@ void SGemm::operator()(const SGemmMatrixMap<const float> &lhs, ...@@ -55,27 +56,27 @@ void SGemm::operator()(const SGemmMatrixMap<const float> &lhs,
scratch_buffer->GrowSize(total_size * sizeof(float)); scratch_buffer->GrowSize(total_size * sizeof(float));
if (!lhs.is_const()) { if (!lhs.is_const()) {
packed_lhs_.reset(new Tensor(scratch_buffer->Scratch( packed_lhs_ = make_unique<Tensor>(scratch_buffer->Scratch(
lhs.size() * sizeof(float)), DT_FLOAT)); lhs.size() * sizeof(float)), DT_FLOAT);
} }
if (!rhs.is_const()) { if (!rhs.is_const()) {
packed_rhs_.reset(new Tensor(scratch_buffer->Scratch( packed_rhs_ = make_unique<Tensor>(scratch_buffer->Scratch(
rhs.size() * sizeof(float)), DT_FLOAT)); rhs.size() * sizeof(float)), DT_FLOAT);
} }
packed_result_.reset(new Tensor(scratch_buffer->Scratch( packed_result_ = make_unique<Tensor>(scratch_buffer->Scratch(
result->size() * sizeof(float)), DT_FLOAT)); result->size() * sizeof(float)), DT_FLOAT);
} }
if (packed_lhs_.get() == nullptr) { if (packed_lhs_.get() == nullptr) {
packed_lhs_.reset(new Tensor(GetCPUAllocator(), DT_FLOAT)); packed_lhs_ = make_unique<Tensor>(GetCPUAllocator(), DT_FLOAT);
packed_lhs_->Resize({lhs.size()}); packed_lhs_->Resize({lhs.size()});
} }
if (packed_rhs_.get() == nullptr) { if (packed_rhs_.get() == nullptr) {
packed_rhs_.reset(new Tensor(GetCPUAllocator(), DT_FLOAT)); packed_rhs_ = make_unique<Tensor>(GetCPUAllocator(), DT_FLOAT);
packed_rhs_->Resize({rhs.size()}); packed_rhs_->Resize({rhs.size()});
} }
if (packed_result_.get() == nullptr) { if (packed_result_.get() == nullptr) {
packed_result_.reset(new Tensor(GetCPUAllocator(), DT_FLOAT)); packed_result_ = make_unique<Tensor>(GetCPUAllocator(), DT_FLOAT);
packed_result_->Resize({result->size()}); packed_result_->Resize({result->size()});
} }
......
...@@ -30,6 +30,8 @@ ...@@ -30,6 +30,8 @@
#include "mace/ops/opencl/buffer/softmax.h" #include "mace/ops/opencl/buffer/softmax.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
...@@ -374,10 +376,10 @@ class SoftmaxOp<DeviceType::GPU, T> : public Operation { ...@@ -374,10 +376,10 @@ class SoftmaxOp<DeviceType::GPU, T> : public Operation {
explicit SoftmaxOp(OpConstructContext *context) explicit SoftmaxOp(OpConstructContext *context)
: Operation(context) { : Operation(context) {
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
kernel_.reset(new opencl::image::SoftmaxKernel<T>); kernel_ = make_unique<opencl::image::SoftmaxKernel<T>>();
} else { } else {
context->set_output_mem_type(MemoryType::GPU_BUFFER); context->set_output_mem_type(MemoryType::GPU_BUFFER);
kernel_.reset(new opencl::buffer::SoftmaxKernel<T>); kernel_ = make_unique<opencl::buffer::SoftmaxKernel<T>>();
} }
} }
MaceStatus Run(OpContext *context) override { MaceStatus Run(OpContext *context) override {
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/space_to_batch.h" #include "mace/ops/opencl/image/space_to_batch.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
...@@ -309,7 +310,7 @@ class SpaceToBatchNDOp<DeviceType::GPU, T> : public SpaceToBatchOpBase { ...@@ -309,7 +310,7 @@ class SpaceToBatchNDOp<DeviceType::GPU, T> : public SpaceToBatchOpBase {
explicit SpaceToBatchNDOp(OpConstructContext *context) explicit SpaceToBatchNDOp(OpConstructContext *context)
: SpaceToBatchOpBase(context) { : SpaceToBatchOpBase(context) {
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
kernel_.reset(new opencl::image::SpaceToBatchKernel<T>); kernel_ = make_unique<opencl::image::SpaceToBatchKernel<T>>();
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/space_to_depth.h" #include "mace/ops/opencl/image/space_to_depth.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
...@@ -95,7 +96,7 @@ class SpaceToDepthOp<DeviceType::GPU, T> : public Operation { ...@@ -95,7 +96,7 @@ class SpaceToDepthOp<DeviceType::GPU, T> : public Operation {
: Operation(context) { : Operation(context) {
int block_size = Operation::GetOptionalArg<int>("block_size", 1); int block_size = Operation::GetOptionalArg<int>("block_size", 1);
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
kernel_.reset(new opencl::image::SpaceToDepthKernel<T>(block_size)); kernel_ = make_unique<opencl::image::SpaceToDepthKernel<T>>(block_size);
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/split.h" #include "mace/ops/opencl/image/split.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
...@@ -108,7 +109,7 @@ class SplitOp<DeviceType::GPU, T> : public Operation { ...@@ -108,7 +109,7 @@ class SplitOp<DeviceType::GPU, T> : public Operation {
: Operation(context) { : Operation(context) {
int32_t axis = Operation::GetOptionalArg<int>("axis", 3); int32_t axis = Operation::GetOptionalArg<int>("axis", 3);
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
kernel_.reset(new opencl::image::SplitKernel<T>(axis)); kernel_ = make_unique<opencl::image::SplitKernel<T>>(axis);
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/sqrdiff_mean.h" #include "mace/ops/opencl/image/sqrdiff_mean.h"
#endif // MACE_ENABLE_OPENCL #endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
...@@ -83,7 +84,7 @@ class SqrDiffMeanOp<DeviceType::GPU, T> : public Operation { ...@@ -83,7 +84,7 @@ class SqrDiffMeanOp<DeviceType::GPU, T> : public Operation {
explicit SqrDiffMeanOp(OpConstructContext *context) explicit SqrDiffMeanOp(OpConstructContext *context)
: Operation(context) { : Operation(context) {
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
kernel_.reset(new opencl::image::SqrDiffMeanKernel<T>()); kernel_ = make_unique<opencl::image::SqrDiffMeanKernel<T>>();
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
......
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_UTILS_MEMORY_H_
#define MACE_UTILS_MEMORY_H_
#include <memory>
#include <utility>
namespace mace {
namespace memory_internal {
// Traits to select proper overload and return type for `make_unique<>`.
template <typename T>
struct MakeUniqueResult {
using scalar = std::unique_ptr<T>;
};
template <typename T>
struct MakeUniqueResult<T[]> {
using array = std::unique_ptr<T[]>;
};
template <typename T, size_t N>
struct MakeUniqueResult<T[N]> {
using invalid = void;
};
} // namespace memory_internal
// gcc 4.8 has __cplusplus at 201301 but doesn't define make_unique. Other
// supported compilers either just define __cplusplus as 201103 but have
// make_unique (msvc), or have make_unique whenever __cplusplus > 201103 (clang)
#if (__cplusplus > 201103L || defined(_MSC_VER)) && \
!(defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ == 8)
using std::make_unique;
#else
// `make_unique` overload for non-array types.
template <typename T, typename... Args>
typename memory_internal::MakeUniqueResult<T>::scalar make_unique(
Args&&... args) {
return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
}
// `make_unique` overload for an array T[] of unknown bounds.
// The array allocation needs to use the `new T[size]` form and cannot take
// element constructor arguments. The `std::unique_ptr` will manage destructing
// these array elements.
template <typename T>
typename memory_internal::MakeUniqueResult<T>::array make_unique(size_t n) {
return std::unique_ptr<T>(new typename std::remove_extent<T>::type[n]());
}
// `make_unique` overload for an array T[N] of known bounds.
// This construction will be rejected.
template <typename T, typename... Args>
typename memory_internal::MakeUniqueResult<T>::invalid make_unique(
Args&&... /* args */) = delete;
#endif
} // namespace mace
#endif // MACE_UTILS_MEMORY_H_
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册