diff --git a/mace/libmace/mace.cc b/mace/libmace/mace.cc
index 8f5511541ad5d57f48e62c51e2805cb059fb5b8d..63dcd7adc6ef326df1da9b95519aba036cf299a5 100644
--- a/mace/libmace/mace.cc
+++ b/mace/libmace/mace.cc
@@ -33,6 +33,8 @@
 #include "mace/core/runtime/hexagon/hexagon_device.h"
 #endif  // MACE_ENABLE_HEXAGON
 
+#include "mace/utils/memory.h"
+
 namespace mace {
 namespace {
 
@@ -289,7 +291,7 @@ MaceTensor::MaceTensor(const std::vector<int64_t> &shape,
                        std::shared_ptr<float> data,
                        const DataFormat format) {
   MACE_CHECK_NOTNULL(data.get());
-  impl_ = std::unique_ptr<MaceTensor::Impl>(new MaceTensor::Impl());
+  impl_ = make_unique<MaceTensor::Impl>();
   impl_->shape = shape;
   impl_->data = data;
   impl_->format = format;
@@ -298,11 +300,11 @@ MaceTensor::MaceTensor(const std::vector<int64_t> &shape,
 }
 
 MaceTensor::MaceTensor() {
-  impl_ = std::unique_ptr<MaceTensor::Impl>(new MaceTensor::Impl());
+  impl_ = make_unique<MaceTensor::Impl>();
 }
 
 MaceTensor::MaceTensor(const MaceTensor &other) {
-  impl_ = std::unique_ptr<MaceTensor::Impl>(new MaceTensor::Impl());
+  impl_ = make_unique<MaceTensor::Impl>();
   impl_->shape = other.shape();
   impl_->data = other.data();
   impl_->format = other.data_format();
@@ -310,7 +312,7 @@ MaceTensor::MaceTensor(const MaceTensor &other) {
 }
 
 MaceTensor::MaceTensor(const MaceTensor &&other) {
-  impl_ = std::unique_ptr<MaceTensor::Impl>(new MaceTensor::Impl());
+  impl_ = make_unique<MaceTensor::Impl>();
   impl_->shape = other.shape();
   impl_->data = other.data();
   impl_->format = other.data_format();
@@ -725,7 +727,7 @@ MaceStatus MaceEngine::Impl::Run(
 }
 
 MaceEngine::MaceEngine(const MaceEngineConfig &config):
-    impl_(new MaceEngine::Impl(config)) {}
+    impl_(make_unique<MaceEngine::Impl>(config)) {}
 
 MaceEngine::~MaceEngine() = default;
 
diff --git a/mace/ops/activation.cc b/mace/ops/activation.cc
index a9e28f1e2b08d985f657d3fa10a9a431a542c9e1..29fee227df0ebac83d9a2e8c9a275a62aff8c68a 100644
--- a/mace/ops/activation.cc
+++ b/mace/ops/activation.cc
@@ -22,6 +22,7 @@
 #include "mace/ops/opencl/buffer_transformer.h"
 #include "mace/ops/opencl/image/activation.h"
 #endif  // MACE_ENABLE_OPENCL
+#include "mace/utils/memory.h"
 
 namespace mace {
 namespace ops {
@@ -88,9 +89,8 @@ class ActivationOp<DeviceType::GPU, T> : public Operation {
     MemoryType mem_type;
     if (context->device()->gpu_runtime()->UseImageMemory()) {
       mem_type = MemoryType::GPU_IMAGE;
-      kernel_.reset(
-          new opencl::image::ActivationKernel<T>(type, relux_max_limit,
-                                                 leakyrelu_coefficient));
+      kernel_ = make_unique<opencl::image::ActivationKernel<T>>(
+          type, relux_max_limit, leakyrelu_coefficient);
     } else {
       MACE_NOT_IMPLEMENTED;
     }
diff --git a/mace/ops/addn.cc b/mace/ops/addn.cc
index cc11a0efc55fe9568c3635c5a72b54f81b60b1ac..5e387d87684d833eb40c5ebe30e564ef74bb55cd 100644
--- a/mace/ops/addn.cc
+++ b/mace/ops/addn.cc
@@ -24,6 +24,7 @@
 #ifdef MACE_ENABLE_OPENCL
 #include "mace/ops/opencl/image/addn.h"
 #endif  // MACE_ENABLE_OPENCL
+#include "mace/utils/memory.h"
 
 namespace mace {
 namespace ops {
@@ -107,7 +108,7 @@ class AddNOp<DeviceType::GPU, T> : public Operation {
   explicit AddNOp(OpConstructContext *context)
       : Operation(context) {
     if (context->device()->gpu_runtime()->UseImageMemory()) {
-      kernel_.reset(new opencl::image::AddNKernel<T>);
+      kernel_ = make_unique<opencl::image::AddNKernel<T>>();
     } else {
       MACE_NOT_IMPLEMENTED;
     }
diff --git a/mace/ops/arm/conv_winograd.cc b/mace/ops/arm/conv_winograd.cc
index 11d4fbf0d52eac3d8c7abab87a5f5b95693c5df5..8922292d5a5bfb559bddcba59aa31c08059ee09c 100644
--- a/mace/ops/arm/conv_winograd.cc
+++ b/mace/ops/arm/conv_winograd.cc
@@ -15,6 +15,7 @@
 #include <algorithm>
 
 #include "mace/ops/arm/conv_winograd.h"
+#include "mace/utils/memory.h"
 
 namespace mace {
 namespace ops {
@@ -607,7 +608,7 @@ void TransformFilter8x8(const float *filter,
   }
 }
 
-void WinoGradConv3x3s1(const float *input,
+void WinogradConv3x3s1(const float *input,
                        const float *transformed_filter,
                        const index_t batch,
                        const index_t in_height,
@@ -659,7 +660,7 @@ void WinoGradConv3x3s1(const float *input,
   }
 }
 
-void WinoGradConv3x3s1(const float *input,
+void WinogradConv3x3s1(const float *input,
                        const float *filter,
                        const index_t batch,
                        const index_t in_height,
@@ -684,28 +685,30 @@ void WinoGradConv3x3s1(const float *input,
   index_t transformed_output_size =
       in_tile_area * batch * out_channels * tile_count;
 
-  float *transformed_input = new float[transformed_input_size];    // TNCB
-  float *transformed_filter = new float[transformed_filter_size];  // TOC
-  float *transformed_output = new float[transformed_output_size];
+  auto transformed_input =
+    make_unique<float[]>(transformed_input_size);  // TNCB NOLINT
+  auto transformed_filter =
+    make_unique<float[]>(transformed_filter_size);  // TOC NOLINT
+  auto transformed_output =
+    make_unique<float[]>(transformed_output_size);  // NOLINT
 
   switch (out_tile_size) {
     case 2:
-      TransformFilter4x4(filter, in_channels, out_channels, transformed_filter);
+      TransformFilter4x4(filter, in_channels, out_channels,
+                         transformed_filter.get());
       break;
     case 6:
-      TransformFilter8x8(filter, in_channels, out_channels, transformed_filter);
+      TransformFilter8x8(filter, in_channels, out_channels,
+                         transformed_filter.get());
       break;
     default:
       MACE_NOT_IMPLEMENTED;
   }
 
-  WinoGradConv3x3s1(input, transformed_filter, batch, in_height, in_width,
-                    in_channels, out_channels, out_tile_size, transformed_input,
-                    transformed_output, output, sgemm, scratch_buffer);
-
-  delete[] transformed_input;
-  delete[] transformed_filter;
-  delete[] transformed_output;
+  WinogradConv3x3s1(input, transformed_filter.get(), batch, in_height,
+                    in_width, in_channels, out_channels, out_tile_size,
+                    transformed_input.get(), transformed_output.get(),
+                    output, sgemm, scratch_buffer);
 }
 
 void ConvRef3x3s1(const float *input,
diff --git a/mace/ops/arm/conv_winograd.h b/mace/ops/arm/conv_winograd.h
index 396d1870b96a4565e56ea5d48faf3e46d616a4da..38242ee90650e63f38ca8dbb7c015cd25a1dd874 100644
--- a/mace/ops/arm/conv_winograd.h
+++ b/mace/ops/arm/conv_winograd.h
@@ -35,7 +35,7 @@ void TransformFilter8x8(const float *filter,
                         const index_t out_channels,
                         float *output);
 
-void WinoGradConv3x3s1(const float *input,
+void WinogradConv3x3s1(const float *input,
                        const float *filter,
                        const index_t batch,
                        const index_t in_height,
@@ -47,7 +47,7 @@ void WinoGradConv3x3s1(const float *input,
                        SGemm *sgemm,
                        ScratchBuffer *scratch_buffer);
 
-void WinoGradConv3x3s1(const float *input,
+void WinogradConv3x3s1(const float *input,
                        const float *transformed_filter,
                        const index_t batch,
                        const index_t in_height,
diff --git a/mace/ops/arm/conv_winograd_test.cc b/mace/ops/arm/conv_winograd_test.cc
index 4f28472d5199dcb2f72667e30da10db82c0ba7d2..4dc618f77c866d8c28ae5ff36770c38d2ae49c46 100644
--- a/mace/ops/arm/conv_winograd_test.cc
+++ b/mace/ops/arm/conv_winograd_test.cc
@@ -66,7 +66,7 @@ TEST(ConvWinogradTest, winograd) {
                         in_channels, out_channels, output_data_ref);
 
   SGemm sgemm;
-  ops::WinoGradConv3x3s1(input_data, filter_data, batch, in_height,
+  ops::WinogradConv3x3s1(input_data, filter_data, batch, in_height,
                              in_width, in_channels, out_channels, 6,
                              output_data, &sgemm, nullptr);
 
diff --git a/mace/ops/batch_norm.cc b/mace/ops/batch_norm.cc
index ee44ec59f7c329215f3a5ba95c8a6bf6e18f6399..469efe2e0c5eaac299d2622931a5e36154973d8e 100644
--- a/mace/ops/batch_norm.cc
+++ b/mace/ops/batch_norm.cc
@@ -22,6 +22,7 @@
 #include "mace/ops/opencl/buffer_transformer.h"
 #include "mace/ops/opencl/image/batch_norm.h"
 #endif  // MACE_ENABLE_OPENCL
+#include "mace/utils/memory.h"
 
 namespace mace {
 namespace ops {
@@ -156,8 +157,8 @@ class BatchNormOp<DeviceType::GPU, T> : public Operation {
     MemoryType mem_type;
     if (context->device()->gpu_runtime()->UseImageMemory()) {
       mem_type = MemoryType::GPU_IMAGE;
-      kernel_.reset(new opencl::image::BatchNormKernel<T>(
-          epsilon, activation, relux_max_limit, leakyrelu_coefficient));
+      kernel_ = make_unique<opencl::image::BatchNormKernel<T>>(
+          epsilon, activation, relux_max_limit, leakyrelu_coefficient);
     } else {
       MACE_NOT_IMPLEMENTED;
     }
diff --git a/mace/ops/batch_to_space.cc b/mace/ops/batch_to_space.cc
index 8d1e463c56b3510901d42d5d4370273d252ecbf2..cfd350d458429ea86a68e9176c41108e2469f392 100644
--- a/mace/ops/batch_to_space.cc
+++ b/mace/ops/batch_to_space.cc
@@ -19,6 +19,7 @@
 #ifdef MACE_ENABLE_OPENCL
 #include "mace/ops/opencl/image/batch_to_space.h"
 #endif  // MACE_ENABLE_OPENCL
+#include "mace/utils/memory.h"
 
 namespace mace {
 namespace ops {
@@ -266,7 +267,7 @@ class BatchToSpaceNDOp<DeviceType::GPU, T> : public BatchToSpaceOpBase {
   explicit BatchToSpaceNDOp(OpConstructContext *context)
       : BatchToSpaceOpBase(context) {
     if (context->device()->gpu_runtime()->UseImageMemory()) {
-      kernel_.reset(new opencl::image::BatchToSpaceKernel<T>);
+      kernel_ = make_unique<opencl::image::BatchToSpaceKernel<T>>();
     } else {
       MACE_NOT_IMPLEMENTED;
     }
diff --git a/mace/ops/bias_add.cc b/mace/ops/bias_add.cc
index 3552a0a31289cbb070bd761644d5711530ea3b80..ca67c4fb14d825fde6c8c831733bc27eb01fb8f7 100644
--- a/mace/ops/bias_add.cc
+++ b/mace/ops/bias_add.cc
@@ -22,6 +22,7 @@
 #include "mace/ops/opencl/buffer_transformer.h"
 #include "mace/ops/opencl/image/bias_add.h"
 #endif  // MACE_ENABLE_OPENCL
+#include "mace/utils/memory.h"
 
 namespace mace {
 namespace ops {
@@ -103,7 +104,7 @@ class BiasAddOp<DeviceType::GPU, T> : public Operation {
     MemoryType mem_type;
     if (context->device()->gpu_runtime()->UseImageMemory()) {
       mem_type = MemoryType::GPU_IMAGE;
-      kernel_.reset(new opencl::image::BiasAddKernel<T>);
+      kernel_ = make_unique<opencl::image::BiasAddKernel<T>>();
     } else {
       MACE_NOT_IMPLEMENTED;
     }
diff --git a/mace/ops/channel_shuffle.cc b/mace/ops/channel_shuffle.cc
index 57607755cc034f364d07660924d6481e3d79793b..70e1811a07292af8eb0982caf46decb393f28325 100644
--- a/mace/ops/channel_shuffle.cc
+++ b/mace/ops/channel_shuffle.cc
@@ -18,6 +18,7 @@
 #ifdef MACE_ENABLE_OPENCL
 #include "mace/ops/opencl/image/channel_shuffle.h"
 #endif  // MACE_ENABLE_OPENCL
+#include "mace/utils/memory.h"
 
 namespace mace {
 namespace ops {
@@ -83,7 +84,7 @@ class ChannelShuffleOp<DeviceType::GPU, T> : public Operation {
       : Operation(context) {
     const int groups = Operation::GetOptionalArg<int>("group", 1);
     if (context->device()->gpu_runtime()->UseImageMemory()) {
-      kernel_.reset(new opencl::image::ChannelShuffleKernel<T>(groups));
+      kernel_ = make_unique<opencl::image::ChannelShuffleKernel<T>>(groups);
     } else {
       MACE_NOT_IMPLEMENTED;
     }
diff --git a/mace/ops/concat.cc b/mace/ops/concat.cc
index 47e95a37190cbf2eb6aed08af544220ad9ce8643..b0785ac0d1304808ea91c0875679bbcef8e280ad 100644
--- a/mace/ops/concat.cc
+++ b/mace/ops/concat.cc
@@ -16,6 +16,7 @@
 
 #include "mace/core/operator.h"
 #include "mace/utils/quantize.h"
+#include "mace/utils/memory.h"
 
 #ifdef MACE_ENABLE_OPENCL
 #include "mace/ops/opencl/image/concat.h"
@@ -199,7 +200,7 @@ class ConcatOp<DeviceType::GPU, T> : public ConcatOpBase {
   explicit ConcatOp(OpConstructContext *context)
       : ConcatOpBase(context) {
     if (context->device()->gpu_runtime()->UseImageMemory()) {
-      kernel_.reset(new opencl::image::ConcatKernel<T>(axis_));
+      kernel_ = make_unique<opencl::image::ConcatKernel<T>>(axis_);
     } else {
       MACE_NOT_IMPLEMENTED;
     }
diff --git a/mace/ops/conv_2d.cc b/mace/ops/conv_2d.cc
index 653e3e33f535915eb52baad01e4e14f3b7a80bb7..01dd092b1138a4c9af5d587e023f4c965d8c0c6b 100644
--- a/mace/ops/conv_2d.cc
+++ b/mace/ops/conv_2d.cc
@@ -31,6 +31,7 @@
 #include "mace/ops/arm/conv_winograd.h"
 #include "mace/ops/conv_pool_2d_base.h"
 #include "mace/ops/common/conv_pool_2d_util.h"
+#include "mace/utils/memory.h"
 #include "mace/utils/utils.h"
 
 #ifdef MACE_ENABLE_NEON
@@ -129,7 +130,7 @@ class Conv2dOp<DeviceType::CPU, float> : public ConvPool2dOpBase {
     if (filter_h == 1 && filter_w == 1 && stride_h == 1 && stride_w == 1
         && dilation_h == 1 && dilation_w == 1) {
       if (conv2d_delegator_.get() == nullptr) {
-        conv2d_delegator_.reset(new arm::fp32::Conv2dK1x1());
+        conv2d_delegator_ = make_unique<arm::fp32::Conv2dK1x1>();
       }
       conv2d_delegator_->Compute(context, input, filter, output);
     } else {
@@ -354,7 +355,7 @@ class Conv2dOp<DeviceType::CPU, float> : public ConvPool2dOpBase {
             *transformed_output_data = transformed_output.mutable_data<float>();
 
         conv_func = [=](const float *pad_input, float *pad_output) {
-          WinoGradConv3x3s1(pad_input,
+          WinogradConv3x3s1(pad_input,
                             transformed_filter_data,
                             batch,
                             extra_input_height,
@@ -508,12 +509,12 @@ class Conv2dOp<DeviceType::CPU, float> : public ConvPool2dOpBase {
     }
 #else
     if (conv2d_delegator_.get() == nullptr) {
-      conv2d_delegator_.reset(new ref::Conv2d<float>(paddings[0],
-                                                     paddings[1],
-                                                     stride_h,
-                                                     stride_w,
-                                                     dilation_h,
-                                                     dilation_w));
+      conv2d_delegator_ = make_unique<ref::Conv2d<float>>(paddings[0],
+                                                          paddings[1],
+                                                          stride_h,
+                                                          stride_w,
+                                                          dilation_h,
+                                                          dilation_w);
     }
     conv2d_delegator_->Compute(context, input, filter, output);
 #endif
@@ -848,7 +849,7 @@ class Conv2dOp<DeviceType::CPU, uint8_t> : public ConvPool2dOpBase {
       ScratchBuffer *scratch = context->device()->scratch_buffer();
       scratch->Rewind();
       scratch->GrowSize(im2col_size);
-      im2col.reset(new Tensor(scratch->Scratch(im2col_size), DT_UINT8));
+      im2col = make_unique<Tensor>(scratch->Scratch(im2col_size), DT_UINT8);
       uint8_t *im2col_data = im2col->mutable_data<uint8_t>();
       Im2col(input_data, input->shape(), filter_h, filter_w, stride_h,
              stride_w, static_cast<uint8_t>(input->zero_point()),
@@ -993,10 +994,10 @@ class Conv2dOp<DeviceType::GPU, T> : public ConvPool2dOpBase {
     MemoryType mem_type;
     if (context->device()->gpu_runtime()->UseImageMemory()) {
       mem_type = MemoryType::GPU_IMAGE;
-      kernel_.reset(new opencl::image::Conv2dKernel<T>);
+      kernel_ = make_unique<opencl::image::Conv2dKernel<T>>();
     } else {
       mem_type = MemoryType::GPU_BUFFER;
-      kernel_.reset(new opencl::buffer::Conv2dKernel<T>);
+      kernel_ = make_unique<opencl::buffer::Conv2dKernel<T>>();
     }
     context->set_output_mem_type(mem_type);
     // Transform filter tensor to target format
diff --git a/mace/ops/crop.cc b/mace/ops/crop.cc
index 0d41845795ecf6b50a9016c99e4e84e0c05d120c..a9af8b8db88c05a6735339e868c4080a4ca1c389 100644
--- a/mace/ops/crop.cc
+++ b/mace/ops/crop.cc
@@ -18,6 +18,7 @@
 #ifdef MACE_ENABLE_OPENCL
 #include "mace/ops/opencl/image/crop.h"
 #endif  // MACE_ENABLE_OPENCL
+#include "mace/utils/memory.h"
 
 namespace mace {
 namespace ops {
@@ -114,8 +115,8 @@ class CropOp<DeviceType::GPU, T> : public Operation {
       : Operation(context) {
     const int axis = Operation::GetOptionalArg<int>("axis", 2);
     if (context->device()->gpu_runtime()->UseImageMemory()) {
-      kernel_.reset(new opencl::image::CropKernel<T>(
-          axis, Operation::GetRepeatedArgs<int>("offset")));
+      kernel_ = make_unique<opencl::image::CropKernel<T>>(
+          axis, Operation::GetRepeatedArgs<int>("offset"));
     } else {
       MACE_NOT_IMPLEMENTED;
     }
diff --git a/mace/ops/deconv_2d.cc b/mace/ops/deconv_2d.cc
index 22fa5c5bb6f95c637e4d9b96652293302697c769..3585659f5229e3f315e0a415d78dade8915beea2 100644
--- a/mace/ops/deconv_2d.cc
+++ b/mace/ops/deconv_2d.cc
@@ -28,6 +28,7 @@
 #include "mace/core/tensor.h"
 #include "mace/ops/activation.h"
 #include "mace/ops/arm/deconv_2d_neon.h"
+#include "mace/utils/memory.h"
 #include "mace/utils/utils.h"
 #ifdef MACE_ENABLE_OPENCL
 #include "mace/ops/opencl/buffer_transformer.h"
@@ -362,7 +363,7 @@ class Deconv2dOp<DeviceType::GPU, T> : public Deconv2dOpBase {
       : Deconv2dOpBase(context) {
     MemoryType mem_type = MemoryType::GPU_IMAGE;
     if (context->device()->gpu_runtime()->UseImageMemory()) {
-      kernel_.reset(new opencl::image::Deconv2dKernel<T>);
+      kernel_ = make_unique<opencl::image::Deconv2dKernel<T>>();
     } else {
       MACE_NOT_IMPLEMENTED;
     }
diff --git a/mace/ops/depth_to_space.cc b/mace/ops/depth_to_space.cc
index ed9cdb539445b17810eaa685135ad12fbfc1a3ba..2460d75a258068c4e0f08576311bf93ace6b3289 100644
--- a/mace/ops/depth_to_space.cc
+++ b/mace/ops/depth_to_space.cc
@@ -19,6 +19,7 @@
 #ifdef MACE_ENABLE_OPENCL
 #include "mace/ops/opencl/image/depth_to_space.h"
 #endif  // MACE_ENABLE_OPENCL
+#include "mace/utils/memory.h"
 
 namespace mace {
 namespace ops {
@@ -97,7 +98,7 @@ class DepthToSpaceOp<DeviceType::GPU, T> : public Operation {
       : Operation(context) {
     int block_size = Operation::GetOptionalArg<int>("block_size", 1);
     if (context->device()->gpu_runtime()->UseImageMemory()) {
-      kernel_.reset(new opencl::image::DepthToSpaceKernel<T>(block_size));
+      kernel_ = make_unique<opencl::image::DepthToSpaceKernel<T>>(block_size);
     } else {
       MACE_NOT_IMPLEMENTED;
     }
diff --git a/mace/ops/depthwise_conv2d.cc b/mace/ops/depthwise_conv2d.cc
index c61f13049d51a6ce6c3fe624c345052316f4a6d3..22130afc966c4141e8ee18245c78a9b2cbb12afc 100644
--- a/mace/ops/depthwise_conv2d.cc
+++ b/mace/ops/depthwise_conv2d.cc
@@ -33,6 +33,7 @@
 #include "mace/ops/arm/depthwise_conv2d_neon.h"
 #include "mace/ops/conv_pool_2d_base.h"
 #include "mace/public/mace.h"
+#include "mace/utils/memory.h"
 #include "mace/utils/quantize.h"
 #ifdef MACE_ENABLE_OPENCL
 #include "mace/ops/opencl/buffer_transformer.h"
@@ -493,10 +494,10 @@ class DepthwiseConv2dOp<DeviceType::GPU, T> : public DepthwiseConv2dOpBase {
     MemoryType mem_type;
     if (context->device()->gpu_runtime()->UseImageMemory()) {
       mem_type = MemoryType::GPU_IMAGE;
-      kernel_.reset(new opencl::image::DepthwiseConv2dKernel<T>);
+      kernel_ = make_unique<opencl::image::DepthwiseConv2dKernel<T>>();
     } else {
       mem_type = MemoryType::GPU_BUFFER;
-      kernel_.reset(new opencl::buffer::DepthwiseConv2dKernel<T>);
+      kernel_ = make_unique<opencl::buffer::DepthwiseConv2dKernel<T>>();
     }
     context->set_output_mem_type(mem_type);
     // Transform filter tensor to target format
diff --git a/mace/ops/depthwise_deconv2d.cc b/mace/ops/depthwise_deconv2d.cc
index 06c55ab27a2f831bb681bb3ef2c39d96b44922b1..5a0033d81fca2201e02e928eac7a9add0e6a332b 100644
--- a/mace/ops/depthwise_deconv2d.cc
+++ b/mace/ops/depthwise_deconv2d.cc
@@ -28,6 +28,7 @@
 #include "mace/ops/arm/depthwise_deconv2d_neon.h"
 #include "mace/utils/utils.h"
 #include "mace/public/mace.h"
+#include "mace/utils/memory.h"
 #ifdef MACE_ENABLE_OPENCL
 #include "mace/ops/opencl/buffer_transformer.h"
 #include "mace/ops/opencl/image/depthwise_deconv2d.h"
@@ -412,7 +413,7 @@ class DepthwiseDeconv2dOp<DeviceType::GPU, T> : public Deconv2dOpBase {
       : Deconv2dOpBase(context) {
     MemoryType mem_type = MemoryType::GPU_IMAGE;
     if (context->device()->gpu_runtime()->UseImageMemory()) {
-      kernel_.reset(new opencl::image::DepthwiseDeconv2dKernel<T>);
+      kernel_ = make_unique<opencl::image::DepthwiseDeconv2dKernel<T>>();
     } else {
       MACE_NOT_IMPLEMENTED;
     }
diff --git a/mace/ops/eltwise.cc b/mace/ops/eltwise.cc
index d345427a2a3a6258d90f3a55c71f1b8d8004419b..cbce427ca46cae60e64732464ed71cc9f10e0b1e 100644
--- a/mace/ops/eltwise.cc
+++ b/mace/ops/eltwise.cc
@@ -30,6 +30,7 @@
 #include "mace/core/future.h"
 #include "mace/core/operator.h"
 #include "mace/core/tensor.h"
+#include "mace/utils/memory.h"
 #include "mace/utils/quantize.h"
 #ifdef MACE_ENABLE_OPENCL
 #include "mace/ops/opencl/buffer_transformer.h"
@@ -1160,8 +1161,8 @@ class EltwiseOp<DeviceType::GPU, T> : public Operation {
     MemoryType mem_type;
     if (context->device()->gpu_runtime()->UseImageMemory()) {
       mem_type = MemoryType::GPU_IMAGE;
-      kernel_.reset(new opencl::image::EltwiseKernel<T>(
-          type, coeff, scalar_input, scalar_input_index));
+      kernel_ = make_unique<opencl::image::EltwiseKernel<T>>(
+          type, coeff, scalar_input, scalar_input_index);
     } else {
       MACE_NOT_IMPLEMENTED;
     }
diff --git a/mace/ops/fully_connected.cc b/mace/ops/fully_connected.cc
index c82aa8ff5332c850b70100b97b0c6c1cfb3c33d3..22d45ea7c5de05eff05f2ad1fa30c9bcd92f6b7d 100644
--- a/mace/ops/fully_connected.cc
+++ b/mace/ops/fully_connected.cc
@@ -38,6 +38,8 @@
 #include "mace/ops/opencl/image/fully_connected.h"
 #endif  // MACE_ENABLE_OPENCL
 
+#include "mace/utils/memory.h"
+
 namespace mace {
 namespace ops {
 
@@ -186,7 +188,7 @@ class FullyConnectedOp<DeviceType::GPU, T> : public FullyConnectedOpBase {
     MemoryType mem_type;
     if (context->device()->gpu_runtime()->UseImageMemory()) {
       mem_type = MemoryType::GPU_IMAGE;
-      kernel_.reset(new opencl::image::FullyConnectedKernel<T>);
+      kernel_ = make_unique<opencl::image::FullyConnectedKernel<T>>();
     } else {
       MACE_NOT_IMPLEMENTED;
     }
diff --git a/mace/ops/lstm_cell.cc b/mace/ops/lstm_cell.cc
index bc5af8f50a7f6ea44f42748ad84a577ac526376d..82ed9053b6d05a40c2e31e6854c0ec16c62f7ae8 100644
--- a/mace/ops/lstm_cell.cc
+++ b/mace/ops/lstm_cell.cc
@@ -18,6 +18,7 @@
 #include "mace/core/operator.h"
 #include "mace/ops/opencl/buffer_transformer.h"
 #include "mace/ops/opencl/image/lstm_cell.h"
+#include "mace/utils/memory.h"
 
 namespace mace {
 namespace ops {
@@ -36,7 +37,7 @@ class LSTMCellOp<DeviceType::GPU, T> : public Operation {
                                          0.0));
     MemoryType mem_type = MemoryType::GPU_IMAGE;
     if (context->device()->gpu_runtime()->UseImageMemory()) {
-      kernel_.reset(new opencl::image::LSTMCellKernel<T>(forget_bias));
+      kernel_ = make_unique<opencl::image::LSTMCellKernel<T>>(forget_bias);
     } else {
       MACE_NOT_IMPLEMENTED;
     }
diff --git a/mace/ops/opencl/buffer/conv_2d.h b/mace/ops/opencl/buffer/conv_2d.h
index 736ecb2a420af7941490224b6f0c390abbb3bac9..4ef8d79d9304143d29ba35125ad0b0970af310cb 100644
--- a/mace/ops/opencl/buffer/conv_2d.h
+++ b/mace/ops/opencl/buffer/conv_2d.h
@@ -22,6 +22,7 @@
 
 #include "mace/ops/opencl/buffer/utils.h"
 #include "mace/ops/opencl/helper.h"
+#include "mace/utils/memory.h"
 
 namespace mace {
 namespace ops {
@@ -211,8 +212,8 @@ MaceStatus Conv2dKernel<T>::Compute(
       old_scratch_size_ = scratch->size();
     }
 
-    padded_input.reset(new Tensor(scratch->Scratch(padded_input_size),
-                                  input->dtype()));
+    padded_input = make_unique<Tensor>(scratch->Scratch(padded_input_size),
+                                  input->dtype());
 
     padded_input->Resize(padded_input_shape);
     PadInput(context, &kernels_[0], input, pad_top, pad_left,
diff --git a/mace/ops/opencl/buffer/depthwise_conv2d.h b/mace/ops/opencl/buffer/depthwise_conv2d.h
index 74a3cb945158382fb9b546cdfee6d0091c1892c7..6a46334a787378441d84d020cf578042e6bd24b9 100644
--- a/mace/ops/opencl/buffer/depthwise_conv2d.h
+++ b/mace/ops/opencl/buffer/depthwise_conv2d.h
@@ -22,6 +22,7 @@
 
 #include "mace/ops/opencl/buffer/utils.h"
 #include "mace/ops/opencl/helper.h"
+#include "mace/utils/memory.h"
 
 namespace mace {
 namespace ops {
@@ -165,8 +166,8 @@ MaceStatus DepthwiseConv2dKernel<T>::Compute(
       old_scratch_size_ = scratch->size();
     }
 
-    padded_input.reset(new Tensor(scratch->Scratch(padded_input_size),
-                                  input->dtype()));
+    padded_input = make_unique<Tensor>(scratch->Scratch(padded_input_size),
+                                       input->dtype());
 
     padded_input->Resize(padded_input_shape);
     PadInput(context, &kernels_[0], input, pad_top, pad_left,
diff --git a/mace/ops/opencl/buffer/pooling.h b/mace/ops/opencl/buffer/pooling.h
index ab1e6f85929298483339944d7eb97d0781023a04..4f153e4acfff75ab179e567803e05e14f67ceebf 100644
--- a/mace/ops/opencl/buffer/pooling.h
+++ b/mace/ops/opencl/buffer/pooling.h
@@ -24,6 +24,7 @@
 
 #include "mace/ops/opencl/buffer/utils.h"
 #include "mace/ops/opencl/helper.h"
+#include "mace/utils/memory.h"
 
 namespace mace {
 namespace ops {
@@ -124,8 +125,8 @@ MaceStatus PoolingKernel<T>::Compute(
       old_scratch_size_ = scratch->size();
     }
 
-    padded_input.reset(new Tensor(scratch->Scratch(padded_input_size),
-                                  input->dtype()));
+    padded_input = make_unique<Tensor>(scratch->Scratch(padded_input_size),
+                                       input->dtype());
 
     padded_input->Resize(padded_input_shape);
     PadInput(context, &kernels_[0], input, 0, 0,
diff --git a/mace/ops/opencl/buffer_transformer.h b/mace/ops/opencl/buffer_transformer.h
index e65ae3701efe51068bb81a39e533f170502c792e..954b31f9c676ec61bf7db08caea3d577833478e6 100644
--- a/mace/ops/opencl/buffer_transformer.h
+++ b/mace/ops/opencl/buffer_transformer.h
@@ -24,6 +24,7 @@
 #include "mace/ops/opencl/image/image_to_buffer.h"
 #include "mace/ops/opencl/buffer/buffer_transform.h"
 #include "mace/ops/common/transpose.h"
+#include "mace/utils/memory.h"
 
 namespace mace {
 namespace ops {
@@ -34,11 +35,11 @@ class OpenCLBufferTransformer {
   OpenCLBufferTransformer(const MemoryType in_mem_type,
                           const MemoryType out_mem_type) {
     if (out_mem_type == MemoryType::GPU_IMAGE) {
-      kernel_.reset(new opencl::image::BufferToImage<T>);
+      kernel_ = make_unique<opencl::image::BufferToImage<T>>();
     } else if (in_mem_type == MemoryType::GPU_IMAGE) {
-      kernel_.reset(new opencl::image::ImageToBuffer<T>);
+      kernel_ = make_unique<opencl::image::ImageToBuffer<T>>();
     } else {
-      kernel_.reset(new opencl::buffer::BufferTransform<T>);
+      kernel_ = make_unique<opencl::buffer::BufferTransform<T>>();
     }
   }
 
diff --git a/mace/ops/opencl/helper.h b/mace/ops/opencl/helper.h
index 33ea688b51ab9cbc958af1e489959681061c3239..cf3beeac9d304b6530c29c395979527b33cc326c 100644
--- a/mace/ops/opencl/helper.h
+++ b/mace/ops/opencl/helper.h
@@ -26,6 +26,7 @@
 #include "mace/core/runtime/opencl/opencl_runtime.h"
 #include "mace/core/runtime/opencl/opencl_util.h"
 #include "mace/core/types.h"
+#include "mace/utils/memory.h"
 #include "mace/utils/utils.h"
 
 namespace mace {
@@ -41,8 +42,8 @@ namespace ops {
 
 #define MACE_OUT_OF_RANGE_INIT(kernel)                       \
   if (runtime->IsOutOfRangeCheckEnabled()) {                 \
-    oorc_flag = std::move(std::unique_ptr<Buffer>(           \
-        new Buffer((context)->device()->allocator())));      \
+    oorc_flag = make_unique<Buffer>(                         \
+        (context)->device()->allocator());                   \
     MACE_RETURN_IF_ERROR((oorc_flag)->Allocate(sizeof(int)));\
     oorc_flag->Map(nullptr);                                 \
     *(oorc_flag->mutable_data<int>()) = 0;                   \
diff --git a/mace/ops/opencl/image/winograd_conv2d.cc b/mace/ops/opencl/image/winograd_conv2d.cc
index 527d6cc87f0b8e5023100a9d403f363d66db5871..0bda447005a413e2503a096cd18defe4a181ce0e 100644
--- a/mace/ops/opencl/image/winograd_conv2d.cc
+++ b/mace/ops/opencl/image/winograd_conv2d.cc
@@ -17,6 +17,7 @@
 #include "mace/ops/common/activation_type.h"
 #include "mace/ops/common/conv_pool_2d_util.h"
 #include "mace/ops/opencl/helper.h"
+#include "mace/utils/memory.h"
 #include "mace/utils/utils.h"
 
 namespace mace {
@@ -264,9 +265,9 @@ extern MaceStatus WinogradConv2dK3x3S1(OpContext *context,
                               OpenCLBufferType::IN_OUT_HEIGHT,
                               &t_input_image_shape);
   ScratchImage transformed_input_image(scratch_manager);
-  std::unique_ptr<Tensor> transformed_input(new Tensor(
+  std::unique_ptr<Tensor> transformed_input = make_unique<Tensor>(
       transformed_input_image.Scratch(context->device()->allocator(),
-                                      t_input_image_shape, dt), dt));
+                                      t_input_image_shape, dt), dt);
   MACE_RETURN_IF_ERROR(transformed_input->ResizeImage(t_input_shape,
                                                       t_input_image_shape));
   MACE_RETURN_IF_ERROR(WinogradInputTransform(
@@ -289,9 +290,9 @@ extern MaceStatus WinogradConv2dK3x3S1(OpContext *context,
                               &mm_output_image_shape);
 
   ScratchImage mm_output_image(scratch_manager);
-  std::unique_ptr<Tensor> mm_output(new Tensor(
+  std::unique_ptr<Tensor> mm_output = make_unique<Tensor>(
       mm_output_image.Scratch(context->device()->allocator(),
-                              mm_output_image_shape, dt), dt));
+                              mm_output_image_shape, dt), dt);
   MACE_RETURN_IF_ERROR(mm_output->ResizeImage(mm_output_shape,
                                               mm_output_image_shape));
 
diff --git a/mace/ops/opencl/out_of_range_check_test.cc b/mace/ops/opencl/out_of_range_check_test.cc
index 61e19808d1dad91045876e75e9b525c042c78427..8909f35113c5a77d78cf614970d9d027019f111c 100644
--- a/mace/ops/opencl/out_of_range_check_test.cc
+++ b/mace/ops/opencl/out_of_range_check_test.cc
@@ -22,6 +22,7 @@
 #include "mace/core/tensor.h"
 #include "mace/core/workspace.h"
 #include "mace/ops/opencl/helper.h"
+#include "mace/utils/memory.h"
 
 namespace mace {
 namespace ops {
@@ -130,7 +131,8 @@ TEST(OutOfRangeCheckTest, RandomTest) {
   index_t channels = 11;
 
   GPUContext gpu_context;
-  std::unique_ptr<Device> device(new GPUDevice(gpu_context.opencl_tuner()));
+  std::unique_ptr<Device> device = make_unique<GPUDevice>(
+      gpu_context.opencl_tuner());
 
   Workspace ws;
   OpContext context(&ws, device.get());
diff --git a/mace/ops/ops_test_util.cc b/mace/ops/ops_test_util.cc
index ce9c1bbde07ddd8857f33718f06eb47d1fb34fa9..79139bcf27775ef4d4d4124ab7f60eb9b54aac30 100644
--- a/mace/ops/ops_test_util.cc
+++ b/mace/ops/ops_test_util.cc
@@ -14,6 +14,7 @@
 
 #include "mace/ops/ops_test_util.h"
 #include "mace/core/memory_optimizer.h"
+#include "mace/utils/memory.h"
 
 namespace mace {
 namespace ops {
@@ -120,17 +121,15 @@ OpTestContext *OpTestContext::Get(int num_threads,
 OpTestContext::OpTestContext(int num_threads,
                              CPUAffinityPolicy cpu_affinity_policy,
                              bool use_gemmlowp)
-    : gpu_context_(new GPUContext(GetStoragePathFromEnv())),
+    : gpu_context_(std::make_shared<GPUContext>(GetStoragePathFromEnv())),
       opencl_mem_types_({MemoryType::GPU_IMAGE}) {
-  device_map_[DeviceType::CPU] = std::unique_ptr<Device>(
-      new CPUDevice(num_threads,
-                    cpu_affinity_policy,
-                    use_gemmlowp));
-
-  device_map_[DeviceType::GPU] = std::unique_ptr<Device>(
-      new GPUDevice(gpu_context_->opencl_tuner(),
-                    gpu_context_->opencl_cache_storage(),
-                    GPUPriorityHint::PRIORITY_NORMAL));
+  device_map_[DeviceType::CPU] = make_unique<CPUDevice>(
+      num_threads, cpu_affinity_policy, use_gemmlowp);
+
+  device_map_[DeviceType::GPU] = make_unique<GPUDevice>(
+      gpu_context_->opencl_tuner(),
+      gpu_context_->opencl_cache_storage(),
+      GPUPriorityHint::PRIORITY_NORMAL);
 }
 
 std::shared_ptr<GPUContext> OpTestContext::gpu_context() const {
@@ -189,12 +188,12 @@ bool OpsTestNet::Setup(mace::DeviceType device) {
     }
   }
   MemoryOptimizer mem_optimizer;
-  net_ = std::unique_ptr<NetBase>(new SerialNet(
+  net_ = make_unique<SerialNet>(
       op_registry_.get(),
       &net_def,
       &ws_,
       OpTestContext::Get()->GetDevice(device),
-      &mem_optimizer));
+      &mem_optimizer);
   MaceStatus status = (ws_.PreallocateOutputTensor(
       net_def,
       &mem_optimizer,
@@ -236,12 +235,12 @@ MaceStatus OpsTestNet::RunNet(const mace::NetDef &net_def,
                               const mace::DeviceType device) {
   device_type_ = device;
   MemoryOptimizer mem_optimizer;
-  net_ = std::unique_ptr<NetBase>(new SerialNet(
+  net_ = make_unique<SerialNet>(
       op_registry_.get(),
       &net_def,
       &ws_,
       OpTestContext::Get()->GetDevice(device),
-      &mem_optimizer));
+      &mem_optimizer);
   MACE_RETURN_IF_ERROR(ws_.PreallocateOutputTensor(
       net_def,
       &mem_optimizer,
diff --git a/mace/ops/ops_test_util.h b/mace/ops/ops_test_util.h
index 07cbad06bdb57381ca3befada4baf1e1f11b5bed..f60fff1d39cb86e51b8c63dae1f94fc4089efb41 100644
--- a/mace/ops/ops_test_util.h
+++ b/mace/ops/ops_test_util.h
@@ -34,6 +34,7 @@
 #include "mace/core/workspace.h"
 #include "mace/ops/ops_registry.h"
 #include "mace/public/mace.h"
+#include "mace/utils/memory.h"
 #include "mace/utils/utils.h"
 #include "mace/utils/quantize.h"
 #include "mace/ops/testing/test_utils.h"
@@ -97,7 +98,7 @@ class OpTestContext {
 class OpsTestNet {
  public:
   OpsTestNet() :
-    op_registry_(new OpRegistry()) {}
+    op_registry_(make_unique<OpRegistry>()) {}
 
   template <DeviceType D, typename T>
   void AddInputFromArray(const std::string &name,
@@ -355,9 +356,9 @@ class OpsTestNet {
   std::unique_ptr<Tensor> CreateTensor(
       const std::vector<index_t> &shape = {},
       const std::vector<T> &data = {}) {
-    std::unique_ptr<Tensor> res(
-        new Tensor(OpTestContext::Get()->GetDevice(D)->allocator(),
-                   DataTypeToEnum<T>::v()));
+    std::unique_ptr<Tensor> res = make_unique<Tensor>(
+        OpTestContext::Get()->GetDevice(D)->allocator(),
+        DataTypeToEnum<T>::v());
     if (!data.empty()) {
       res->Resize(shape);
       T *input_data = res->mutable_data<T>();
diff --git a/mace/ops/pad.cc b/mace/ops/pad.cc
index 0dfdf673b21f49ce231030251ed78004971e0b3f..ad1ded81ff1e2c70b59bb5028ff704b4c615c72a 100644
--- a/mace/ops/pad.cc
+++ b/mace/ops/pad.cc
@@ -20,6 +20,7 @@
 #ifdef MACE_ENABLE_OPENCL
 #include "mace/ops/opencl/image/pad.h"
 #endif  // MACE_ENABLE_OPENCL
+#include "mace/utils/memory.h"
 
 namespace mace {
 namespace ops {
@@ -182,8 +183,8 @@ class PadOp<DeviceType::GPU, T> : public Operation {
     float constant_value = Operation::GetOptionalArg<float>(
         "constant_value", 0.0);
     if (context->device()->gpu_runtime()->UseImageMemory()) {
-      kernel_.reset(new opencl::image::PadKernel<T>(
-          type, paddings, constant_value));
+      kernel_ = make_unique<opencl::image::PadKernel<T>>(
+          type, paddings, constant_value);
     } else {
       MACE_NOT_IMPLEMENTED;
     }
diff --git a/mace/ops/pooling.cc b/mace/ops/pooling.cc
index 8fd87cdfa38771a56636fd7bd54894ea1cbe042e..969f2774e3bb5a5fcf35e37e5f613f2f87b9f19b 100644
--- a/mace/ops/pooling.cc
+++ b/mace/ops/pooling.cc
@@ -32,6 +32,7 @@
 #include "mace/ops/opencl/image/pooling.h"
 #include "mace/ops/opencl/buffer/pooling.h"
 #endif  // MACE_ENABLE_OPENCL
+#include "mace/utils/memory.h"
 
 namespace mace {
 namespace ops {
@@ -433,10 +434,10 @@ class PoolingOp<DeviceType::GPU, T> : public PoolingOpBase {
   explicit PoolingOp(OpConstructContext *context)
       : PoolingOpBase(context) {
     if (context->device()->gpu_runtime()->UseImageMemory()) {
-      kernel_.reset(new opencl::image::PoolingKernel<T>);
+      kernel_ = make_unique<opencl::image::PoolingKernel<T>>();
     } else {
       context->set_output_mem_type(MemoryType::GPU_BUFFER);
-      kernel_.reset(new opencl::buffer::PoolingKernel<T>);
+      kernel_ = make_unique<opencl::buffer::PoolingKernel<T>>();
     }
   }
   MaceStatus Run(OpContext *context) override {
diff --git a/mace/ops/reduce.cc b/mace/ops/reduce.cc
index f4a147cc7b8191f5323cf38acd532830a44948c9..e4726987025a65560d5768746afae68298d98b9c 100644
--- a/mace/ops/reduce.cc
+++ b/mace/ops/reduce.cc
@@ -25,6 +25,7 @@
 #ifdef MACE_ENABLE_OPENCL
 #include "mace/ops/opencl/image/reduce.h"
 #endif  // MACE_ENABLE_OPENCL
+#include "mace/utils/memory.h"
 
 namespace mace {
 namespace ops {
@@ -847,9 +848,9 @@ class ReduceOp<DeviceType::GPU, T> : public ReduceOpBase {
   explicit ReduceOp(OpConstructContext *context)
       : ReduceOpBase(context) {
     if (context->device()->gpu_runtime()->UseImageMemory()) {
-      kernel_.reset(new opencl::image::ReduceKernel<T>(reduce_type_,
-                                                       axis_,
-                                                       keep_dims_));
+      kernel_ = make_unique<opencl::image::ReduceKernel<T>>(reduce_type_,
+                                                            axis_,
+                                                            keep_dims_);
     } else {
       MACE_NOT_IMPLEMENTED;
     }
diff --git a/mace/ops/resize_bicubic.cc b/mace/ops/resize_bicubic.cc
index 9334e850fa214ab710969e7f5e7b3e28f17b303d..236e670f1d26b97471e219ba746102d777a008b5 100644
--- a/mace/ops/resize_bicubic.cc
+++ b/mace/ops/resize_bicubic.cc
@@ -23,6 +23,7 @@
 #ifdef MACE_ENABLE_OPENCL
 #include "mace/ops/opencl/image/resize_bicubic.h"
 #endif  // MACE_ENABLE_OPENCL
+#include "mace/utils/memory.h"
 
 namespace mace {
 namespace ops {
@@ -197,9 +198,8 @@ class ResizeBicubicOp<DeviceType::GPU, T> : public Operation {
         "size", {-1, -1});
     MACE_CHECK(size.size() == 2);
     if (context->device()->gpu_runtime()->UseImageMemory()) {
-      kernel_.reset(new opencl::image::ResizeBicubicKernel<T>(align_corners,
-                                                              size[0],
-                                                              size[1]));
+      kernel_ = make_unique<opencl::image::ResizeBicubicKernel<T>>(
+          align_corners, size[0], size[1]);
     } else {
       MACE_NOT_IMPLEMENTED;
     }
diff --git a/mace/ops/resize_bilinear.cc b/mace/ops/resize_bilinear.cc
index e4c2f3fc3c64bb08410c709bd2f8b405363dcdd5..46720b3c29d32d01f82902a0bfcc49071aa6aa2a 100644
--- a/mace/ops/resize_bilinear.cc
+++ b/mace/ops/resize_bilinear.cc
@@ -19,6 +19,7 @@
 #include <vector>
 
 #include "mace/core/operator.h"
+#include "mace/utils/memory.h"
 #include "mace/utils/quantize.h"
 #ifdef MACE_ENABLE_OPENCL
 #include "mace/ops/opencl/image/resize_bilinear.h"
@@ -332,9 +333,8 @@ class ResizeBilinearOp<DeviceType::GPU, T> : public Operation {
         "size", {-1, -1});
     MACE_CHECK(size.size() == 2);
     if (context->device()->gpu_runtime()->UseImageMemory()) {
-      kernel_.reset(new opencl::image::ResizeBilinearKernel<T>(align_corners,
-                                                               size[0],
-                                                               size[1]));
+      kernel_ = make_unique<opencl::image::ResizeBilinearKernel<T>>(
+          align_corners, size[0], size[1]);
     } else {
       MACE_NOT_IMPLEMENTED;
     }
diff --git a/mace/ops/resize_nearest_neighbor.cc b/mace/ops/resize_nearest_neighbor.cc
index c40fd46dce86d382df5dec340fbd66cf143f782d..5cdbf07fa101881c4b1c5a4b66476a01199cacee 100644
--- a/mace/ops/resize_nearest_neighbor.cc
+++ b/mace/ops/resize_nearest_neighbor.cc
@@ -22,6 +22,7 @@
 #ifdef MACE_ENABLE_OPENCL
 #include "mace/ops/opencl/image/resize_nearest_neighbor.h"
 #endif  // MACE_ENABLE_OPENCL
+#include "mace/utils/memory.h"
 
 namespace mace {
 namespace ops {
@@ -142,8 +143,8 @@ class ResizeNearestNeighborOp<DeviceType::GPU, T> : public Operation {
     bool align_corners = Operation::GetOptionalArg<bool>(
         "align_corners", false);
     if (context->device()->gpu_runtime()->UseImageMemory()) {
-      kernel_.reset(new opencl::image::ResizeNearestNeighborKernel<T>(
-          align_corners));
+      kernel_ = make_unique<opencl::image::ResizeNearestNeighborKernel<T>>(
+          align_corners);
     } else {
       MACE_NOT_IMPLEMENTED;
     }
diff --git a/mace/ops/sgemm.cc b/mace/ops/sgemm.cc
index 1601aac2cd774d9b35406d30dceea56e27469c93..b96c4f0f2b43799da41bb45636b8dddd4197dfd9 100644
--- a/mace/ops/sgemm.cc
+++ b/mace/ops/sgemm.cc
@@ -18,6 +18,7 @@
 
 #include "mace/ops/sgemm.h"
 #include "mace/core/runtime/cpu/cpu_runtime.h"
+#include "mace/utils/memory.h"
 
 #if defined(MACE_ENABLE_NEON)
 #include <arm_neon.h>
@@ -55,27 +56,27 @@ void SGemm::operator()(const SGemmMatrixMap<const float> &lhs,
     scratch_buffer->GrowSize(total_size * sizeof(float));
 
     if (!lhs.is_const()) {
-      packed_lhs_.reset(new Tensor(scratch_buffer->Scratch(
-          lhs.size() * sizeof(float)), DT_FLOAT));
+      packed_lhs_ = make_unique<Tensor>(scratch_buffer->Scratch(
+          lhs.size() * sizeof(float)), DT_FLOAT);
     }
     if (!rhs.is_const()) {
-      packed_rhs_.reset(new Tensor(scratch_buffer->Scratch(
-          rhs.size() * sizeof(float)), DT_FLOAT));
+      packed_rhs_ = make_unique<Tensor>(scratch_buffer->Scratch(
+          rhs.size() * sizeof(float)), DT_FLOAT);
     }
-    packed_result_.reset(new Tensor(scratch_buffer->Scratch(
-        result->size() * sizeof(float)), DT_FLOAT));
+    packed_result_ = make_unique<Tensor>(scratch_buffer->Scratch(
+        result->size() * sizeof(float)), DT_FLOAT);
   }
 
   if (packed_lhs_.get() == nullptr) {
-    packed_lhs_.reset(new Tensor(GetCPUAllocator(), DT_FLOAT));
+    packed_lhs_ = make_unique<Tensor>(GetCPUAllocator(), DT_FLOAT);
     packed_lhs_->Resize({lhs.size()});
   }
   if (packed_rhs_.get() == nullptr) {
-    packed_rhs_.reset(new Tensor(GetCPUAllocator(), DT_FLOAT));
+    packed_rhs_ = make_unique<Tensor>(GetCPUAllocator(), DT_FLOAT);
     packed_rhs_->Resize({rhs.size()});
   }
   if (packed_result_.get() == nullptr) {
-    packed_result_.reset(new Tensor(GetCPUAllocator(), DT_FLOAT));
+    packed_result_ = make_unique<Tensor>(GetCPUAllocator(), DT_FLOAT);
     packed_result_->Resize({result->size()});
   }
 
diff --git a/mace/ops/softmax.cc b/mace/ops/softmax.cc
index b407ac34c357a6e81295007ee946ff61e0c18b7b..693058a372198f7eb0848ef84e9aab78e4e2645c 100644
--- a/mace/ops/softmax.cc
+++ b/mace/ops/softmax.cc
@@ -30,6 +30,8 @@
 #include "mace/ops/opencl/buffer/softmax.h"
 #endif  // MACE_ENABLE_OPENCL
 
+#include "mace/utils/memory.h"
+
 namespace mace {
 namespace ops {
 
@@ -374,10 +376,10 @@ class SoftmaxOp<DeviceType::GPU, T> : public Operation {
   explicit SoftmaxOp(OpConstructContext *context)
       : Operation(context) {
     if (context->device()->gpu_runtime()->UseImageMemory()) {
-      kernel_.reset(new opencl::image::SoftmaxKernel<T>);
+      kernel_ = make_unique<opencl::image::SoftmaxKernel<T>>();
     } else {
       context->set_output_mem_type(MemoryType::GPU_BUFFER);
-      kernel_.reset(new opencl::buffer::SoftmaxKernel<T>);
+      kernel_ = make_unique<opencl::buffer::SoftmaxKernel<T>>();
     }
   }
   MaceStatus Run(OpContext *context) override {
diff --git a/mace/ops/space_to_batch.cc b/mace/ops/space_to_batch.cc
index e1da96664abe010a84bd287cc9b2cd940ed7e736..ece9b6f61dd25e0fe4c6d2f5aff1aeea4ed55302 100644
--- a/mace/ops/space_to_batch.cc
+++ b/mace/ops/space_to_batch.cc
@@ -19,6 +19,7 @@
 #ifdef MACE_ENABLE_OPENCL
 #include "mace/ops/opencl/image/space_to_batch.h"
 #endif  // MACE_ENABLE_OPENCL
+#include "mace/utils/memory.h"
 
 namespace mace {
 namespace ops {
@@ -309,7 +310,7 @@ class SpaceToBatchNDOp<DeviceType::GPU, T> : public SpaceToBatchOpBase {
   explicit SpaceToBatchNDOp(OpConstructContext *context)
       : SpaceToBatchOpBase(context) {
     if (context->device()->gpu_runtime()->UseImageMemory()) {
-      kernel_.reset(new opencl::image::SpaceToBatchKernel<T>);
+      kernel_ = make_unique<opencl::image::SpaceToBatchKernel<T>>();
     } else {
       MACE_NOT_IMPLEMENTED;
     }
diff --git a/mace/ops/space_to_depth.cc b/mace/ops/space_to_depth.cc
index fb98de71dd118448d02c64f06fb1a79f9d3a8302..4e40227c5b5857d065195d509bcafe55fbef1c59 100644
--- a/mace/ops/space_to_depth.cc
+++ b/mace/ops/space_to_depth.cc
@@ -19,6 +19,7 @@
 #ifdef MACE_ENABLE_OPENCL
 #include "mace/ops/opencl/image/space_to_depth.h"
 #endif  // MACE_ENABLE_OPENCL
+#include "mace/utils/memory.h"
 
 namespace mace {
 namespace ops {
@@ -95,7 +96,7 @@ class SpaceToDepthOp<DeviceType::GPU, T> : public Operation {
       : Operation(context) {
     int block_size = Operation::GetOptionalArg<int>("block_size", 1);
     if (context->device()->gpu_runtime()->UseImageMemory()) {
-      kernel_.reset(new opencl::image::SpaceToDepthKernel<T>(block_size));
+      kernel_ = make_unique<opencl::image::SpaceToDepthKernel<T>>(block_size);
     } else {
       MACE_NOT_IMPLEMENTED;
     }
diff --git a/mace/ops/split.cc b/mace/ops/split.cc
index 7fe05be1edf474cc92ee8c049f27e8a265ca7219..1ac77cfbec2befb4e1edbd8568ffaf5aa218ce79 100644
--- a/mace/ops/split.cc
+++ b/mace/ops/split.cc
@@ -19,6 +19,7 @@
 #ifdef MACE_ENABLE_OPENCL
 #include "mace/ops/opencl/image/split.h"
 #endif  // MACE_ENABLE_OPENCL
+#include "mace/utils/memory.h"
 
 namespace mace {
 namespace ops {
@@ -108,7 +109,7 @@ class SplitOp<DeviceType::GPU, T> : public Operation {
       : Operation(context) {
     int32_t axis = Operation::GetOptionalArg<int>("axis", 3);
     if (context->device()->gpu_runtime()->UseImageMemory()) {
-      kernel_.reset(new opencl::image::SplitKernel<T>(axis));
+      kernel_ = make_unique<opencl::image::SplitKernel<T>>(axis);
     } else {
       MACE_NOT_IMPLEMENTED;
     }
diff --git a/mace/ops/sqrdiff_mean.cc b/mace/ops/sqrdiff_mean.cc
index 1bd8a2e33e872715f57b712102643b411b142fbb..b937b259322615abcbb929e4c17c0f41e3844167 100644
--- a/mace/ops/sqrdiff_mean.cc
+++ b/mace/ops/sqrdiff_mean.cc
@@ -19,6 +19,7 @@
 #ifdef MACE_ENABLE_OPENCL
 #include "mace/ops/opencl/image/sqrdiff_mean.h"
 #endif  // MACE_ENABLE_OPENCL
+#include "mace/utils/memory.h"
 
 namespace mace {
 namespace ops {
@@ -83,7 +84,7 @@ class SqrDiffMeanOp<DeviceType::GPU, T> : public Operation {
   explicit SqrDiffMeanOp(OpConstructContext *context)
       : Operation(context) {
     if (context->device()->gpu_runtime()->UseImageMemory()) {
-      kernel_.reset(new opencl::image::SqrDiffMeanKernel<T>());
+      kernel_ = make_unique<opencl::image::SqrDiffMeanKernel<T>>();
     } else {
       MACE_NOT_IMPLEMENTED;
     }
diff --git a/mace/utils/memory.h b/mace/utils/memory.h
new file mode 100644
index 0000000000000000000000000000000000000000..41a898ef48fd712ce65191f967565531a4afdd89
--- /dev/null
+++ b/mace/utils/memory.h
@@ -0,0 +1,74 @@
+// Copyright 2018 The MACE Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MACE_UTILS_MEMORY_H_
+#define MACE_UTILS_MEMORY_H_
+
+#include <memory>
+#include <utility>
+
+namespace mace {
+
+namespace memory_internal {
+
+// Traits to select proper overload and return type for `make_unique<>`.
+template <typename T>
+struct MakeUniqueResult {
+  using scalar = std::unique_ptr<T>;
+};
+template <typename T>
+struct MakeUniqueResult<T[]> {
+  using array = std::unique_ptr<T[]>;
+};
+template <typename T, size_t N>
+struct MakeUniqueResult<T[N]> {
+  using invalid = void;
+};
+
+}  // namespace memory_internal
+
+// gcc 4.8 has __cplusplus at 201301 but doesn't define make_unique.  Other
+// supported compilers either just define __cplusplus as 201103 but have
+// make_unique (msvc), or have make_unique whenever __cplusplus > 201103 (clang)
+#if (__cplusplus > 201103L || defined(_MSC_VER)) && \
+    !(defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ == 8)
+using std::make_unique;
+#else
+
+// `make_unique` overload for non-array types.
+template <typename T, typename... Args>
+typename memory_internal::MakeUniqueResult<T>::scalar make_unique(
+    Args&&... args) {
+  return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
+}
+
+// `make_unique` overload for an array T[] of unknown bounds.
+// The array allocation needs to use the `new T[size]` form and cannot take
+// element constructor arguments. The `std::unique_ptr` will manage destructing
+// these array elements.
+template <typename T>
+typename memory_internal::MakeUniqueResult<T>::array make_unique(size_t n) {
+  return std::unique_ptr<T>(new typename std::remove_extent<T>::type[n]());
+}
+
+// `make_unique` overload for an array T[N] of known bounds.
+// This construction will be rejected.
+template <typename T, typename... Args>
+typename memory_internal::MakeUniqueResult<T>::invalid make_unique(
+    Args&&... /* args */) = delete;
+#endif
+
+}  // namespace mace
+
+#endif  // MACE_UTILS_MEMORY_H_