diff --git a/mace/core/preallocated_pooled_allocator.h b/mace/core/preallocated_pooled_allocator.h
new file mode 100644
index 0000000000000000000000000000000000000000..dcb35070d725865a10fc981943b2dc9b18084b53
--- /dev/null
+++ b/mace/core/preallocated_pooled_allocator.h
@@ -0,0 +1,31 @@
+//
+// Copyright (c) 2018 XiaoMi All rights reserved.
+//
+
+#ifndef MACE_CORE_PREALLOCATED_POOLED_ALLOCATOR_H_
+#define MACE_CORE_PREALLOCATED_POOLED_ALLOCATOR_H_
+
+#include "mace/core/allocator.h"
+
+namespace mace {
+
+class PreallocatedPooledAllocator {
+ public:
+  PreallocatedPooledAllocator() {}
+
+  virtual ~PreallocatedPooledAllocator() noexcept {}
+
+  virtual void PreallocateImage(int mem_id,
+                                const std::vector<size_t> &image_shape,
+                                DataType data_type) = 0;
+
+  virtual void *GetImage(int mem_id) = 0;
+
+  virtual bool HasImage(int mem_id) = 0;
+
+  virtual std::vector<size_t> GetImageSize(int mem_id) = 0;
+};
+
+} // namespace mace
+
+#endif // MACE_CORE_PREALLOCATED_POOLED_ALLOCATOR_H_
diff --git a/mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.cc b/mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.cc
new file mode 100644
index 0000000000000000000000000000000000000000..f854b9c5c6b19c194b0f10ee215aac04904545fb
--- /dev/null
+++ b/mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.cc
@@ -0,0 +1,30 @@
+//
+// Copyright (c) 2018 XiaoMi All rights reserved.
+//
+
+#include "mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.h"
+
+namespace mace {
+
+OpenCLPreallocatedPooledAllocator::OpenCLPreallocatedPooledAllocator()
+  : allocator(GetDeviceAllocator(DeviceType::OPENCL)) {
+}
+
+OpenCLPreallocatedPooledAllocator::~OpenCLPreallocatedPooledAllocator() {
+}
+
+void OpenCLPreallocatedPooledAllocator::PreallocateImage(int mem_id,
+                                                         const std::vector<
+                                                           size_t> &image_shape,
+                                                         DataType data_type) {
+  MACE_CHECK(!this->HasImage(mem_id), "Memory already exists: ", mem_id);
+  VLOG(3) << "Preallocate OpenCL image: " << mem_id << " "
+          << image_shape[0] << ", " << image_shape[1];
+  images_[mem_id] = std::move(std::unique_ptr<void, std::function<void(void *)>>(
+    allocator->NewImage(image_shape, data_type), [this](void *p) {
+      this->allocator->DeleteImage(p);
+    }));
+  image_shapes_[mem_id] = image_shape;
+}
+
+} // namespace mace
\ No newline at end of file
diff --git a/mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.h b/mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.h
new file mode 100644
index 0000000000000000000000000000000000000000..960ff174e4d6706fcc42f0304ca273c3c96c58c1
--- /dev/null
+++ b/mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.h
@@ -0,0 +1,45 @@
+//
+// Copyright (c) 2018 XiaoMi All rights reserved.
+//
+
+#ifndef MACE_CORE_RUNTIME_OPENCL_PREALLOCATED_POOLED_ALLOCATOR_H_
+#define MACE_CORE_RUNTIME_OPENCL_PREALLOCATED_POOLED_ALLOCATOR_H_
+
+#include "mace/core/preallocated_pooled_allocator.h"
+#include <unordered_map>
+
+namespace mace {
+
+class OpenCLPreallocatedPooledAllocator : public PreallocatedPooledAllocator {
+ public:
+  OpenCLPreallocatedPooledAllocator();
+
+  ~OpenCLPreallocatedPooledAllocator() override;
+
+  void PreallocateImage(int mem_id,
+                        const std::vector<size_t> &image_shape,
+                        DataType data_type) override;
+
+  inline void *GetImage(int mem_id) override {
+    MACE_CHECK(HasImage(mem_id), "image does not exist");
+    return images_[mem_id].get();
+  }
+
+  inline bool HasImage(int mem_id) override {
+    return images_.find(mem_id) != images_.end();
+  }
+
+  inline std::vector<size_t> GetImageSize(int mem_id) override {
+    return image_shapes_[mem_id];
+  }
+
+ private:
+  std::unordered_map<int, std::unique_ptr<void, std::function<void(void *)>>>
+    images_;
+  std::unordered_map<int, std::vector<size_t>> image_shapes_;
+  Allocator *allocator;
+};
+
+} // namepsace mace
+
+#endif // MACE_CORE_RUNTIME_OPENCL_PREALLOCATED_POOLED_ALLOCATOR_H_
diff --git a/mace/core/tensor.h b/mace/core/tensor.h
index 62d368974e024e8b89aec7019e6fd04d512c2ca2..83961e1058b86b5b8832cd9c443a79d9f79c2ef8 100644
--- a/mace/core/tensor.h
+++ b/mace/core/tensor.h
@@ -10,6 +10,7 @@
 #include "mace/utils/logging.h"
 #include "mace/core/types.h"
 #include "mace/core/public/mace.h"
+#include "preallocated_pooled_allocator.h"
 
 namespace mace {
 
@@ -71,7 +72,8 @@ class Tensor {
         buffer_(nullptr),
         data_(nullptr),
         unused_(false),
-        is_image_(false){};
+        is_image_(false){
+};
 
   Tensor(Allocator *alloc, DataType type)
       : alloc_(alloc),
@@ -80,18 +82,10 @@ class Tensor {
         buffer_(nullptr),
         data_(nullptr),
         unused_(false),
-        is_image_(false){};
+        is_image_(false){
+  };
 
   ~Tensor() {
-    MACE_CHECK(data_ == nullptr, "Buffer must be unmapped before destroy");
-    if (buffer_ != nullptr) {
-      MACE_CHECK_NOTNULL(alloc_);
-      if (is_image_) {
-        alloc_->DeleteImage(buffer_);
-      } else {
-        alloc_->Delete(buffer_);
-      }
-    }
   }
 
   inline DataType dtype() const { return dtype_; }
@@ -132,13 +126,13 @@ class Tensor {
   inline void Map() const {
     if (!OnHost()) {
       MACE_CHECK(buffer_ != nullptr && data_ == nullptr);
-      data_ = alloc_->Map(buffer_, size_ * SizeOfType());
+      data_ = alloc_->Map(buffer_.get(), size_ * SizeOfType());
     }
   }
 
   inline void MapImage(std::vector<size_t> &mapped_image_pitch) const {
     MACE_CHECK(!OnHost() && buffer_ != nullptr && data_ == nullptr);
-    data_ = alloc_->MapImage(buffer_, image_shape_, mapped_image_pitch);
+    data_ = alloc_->MapImage(buffer_.get(), image_shape_, mapped_image_pitch);
   }
 
   /*
@@ -147,12 +141,12 @@ class Tensor {
   inline void Unmap() const {
     if (!OnHost()) {
       MACE_CHECK(buffer_ != nullptr && data_ != nullptr);
-      alloc_->Unmap(buffer_, data_);
+      alloc_->Unmap(buffer_.get(), data_);
       data_ = nullptr;
     }
   }
 
-  void *buffer() const { return buffer_; }
+  void *buffer() const { return buffer_.get(); }
 
   inline const void *raw_data() const {
     void *data = MappedBuffer();
@@ -181,42 +175,51 @@ class Tensor {
   }
 
   inline void Resize(const vector<index_t> &shape) {
+    MACE_CHECK(!is_image_ || buffer_ == nullptr,
+               "Resize is not for image, use ResizeImage instead.");
+    is_image_ = false;
     shape_ = shape;
     index_t size = NumElements();
-    if (size_ != size || is_image_) {
+    if (size_ != size) {
       size_ = size;
       MACE_CHECK(data_ == nullptr, "Buffer must be unmapped before resize");
-      if (is_image_) {
-        alloc_->DeleteImage(buffer_);
-      } else {
-        alloc_->Delete(buffer_);
-      }
-      is_image_ = false;
-      CASES(dtype_, buffer_ = alloc_->New(size_ * sizeof(T)));
+      CASES(dtype_,
+            (buffer_ =
+               std::move(std::unique_ptr<void, std::function<void(void *)>>(
+                 alloc_->New(size_ * sizeof(T)),
+                 [this](void *p) {
+                   this->alloc_->Delete(p);
+                 })
+               )));
     }
   }
 
   inline void ResizeImage(const vector<index_t> &shape,
                           const std::vector<size_t> &image_shape) {
+    MACE_CHECK(is_image_ || buffer_ == nullptr,
+               "ResizeImage is not for buffer, use Resize instead.");
+    is_image_ = true;
     shape_ = shape;
     index_t size = NumElements();
-    if (size_ != size || !is_image_) {
+    if (size_ != size) {
       size_ = size;
-      MACE_CHECK(data_ == nullptr, "Buffer must be unmapped before resize");
-
-      if (is_image_ && !image_shape_.empty()) {
-        MACE_ASSERT(image_shape_.size() == 2
-                        && image_shape_[0] >= image_shape[0]
-                        || image_shape_[1] >= image_shape[1],
-                    "image shape not large enough");
-      }
-      if (!is_image_ && buffer_ != nullptr) {
-        alloc_->Delete(buffer_);
-      }
-      is_image_ = true;
-      if (image_shape_.empty()) {
-        image_shape_ = image_shape;
-        buffer_ = alloc_->NewImage(image_shape, dtype_);
+      image_shape_ = image_shape;
+      if (!preallocated_image_shape_.empty()) {
+        MACE_CHECK(preallocated_image_shape_[0] >= image_shape[0]
+                     && preallocated_image_shape_[1] >= image_shape[1],
+                   "image shape not large enough: preallocated ",
+                   preallocated_image_shape_[0],
+                   " ",
+                   preallocated_image_shape_[1],
+                   "apply for ",
+                   image_shape[0],
+                   " ",
+                   image_shape[1]);
+      } else {
+        buffer_ = std::move(std::unique_ptr<void, std::function<void(void *)>>(
+          alloc_->NewImage(image_shape, dtype_),
+          [this](void *p) { this->alloc_->DeleteImage(p); }));
+        preallocated_image_shape_ = image_shape;
       }
     }
   }
@@ -237,15 +240,14 @@ class Tensor {
     }
   }
 
-  inline void AllocateImageMemory(const std::vector<size_t> &image_shape) {
+  inline void PreallocateImage(void *image,
+                               const std::vector<size_t>& image_shape) {
     is_image_ = true;
-    if (image_shape_ != image_shape) {
-      if (buffer_ != nullptr) {
-        alloc_->DeleteImage(buffer_);
-      }
-      image_shape_ = image_shape;
-      buffer_ = alloc_->NewImage(image_shape, dtype_);
-    }
+    buffer_ = std::move(std::unique_ptr<void, std::function<void(void *)>>(
+      image, [](void *p) {
+        // tensor does not have ownership of preallocated memory
+      }));
+    preallocated_image_shape_ = image_shape;
   }
 
   template <typename T>
@@ -273,7 +275,7 @@ class Tensor {
   inline void DebugPrint() const {
     using namespace numerical_chars;
     std::stringstream os;
-    for (int i : shape_) {
+    for (index_t i : shape_) {
       os << i << ", ";
     }
 
@@ -336,7 +338,7 @@ class Tensor {
  private:
   inline void *MappedBuffer() const {
     if (OnHost()) {
-      return buffer_;
+      return buffer_.get();
     }
     return data_;
   }
@@ -346,7 +348,7 @@ class Tensor {
   DataType dtype_;
   // Raw buffer, must be mapped as host accessable data before
   // read or write
-  void *buffer_;
+  std::unique_ptr<void, std::function<void(void*)>> buffer_;
   // Mapped buffer
   mutable void *data_;
   vector<index_t> shape_;
@@ -354,6 +356,7 @@ class Tensor {
   bool unused_;
   bool is_image_;
   std::vector<size_t> image_shape_;
+  std::vector<size_t> preallocated_image_shape_;
 
   DISABLE_COPY_AND_ASSIGN(Tensor);
 };
diff --git a/mace/core/workspace.cc b/mace/core/workspace.cc
index 140986a4492fa1f59fe5f2467462ff3a9bd7581c..6d3916d25ba1b20e94d1572937179f0e3a76db7e 100644
--- a/mace/core/workspace.cc
+++ b/mace/core/workspace.cc
@@ -5,6 +5,7 @@
 #include "mace/core/workspace.h"
 #include "mace/core/serializer.h"
 #include "mace/core/arg_helper.h"
+#include "mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.h"
 
 namespace mace {
 
@@ -23,7 +24,7 @@ Tensor *Workspace::CreateTensor(const string &name,
     VLOG(1) << "Tensor " << name << " already exists. Skipping.";
   } else {
     VLOG(1) << "Creating Tensor " << name;
-    tensor_map_[name] = unique_ptr<Tensor>(new Tensor(alloc, type));
+    tensor_map_[name] = std::move(std::unique_ptr<Tensor>(new Tensor(alloc, type)));
   }
   return GetTensor(name);
 }
@@ -84,25 +85,45 @@ void Workspace::CreateImageOutputTensor(const NetDef &net_def) {
   if (!net_def.has_mem_arena() || net_def.mem_arena().mem_block_size() == 0) {
     return;
   }
-  std::map<std::string, std::shared_ptr<Tensor>> mem_tensor_map;
-  const DataType dtype = static_cast<DataType>(
-      ArgumentHelper::GetSingleArgument<OperatorDef, int>(
-          net_def.op(0),
+  preallocated_allocator_ =
+    std::move(std::unique_ptr<PreallocatedPooledAllocator>(
+      new OpenCLPreallocatedPooledAllocator));
+
+  DataType dtype = DataType::DT_INVALID;
+  // We use the data type of the first op (with mem id, must be image),
+  // as GPU have consistent data type for each layer for now.
+  // As DSP may have different data output type for each op,
+  // we stick to the same concept.
+  for (auto &op: net_def.op()) {
+    if (op.has_mem_id()) {
+      const DataType op_dtype = static_cast<DataType>(
+        ArgumentHelper::GetSingleArgument<OperatorDef, int>(
+          op,
           "T",
           static_cast<int>(DT_FLOAT)));
+      if (op_dtype != DataType::DT_INVALID) {
+        dtype = op_dtype;
+        // find first valid data type, break
+        break;
+      }
+    }
+  }
+  MACE_CHECK(dtype != DataType::DT_INVALID, "data type is invalid.");
   for (auto &mem_block: net_def.mem_arena().mem_block()) {
-    string mem_block_name = MemBlockName(mem_block.mem_id());
-    mem_tensor_map[mem_block_name].reset(new Tensor(
-        GetDeviceAllocator(DeviceType::OPENCL),
-        dtype));
-    mem_tensor_map[mem_block_name]->AllocateImageMemory({mem_block.x(),
-                                                         mem_block.y()});
+    preallocated_allocator_->PreallocateImage(mem_block.mem_id(),
+                                              {mem_block.x(), mem_block.y()},
+                                              dtype);
   }
+  VLOG(1) << "Preallocate image to tensors";
+  auto allocator = GetDeviceAllocator(DeviceType::OPENCL);
   for (auto &op: net_def.op()) {
     if (op.has_mem_id()) {
-      tensor_map_[op.output(0)] = mem_tensor_map[MemBlockName(op.mem_id())];
+      CreateTensor(op.output(0), allocator, dtype);
+      tensor_map_[op.output(0)]->PreallocateImage(
+        preallocated_allocator_->GetImage(op.mem_id()),
+        preallocated_allocator_->GetImageSize(op.mem_id()));
     }
   }
 }
 
-}  // namespace mace
\ No newline at end of file
+}  // namespace mace
diff --git a/mace/core/workspace.h b/mace/core/workspace.h
index 5e0b4ace3393b25edbddbf224ae7962c50ec6735..8b69f806d092f6cee9e1ce7a6a528fbe01694f8e 100644
--- a/mace/core/workspace.h
+++ b/mace/core/workspace.h
@@ -8,14 +8,17 @@
 #include "mace/core/common.h"
 #include "mace/core/tensor.h"
 #include "mace/core/public/mace.h"
+#include "mace/core/preallocated_pooled_allocator.h"
 
 namespace mace {
 
 class Workspace {
  public:
-  typedef map<string, std::shared_ptr<Tensor>> TensorMap;
+  typedef map<string, std::unique_ptr<Tensor>> TensorMap;
 
-  Workspace() {}
+  Workspace()
+    : preallocated_allocator_(nullptr) {}
+  ~Workspace() {}
 
   vector<string> Tensors() const;
 
@@ -35,15 +38,13 @@ class Workspace {
 
   void LoadModelTensor(const NetDef &net_def, DeviceType type);
 
-  inline std::string MemBlockName(int mem_id) const {
-	  return internal::MakeString("mem_block_", mem_id);
-  };
-
  private:
   void CreateImageOutputTensor(const NetDef &net_def);
 
   TensorMap tensor_map_;
 
+  std::unique_ptr<PreallocatedPooledAllocator> preallocated_allocator_;
+
   DISABLE_COPY_AND_ASSIGN(Workspace);
 };