diff --git a/mace/core/allocator.cc b/mace/core/allocator.cc
index 61e28e9d4a78fb3d8b40275c62366c104f6f2847..fd1f50c3910a6878505bee7c0655f346b644c5ff 100644
--- a/mace/core/allocator.cc
+++ b/mace/core/allocator.cc
@@ -15,4 +15,13 @@ void SetCPUAllocator(CPUAllocator* alloc) {
   g_cpu_allocator.reset(alloc);
 }
 
+Allocator* GetDeviceAllocator(DeviceType type) {
+  if (type == DeviceType::CPU) {
+    return cpu_allocator();
+  } else {
+    REQUIRE(false, "device type ", type, " is not supported.");
+  }
+  return nullptr;
+}
+
 } // namespace mace
diff --git a/mace/core/allocator.h b/mace/core/allocator.h
index fa4f188983801d005da46c64c672d4aa3dd9e910..110b012bec4ea91663c17e14651519abc1a0f9f4 100644
--- a/mace/core/allocator.h
+++ b/mace/core/allocator.h
@@ -21,6 +21,7 @@ class Allocator {
   virtual ~Allocator() noexcept {}
   virtual void* New(size_t nbytes) = 0;
   virtual void Delete(void* data) = 0;
+  virtual void CopyBytes(void* dst, const void* src, size_t size) = 0;
 
   template <typename T>
   T* New(size_t num_elements) {
@@ -59,6 +60,10 @@ class CPUAllocator: public Allocator {
     free(data);
   }
 #endif
+
+  void CopyBytes(void* dst, const void* src, size_t size) {
+    memcpy(dst, src, size);
+  }
 };
 
 // Get the CPU Alloctor.
@@ -72,9 +77,10 @@ struct DeviceContext {};
 
 template <>
 struct DeviceContext<DeviceType::CPU> {
-  static Allocator* alloctor() { return cpu_allocator(); }
+  static Allocator* allocator() { return cpu_allocator(); }
 };
 
+Allocator* GetDeviceAllocator(DeviceType type);
 
 } // namespace mace
 
diff --git a/mace/core/integral_types.h b/mace/core/integral_types.h
index 10a330539b5ab54a9cda03b947192beb4efcb0f3..ac4c8803193458a1ab4ee027fe472fbc0c06de3c 100644
--- a/mace/core/integral_types.h
+++ b/mace/core/integral_types.h
@@ -9,11 +9,11 @@
 typedef signed char int8;
 typedef short int16;
 typedef int int32;
-typedef long long int64;
+typedef int64_t int64;
 
 typedef unsigned char uint8;
 typedef unsigned short uint16;
 typedef unsigned int uint32;
-typedef unsigned long long uint64;
+typedef uint64_t uint64;
 
 #endif // MACE_CORE_INTEGRAL_TYPES_H_
diff --git a/mace/core/operator.h b/mace/core/operator.h
index 4b7555262c115b6a44215bb9034f1b80718d78f9..27e1fa16a772481406b0ce665bb61c1f620818b8 100644
--- a/mace/core/operator.h
+++ b/mace/core/operator.h
@@ -101,7 +101,7 @@ class Operator : public OperatorBase {
 
     for (const string &output_str : operator_def.output()) {
       outputs_.push_back(CHECK_NOTNULL(ws->CreateTensor(output_str,
-                         DeviceContext<D>::alloctor(),
+                         DeviceContext<D>::allocator(),
                          DataTypeToEnum<T>::v())));
     }
   }
diff --git a/mace/core/serializer.cc b/mace/core/serializer.cc
new file mode 100644
index 0000000000000000000000000000000000000000..310e76299db02eb8dae9fa2032f65a5cccd1c6e2
--- /dev/null
+++ b/mace/core/serializer.cc
@@ -0,0 +1,78 @@
+//
+// Copyright (c) 2017 XiaoMi All rights reserved.
+//
+
+#include "mace/core/serializer.h"
+
+
+namespace mace {
+
+unique_ptr<TensorProto> Serializer::Serialize(const Tensor &tensor,
+                           const string &name) {
+  MACE_NOT_IMPLEMENTED;
+  return nullptr;
+}
+
+unique_ptr<Tensor> Serializer::Deserialize(const TensorProto &proto,
+                                           DeviceType type) {
+  unique_ptr<Tensor> tensor(new Tensor(GetDeviceAllocator(type),
+                                       proto.data_type()));
+  vector<TIndex> dims;
+  for (const TIndex d : proto.dims()) {
+    dims.push_back(d);
+  }
+  tensor->Resize(dims);
+
+  switch (proto.data_type()) {
+    case DT_FLOAT:
+      tensor->Copy<float>(proto.float_data().data(),
+                          proto.float_data().size());
+      break;
+    case DT_DOUBLE:
+      tensor->Copy<double>(proto.double_data().data(),
+                           proto.double_data().size());
+      break;
+    case DT_INT32:
+      tensor->template Copy<int32>(proto.int32_data().data(),
+                                   proto.int32_data().size());
+      break;
+    case DT_UINT8:
+      tensor->CopyWithCast<int32, uint8>(proto.int32_data().data(),
+                                         proto.int32_data().size());
+      break;
+    case DT_INT16:
+      tensor->CopyWithCast<int32, int16>(proto.int32_data().data(),
+                                         proto.int32_data().size());
+      break;
+    case DT_INT8:
+      tensor->CopyWithCast<int32, int8>(proto.int32_data().data(),
+                                        proto.int32_data().size());
+      break;
+    case DT_INT64:
+      tensor->Copy<int64>(proto.int64_data().data(),
+                          proto.int64_data().size());
+      break;
+    case DT_UINT16:
+      tensor->CopyWithCast<int32, uint16>(proto.int32_data().data(),
+                                          proto.int32_data().size());
+      break;
+    case DT_BOOL:
+      tensor->CopyWithCast<int32, bool>(proto.int32_data().data(),
+                                        proto.int32_data().size());
+      break;
+    case DT_STRING: {
+      string *content = tensor->mutable_data<string>();
+      for (int i = 0; i < proto.string_data().size(); ++i) {
+        content[i] = proto.string_data(i);
+      }
+    }
+      break;
+    default:
+      MACE_NOT_IMPLEMENTED;
+      break;
+  }
+
+  return tensor;
+}
+
+} // namespace mace
\ No newline at end of file
diff --git a/mace/core/serializer.h b/mace/core/serializer.h
new file mode 100644
index 0000000000000000000000000000000000000000..01f207480f11e8e5b2631ffafc310eb33b1352e8
--- /dev/null
+++ b/mace/core/serializer.h
@@ -0,0 +1,28 @@
+//
+// Copyright (c) 2017 XiaoMi All rights reserved.
+//
+
+#ifndef MACE_CORE_SERIALIZER_H_
+#define MACE_CORE_SERIALIZER_H_
+
+#include "mace/proto/mace.pb.h"
+#include "mace/core/common.h"
+#include "mace/core/tensor.h"
+
+namespace mace {
+
+class Serializer {
+ public:
+  Serializer() {}
+  ~Serializer() {}
+
+  unique_ptr<TensorProto> Serialize(const Tensor& tensor, const string& name);
+
+  unique_ptr<Tensor> Deserialize(const TensorProto& proto, DeviceType type);
+
+ DISABLE_COPY_AND_ASSIGN(Serializer);
+};
+
+} // namespace mace
+
+#endif // MACE_CORE_SERIALIZER_H_
diff --git a/mace/core/tensor.h b/mace/core/tensor.h
index 1e15b425c9475491bd5af29fc08110b4adefac53..fb34d5814018bdfecd414d5f289104caa8b9eead 100644
--- a/mace/core/tensor.h
+++ b/mace/core/tensor.h
@@ -9,6 +9,7 @@
 #include "mace/proto/mace.pb.h"
 #include "mace/core/allocator.h"
 #include "mace/core/types.h"
+#include "mace/core/logging.h"
 
 namespace mace {
 
@@ -118,6 +119,41 @@ class Tensor {
     Resize(other->shape());
   }
 
+  template <typename T>
+  inline void Copy(const T* src, size_t size) {
+    REQUIRE(size == size_, "copy src and dst with different size.");
+    CopyBytes(static_cast<const void*>(src), sizeof(T) * size);
+  }
+
+  template <typename SrcType, typename DstType>
+  inline void CopyWithCast(const SrcType* src, size_t size) {
+    REQUIRE(size == size_, "copy src and dst with different size.");
+    unique_ptr<DstType[]> buffer(new DstType[size]);
+    for (int i = 0; i < size; ++i) {
+      buffer[i] = static_cast<DstType>(src[i]);
+    }
+    CopyBytes(static_cast<const void*>(buffer.get()), sizeof(DstType) * size);
+  }
+
+  inline void CopyBytes(const void* src, size_t size) {
+    alloc_->CopyBytes(raw_mutable_data(), src, size);
+  }
+
+  inline void DebugPrint() {
+    std::stringstream os;
+    for (int i: shape_) {
+      os << i << ", ";
+    }
+    LOG(INFO) << "Tensor shape: " << os.str() << " type: " << DataType_Name(dtype_);
+
+    os.str("");
+    os.clear();
+    for (int i = 0; i < size_; ++i) {
+      CASES(dtype_, (os << this->data<T>()[i]) << ", ");
+    }
+    LOG(INFO) << os.str();
+  }
+
  private:
   inline int64 NumElements() const {
     return std::accumulate(shape_.begin(), shape_.end(), 1, std::multiplies<int64>());
diff --git a/mace/core/workspace.cc b/mace/core/workspace.cc
index 14431bc66229c72f9600c182c9081a7b7af7dbe3..ae28d2df16f94f47e02d32d1b8366d5ed988aa9b 100644
--- a/mace/core/workspace.cc
+++ b/mace/core/workspace.cc
@@ -4,6 +4,7 @@
 
 #include "mace/core/common.h"
 #include "mace/core/workspace.h"
+#include "mace/core/serializer.h"
 
 namespace mace {
 
@@ -48,6 +49,11 @@ Tensor* Workspace::GetTensor(const string& name) {
   return const_cast<Tensor*>(static_cast<const Workspace*>(this)->GetTensor(name));
 }
 
-bool RunNet();
+void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) {
+  Serializer serializer;
+  for (auto& tensor_proto: net_def.tensors()) {
+    tensor_map_[tensor_proto.name()] = serializer.Deserialize(tensor_proto, type);
+  }
+}
 
 } // namespace mace
\ No newline at end of file
diff --git a/mace/core/workspace.h b/mace/core/workspace.h
index 93043744fc275b19430f0457ea918646c2dbf9fc..7de345bcfa0deea1cdbf6228052e35316e88442c 100644
--- a/mace/core/workspace.h
+++ b/mace/core/workspace.h
@@ -32,10 +32,12 @@ class Workspace {
 
   Tensor* GetTensor(const string& name);
 
+  void LoadModelTensor(const NetDef& net_def, DeviceType type);
+
  private:
   TensorMap tensor_map_;
 
-  DISABLE_COPY_AND_ASSIGN(Workspace);
+ DISABLE_COPY_AND_ASSIGN(Workspace);
 };
 
 } // namespace mace
diff --git a/mace/examples/BUILD b/mace/examples/BUILD
index 41362a0314018d6e3e94eedd2236e61b10c3167c..a674593b8860dffb61b392ec9b8787b39614a0ee 100644
--- a/mace/examples/BUILD
+++ b/mace/examples/BUILD
@@ -7,7 +7,7 @@ cc_binary(
         "helloworld.cc",
         ],
     deps = [
-        "//mace/core:core",
+        "//mace/ops:ops",
         ],
     copts = ['-std=c++11'],
     linkopts = if_android(["-pie", "-llog"]),
diff --git a/mace/examples/helloworld.cc b/mace/examples/helloworld.cc
index 0ba6d38eb91ecc46a08fc38e9c0eba6e5e00f8fe..2e9eb1e20a8a100f9c8467958e43aa0f1597389c 100644
--- a/mace/examples/helloworld.cc
+++ b/mace/examples/helloworld.cc
@@ -1,7 +1,67 @@
-#include "mace/core/logging.h"
+//
+// Copyright (c) 2017 XiaoMi All rights reserved.
+//
+
+#include "mace/core/net.h"
+
+using namespace mace;
 
 int main() {
-  LOG(INFO) << "Hello World";
+  // Construct graph
+  OperatorDef op_def_0;
+  op_def_0.add_input("Input");
+  op_def_0.add_output("Output0");
+  op_def_0.set_name("ReluTest0");
+  op_def_0.set_type("Relu");
+  auto arg_0 = op_def_0.add_arg();
+  arg_0->set_name("arg0");
+  arg_0->set_f(0.5);
+
+  OperatorDef op_def_1;
+  op_def_1.add_input("Input");
+  op_def_1.add_output("Output1");
+  op_def_1.set_name("ReluTest1");
+  op_def_1.set_type("Relu");
+  auto arg_1 = op_def_1.add_arg();
+  arg_1->set_name("arg0");
+  arg_1->set_f(1.5);
+
+  OperatorDef op_def_2;
+  op_def_2.add_input("Output1");
+  op_def_2.add_output("Output2");
+  op_def_2.set_name("ReluTest2");
+  op_def_2.set_type("Relu");
+  auto arg_2 = op_def_2.add_arg();
+  arg_2->set_name("arg0");
+  arg_2->set_f(2.5);
+
+  NetDef net_def;
+  net_def.set_name("NetTest");
+  net_def.add_op()->CopyFrom(op_def_0);
+  net_def.add_op()->CopyFrom(op_def_1);
+  net_def.add_op()->CopyFrom(op_def_2);
+
+  auto input = net_def.add_tensors();
+  input->set_name("Input");
+  input->set_data_type(DataType::DT_FLOAT);
+  input->add_dims(2);
+  input->add_dims(3);
+  for (int i = 0; i < 6; ++i) {
+    input->add_float_data(i - 3);
+  }
+
+  VLOG(0) << net_def.DebugString();
+
+  // Create workspace and input tensor
+  Workspace ws;
+  ws.LoadModelTensor(net_def, DeviceType::CPU);
+
+  // Create Net & run
+  auto net = CreateNet(net_def, &ws, DeviceType::CPU);
+  net->Run();
+
+  auto out_tensor = ws.GetTensor("Output2");
+  out_tensor->DebugPrint();
 
   return 0;
 }