diff --git a/mace/core/mace.cc b/mace/core/mace.cc
index 83729b79121c90232cc336b9437ed95ed7d3b6e0..3e0d642f8affcdf541d016519d063991636156cc 100644
--- a/mace/core/mace.cc
+++ b/mace/core/mace.cc
@@ -10,7 +10,7 @@
 namespace mace {
 
 ConstTensor::ConstTensor(const std::string &name,
-                         unsigned char *data,
+                         const unsigned char *data,
                          const std::vector<int64_t> &dims,
                          const DataType data_type,
                          uint32_t node_id) :
@@ -23,7 +23,7 @@ ConstTensor::ConstTensor(const std::string &name,
     node_id_(node_id) {}
 
 ConstTensor::ConstTensor(const std::string &name,
-                         unsigned char *data,
+                         const unsigned char *data,
                          const std::vector<int64_t> &dims,
                          const int data_type,
                          uint32_t node_id) :
diff --git a/mace/core/net.cc b/mace/core/net.cc
index 1dcef048cf8b0ec53863da471b23bcf7df74720f..46febc68122d4579cc3d44fada8df2a12f968a58 100644
--- a/mace/core/net.cc
+++ b/mace/core/net.cc
@@ -22,7 +22,7 @@ SimpleNet::SimpleNet(const std::shared_ptr<const OperatorRegistry> op_registry,
                      const NetMode mode)
     :  NetBase(op_registry, net_def, ws, type),
       device_type_(type) {
-  VLOG(1) << "Constructing SimpleNet " << net_def->name();
+  MACE_LATENCY_LOGGER(1, "Constructing SimpleNet ", net_def->name());
   for (int idx = 0; idx < net_def->op_size(); ++idx) {
     const auto &operator_def = net_def->op(idx);
     VLOG(3) << "Creating operator " << operator_def.name() << "("
@@ -41,10 +41,8 @@ bool SimpleNet::Run(RunMetadata *run_metadata) {
   MACE_LATENCY_LOGGER(1, "Running net");
   for (auto iter = operators_.begin(); iter != operators_.end(); ++iter) {
     auto &op = *iter;
-    VLOG(3) << "Running operator " << op->debug_def().name() << "("
-            << op->debug_def().type() << ").";
-    MACE_LATENCY_LOGGER(2, "Running operator ", op->debug_def().name());
-
+    MACE_LATENCY_LOGGER(2, "Running operator ", op->debug_def().name(),
+                        "(", op->debug_def().type(), ")");
     bool future_wait = (device_type_ == DeviceType::OPENCL &&
                         (run_metadata != nullptr ||
                          std::distance(iter, operators_.end()) == 1));
diff --git a/mace/core/runtime/hexagon/hexagon_control_wrapper.cc b/mace/core/runtime/hexagon/hexagon_control_wrapper.cc
index 4c35b29e5ce7c94787d15182b335904bdee5e37c..aa55fd0cd044e7e08455287adf454c9e703ab27b 100644
--- a/mace/core/runtime/hexagon/hexagon_control_wrapper.cc
+++ b/mace/core/runtime/hexagon/hexagon_control_wrapper.cc
@@ -64,29 +64,29 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def) {
   std::thread const_thread([&]() {
     std::cout << "thread function\n";
     std::vector<hexagon_nn_const_node> const_node_list;
-    for (const ConstTensor &tensor_proto: net_def.tensors()) {
-      std::vector<int> tensor_shape(tensor_proto.dims().begin(),
-                               tensor_proto.dims().end());
+    for (const ConstTensor &const_tensor: net_def.tensors()) {
+      std::vector<int> tensor_shape(const_tensor.dims().begin(),
+                               const_tensor.dims().end());
       while (tensor_shape.size() < 4) {
         tensor_shape.insert(tensor_shape.begin(), 1);
       }
 
       hexagon_nn_const_node const_node;
-      const_node.node_id = node_id(tensor_proto.node_id());
+      const_node.node_id = node_id(const_tensor.node_id());
       const_node.tensor.batches = tensor_shape[0];
       const_node.tensor.height = tensor_shape[1];
       const_node.tensor.width = tensor_shape[2];
       const_node.tensor.depth = tensor_shape[3];
 
-      if (tensor_proto.data_type() == DataType::DT_INT32
-        && tensor_proto.data_size() == 0) {
+      if (const_tensor.data_type() == DataType::DT_INT32
+        && const_tensor.data_size() == 0) {
         const_node.tensor.data = NULL;
         const_node.tensor.dataLen = 0;
       } else {
         const_node.tensor.data =
-          const_cast<unsigned char *>(tensor_proto.data());
+          const_cast<unsigned char *>(const_tensor.data());
         const_node.tensor.dataLen =
-          tensor_proto.data_size() * GetEnumTypeSize(tensor_proto.data_type());
+          const_tensor.data_size() * GetEnumTypeSize(const_tensor.data_type());
       }
       const_node_list.push_back(const_node);
       // 255 is magic number: why fastrpc limits sequence length to that?
diff --git a/mace/core/serializer.cc b/mace/core/serializer.cc
index 9b7a51bdad5fa41944ea443f37e9d465ca598c5d..c171205f75ad9f73673958a176957f92372a425d 100644
--- a/mace/core/serializer.cc
+++ b/mace/core/serializer.cc
@@ -12,56 +12,64 @@ std::unique_ptr<ConstTensor> Serializer::Serialize(const Tensor &tensor,
   return nullptr;
 }
 
-std::unique_ptr<Tensor> Serializer::Deserialize(const ConstTensor &proto,
+std::unique_ptr<Tensor> Serializer::Deserialize(const ConstTensor &const_tensor,
                                                 DeviceType type) {
   std::unique_ptr<Tensor> tensor(
-      new Tensor(GetDeviceAllocator(type), proto.data_type()));
+      new Tensor(GetDeviceAllocator(type), const_tensor.data_type()));
   std::vector<index_t> dims;
-  for (const index_t d : proto.dims()) {
+  for (const index_t d : const_tensor.dims()) {
     dims.push_back(d);
   }
   tensor->Resize(dims);
 
-  switch (proto.data_type()) {
+  switch (const_tensor.data_type()) {
     case DT_HALF:
-      tensor->Copy<half>(reinterpret_cast<const half*>(proto.data()),
-                         proto.data_size());
+      tensor->Copy<half>(reinterpret_cast<const half *>(const_tensor.data()),
+                         const_tensor.data_size());
       break;
     case DT_FLOAT:
-      tensor->Copy<float>(reinterpret_cast<const float *>(proto.data()),
-                          proto.data_size());
+      tensor->Copy<float>(reinterpret_cast<const float *>(const_tensor.data()),
+                          const_tensor.data_size());
       break;
     case DT_DOUBLE:
-      tensor->Copy<double>(reinterpret_cast<const double *>(proto.data()),
-                           proto.data_size());
+      tensor->Copy<double>(
+          reinterpret_cast<const double *>(const_tensor.data()),
+          const_tensor.data_size());
       break;
     case DT_INT32:
-      tensor->Copy<int32_t>(reinterpret_cast<const int32_t *>(proto.data()),
-                            proto.data_size());
+      tensor->Copy<int32_t>(
+          reinterpret_cast<const int32_t *>(const_tensor.data()),
+          const_tensor.data_size());
       break;
     case DT_INT64:
-      tensor->Copy<int64_t>(reinterpret_cast<const int64_t *>(proto.data()),
-                            proto.data_size());
+      tensor->Copy<int64_t>(
+          reinterpret_cast<const int64_t *>(const_tensor.data()),
+          const_tensor.data_size());
       break;
     case DT_UINT8:
-      tensor->Copy<uint8_t>(reinterpret_cast<const uint8_t *>(proto.data()),
-                            proto.data_size());
+      tensor->Copy<uint8_t>(
+          reinterpret_cast<const uint8_t *>(const_tensor.data()),
+          const_tensor.data_size());
       break;
     case DT_INT16:
       tensor->CopyWithCast<int32_t, uint16_t>(
-          reinterpret_cast<const int32_t *>(proto.data()), proto.data_size());
+          reinterpret_cast<const int32_t *>(const_tensor.data()),
+          const_tensor.data_size());
       break;
     case DT_INT8:
       tensor->CopyWithCast<int32_t, int8_t>(
-          reinterpret_cast<const int32_t *>(proto.data()), proto.data_size());
+          reinterpret_cast<const int32_t *>(const_tensor.data()),
+          const_tensor.data_size());
       break;
     case DT_UINT16:
       tensor->CopyWithCast<int32_t, int16_t>(
-          reinterpret_cast<const int32_t *>(proto.data()), proto.data_size());
+          reinterpret_cast<const int32_t *>(const_tensor.data()),
+          const_tensor.data_size());
       break;
     case DT_BOOL:
       tensor->CopyWithCast<int32_t, bool>(
-          reinterpret_cast<const int32_t *>(proto.data()), proto.data_size());
+          reinterpret_cast<const int32_t *>(const_tensor.data()),
+          const_tensor.data_size());
       break;
     default:
       MACE_NOT_IMPLEMENTED;
diff --git a/mace/core/serializer.h b/mace/core/serializer.h
index b2e905f93f06c2bd61b060ea6cbadd4b235a2bde..fcc98a7230d15fb106d0e3c7b9e4f23daf37fd0a 100644
--- a/mace/core/serializer.h
+++ b/mace/core/serializer.h
@@ -15,9 +15,11 @@ class Serializer {
   Serializer() {}
   ~Serializer() {}
 
-  std::unique_ptr<ConstTensor> Serialize(const Tensor &tensor, const std::string &name);
+  std::unique_ptr<ConstTensor> Serialize(const Tensor &tensor,
+                                         const std::string &name);
 
-  std::unique_ptr<Tensor> Deserialize(const ConstTensor &proto, DeviceType type);
+  std::unique_ptr<Tensor> Deserialize(const ConstTensor &const_tensor,
+                                      DeviceType type);
 
   DISABLE_COPY_AND_ASSIGN(Serializer);
 };
diff --git a/mace/core/tensor.h b/mace/core/tensor.h
index 24990283db1cea39156db3137c861a8d323f40f1..0fcda89c6112f27d100fd5027858d70eea5260a2 100644
--- a/mace/core/tensor.h
+++ b/mace/core/tensor.h
@@ -324,12 +324,12 @@ class Tensor {
         }
       }
     }
+
     MappingGuard(MappingGuard &&other) {
       tensor_ = other.tensor_;
       other.tensor_ = nullptr;
     }
-    MappingGuard(const MappingGuard &other) = delete;
-    MappingGuard & operator = (const MappingGuard &other) = delete;
+
     ~MappingGuard() {
       if (tensor_ != nullptr) tensor_->Unmap();
     }
@@ -339,6 +339,8 @@ class Tensor {
    private:
     const Tensor *tensor_;
     std::vector<size_t> mapped_image_pitch_;
+
+    DISABLE_COPY_AND_ASSIGN(MappingGuard);
   };
 
  private:
diff --git a/mace/core/workspace.cc b/mace/core/workspace.cc
index 2cf55a3b0548bb350a320f1a04f8196946ccd50f..371ab2e06d71a3a0e836eae80db96dc8a5d24a36 100644
--- a/mace/core/workspace.cc
+++ b/mace/core/workspace.cc
@@ -72,15 +72,15 @@ Tensor *Workspace::GetTensor(const std::string &name) {
 void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) {
   MACE_LATENCY_LOGGER(1, "Load model tensors");
   Serializer serializer;
-  for (auto &tensor_proto : net_def.tensors()) {
-    MACE_LATENCY_LOGGER(2, "Load tensor ", tensor_proto.name());
-    VLOG(3) << "Load tensor: " << tensor_proto.name()
-            << ", with data type: " << tensor_proto.data_type()
-            << ", has shape: "
-            << MakeString(std::vector<index_t>(tensor_proto.dims().begin(),
-                                          tensor_proto.dims().end()));
-    tensor_map_[tensor_proto.name()] =
-        serializer.Deserialize(tensor_proto, type);
+  for (auto &const_tensor : net_def.tensors()) {
+    MACE_LATENCY_LOGGER(2, "Load tensor ", const_tensor.name());
+    VLOG(3) << "Tensor name: " << const_tensor.name()
+            << ", data type: " << const_tensor.data_type()
+            << ", shape: "
+            << MakeString(std::vector<index_t>(const_tensor.dims().begin(),
+                                               const_tensor.dims().end()));
+    tensor_map_[const_tensor.name()] =
+        serializer.Deserialize(const_tensor, type);
   }
   if (type == DeviceType::OPENCL) {
     CreateImageOutputTensor(net_def);
diff --git a/mace/public/mace.h b/mace/public/mace.h
index ab91e4c7592e862f2abbc540b5dca0884417a3e1..591987cb9eb0740b790dffb8f2b519adb17e887f 100644
--- a/mace/public/mace.h
+++ b/mace/public/mace.h
@@ -73,12 +73,12 @@ enum DataType {
 class ConstTensor {
  public:
   ConstTensor(const std::string &name,
-              unsigned char *data,
+              const unsigned char *data,
               const std::vector<int64_t> &dims,
               const DataType data_type = DT_FLOAT,
               uint32_t node_id = 0);
   ConstTensor(const std::string &name,
-              unsigned char *data,
+              const unsigned char *data,
               const std::vector<int64_t> &dims,
               const int data_type,
               uint32_t node_id = 0);
diff --git a/mace/utils/logging.h b/mace/utils/logging.h
index 6dc5658c40da3f85d210cf56dde530dbfcc2239c..592831580f850a22adac6d50e2ddfb3cf0bdbf0b 100644
--- a/mace/utils/logging.h
+++ b/mace/utils/logging.h
@@ -113,6 +113,7 @@ class LatencyLogger {
       : vlog_level_(vlog_level), message_(message) {
     if (VLOG_IS_ON(vlog_level_)) {
       start_micros_ = NowMicros();
+      VLOG(vlog_level_) << message_ << " started";
     }
   }
   ~LatencyLogger() {
diff --git a/tools/export_lib.sh b/tools/export_lib.sh
index 119dfa1513025b337503d99f85ce82f4b7e023e6..a178c118f85a4843b534559445f5cf1bbde80c80 100755
--- a/tools/export_lib.sh
+++ b/tools/export_lib.sh
@@ -161,8 +161,10 @@ rm -rf ${EXPORT_LIB_DIR}
 mkdir -p ${EXPORT_LIB_DIR}
 
 cp ${MACE_SOURCE_DIR}/mace/public/*.h ${EXPORT_INCLUDE_DIR}/mace/public/ || exit 1
-# utils is noti part of public API
-cp ${MACE_SOURCE_DIR}/mace/utils/*.h ${EXPORT_INCLUDE_DIR}/mace/utils/ || exit 1
+# utils is not part of public API
+cp ${MACE_SOURCE_DIR}/mace/utils/env_time.h ${EXPORT_INCLUDE_DIR}/mace/utils/ || exit 1
+cp ${MACE_SOURCE_DIR}/mace/utils/logging.h ${EXPORT_INCLUDE_DIR}/mace/utils/ || exit 1
+cp ${MACE_SOURCE_DIR}/mace/utils/string_util.h ${EXPORT_INCLUDE_DIR}/mace/utils/ || exit 1
 cp ${LIBMACE_TEMP_DIR}/libmace.a ${LIBMACE_TEMP_DIR}/libmace_dev.a ${LIBMACE_TEMP_DIR}/libmace_prod.a ${EXPORT_LIB_DIR}/ || exit 1
 
 echo "Step 6: Remove temporary file"