diff --git a/mace/benchmark/benchmark_model.cc b/mace/benchmark/benchmark_model.cc
index cac3aca2d18216b2f09efd3c6a0d5f8f52bb2baf..7317b8bef6447eb64b6c3ddb55ad0e4509abfc29 100644
--- a/mace/benchmark/benchmark_model.cc
+++ b/mace/benchmark/benchmark_model.cc
@@ -280,12 +280,12 @@ int Main(int argc, char **argv) {
       LOG(FATAL) << "Failed to read file: " << FLAGS_model_file;
     }
     create_engine_status =
-        CreateMaceEngineFromPB(model_data_file_ptr,
-                               input_names,
-                               output_names,
-                               device_type,
-                               &engine,
-                               model_pb_data);
+        CreateMaceEngineFromProto(model_pb_data,
+                                  model_data_file_ptr,
+                                  input_names,
+                                  output_names,
+                                  device_type,
+                                  &engine);
   } else {
     create_engine_status =
         CreateMaceEngineFromCode(FLAGS_model_name,
diff --git a/mace/core/mace.cc b/mace/core/mace.cc
index e9e7eb87888f03b980135badcf005408e13c5f77..2a49c325912a094a76b89d3e3172f705c720629e 100644
--- a/mace/core/mace.cc
+++ b/mace/core/mace.cc
@@ -311,12 +311,13 @@ void UnloadModelData(const unsigned char *model_data,
   MACE_CHECK(ret == 0, "Failed to unmap model data file, error code: ", errno);
 }
 
-MaceStatus CreateMaceEngineFromPB(const std::string &model_data_file,
-                                  const std::vector<std::string> &input_nodes,
-                                  const std::vector<std::string> &output_nodes,
-                                  const DeviceType device_type,
-                                  std::shared_ptr<MaceEngine> *engine,
-                                  const std::vector<unsigned char> &model_pb) {
+MaceStatus CreateMaceEngineFromProto(
+    const std::vector<unsigned char> &model_pb,
+    const std::string &model_data_file,
+    const std::vector<std::string> &input_nodes,
+    const std::vector<std::string> &output_nodes,
+    const DeviceType device_type,
+    std::shared_ptr<MaceEngine> *engine) {
   LOG(INFO) << "Create MaceEngine from model pb";
   // load model
   if (engine == nullptr) {
diff --git a/mace/examples/example.cc b/mace/examples/example.cc
index 3d0b19e523a0c7f78c34b15e276824d73d42afa0..c54f3c504af23413488a648a0f08534bdf3f32d4 100644
--- a/mace/examples/example.cc
+++ b/mace/examples/example.cc
@@ -175,12 +175,12 @@ bool RunModel(const std::vector<std::string> &input_names,
       LOG(FATAL) << "Failed to read file: " << FLAGS_model_file;
     }
     create_engine_status =
-        CreateMaceEngineFromPB(FLAGS_model_data_file,
-                               input_names,
-                               output_names,
-                               device_type,
-                               &engine,
-                               model_pb_data);
+        CreateMaceEngineFromProto(model_pb_data,
+                                  FLAGS_model_data_file,
+                                  input_names,
+                                  output_names,
+                                  device_type,
+                                  &engine);
   } else {
     create_engine_status =
         CreateMaceEngineFromCode(model_name,
diff --git a/mace/public/mace.h b/mace/public/mace.h
index edc40153b4f73451787058a8378d2cc46ad11559..2ebf72a0cf9ffe1147d710600808ed5458e22bd2 100644
--- a/mace/public/mace.h
+++ b/mace/public/mace.h
@@ -114,12 +114,13 @@ class MaceEngine {
   MaceEngine &operator=(const MaceEngine &) = delete;
 };
 
-MaceStatus CreateMaceEngineFromPB(const std::string &model_data_file,
-                                  const std::vector<std::string> &input_nodes,
-                                  const std::vector<std::string> &output_nodes,
-                                  const DeviceType device_type,
-                                  std::shared_ptr<MaceEngine> *engine,
-                                  const std::vector<unsigned char> &model_pb);
+MaceStatus CreateMaceEngineFromProto(
+    const std::vector<unsigned char> &model_pb,
+    const std::string &model_data_file,
+    const std::vector<std::string> &input_nodes,
+    const std::vector<std::string> &output_nodes,
+    const DeviceType device_type,
+    std::shared_ptr<MaceEngine> *engine);
 
 }  // namespace mace
 
diff --git a/mace/python/tools/BUILD b/mace/python/tools/BUILD
index e903d817033b05287d82d7fec4ec2d8868c1dfbb..b466ac729d9944cc6e19d88147dea8a36257e0b1 100644
--- a/mace/python/tools/BUILD
+++ b/mace/python/tools/BUILD
@@ -3,6 +3,7 @@ py_library(
     srcs = [
         "convert_util.py",
         "graph_util.py",
+        "tensor_util.py",
         "tf_dsp_converter_lib.py",
         "converter_tool/base_converter.py",
         "converter_tool/shape_inference.py",
diff --git a/mace/python/tools/converter.py b/mace/python/tools/converter.py
index 8468847d54292d156808904b3aabf579946a9746..0de6190717a2a3628efff28b73825a2638b8da8e 100644
--- a/mace/python/tools/converter.py
+++ b/mace/python/tools/converter.py
@@ -22,6 +22,7 @@ from mace.proto import mace_pb2
 from mace.python.tools import tf_dsp_converter_lib
 from mace.python.tools import memory_optimizer
 from mace.python.tools import source_converter_lib
+from mace.python.tools import tensor_util
 from mace.python.tools.converter_tool import base_converter as cvt
 from mace.python.tools.converter_tool import tensorflow_converter
 from mace.python.tools.converter_tool import caffe_converter
@@ -173,11 +174,27 @@ def main(unused_args):
 
             print "Memory optimization done."
 
+    if FLAGS.obfuscate:
+        tensor_util.obfuscate_name(output_graph_def)
+    else:
+        tensor_util.rename_tensor(output_graph_def)
+
+    tensor_infos, model_data = tensor_util.get_tensor_info_and_model_data(
+            output_graph_def, FLAGS.runtime)
+
     source_converter_lib.convert_to_source(
-        output_graph_def, model_checksum, weight_checksum, FLAGS.template,
-        FLAGS.obfuscate, FLAGS.model_tag, FLAGS.codegen_output, FLAGS.runtime,
-        FLAGS.embed_model_data, FLAGS.winograd, FLAGS.model_load_type)
+            output_graph_def, model_checksum, weight_checksum, FLAGS.template,
+            FLAGS.obfuscate, FLAGS.model_tag, FLAGS.codegen_output,
+            FLAGS.runtime, FLAGS.embed_model_data, FLAGS.winograd,
+            FLAGS.model_load_type, tensor_infos, model_data)
+
+    if not FLAGS.embed_model_data:
+        output_dir = os.path.dirname(FLAGS.codegen_output) + '/'
+        with open(output_dir + FLAGS.model_tag + '.data', "wb") as f:
+            f.write(bytearray(model_data))
+
     if FLAGS.model_load_type == 'pb':
+        tensor_util.del_tensor_data(output_graph_def, FLAGS.runtime)
         with open(FLAGS.pb_output, "wb") as f:
             f.write(output_graph_def.SerializeToString())
         # with open(FLAGS.pb_output + '_txt', "wb") as f:
diff --git a/mace/python/tools/model.jinja2 b/mace/python/tools/model.jinja2
index ea57053e57e3e04f32eaba1e3dd69671066e8a30..ba2ca7462a1d33d5c6b4dc675454680d36d32fe9 100644
--- a/mace/python/tools/model.jinja2
+++ b/mace/python/tools/model.jinja2
@@ -27,8 +27,8 @@ namespace mace {
 {% if model_type == 'source' %}
 namespace {{tag}} {
 
-{% for tensor in tensors %}
-extern void CreateTensor{{ tensor.id }}(mace::ConstTensor *tensor);
+{% for i in range(net.tensors|length) %}
+extern void CreateTensor{{ i }}(mace::ConstTensor *tensor);
 {% endfor %}
 
 
@@ -111,9 +111,9 @@ void CreateOperators(NetDef *net_def) {
 void CreateTensors(NetDef *net_def) {
   MACE_LATENCY_LOGGER(1, "Create tensors");
 
-  net_def->mutable_tensors()->Reserve({{ tensors|length }});
-  {% for tensor in tensors %}
-  mace::{{tag}}::CreateTensor{{tensor.id}}(net_def->add_tensors());
+  net_def->mutable_tensors()->Reserve({{ net.tensors|length }});
+  {% for i in range(net.tensors|length) %}
+  mace::{{tag}}::CreateTensor{{ i }}(net_def->add_tensors());
   {% endfor %}
 }
 
diff --git a/mace/python/tools/source_converter_lib.py b/mace/python/tools/source_converter_lib.py
index 81ebef0de513724e6fdb39df470e880b32f62539..93dcba0e41352459416448517f56154079b31ea7 100644
--- a/mace/python/tools/source_converter_lib.py
+++ b/mace/python/tools/source_converter_lib.py
@@ -14,122 +14,10 @@
 
 import datetime
 import os
-import uuid
-import numpy as np
-import hashlib
 
 from mace.proto import mace_pb2
 from jinja2 import Environment, FileSystemLoader
 
-GENERATED_NAME = set()
-
-
-def generate_obfuscated_name(namespace, name):
-    md5 = hashlib.md5()
-    md5.update(namespace)
-    md5.update(name)
-    md5_digest = md5.hexdigest()
-
-    name = md5_digest[:8]
-    while name in GENERATED_NAME:
-        name = md5_digest
-        assert name not in GENERATED_NAME
-    GENERATED_NAME.add(name)
-    return name
-
-
-def generate_tensor_map(tensors):
-    tensor_map = {}
-    for t in tensors:
-        if t.name not in tensor_map:
-            tensor_map[t.name] = generate_obfuscated_name("tensor", t.name)
-    return tensor_map
-
-
-def generate_in_out_map(ops, tensor_map):
-    in_out_map = {}
-    for op in ops:
-        op.name = generate_obfuscated_name("op", op.name)
-        for input_name in op.input:
-            if input_name not in in_out_map:
-                if input_name in tensor_map:
-                    in_out_map[input_name] = tensor_map[input_name]
-                else:
-                    in_out_map[input_name] = generate_obfuscated_name(
-                        "in", input_name)
-        for output_name in op.output:
-            if output_name not in in_out_map:
-                if output_name in tensor_map:
-                    in_out_map[output_name] = tensor_map[output_name]
-                else:
-                    in_out_map[output_name] = generate_obfuscated_name(
-                        "out", output_name)
-    return in_out_map
-
-
-def obfuscate_name(net_def):
-    input_node = "mace_input_node"
-    output_node = "mace_output_node"
-    tensor_map = generate_tensor_map(net_def.tensors)
-    in_out_map = generate_in_out_map(net_def.op, tensor_map)
-    for t in net_def.tensors:
-        if input_node not in t.name and output_node not in t.name:
-            t.name = tensor_map[t.name]
-    for op in net_def.op:
-        for i in range(len(op.input)):
-            if input_node not in op.input[i]:
-                op.input[i] = in_out_map[op.input[i]]
-        for i in range(len(op.output)):
-            if output_node not in op.output[i]:
-                op.output[i] = in_out_map[op.output[i]]
-
-
-def normalize_op_name(op_name):
-    idx = op_name.rfind(':')
-    if idx == -1:
-        return op_name
-    else:
-        return op_name[:idx]
-
-
-def rename_tensor(net_def):
-    tensor_map = {}
-    for t in net_def.tensors:
-        if t.name not in tensor_map:
-            tensor_map[t.name] = "_" + normalize_op_name(t.name).replace("/",
-                                                                         "_")
-            t.name = tensor_map[t.name]
-    for op in net_def.op:
-        for i in range(len(op.input)):
-            if op.input[i] in tensor_map:
-                op.input[i] = tensor_map[op.input[i]]
-        for i in range(len(op.output)):
-            if op.output[i] in tensor_map:
-                op.output[i] = tensor_map[op.output[i]]
-
-
-class TensorInfo:
-    def __init__(self, id, t, runtime):
-        self.id = id
-        self.data_type = mace_pb2.DataType.Name(t.data_type)
-        if t.data_type == mace_pb2.DT_FLOAT:
-            if runtime == 'gpu':
-                self.data_type = mace_pb2.DT_HALF
-                self.data = bytearray(
-                    np.array(t.float_data).astype(np.float16).tobytes())
-            else:
-                self.data_type = mace_pb2.DT_FLOAT
-                self.data = bytearray(
-                    np.array(t.float_data).astype(np.float32).tobytes())
-        elif t.data_type == mace_pb2.DT_INT32:
-            self.data = bytearray(
-                np.array(t.int32_data).astype(np.int32).tobytes())
-        elif t.data_type == mace_pb2.DT_UINT8:
-            self.data = bytearray(
-                np.array(t.int32_data).astype(np.uint8).tolist())
-        else:
-            raise Exception('Tensor data type %s not supported' % t.data_type)
-
 
 def stringfy(value):
     return ', '.join('"{0}"'.format(w) for w in value)
@@ -137,11 +25,8 @@ def stringfy(value):
 
 def convert_to_source(net_def, model_checksum, weight_checksum, template_dir,
                       obfuscate, model_tag, output, runtime, embed_model_data,
-                      winograd_conv, model_load_type):
-    if obfuscate:
-        obfuscate_name(net_def)
-    else:
-        rename_tensor(net_def)
+                      winograd_conv, model_load_type, tensor_infos,
+                      model_data):
 
     # Capture our current directory
     print template_dir
@@ -153,40 +38,15 @@ def convert_to_source(net_def, model_checksum, weight_checksum, template_dir,
     output_dir = os.path.dirname(output) + '/'
     # generate tensor source files
     template_name = 'tensor_source.jinja2'
-    model_data = []
-    offset = 0
-    counter = 0
-    for t in net_def.tensors:
-        tensor_info = TensorInfo(counter, t, runtime)
-        # align
-        if tensor_info.data_type != 'DT_UINT8' and offset % 4 != 0:
-            padding = 4 - offset % 4
-            model_data.extend(bytearray([0] * padding))
-            offset += padding
-
-        if t.data_type == mace_pb2.DT_FLOAT:
-            t.data_size = len(t.float_data)
-        elif t.data_type == mace_pb2.DT_INT32:
-            t.data_size = len(t.int32_data)
-        elif t.data_type == mace_pb2.DT_UINT8:
-            t.data_size = len(t.int32_data)
-        t.offset = offset
-
+    for i in range(len(net_def.tensors)):
         if model_load_type == 'source':
             source = j2_env.get_template(template_name).render(
-                tensor_info=tensor_info,
-                tensor=t,
+                tensor_info=tensor_infos[i],
+                tensor=net_def.tensors[i],
                 tag=model_tag,
             )
-            with open(output_dir + 'tensor' + str(counter) + '.cc', "wb") as f:
+            with open(output_dir + 'tensor' + str(i) + '.cc', "wb") as f:
                 f.write(source)
-        counter += 1
-        model_data.extend(tensor_info.data)
-        offset += len(tensor_info.data)
-
-    if not embed_model_data:
-        with open(output_dir + model_tag + '.data', "wb") as f:
-            f.write(bytearray(model_data))
 
     if model_load_type == 'source':
         # generate tensor data
@@ -194,7 +54,7 @@ def convert_to_source(net_def, model_checksum, weight_checksum, template_dir,
         source = j2_env.get_template(template_name).render(
             tag=model_tag,
             embed_model_data=embed_model_data,
-            model_data_size=offset,
+            model_data_size=len(model_data),
             model_data=model_data)
         with open(output_dir + 'tensor_data' + '.cc', "wb") as f:
             f.write(source)
@@ -218,15 +78,10 @@ def convert_to_source(net_def, model_checksum, weight_checksum, template_dir,
         # generate model source files
         build_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
         template_name = 'model.jinja2'
-        tensors = [
-            TensorInfo(i, net_def.tensors[i], runtime)
-            for i in range(len(net_def.tensors))
-        ]
         checksum = model_checksum
         if weight_checksum is not None:
             checksum = "{},{}".format(model_checksum, weight_checksum)
         source = j2_env.get_template(template_name).render(
-            tensors=tensors,
             net=net_def,
             tag=model_tag,
             runtime=runtime,
@@ -244,15 +99,3 @@ def convert_to_source(net_def, model_checksum, weight_checksum, template_dir,
         source = j2_env.get_template(template_name).render(tag=model_tag, )
         with open(output_dir + model_tag + '.h', "wb") as f:
             f.write(source)
-
-    for t in net_def.tensors:
-        if t.data_type == mace_pb2.DT_FLOAT:
-            del t.float_data[:]
-            if runtime == 'gpu':
-                t.data_type = mace_pb2.DT_HALF
-            else:
-                t.data_type = mace_pb2.DT_FLOAT
-        elif t.data_type == mace_pb2.DT_INT32:
-            del t.int32_data[:]
-        elif t.data_type == mace_pb2.DT_UINT8:
-            del t.int32_data[:]
diff --git a/mace/python/tools/tensor_util.py b/mace/python/tools/tensor_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..e94a10ca92042e745900c8d2f7c8cd7faf19d1a6
--- /dev/null
+++ b/mace/python/tools/tensor_util.py
@@ -0,0 +1,170 @@
+# Copyright 2018 Xiaomi, Inc.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import hashlib
+import numpy as np
+
+from mace.proto import mace_pb2
+
+GENERATED_NAME = set()
+
+
+def generate_obfuscated_name(namespace, name):
+    md5 = hashlib.md5()
+    md5.update(namespace)
+    md5.update(name)
+    md5_digest = md5.hexdigest()
+
+    name = md5_digest[:8]
+    while name in GENERATED_NAME:
+        name = md5_digest
+        assert name not in GENERATED_NAME
+    GENERATED_NAME.add(name)
+    return name
+
+
+def generate_tensor_map(tensors):
+    tensor_map = {}
+    for t in tensors:
+        if t.name not in tensor_map:
+            tensor_map[t.name] = generate_obfuscated_name("tensor", t.name)
+    return tensor_map
+
+
+def generate_in_out_map(ops, tensor_map):
+    in_out_map = {}
+    for op in ops:
+        op.name = generate_obfuscated_name("op", op.name)
+        for input_name in op.input:
+            if input_name not in in_out_map:
+                if input_name in tensor_map:
+                    in_out_map[input_name] = tensor_map[input_name]
+                else:
+                    in_out_map[input_name] = generate_obfuscated_name(
+                        "in", input_name)
+        for output_name in op.output:
+            if output_name not in in_out_map:
+                if output_name in tensor_map:
+                    in_out_map[output_name] = tensor_map[output_name]
+                else:
+                    in_out_map[output_name] = generate_obfuscated_name(
+                        "out", output_name)
+    return in_out_map
+
+
+def obfuscate_name(net_def):
+    input_node = "mace_input_node"
+    output_node = "mace_output_node"
+    tensor_map = generate_tensor_map(net_def.tensors)
+    in_out_map = generate_in_out_map(net_def.op, tensor_map)
+    for t in net_def.tensors:
+        if input_node not in t.name and output_node not in t.name:
+            t.name = tensor_map[t.name]
+    for op in net_def.op:
+        for i in range(len(op.input)):
+            if input_node not in op.input[i]:
+                op.input[i] = in_out_map[op.input[i]]
+        for i in range(len(op.output)):
+            if output_node not in op.output[i]:
+                op.output[i] = in_out_map[op.output[i]]
+
+
+def normalize_op_name(op_name):
+    idx = op_name.rfind(':')
+    if idx == -1:
+        return op_name
+    else:
+        return op_name[:idx]
+
+
+def rename_tensor(net_def):
+    tensor_map = {}
+    for t in net_def.tensors:
+        if t.name not in tensor_map:
+            tensor_map[t.name] = "_" + normalize_op_name(t.name).replace("/",
+                                                                         "_")
+            t.name = tensor_map[t.name]
+    for op in net_def.op:
+        for i in range(len(op.input)):
+            if op.input[i] in tensor_map:
+                op.input[i] = tensor_map[op.input[i]]
+        for i in range(len(op.output)):
+            if op.output[i] in tensor_map:
+                op.output[i] = tensor_map[op.output[i]]
+
+
+class TensorInfo:
+    def __init__(self, id, t, runtime):
+        self.id = id
+        self.data_type = mace_pb2.DataType.Name(t.data_type)
+        if t.data_type == mace_pb2.DT_FLOAT:
+            if runtime == 'gpu':
+                self.data_type = mace_pb2.DT_HALF
+                self.data = bytearray(
+                    np.array(t.float_data).astype(np.float16).tobytes())
+            else:
+                self.data_type = mace_pb2.DT_FLOAT
+                self.data = bytearray(
+                    np.array(t.float_data).astype(np.float32).tobytes())
+        elif t.data_type == mace_pb2.DT_INT32:
+            self.data = bytearray(
+                np.array(t.int32_data).astype(np.int32).tobytes())
+        elif t.data_type == mace_pb2.DT_UINT8:
+            self.data = bytearray(
+                np.array(t.int32_data).astype(np.uint8).tolist())
+        else:
+            raise Exception('Tensor data type %s not supported' % t.data_type)
+
+
+def get_tensor_info_and_model_data(net_def, runtime):
+    model_data = []
+    offset = 0
+    counter = 0
+    tensor_infos = []
+    for t in net_def.tensors:
+        tensor_info = TensorInfo(counter, t, runtime)
+        tensor_infos.append(tensor_info)
+        # align
+        if tensor_info.data_type != 'DT_UINT8' and offset % 4 != 0:
+            padding = 4 - offset % 4
+            model_data.extend(bytearray([0] * padding))
+            offset += padding
+
+        if t.data_type == mace_pb2.DT_FLOAT:
+            t.data_size = len(t.float_data)
+        elif t.data_type == mace_pb2.DT_INT32:
+            t.data_size = len(t.int32_data)
+        elif t.data_type == mace_pb2.DT_UINT8:
+            t.data_size = len(t.int32_data)
+        t.offset = offset
+
+        counter += 1
+        model_data.extend(tensor_info.data)
+        offset += len(tensor_info.data)
+
+    return tensor_infos, model_data
+
+
+def del_tensor_data(net_def, runtime):
+    for t in net_def.tensors:
+        if t.data_type == mace_pb2.DT_FLOAT:
+            del t.float_data[:]
+            if runtime == 'gpu':
+                t.data_type = mace_pb2.DT_HALF
+            else:
+                t.data_type = mace_pb2.DT_FLOAT
+        elif t.data_type == mace_pb2.DT_INT32:
+            del t.int32_data[:]
+        elif t.data_type == mace_pb2.DT_UINT8:
+            del t.int32_data[:]
diff --git a/mace/tools/validation/mace_run.cc b/mace/tools/validation/mace_run.cc
index af6e0c43a96b0d601f96a285e4fec24f62ba213e..38f18af5a5dd425ee0888ea6450b7494c16ca594 100644
--- a/mace/tools/validation/mace_run.cc
+++ b/mace/tools/validation/mace_run.cc
@@ -237,12 +237,12 @@ bool RunModel(const std::string &model_name,
       LOG(FATAL) << "Failed to read file: " << FLAGS_model_file;
     }
     create_engine_status =
-        CreateMaceEngineFromPB(FLAGS_model_data_file,
-                               input_names,
-                               output_names,
-                               device_type,
-                               &engine,
-                               model_pb_data);
+        CreateMaceEngineFromProto(model_pb_data,
+                                  FLAGS_model_data_file,
+                                  input_names,
+                                  output_names,
+                                  device_type,
+                                  &engine);
   } else {
     create_engine_status =
         CreateMaceEngineFromCode(model_name,