diff --git a/mace/dsp/test/quantized_add_test.cc b/mace/dsp/test/quantized_add_test.cc
index f30d8424f68c1613064f3c7531b9685a41a0f215..6cf51bdb0bfb1704fa82cadcb4bc3237c23f97a1 100644
--- a/mace/dsp/test/quantized_add_test.cc
+++ b/mace/dsp/test/quantized_add_test.cc
@@ -17,7 +17,6 @@ static NetDef BuildNetDef() {
   input_op->set_type("INPUT");
   input_op->set_node_id(0);
   input_op->set_padding(0);
-  input_op->add_out_max_byte_size(1000);
 
   // add op
   OperatorDef *add_op = net.add_op();
@@ -59,10 +58,6 @@ static NetDef BuildNetDef() {
   input_node_input->set_node_id(16);
   input_node_input->set_output_port(0);
 
-  add_op->add_out_max_byte_size(1000);
-  add_op->add_out_max_byte_size(1000);
-  add_op->add_out_max_byte_size(1000);
-
   // output op
   OperatorDef *output_op = net.add_op();
   output_op->set_name("__output__");
diff --git a/mace/dsp/test/quantized_maxpool_test.cc b/mace/dsp/test/quantized_maxpool_test.cc
index 897d1ac432e955ebc3337424e708ad62d28ab2d9..f99b0b956edc70cfea8fd1f562b34dbc0a6c6d30 100644
--- a/mace/dsp/test/quantized_maxpool_test.cc
+++ b/mace/dsp/test/quantized_maxpool_test.cc
@@ -24,7 +24,6 @@ static NetDef BuildNetDef(const vector<index_t> &input_shape,
   input_op->set_type("INPUT");
   input_op->set_node_id(0);
   input_op->set_padding(0);
-  input_op->add_out_max_byte_size(1000);
 
   // maxpool op
   OperatorDef *maxpool_op = net.add_op();
@@ -58,9 +57,6 @@ static NetDef BuildNetDef(const vector<index_t> &input_shape,
   input_node_input = maxpool_op->add_node_input();
   input_node_input->set_node_id(13);
   input_node_input->set_output_port(0);
-  maxpool_op->add_out_max_byte_size(1000);
-  maxpool_op->add_out_max_byte_size(1000);
-  maxpool_op->add_out_max_byte_size(1000);
 
   // output op
   OperatorDef *output_op = net.add_op();
diff --git a/mace/dsp/test/quantized_relu_test.cc b/mace/dsp/test/quantized_relu_test.cc
index 685be71d9b51ab3a833579654da568552d310a0f..b1c8fbd099fe88acf4b2790966af91d3d45c0d37 100644
--- a/mace/dsp/test/quantized_relu_test.cc
+++ b/mace/dsp/test/quantized_relu_test.cc
@@ -16,7 +16,6 @@ static NetDef BuildNetDef() {
   input_op->set_type("INPUT");
   input_op->set_node_id(0);
   input_op->set_padding(0);
-  input_op->add_out_max_byte_size(1000);
 
   // relu op
   OperatorDef *relu_op = net.add_op();
@@ -39,9 +38,6 @@ static NetDef BuildNetDef() {
   input_node_input = relu_op->add_node_input();
   input_node_input->set_node_id(11);
   input_node_input->set_output_port(0);
-  relu_op->add_out_max_byte_size(1000);
-  relu_op->add_out_max_byte_size(1000);
-  relu_op->add_out_max_byte_size(1000);
 
   // output op
   OperatorDef *output_op = net.add_op();
diff --git a/mace/dsp/test/quantized_resize_bilinear_test.cc b/mace/dsp/test/quantized_resize_bilinear_test.cc
index 12a2f8d34b94aeb21c8d3507be4ab4b545c26c2e..6ee9f404e8894d96cdc7b97d0001cb62f396c5d6 100644
--- a/mace/dsp/test/quantized_resize_bilinear_test.cc
+++ b/mace/dsp/test/quantized_resize_bilinear_test.cc
@@ -18,7 +18,6 @@ static NetDef BuildNetDef() {
   input_op->set_type("INPUT");
   input_op->set_node_id(0);
   input_op->set_padding(0);
-  input_op->add_out_max_byte_size(1200);
 
   // relu op
   OperatorDef *resize_bilinear_op = net.add_op();
@@ -46,9 +45,6 @@ static NetDef BuildNetDef() {
   input_node_input = resize_bilinear_op->add_node_input();
   input_node_input->set_node_id(12);
   input_node_input->set_output_port(0);
-  resize_bilinear_op->add_out_max_byte_size(1200);
-  resize_bilinear_op->add_out_max_byte_size(1000);
-  resize_bilinear_op->add_out_max_byte_size(1000);
 
   // output op
   OperatorDef *output_op = net.add_op();
diff --git a/mace/dsp/test/supernode_test.cc b/mace/dsp/test/supernode_test.cc
index 634795ecf5beb6adbb3f255666537897be960436..1262299d745684e2b2641aa037d6fbefeb095dfd 100644
--- a/mace/dsp/test/supernode_test.cc
+++ b/mace/dsp/test/supernode_test.cc
@@ -17,7 +17,6 @@ static NetDef BuildNetDef() {
   input_op->set_type("INPUT");
   input_op->set_node_id(0);
   input_op->set_padding(0);
-  input_op->add_out_max_byte_size(1000);
 
   // add op
   OperatorDef *supernode_op = net.add_op();
@@ -77,10 +76,6 @@ static NetDef BuildNetDef() {
   input_node_input->set_node_id(20);
   input_node_input->set_output_port(0);
 
-  supernode_op->add_out_max_byte_size(1000);
-  supernode_op->add_out_max_byte_size(1000);
-  supernode_op->add_out_max_byte_size(1000);
-
   // output op
   OperatorDef *output_op = net.add_op();
   output_op->set_name("__output__");
diff --git a/mace/proto/mace.proto b/mace/proto/mace.proto
index 2aa79796afd9ee97b89913c6c775261f752c3330..c1936693df3289b94e58b8aaec42121d04bb46b3 100644
--- a/mace/proto/mace.proto
+++ b/mace/proto/mace.proto
@@ -83,6 +83,7 @@ message OperatorDef {
   optional string type = 4;
   repeated Argument arg = 5;
   repeated OutputShape output_shape = 6;
+  repeated DataType output_type = 7;
 
   // Memory optimization: only support one single output op
   optional int32 mem_id = 10 [default = -1];
@@ -92,7 +93,6 @@ message OperatorDef {
   optional uint32 op_id = 101;
   optional uint32 padding = 102;
   repeated NodeInput node_input = 103;
-  repeated int32 out_max_byte_size = 104; // only support 32-bit len
 }
 
 // for memory optimization
diff --git a/mace/python/tools/BUILD b/mace/python/tools/BUILD
index f5b1f15a31652e29aa880d9b5a3f7ad126e707f6..b1bb214cb7153324924e05ddc81868c94f09b73a 100644
--- a/mace/python/tools/BUILD
+++ b/mace/python/tools/BUILD
@@ -1,9 +1,11 @@
 py_library(
     name = "tf_converter_lib",
     srcs = [
+        "convert_util.py",
+        "graph_util.py",
         "tf_converter_lib.py",
         "tf_dsp_converter_lib.py",
-        "graph_util.py"],
+    ],
     srcs_version = "PY2AND3",
     deps = [
         "//mace/proto:mace_py",
diff --git a/mace/python/tools/convert_util.py b/mace/python/tools/convert_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..53b3196952e991e6163b83ff3ca14395bcba9856
--- /dev/null
+++ b/mace/python/tools/convert_util.py
@@ -0,0 +1,29 @@
+import tensorflow as tf
+from mace.proto import mace_pb2
+
+TF_DTYPE_2_MACE_DTYPE_MAP = {
+    tf.float32: mace_pb2.DT_FLOAT,
+    tf.double: mace_pb2.DT_DOUBLE,
+    tf.half: mace_pb2.DT_HALF,
+    tf.int64: mace_pb2.DT_INT64,
+    tf.int32: mace_pb2.DT_INT32,
+    tf.qint32: mace_pb2.DT_INT32,
+    tf.int16: mace_pb2.DT_INT16,
+    tf.qint16: mace_pb2.DT_INT16,
+    tf.int8: mace_pb2.DT_INT8,
+    tf.qint8: mace_pb2.DT_INT8,
+    tf.quint16: mace_pb2.DT_UINT16,
+    tf.uint16: mace_pb2.DT_UINT16,
+    tf.quint8: mace_pb2.DT_UINT8,
+    tf.uint8: mace_pb2.DT_UINT8,
+    tf.string: mace_pb2.DT_STRING,
+    tf.bool: mace_pb2.DT_BOOL,
+}
+
+
+def tf_dtype_2_mace_dtype(tf_dtype):
+    mace_dtype = TF_DTYPE_2_MACE_DTYPE_MAP.get(tf_dtype, None)
+    if not mace_dtype:
+        raise Exception("Not supported tensorflow dtype: " + tf_dtype)
+    return mace_dtype
+
diff --git a/mace/python/tools/tf_converter_lib.py b/mace/python/tools/tf_converter_lib.py
index 80b5ee4253a8de05a82435ec5f7593734b757115..f603d3b5c96e76ef52bee06911b467d9b0112a29 100644
--- a/mace/python/tools/tf_converter_lib.py
+++ b/mace/python/tools/tf_converter_lib.py
@@ -1,6 +1,7 @@
 from mace.proto import mace_pb2
 import tensorflow as tf
 import numpy as np
+from mace.python.tools.convert_util import tf_dtype_2_mace_dtype
 
 # TODO: support NCHW formt, now only support NHWC.
 padding_mode = {
@@ -110,6 +111,19 @@ def add_output_transform(name, net_def):
   epsilon_arg.name = 'buffer_type'
   epsilon_arg.i = buffer_type_map['IN_OUT']
 
+
+def convert_op_outputs(mace_op_def, tf_op):
+  mace_op_def.output.extend([output.name for output in tf_op.outputs])
+  mace_op_def.output_type.extend([tf_dtype_2_mace_dtype(output.dtype)
+                                  for output in tf_op.outputs])
+  output_shapes = []
+  for output in tf_op.outputs:
+    output_shape = mace_pb2.OutputShape()
+    output_shape.dims.extend(output.shape.as_list())
+    output_shapes.append(output_shape)
+  mace_op_def.output_shape.extend(output_shapes)
+
+
 def convert_ops(unresolved_ops, dt, net_def, device):
   ops_count = len(unresolved_ops)
   resolved_count = 1
@@ -171,13 +185,7 @@ def convert_ops(unresolved_ops, dt, net_def, device):
         final_op = relu_op
         resolved_count = 4
 
-      op_def.output.extend([output.name for output in final_op.outputs])
-      output_shapes = []
-      for output in final_op.outputs:
-        output_shape = mace_pb2.OutputShape()
-        output_shape.dims.extend(output.shape.as_list())
-        output_shapes.append(output_shape)
-      op_def.output_shape.extend(output_shapes)
+      convert_op_outputs(op_def, final_op)
 
     elif first_op.type == 'FusedBatchNorm':
       op_def.name = first_op.name
@@ -225,26 +233,15 @@ def convert_ops(unresolved_ops, dt, net_def, device):
       op_def.name = first_op.name[:-4]  # remove /add
       op_def.type = 'BatchNorm'
       op_def.input.extend([input_name, gamma, beta, mean, variance, epsilon])
-      op_def.output.extend([output.name for output in add_1_op.outputs])
-      output_shapes = []
-      for output in add_1_op.outputs:
-        output_shape = mace_pb2.OutputShape()
-        output_shape.dims.extend(output.shape.as_list())
-        output_shapes.append(output_shape)
-      op_def.output_shape.extend(output_shapes)
+      convert_op_outputs(op_def, add_1_op)
 
       resolved_count = 7
     elif first_op.type == 'Relu6':
       op_def.name = first_op.name
       op_def.type = 'Relu'
       op_def.input.extend([input.name for input in first_op.inputs])
-      op_def.output.extend([output.name for output in first_op.outputs])
-      output_shapes = []
-      for output in first_op.outputs:
-        output_shape = mace_pb2.OutputShape()
-        output_shape.dims.extend(output.shape.as_list())
-        output_shapes.append(output_shape)
-      op_def.output_shape.extend(output_shapes)
+      convert_op_outputs(op_def, first_op)
+
       max_limit_arg = op_def.arg.add()
       max_limit_arg.name = 'max_limit'
       max_limit_arg.f = 6
@@ -252,13 +249,8 @@ def convert_ops(unresolved_ops, dt, net_def, device):
       op_def.name = first_op.name
       op_def.type = 'Pooling'
       op_def.input.extend([input.name for input in first_op.inputs])
-      op_def.output.extend([output.name for output in first_op.outputs])
-      output_shapes = []
-      for output in first_op.outputs:
-        output_shape = mace_pb2.OutputShape()
-        output_shape.dims.extend(output.shape.as_list())
-        output_shapes.append(output_shape)
-      op_def.output_shape.extend(output_shapes)
+      convert_op_outputs(op_def, first_op)
+
       pooling_type_arg = op_def.arg.add()
       pooling_type_arg.name = 'pooling_type'
       pooling_type_arg.i = pooling_type_mode[first_op.type]
@@ -278,55 +270,31 @@ def convert_ops(unresolved_ops, dt, net_def, device):
       op_def.name = first_op.name
       op_def.type = "AddN"
       op_def.input.extend([input.name for input in first_op.inputs])
-      op_def.output.extend([output.name for output in first_op.outputs])
-      output_shapes = []
-      for output in first_op.outputs:
-        output_shape = mace_pb2.OutputShape()
-        output_shape.dims.extend(output.shape.as_list())
-        output_shapes.append(output_shape)
-      op_def.output_shape.extend(output_shapes)
+      convert_op_outputs(op_def, first_op)
     elif first_op.type == 'ConcatV2':
       op_def.name = first_op.name
       op_def.type = "Concat"
       op_def.input.extend([first_op.inputs[i].name for i in xrange(2)])
-      op_def.output.extend([output.name for output in first_op.outputs])
       axis_arg = op_def.arg.add()
       axis_arg.name = 'axis'
       axis_arg.i = get_input_tensor(first_op, 2).eval().astype(np.int32)
-      output_shapes = []
-      for output in first_op.outputs:
-        output_shape = mace_pb2.OutputShape()
-        output_shape.dims.extend(output.shape.as_list())
-        output_shapes.append(output_shape)
-      op_def.output_shape.extend(output_shapes)
+      convert_op_outputs(op_def, first_op)
     elif first_op.type == 'ResizeBilinear':
       op_def.name = first_op.name
       op_def.type = "ResizeBilinear"
       op_def.input.extend([first_op.inputs[0].name])
-      op_def.output.extend([output.name for output in first_op.outputs])
       size_arg = op_def.arg.add()
       size_arg.name = 'size'
       size_arg.ints.extend(get_input_tensor(first_op, 1).eval().astype(np.int32).flat)
       size_arg = op_def.arg.add()
       size_arg.name = 'align_corners'
       size_arg.i = first_op.get_attr('align_corners')
-      output_shapes = []
-      for output in first_op.outputs:
-        output_shape = mace_pb2.OutputShape()
-        output_shape.dims.extend(output.shape.as_list())
-        output_shapes.append(output_shape)
-      op_def.output_shape.extend(output_shapes)
+      convert_op_outputs(op_def, first_op)
     elif first_op.type in ['Relu', 'SpaceToBatchND', 'BatchToSpaceND', 'BiasAdd']:
       op_def.name = first_op.name
       op_def.type = first_op.type
       op_def.input.extend([input.name for input in first_op.inputs])
-      op_def.output.extend([output.name for output in first_op.outputs])
-      output_shapes = []
-      for output in first_op.outputs:
-        output_shape = mace_pb2.OutputShape()
-        output_shape.dims.extend(output.shape.as_list())
-        output_shapes.append(output_shape)
-      op_def.output_shape.extend(output_shapes)
+      convert_op_outputs(op_def, first_op)
     else:
       raise Exception('Unknown Op: %s, type: %s' % (first_op.name, first_op.type))
       pass
diff --git a/mace/python/tools/tf_dsp_converter_lib.py b/mace/python/tools/tf_dsp_converter_lib.py
index ced16ce853e8f49b9c968e09ed257a8e3bf815b5..c485a3669c86787ee45b29873bcf08e0f54a22c6 100644
--- a/mace/python/tools/tf_dsp_converter_lib.py
+++ b/mace/python/tools/tf_dsp_converter_lib.py
@@ -3,6 +3,7 @@ import tensorflow as tf
 from operator import mul
 from dsp_ops import DspOps
 from mace.python.tools import graph_util
+from mace.python.tools.convert_util import tf_dtype_2_mace_dtype
 
 # converter --input ../libcv/quantized_icnet.pb --output quantized_icnet_dsp.pb \
 # --runtime dsp --input_node input_node --output_node output_node
@@ -65,6 +66,18 @@ def add_shape_const_node(net_def, op, values, name):
   tensor.dims.extend(values)
   return tensor.name
 
+
+def convert_op_outputs(mace_op_def, tf_op):
+  mace_op_def.output_type.extend([tf_dtype_2_mace_dtype(output.dtype)
+                                  for output in tf_op.outputs])
+  output_shapes = []
+  for output in tf_op.outputs:
+    output_shape = mace_pb2.OutputShape()
+    output_shape.dims.extend(output.shape.as_list())
+    output_shapes.append(output_shape)
+  mace_op_def.output_shape.extend(output_shapes)
+
+
 def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
   first_op = unresolved_ops[0]
   print ('Op: ', first_op.name, first_op.type, first_op.outputs[0].shape)
@@ -119,7 +132,7 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
       op_def.input.append(input_tensor.name)
       op_def.input.extend([t.name for t in s2b_op.inputs[1:]])
       op_def.input.extend([min_tensor.name, max_tensor.name])
-      op_def.out_max_byte_size.extend([max_elem_size(out) for out in quantize_op.outputs])
+      convert_op_outputs(op_def, quantize_op)
     elif has_padding_and_strides(first_op):
       op_def.padding = padding_mode[first_op.get_attr('padding')]
       op_def.input.extend([t.name for t in first_op.inputs])
@@ -130,14 +143,14 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
       strides = first_op.get_attr('strides')
       strides_tensor = add_shape_const_node(net_def, first_op, strides, 'strides')
       op_def.input.extend([strides_tensor])
-      op_def.out_max_byte_size.extend([max_elem_size(out) for out in first_op.outputs])
+      convert_op_outputs(op_def, first_op)
     elif is_node_flatten_reshape(first_op):
       op_def.type = 'Flatten'
       op_def.input.extend([t.name for t in first_op.inputs])
-      op_def.out_max_byte_size.extend([max_elem_size(out) for out in first_op.outputs])
+      convert_op_outputs(op_def, first_op)
     elif dsp_ops.has_op(first_op.type):
       op_def.input.extend([t.name for t in first_op.inputs])
-      op_def.out_max_byte_size.extend([max_elem_size(out) for out in first_op.outputs])
+      convert_op_outputs(op_def, first_op)
     else:
       raise Exception('Unsupported op: ', first_op)
 
@@ -188,11 +201,9 @@ def reverse_batch_to_space_and_biasadd(net_def):
             new_biasadd_op.input[0] = get_tensor_name_from_op(conv_requantize_op.name, 0)
             new_biasadd_op.input[2] = get_tensor_name_from_op(conv_requantize_op.name, 1)
             new_biasadd_op.input[3] = get_tensor_name_from_op(conv_requantize_op.name, 2)
-            new_biasadd_op.out_max_byte_size[0] = conv_requantize_op.out_max_byte_size[0] * 4
 
             new_biasadd_requantize_op = mace_pb2.OperatorDef()
             new_biasadd_requantize_op.CopyFrom(biasadd_requantize_op)
-            new_biasadd_requantize_op.out_max_byte_size[0] = new_biasadd_op.out_max_byte_size[0] / 4
 
             new_b2s_op = mace_pb2.OperatorDef()
             new_b2s_op.CopyFrom(b2s_op)
@@ -309,8 +320,11 @@ def strip_input_quantize_and_output_dequantize(net_def, input_node, output_node)
         new_input_op.name = input_op.name
         new_input_op.type = input_op.type
         new_input_op.padding = input_op.padding
-        new_input_op.out_max_byte_size.extend([input_op.out_max_byte_size[0]/4, 4, 4])
         new_ops.append(new_input_op)
+        new_input_op.output_shape.extend([input_op.output_shape[0],
+                                          minf_op.output_shape[0],
+                                          maxf_op.output_shape[0]])
+        new_input_op.output_type.extend([input_op.output_type[0], mace_pb2.DT_FLOAT, mace_pb2.DT_FLOAT])
         for follow_op in consumers[get_tensor_name_from_op(quantize_op.name, 0)]:
           new_follow_op = mace_pb2.OperatorDef()
           new_follow_op.CopyFrom(follow_op)