diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py
index 7ad028714d3b47d93328dbf7c3297d55a2db1bd0..93aa5f908ec929a33089a62caa2186ba9e57fffe 100644
--- a/python/paddle/fluid/executor.py
+++ b/python/paddle/fluid/executor.py
@@ -299,14 +299,18 @@ class Executor(object):
         if feed is None:
             feed = {}
         if not isinstance(feed, dict):
-            raise TypeError("feed should be a map")
+            raise TypeError(
+                "feed requires dict as its Parameter. But you passed in %s" %
+                (type(feed)))
         if fetch_list is None:
             fetch_list = []
         if program is None:
             program = default_main_program()
 
         if not isinstance(program, Program):
-            raise TypeError()
+            raise TypeError(
+                "Executor requires Program as its Parameter. But you passed in %s"
+                % (type(program)))
 
         if scope is None:
             scope = global_scope()
diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py
index 0a314ddfd7c607a3bc7f7c746c4c4990fc4a52e2..0fc48055220ed84c4ab146ad01b05f393e01078e 100644
--- a/python/paddle/fluid/optimizer.py
+++ b/python/paddle/fluid/optimizer.py
@@ -47,6 +47,8 @@ class Optimizer(object):
             raise TypeError("learning rate should be float or Variable")
         self.regularization = regularization
         self._learning_rate = learning_rate
+        # the learning rate type should be inferenced from loss
+        self._dtype = None
         # each program should have a independent learning rate
         # program -> Variable(learning_rate)
         self._learning_rate_map = dict()
@@ -77,7 +79,7 @@ class Optimizer(object):
             name=unique_name.generate("learning_rate"),
             shape=[1],
             value=float(self._learning_rate),
-            dtype='float32',
+            dtype='float32' if self._dtype == None else self._dtype,
             persistable=True)
 
     def global_learning_rate(self, program=None):
@@ -200,6 +202,7 @@ class Optimizer(object):
 
         # Create any accumulators
         program = loss.block.program
+        self._dtype = loss.dtype
         with program_guard(program, startup_program):
             global_block = framework.default_main_program().global_block()
             start = len(global_block.ops)
@@ -391,7 +394,7 @@ class AdamOptimizer(Optimizer):
         beta_shape = [1]
         self._beta1_pow_acc = self.helper.create_global_variable(
             name=unique_name.generate('beta1_pow_acc'),
-            dtype='float32',
+            dtype='float32' if self._dtype == None else self._dtype,
             shape=beta_shape,
             lod_level=0,
             persistable=True)
@@ -400,7 +403,7 @@ class AdamOptimizer(Optimizer):
 
         self._beta2_pow_acc = self.helper.create_global_variable(
             name=unique_name.generate('beta2_pow_acc'),
-            dtype='float32',
+            dtype='float32' if self._dtype == None else self._dtype,
             shape=beta_shape,
             lod_level=0,
             persistable=True)
@@ -493,7 +496,7 @@ class AdamaxOptimizer(Optimizer):
         beta_shape = [1]
         self._beta1_pow_acc = self.helper.create_global_variable(
             name=unique_name.generate('beta1_pow_acc'),
-            dtype='float32',
+            dtype='float32' if self._dtype == None else self._dtype,
             shape=beta_shape,
             lod_level=0,
             persistable=True)
@@ -900,8 +903,10 @@ class ModelAverage(Optimizer):
         # param = (sum_1 + sum_2 + sum_3) / (num_accumulates + old_num_accumulates)
         tmp = layers.sum(x=[num_accumulates, old_num_accumulates])
         sum = layers.sum(x=[sum_1, sum_2, sum_3])
-        tmp = layers.cast(x=tmp, dtype='float32')
-        sum = layers.cast(x=sum, dtype='float32')
+        tmp = layers.cast(
+            x=tmp, dtype='float32' if self._dtype == None else self._dtype)
+        sum = layers.cast(
+            x=sum, dtype='float32' if self._dtype == None else self._dtype)
         layers.elementwise_div(x=sum, y=tmp, out=param)
 
     def _add_average_restore_op(self, block, param_grad):
diff --git a/python/paddle/fluid/tests/unittests/test_network_with_dtype.py b/python/paddle/fluid/tests/unittests/test_network_with_dtype.py
new file mode 100644
index 0000000000000000000000000000000000000000..baafcdbb80238385752183ee0a8ff96a5da4659c
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_network_with_dtype.py
@@ -0,0 +1,74 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+import paddle
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+from paddle.fluid.executor import Executor
+
+BATCH_SIZE = 20
+
+
+class TestNetWithDtype(unittest.TestCase):
+    def setUp(self):
+        self.dtype = "float64"
+        self.init_dtype()
+        self.x = fluid.layers.data(name='x', shape=[13], dtype=self.dtype)
+        self.y = fluid.layers.data(name='y', shape=[1], dtype=self.dtype)
+        y_predict = fluid.layers.fc(input=self.x, size=1, act=None)
+
+        cost = fluid.layers.square_error_cost(input=y_predict, label=self.y)
+        avg_cost = fluid.layers.mean(cost)
+        self.fetch_list = [avg_cost]
+
+        sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
+        sgd_optimizer.minimize(avg_cost)
+
+    def run_net_on_place(self, place):
+        train_reader = paddle.batch(
+            paddle.dataset.uci_housing.train(), batch_size=BATCH_SIZE)
+        feeder = fluid.DataFeeder(place=place, feed_list=[self.x, self.y])
+        exe = fluid.Executor(place)
+        exe.run(fluid.default_startup_program())
+        for data in train_reader():
+            exe.run(fluid.default_main_program(),
+                    feed=feeder.feed(data),
+                    fetch_list=self.fetch_list)
+            # the main program is runable, the datatype is fully supported
+            break
+
+    def init_dtype(self):
+        pass
+
+    def test_cpu(self):
+        place = fluid.CPUPlace()
+        self.run_net_on_place(place)
+
+    def test_gpu(self):
+        if not core.is_compiled_with_cuda():
+            return
+        place = fluid.CUDAPlace(0)
+        self.run_net_on_place(place)
+
+
+# TODO(dzhwinter): make sure the fp16 is runable
+# class TestFloat16(SimpleNet):
+#     def init_dtype(self):
+#         self.dtype = "float16"
+
+if __name__ == '__main__':
+    unittest.main()