diff --git a/paddle/fluid/platform/device_event_cpu.h b/paddle/fluid/platform/device_event_cpu.h
index b08323d7f15066bbf763e4bdcbd71b3e0335501f..6e2bf4c7ad1356dee34e4f0c279202787a951918 100644
--- a/paddle/fluid/platform/device_event_cpu.h
+++ b/paddle/fluid/platform/device_event_cpu.h
@@ -23,7 +23,8 @@ namespace platform {
 
 struct CPUDeviceEventWrapper {
   explicit CPUDeviceEventWrapper(const platform::Place& place,
-                                 unsigned int flag = 0) {
+                                 unsigned int flag = 0)
+      : status_(EventStatus::INITIALIZED) {
     PADDLE_ENFORCE_EQ(
         platform::is_cpu_place(place), true,
         platform::errors::PreconditionNotMet(
diff --git a/python/paddle/fluid/tests/unittests/interpreter/CMakeLists.txt b/python/paddle/fluid/tests/unittests/interpreter/CMakeLists.txt
index c1ca62629e60a55cba8bb10992ed500a4f8d100f..7692f8befdf58ceb6c0a23ebe3e2b49fc656ec3e 100644
--- a/python/paddle/fluid/tests/unittests/interpreter/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/interpreter/CMakeLists.txt
@@ -1,8 +1,6 @@
 file(GLOB TEST_INTERP_CASES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
 string(REPLACE ".py" "" TEST_INTERP_CASES "${TEST_INTERP_CASES}")
 
-list(REMOVE_ITEM TEST_INTERP_CASES test_standalone_executor)
-
 foreach(target ${TEST_INTERP_CASES})
   py_test_modules(${target} MODULES ${target})
 endforeach()
diff --git a/python/paddle/fluid/tests/unittests/interpreter/test_standalone_executor.py b/python/paddle/fluid/tests/unittests/interpreter/test_standalone_executor.py
index b59fcd8d02e2f13ad78b2ce5ac8f9b66a82e1f18..1f971ae1b2508f51faf559f1cdaec59aae57bf85 100644
--- a/python/paddle/fluid/tests/unittests/interpreter/test_standalone_executor.py
+++ b/python/paddle/fluid/tests/unittests/interpreter/test_standalone_executor.py
@@ -25,10 +25,12 @@ paddle.enable_static()
 
 class LinearTestCase(unittest.TestCase):
     def setUp(self):
-        self.place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda(
+        place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda(
         ) else paddle.CPUPlace()
+        self.place = core.Place()
+        self.place.set_place(place)
 
-    def test_interp_base(self):
+    def build_program(self):
         a = paddle.static.data(name="a", shape=[2, 2], dtype='float32')
         b = paddle.ones([2, 2]) * 2
         t = paddle.static.nn.fc(a, 2)
@@ -36,11 +38,15 @@ class LinearTestCase(unittest.TestCase):
 
         main_program = paddle.fluid.default_main_program()
         startup_program = paddle.fluid.default_startup_program()
-        p = core.Place()
-        p.set_place(self.place)
-        standaloneexecutor = StandaloneExecutor(p, startup_program.desc,
-                                                main_program.desc, core.Scope())
 
+        return startup_program, main_program, c
+
+        return standaloneexecutor, c
+
+    def test_interp_base(self):
+        startup_program, main_program, c = self.build_program()
+        standaloneexecutor = StandaloneExecutor(
+            self.place, startup_program.desc, main_program.desc, core.Scope())
         out = standaloneexecutor.run({
             "a": np.ones(
                 [2, 2], dtype="float32") * 2
@@ -55,24 +61,35 @@ class LinearTestCase(unittest.TestCase):
             out = standaloneexecutor.run({
                 "a": np.ones(
                     [2, 2], dtype="float32") * i
-            }, [a.name, c.name])
+            }, ['a', c.name])
 
+    def test_dry_run(self):
+        startup_program, main_program, c = self.build_program()
+        standaloneexecutor = StandaloneExecutor(
+            self.place, startup_program.desc, main_program.desc, core.Scope())
         # test for cost_info
         cost_info = standaloneexecutor.dry_run({
             "a": np.ones(
-                [2, 2], dtype="float32") * i
+                [2, 2], dtype="float32")
         })
         self.check_cost_info(cost_info)
 
     def check_cost_info(self, cost_info):
+        IS_WINDOWS = sys.platform.startswith('win')
+
         if core.is_compiled_with_cuda():
-            # self.assertEqual(cost_info.host_memory_bytes(), 16)
-            self.assertGreater(cost_info.device_memory_bytes(), 0)
+            # input `a` is on CPU, 16 bytes
+            self.assertEqual(cost_info.host_memory_bytes(), 16)
+            # # w,bias,b, out, memory block is at least 256 bytes on Linux
+            gt = 16 * 4 if IS_WINDOWS else 256 * 4
+            self.assertGreater(cost_info.device_memory_bytes(), gt)
             self.assertGreaterEqual(cost_info.device_total_memory_bytes(),
                                     cost_info.device_memory_bytes())
         else:
-            self.assertGreater(cost_info.host_memory_bytes(), 0)
+            # x(16 bytes), w(16 bytes), bias(8 bytes), b(16 bytes), out(16 bytes)
+            self.assertGreaterEqual(cost_info.host_memory_bytes(), 72)
             self.assertEqual(cost_info.device_memory_bytes(), 0)
+            self.assertGreaterEqual(cost_info.device_total_memory_bytes(), 0)
 
 
 class MultiStreamModelTestCase(unittest.TestCase):