diff --git a/paddle/phi/kernels/gpu/range_kernel.cu b/paddle/phi/kernels/gpu/range_kernel.cu
index 65d9b45efbcdd79b885a5621eaaa0401500a2f17..d9a98f06d0795afd5235b3d868291f79f1c38afb 100644
--- a/paddle/phi/kernels/gpu/range_kernel.cu
+++ b/paddle/phi/kernels/gpu/range_kernel.cu
@@ -21,6 +21,19 @@
 
 namespace phi {
 
+template <typename T, typename Context>
+inline T GetValue(const Context& dev_ctx, const DenseTensor& x) {
+  T value = static_cast<T>(0);
+  if (x.place() != CPUPlace()) {
+    DenseTensor cpu_x;
+    Copy(dev_ctx, x, CPUPlace(), true, &cpu_x);
+    value = cpu_x.data<T>()[0];
+  } else {
+    value = x.data<T>()[0];
+  }
+  return value;
+}
+
 template <typename T>
 __global__ void Range(T start, T step, int64_t size, T* out) {
   CUDA_KERNEL_LOOP(index, size) { out[index] = start + step * index; }
@@ -32,9 +45,9 @@ void RangeKernel(const Context& dev_ctx,
                  const DenseTensor& end,
                  const DenseTensor& step,
                  DenseTensor* out) {
-  T start_value = start.data<T>()[0];
-  T end_value = end.data<T>()[0];
-  T step_value = step.data<T>()[0];
+  T start_value = GetValue<T, Context>(dev_ctx, start);
+  T end_value = GetValue<T, Context>(dev_ctx, end);
+  T step_value = GetValue<T, Context>(dev_ctx, step);
 
   int64_t size = 0;
   phi::funcs::GetSize(start_value, end_value, step_value, &size);
diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py
index a7971763f53e1fa1ea445f8ac843a57ae00bd1c2..eb833428afa429b35bc99ebbde9bd231ab07b722 100644
--- a/python/paddle/fluid/executor.py
+++ b/python/paddle/fluid/executor.py
@@ -394,9 +394,20 @@ def _is_enable_standalone_executor():
     Whether to use experimental executor `StandaloneExecutor`.
     """
     flag = False
-    env_val = os.environ.get('FLAGS_USE_STANDALONE_EXECUTOR', None)
+    # NOTE(zhiqiu): enable STANDALONE_EXECUTOR on windows platform by default
+    # It should be enabled on all platform in the future.
+
+    import platform
+    sysstr = platform.system().lower()
+    if sysstr == 'windows':
+        env_val = os.environ.get('FLAGS_USE_STANDALONE_EXECUTOR', 1)
+    else:
+        env_val = os.environ.get('FLAGS_USE_STANDALONE_EXECUTOR', None)
+
     if env_val in [1, '1', True, 'True', 'true']:
         flag = True
+        warnings.warn("STANDALONE_EXECUTOR is enabled.")
+
     return flag
 
 
diff --git a/python/paddle/fluid/tests/unittests/check_nan_inf_base.py b/python/paddle/fluid/tests/unittests/check_nan_inf_base.py
index 1c5db616306caa225690eab7580cbf1809be3dc1..13a7ff6860e4dd76497536161636cca8e63f5032 100644
--- a/python/paddle/fluid/tests/unittests/check_nan_inf_base.py
+++ b/python/paddle/fluid/tests/unittests/check_nan_inf_base.py
@@ -103,6 +103,14 @@ def check(use_cuda):
 
 
 if __name__ == '__main__':
+    try:
+        check(use_cuda=False)
+        assert False
+    except Exception as e:
+        print(e)
+        print(type(e))
+        assert type(e) == RuntimeError
+
     if core.is_compiled_with_cuda():
         try:
             check(use_cuda=True)
@@ -113,10 +121,3 @@ if __name__ == '__main__':
             # Note. Enforce in cuda kernel may not catch in paddle, and
             # Exception type will be RuntimeError
             assert type(e) == OSError or type(e) == RuntimeError
-    try:
-        check(use_cuda=False)
-        assert False
-    except Exception as e:
-        print(e)
-        print(type(e))
-        assert type(e) == RuntimeError
diff --git a/python/paddle/fluid/tests/unittests/test_nan_inf.py b/python/paddle/fluid/tests/unittests/test_nan_inf.py
index cb7e673c6ca29c7d089a9c4cdc033d3eae9cacd3..84559048a2b8a919f96ca3cbbd1ea7bec3c56ffb 100644
--- a/python/paddle/fluid/tests/unittests/test_nan_inf.py
+++ b/python/paddle/fluid/tests/unittests/test_nan_inf.py
@@ -47,10 +47,12 @@ class TestNanInf(unittest.TestCase):
         print(out)
         print(err)
 
-        assert returncode == 0
         # in python3, type(out+err) is 'bytes', need use encode
-        assert (out + err
-                ).find('There are `nan` or `inf` in tensor'.encode()) != -1
+        if paddle.fluid.core.is_compiled_with_cuda():
+            assert (out + err).find('find nan or inf==='.encode()) != -1
+        else:
+            assert (out + err
+                    ).find('There are `nan` or `inf` in tensor'.encode()) != -1
 
     def test_nan_inf_in_static_mode(self):
         self._python_interp += " check_nan_inf_base.py"