enable new-executor on windows to test it (#41301)

* enable new-executor on windows to test it * add message * fix ut

enable new-executor on windows to test it (#41301)
* enable new-executor on windows to test it * add message * fix ut
e59a693e · Leo Chen · GitHub · a5e00bb7 · e59a693e · e59a693e
4 changed file
--- a/paddle/phi/kernels/gpu/range_kernel.cu
+++ b/paddle/phi/kernels/gpu/range_kernel.cu
@@ -21,6 +21,19 @@
 namespace phi {
+template <typename T, typename Context>
+inline T GetValue(const Context& dev_ctx, const DenseTensor& x) {
+  T value = static_cast<T>(0);
+  if (x.place() != CPUPlace()) {
+    DenseTensor cpu_x;
+    Copy(dev_ctx, x, CPUPlace(), true, &cpu_x);
+    value = cpu_x.data<T>()[0];
+  } else {
+    value = x.data<T>()[0];
+  }
+  return value;
+}
 template <typename T>
 __global__ void Range(T start, T step, int64_t size, T* out) {
  CUDA_KERNEL_LOOP(index, size) { out[index] = start + step * index; }
@@ -32,9 +45,9 @@ void RangeKernel(const Context& dev_ctx,
                 const DenseTensor& end,
                 const DenseTensor& step,
                 DenseTensor* out) {
-  T start_value = start.data<T>()[0];
+  T start_value = GetValue<T, Context>(dev_ctx, start);
-  T end_value = end.data<T>()[0];
+  T end_value = GetValue<T, Context>(dev_ctx, end);
-  T step_value = step.data<T>()[0];
+  T step_value = GetValue<T, Context>(dev_ctx, step);
  int64_t size = 0;
  phi::funcs::GetSize(start_value, end_value, step_value, &size);

--- a/python/paddle/fluid/executor.py
+++ b/python/paddle/fluid/executor.py
@@ -394,9 +394,20 @@ def _is_enable_standalone_executor():
    Whether to use experimental executor `StandaloneExecutor`.
    """
    flag = False
+    # NOTE(zhiqiu): enable STANDALONE_EXECUTOR on windows platform by default
+    # It should be enabled on all platform in the future.
+    import platform
+    sysstr = platform.system().lower()
+    if sysstr == 'windows':
+        env_val = os.environ.get('FLAGS_USE_STANDALONE_EXECUTOR', 1)
+    else:
        env_val = os.environ.get('FLAGS_USE_STANDALONE_EXECUTOR', None)
    if env_val in [1, '1', True, 'True', 'true']:
        flag = True
+        warnings.warn("STANDALONE_EXECUTOR is enabled.")
    return flag

--- a/python/paddle/fluid/tests/unittests/check_nan_inf_base.py
+++ b/python/paddle/fluid/tests/unittests/check_nan_inf_base.py
@@ -103,20 +103,21 @@ def check(use_cuda):
 if __name__ == '__main__':
-    if core.is_compiled_with_cuda():
    try:
-            check(use_cuda=True)
+        check(use_cuda=False)
        assert False
    except Exception as e:
        print(e)
        print(type(e))
-            # Note. Enforce in cuda kernel may not catch in paddle, and
+        assert type(e) == RuntimeError
-            # Exception type will be RuntimeError
-            assert type(e) == OSError or type(e) == RuntimeError
+    if core.is_compiled_with_cuda():
        try:
-        check(use_cuda=False)
+            check(use_cuda=True)
            assert False
        except Exception as e:
            print(e)
            print(type(e))
-        assert type(e) == RuntimeError
+            # Note. Enforce in cuda kernel may not catch in paddle, and
+            # Exception type will be RuntimeError
+            assert type(e) == OSError or type(e) == RuntimeError
--- a/python/paddle/fluid/tests/unittests/test_nan_inf.py
+++ b/python/paddle/fluid/tests/unittests/test_nan_inf.py
@@ -47,8 +47,10 @@ class TestNanInf(unittest.TestCase):
        print(out)
        print(err)
-        assert returncode == 0
        # in python3, type(out+err) is 'bytes', need use encode
+        if paddle.fluid.core.is_compiled_with_cuda():
+            assert (out + err).find('find nan or inf==='.encode()) != -1
+        else:
            assert (out + err
                    ).find('There are `nan` or `inf` in tensor'.encode()) != -1