diff --git a/paddle/phi/kernels/gpu/range_kernel.cu b/paddle/phi/kernels/gpu/range_kernel.cu index 65d9b45efbcdd79b885a5621eaaa0401500a2f17..d9a98f06d0795afd5235b3d868291f79f1c38afb 100644 --- a/paddle/phi/kernels/gpu/range_kernel.cu +++ b/paddle/phi/kernels/gpu/range_kernel.cu @@ -21,6 +21,19 @@ namespace phi { +template +inline T GetValue(const Context& dev_ctx, const DenseTensor& x) { + T value = static_cast(0); + if (x.place() != CPUPlace()) { + DenseTensor cpu_x; + Copy(dev_ctx, x, CPUPlace(), true, &cpu_x); + value = cpu_x.data()[0]; + } else { + value = x.data()[0]; + } + return value; +} + template __global__ void Range(T start, T step, int64_t size, T* out) { CUDA_KERNEL_LOOP(index, size) { out[index] = start + step * index; } @@ -32,9 +45,9 @@ void RangeKernel(const Context& dev_ctx, const DenseTensor& end, const DenseTensor& step, DenseTensor* out) { - T start_value = start.data()[0]; - T end_value = end.data()[0]; - T step_value = step.data()[0]; + T start_value = GetValue(dev_ctx, start); + T end_value = GetValue(dev_ctx, end); + T step_value = GetValue(dev_ctx, step); int64_t size = 0; phi::funcs::GetSize(start_value, end_value, step_value, &size); diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py index a7971763f53e1fa1ea445f8ac843a57ae00bd1c2..eb833428afa429b35bc99ebbde9bd231ab07b722 100644 --- a/python/paddle/fluid/executor.py +++ b/python/paddle/fluid/executor.py @@ -394,9 +394,20 @@ def _is_enable_standalone_executor(): Whether to use experimental executor `StandaloneExecutor`. """ flag = False - env_val = os.environ.get('FLAGS_USE_STANDALONE_EXECUTOR', None) + # NOTE(zhiqiu): enable STANDALONE_EXECUTOR on windows platform by default + # It should be enabled on all platform in the future. + + import platform + sysstr = platform.system().lower() + if sysstr == 'windows': + env_val = os.environ.get('FLAGS_USE_STANDALONE_EXECUTOR', 1) + else: + env_val = os.environ.get('FLAGS_USE_STANDALONE_EXECUTOR', None) + if env_val in [1, '1', True, 'True', 'true']: flag = True + warnings.warn("STANDALONE_EXECUTOR is enabled.") + return flag diff --git a/python/paddle/fluid/tests/unittests/check_nan_inf_base.py b/python/paddle/fluid/tests/unittests/check_nan_inf_base.py index 1c5db616306caa225690eab7580cbf1809be3dc1..13a7ff6860e4dd76497536161636cca8e63f5032 100644 --- a/python/paddle/fluid/tests/unittests/check_nan_inf_base.py +++ b/python/paddle/fluid/tests/unittests/check_nan_inf_base.py @@ -103,6 +103,14 @@ def check(use_cuda): if __name__ == '__main__': + try: + check(use_cuda=False) + assert False + except Exception as e: + print(e) + print(type(e)) + assert type(e) == RuntimeError + if core.is_compiled_with_cuda(): try: check(use_cuda=True) @@ -113,10 +121,3 @@ if __name__ == '__main__': # Note. Enforce in cuda kernel may not catch in paddle, and # Exception type will be RuntimeError assert type(e) == OSError or type(e) == RuntimeError - try: - check(use_cuda=False) - assert False - except Exception as e: - print(e) - print(type(e)) - assert type(e) == RuntimeError diff --git a/python/paddle/fluid/tests/unittests/test_nan_inf.py b/python/paddle/fluid/tests/unittests/test_nan_inf.py index cb7e673c6ca29c7d089a9c4cdc033d3eae9cacd3..84559048a2b8a919f96ca3cbbd1ea7bec3c56ffb 100644 --- a/python/paddle/fluid/tests/unittests/test_nan_inf.py +++ b/python/paddle/fluid/tests/unittests/test_nan_inf.py @@ -47,10 +47,12 @@ class TestNanInf(unittest.TestCase): print(out) print(err) - assert returncode == 0 # in python3, type(out+err) is 'bytes', need use encode - assert (out + err - ).find('There are `nan` or `inf` in tensor'.encode()) != -1 + if paddle.fluid.core.is_compiled_with_cuda(): + assert (out + err).find('find nan or inf==='.encode()) != -1 + else: + assert (out + err + ).find('There are `nan` or `inf` in tensor'.encode()) != -1 def test_nan_inf_in_static_mode(self): self._python_interp += " check_nan_inf_base.py"