diff --git a/paddle/fluid/memory/allocation/cuda_allocator.cc b/paddle/fluid/memory/allocation/cuda_allocator.cc index 6e0677dcb45dc3a9ff541d35f0d9d6e6b0794356..154ff1c87aafebde3fe28a8f380b652376651065 100644 --- a/paddle/fluid/memory/allocation/cuda_allocator.cc +++ b/paddle/fluid/memory/allocation/cuda_allocator.cc @@ -33,6 +33,8 @@ void CUDAAllocator::FreeImpl(Allocation* allocation) { } Allocation* CUDAAllocator::AllocateImpl(size_t size) { + std::call_once(once_flag_, [this] { platform::SetDeviceId(place_.device); }); + platform::CUDADeviceGuard guard(place_.device); void* ptr; auto result = cudaMalloc(&ptr, size); diff --git a/paddle/fluid/memory/allocation/cuda_allocator.h b/paddle/fluid/memory/allocation/cuda_allocator.h index 886f6e7a327f70068c6fabb6328f927bf71b2881..ae48d26ceeb5a4611d4e140e3851eee5f11b4d5b 100644 --- a/paddle/fluid/memory/allocation/cuda_allocator.h +++ b/paddle/fluid/memory/allocation/cuda_allocator.h @@ -13,6 +13,7 @@ // limitations under the License. #pragma once +#include // NOLINT #include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/platform/place.h" @@ -33,6 +34,7 @@ class CUDAAllocator : public Allocator { private: platform::CUDAPlace place_; + std::once_flag once_flag_; }; } // namespace allocation diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 7ce08b58c1d14c178bd11abeb7c73e1d49cbb346..73e0c2394a65ff37de6d845e977c75569445af96 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -330,5 +330,5 @@ set_tests_properties(test_parallel_executor_test_while_train test_parallel_execu test_parallel_executor_crf test_sync_batch_norm_op test_parallel_executor_feed_persistable_var test_parallel_executor_crf_auto_growth test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass - test_data_norm_op + test_data_norm_op test_imperative_using_non_zero_gpu test_buffer_shared_memory_reuse_pass PROPERTIES LABELS "RUN_TYPE=DIST") diff --git a/python/paddle/fluid/tests/unittests/test_imperative_using_non_zero_gpu.py b/python/paddle/fluid/tests/unittests/test_imperative_using_non_zero_gpu.py new file mode 100644 index 0000000000000000000000000000000000000000..0af8132acfd26917f83dda6350583c7a06858a7d --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_imperative_using_non_zero_gpu.py @@ -0,0 +1,38 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.fluid as fluid +import unittest +from paddle.fluid.dygraph import to_variable, Embedding, guard +import numpy as np + + +class TestImperativeUsingNonZeroGpu(unittest.TestCase): + def run_main(self, np_arr, place): + with guard(place): + embedding = Embedding(size=[10, 10]) + var = to_variable(np_arr) + self.assertTrue(np.array_equal(np_arr, var.numpy())) + + def test_non_zero_gpu(self): + if not fluid.is_compiled_with_cuda(): + return + + np_arr = np.random.random([11, 13]).astype('float32') + self.run_main(np_arr, fluid.CUDAPlace(1)) + self.run_main(np_arr, fluid.CUDAPlace(0)) + + +if __name__ == '__main__': + unittest.main()