未验证 提交 c3bcd3c1 编写于 作者: Z Zeng Jinle 提交者: GitHub

fix dygraph non zero gpu bug, test=develop (#22165)

上级 cfb0c12e
...@@ -33,6 +33,8 @@ void CUDAAllocator::FreeImpl(Allocation* allocation) { ...@@ -33,6 +33,8 @@ void CUDAAllocator::FreeImpl(Allocation* allocation) {
} }
Allocation* CUDAAllocator::AllocateImpl(size_t size) { Allocation* CUDAAllocator::AllocateImpl(size_t size) {
std::call_once(once_flag_, [this] { platform::SetDeviceId(place_.device); });
platform::CUDADeviceGuard guard(place_.device); platform::CUDADeviceGuard guard(place_.device);
void* ptr; void* ptr;
auto result = cudaMalloc(&ptr, size); auto result = cudaMalloc(&ptr, size);
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#pragma once #pragma once
#include <mutex> // NOLINT
#include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
...@@ -33,6 +34,7 @@ class CUDAAllocator : public Allocator { ...@@ -33,6 +34,7 @@ class CUDAAllocator : public Allocator {
private: private:
platform::CUDAPlace place_; platform::CUDAPlace place_;
std::once_flag once_flag_;
}; };
} // namespace allocation } // namespace allocation
......
...@@ -330,5 +330,5 @@ set_tests_properties(test_parallel_executor_test_while_train test_parallel_execu ...@@ -330,5 +330,5 @@ set_tests_properties(test_parallel_executor_test_while_train test_parallel_execu
test_parallel_executor_crf test_sync_batch_norm_op test_parallel_executor_crf test_sync_batch_norm_op
test_parallel_executor_feed_persistable_var test_parallel_executor_feed_persistable_var
test_parallel_executor_crf_auto_growth test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass test_parallel_executor_crf_auto_growth test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass
test_data_norm_op test_data_norm_op test_imperative_using_non_zero_gpu
test_buffer_shared_memory_reuse_pass PROPERTIES LABELS "RUN_TYPE=DIST") test_buffer_shared_memory_reuse_pass PROPERTIES LABELS "RUN_TYPE=DIST")
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid as fluid
import unittest
from paddle.fluid.dygraph import to_variable, Embedding, guard
import numpy as np
class TestImperativeUsingNonZeroGpu(unittest.TestCase):
def run_main(self, np_arr, place):
with guard(place):
embedding = Embedding(size=[10, 10])
var = to_variable(np_arr)
self.assertTrue(np.array_equal(np_arr, var.numpy()))
def test_non_zero_gpu(self):
if not fluid.is_compiled_with_cuda():
return
np_arr = np.random.random([11, 13]).astype('float32')
self.run_main(np_arr, fluid.CUDAPlace(1))
self.run_main(np_arr, fluid.CUDAPlace(0))
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册