From 0d2235aadf87a22773d6ffe8322126715f42d3aa Mon Sep 17 00:00:00 2001 From: dzhwinter Date: Mon, 25 Dec 2017 12:44:31 +0800 Subject: [PATCH] GPUPlace to CUDAPlace (#6960) --- paddle/framework/init.cc | 2 +- paddle/framework/lod_tensor.cc | 2 +- paddle/framework/lod_tensor_test.cu | 2 +- paddle/framework/op_kernel_type_test.cc | 4 +-- paddle/framework/op_registry.h | 2 +- paddle/framework/tensor.md | 2 +- paddle/framework/tensor_impl.h | 6 ++-- paddle/framework/tensor_test.cc | 16 +++++------ paddle/framework/tensor_util.h | 18 ++++++------ paddle/framework/tensor_util_test.cc | 6 ++-- paddle/memory/README.md | 10 +++---- paddle/memory/memcpy.cc | 24 ++++++---------- paddle/memory/memory.cc | 8 +++--- paddle/memory/memory_test.cc | 6 ++-- paddle/operators/accuracy_op.cu | 2 +- paddle/operators/batch_norm_op.cu.cc | 4 +-- paddle/operators/conv_cudnn_op.cu.cc | 8 +++--- .../operators/conv_transpose_cudnn_op.cu.cc | 8 +++--- paddle/operators/detail/strided_memcpy.h | 2 +- paddle/operators/linear_chain_crf_op.h | 8 +++--- paddle/operators/lookup_table_op.cu | 2 +- paddle/operators/lstm_unit_op.cu | 4 +-- paddle/operators/math/im2col_test.cc | 3 +- paddle/operators/math/math_function.cu | 8 +++--- paddle/operators/math/math_function_test.cu | 10 +++---- .../operators/math/selected_rows_functor.cu | 12 ++++---- .../math/selected_rows_functor_test.cu | 4 +-- paddle/operators/math/vol2col_test.cc | 2 +- paddle/operators/multiplex_op.cu | 4 +-- paddle/operators/nccl_op.cu.cc | 6 ++-- paddle/operators/nccl_op_test.cu.cc | 18 ++++++------ paddle/operators/pool_cudnn_op.cu.cc | 4 +-- paddle/operators/reshape_op.cu | 4 +-- paddle/operators/strided_memcpy_test.cc | 4 +-- paddle/operators/top_k_op.cu | 2 +- paddle/platform/device_context.cc | 10 +++---- paddle/platform/device_context.h | 6 ++-- paddle/platform/device_context_test.cu | 18 ++++++------ paddle/platform/nccl_test.cu | 4 +-- paddle/platform/place.cc | 8 ++++-- paddle/platform/place.h | 28 ++++++++++--------- paddle/platform/place_test.cc | 6 ++-- paddle/platform/transform_test.cu | 4 +-- paddle/pybind/pybind.cc | 16 +++++------ paddle/pybind/tensor_py.h | 4 +-- paddle/testing/paddle_gtest_main.cc | 2 +- python/paddle/v2/fluid/__init__.py | 4 +-- python/paddle/v2/fluid/executor.py | 2 +- .../tests/book/test_recommender_system.py | 2 +- python/paddle/v2/fluid/tests/op_test.py | 4 +-- .../paddle/v2/fluid/tests/test_adagrad_op.py | 2 +- .../v2/fluid/tests/test_batch_norm_op.py | 4 +-- .../v2/fluid/tests/test_gaussian_random_op.py | 2 +- python/paddle/v2/fluid/tests/test_profiler.py | 2 +- python/paddle/v2/fluid/tests/test_sgd_op.py | 2 +- .../v2/fluid/tests/test_uniform_random_op.py | 2 +- 56 files changed, 179 insertions(+), 180 deletions(-) diff --git a/paddle/framework/init.cc b/paddle/framework/init.cc index 4deb4fa90..3ff2da344 100644 --- a/paddle/framework/init.cc +++ b/paddle/framework/init.cc @@ -54,7 +54,7 @@ bool InitDevices(const std::vector &devices) { #ifdef PADDLE_WITH_CUDA auto pos = string::RFind(p, ':', string::Piece::npos); auto number = device.substr(pos + 1); - places.emplace_back(platform::GPUPlace(std::stoi(number))); + places.emplace_back(platform::CUDAPlace(std::stoi(number))); #else LOG(WARNING) << "'GPU' is not supported, Please re-compile with WITH_GPU option"; diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc index 465f8c62b..d766d3c41 100644 --- a/paddle/framework/lod_tensor.cc +++ b/paddle/framework/lod_tensor.cc @@ -224,7 +224,7 @@ void SerializeToStream(std::ostream &os, const LoDTensor &tensor, while (size != 0) { size_t size_to_write = std::min(kBufSize, static_cast(size)); memory::Copy(cpu, buf.get(), - boost::get(tensor.place()), + boost::get(tensor.place()), reinterpret_cast(data), size_to_write, gpu_dev_ctx.stream()); gpu_dev_ctx.Wait(); diff --git a/paddle/framework/lod_tensor_test.cu b/paddle/framework/lod_tensor_test.cu index 5b90fbfca..e8508ad26 100644 --- a/paddle/framework/lod_tensor_test.cu +++ b/paddle/framework/lod_tensor_test.cu @@ -27,7 +27,7 @@ __global__ void test(size_t* a, int size) { TEST(LoDTensor, LoDInGPU) { paddle::framework::LoDTensor lod_tensor; - paddle::platform::GPUPlace place(0); + paddle::platform::CUDAPlace place(0); paddle::framework::LoD src_lod; src_lod.push_back(std::vector{0, 2, 4, 6, 8, 10, 12, 14}); diff --git a/paddle/framework/op_kernel_type_test.cc b/paddle/framework/op_kernel_type_test.cc index 899676b5c..8753d7cc3 100644 --- a/paddle/framework/op_kernel_type_test.cc +++ b/paddle/framework/op_kernel_type_test.cc @@ -37,13 +37,13 @@ TEST(OpKernelType, Hash) { using OpKernelType = paddle::framework::OpKernelType; using DataType = paddle::framework::proto::DataType; using CPUPlace = paddle::platform::CPUPlace; - using GPUPlace = paddle::platform::GPUPlace; + using CUDAPlace = paddle::platform::CUDAPlace; using DataLayout = paddle::framework::DataLayout; using LibraryType = paddle::framework::LibraryType; OpKernelType op_kernel_type_1(DataType::FP32, CPUPlace(), DataLayout::kNCHW, LibraryType::kCUDNN); - OpKernelType op_kernel_type_2(DataType::FP32, GPUPlace(0), DataLayout::kNCHW, + OpKernelType op_kernel_type_2(DataType::FP32, CUDAPlace(0), DataLayout::kNCHW, LibraryType::kCUDNN); OpKernelType::Hash hasher; diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index 244c11746..9bb2a3b5c 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -188,7 +188,7 @@ class OpKernelRegistrar : public Registrar { } #define REGISTER_OP_CUDA_KERNEL(op_type, ...) \ - REGISTER_OP_KERNEL(op_type, CUDA, ::paddle::platform::GPUPlace, __VA_ARGS__) + REGISTER_OP_KERNEL(op_type, CUDA, ::paddle::platform::CUDAPlace, __VA_ARGS__) #define REGISTER_OP_CPU_KERNEL(op_type, ...) \ REGISTER_OP_KERNEL(op_type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__) diff --git a/paddle/framework/tensor.md b/paddle/framework/tensor.md index 7a80816d8..0a27ac9bb 100644 --- a/paddle/framework/tensor.md +++ b/paddle/framework/tensor.md @@ -71,7 +71,7 @@ private: ``` ```c++ -typedef boost::variant Place; +typedef boost::variant Place; typedef boost::variant, Dim<2>, Dim<3>, Dim<4>, Dim<5>, Dim<6>, Dim<7>, Dim<8>, Dim<9>> DDimVar; typedef boost::variant< diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h index aba1f9f09..3d93b7808 100644 --- a/paddle/framework/tensor_impl.h +++ b/paddle/framework/tensor_impl.h @@ -125,11 +125,11 @@ inline void* Tensor::mutable_data(platform::Place place, std::type_index type) { boost::get(place), size, type)); } else if (platform::is_gpu_place(place)) { #ifndef PADDLE_WITH_CUDA - PADDLE_THROW("'GPUPlace' is not supported in CPU only device."); + PADDLE_THROW("'CUDAPlace' is not supported in CPU only device."); } #else - holder_.reset(new PlaceholderImpl( - boost::get(place), size, type)); + holder_.reset(new PlaceholderImpl( + boost::get(place), size, type)); } #endif offset_ = 0; diff --git a/paddle/framework/tensor_test.cc b/paddle/framework/tensor_test.cc index ceca64365..f347981f2 100644 --- a/paddle/framework/tensor_test.cc +++ b/paddle/framework/tensor_test.cc @@ -80,20 +80,20 @@ TEST(Tensor, MutableData) { float* p1 = nullptr; float* p2 = nullptr; // initialization - p1 = src_tensor.mutable_data(make_ddim({1, 2, 3}), GPUPlace()); + p1 = src_tensor.mutable_data(make_ddim({1, 2, 3}), CUDAPlace()); EXPECT_NE(p1, nullptr); // set src_tensor a new dim with large size // momery is supposed to be re-allocated - p2 = src_tensor.mutable_data(make_ddim({3, 4}), GPUPlace()); + p2 = src_tensor.mutable_data(make_ddim({3, 4}), CUDAPlace()); EXPECT_NE(p2, nullptr); EXPECT_NE(p1, p2); // set src_tensor a new dim with same size // momery block is supposed to be unchanged - p1 = src_tensor.mutable_data(make_ddim({2, 2, 3}), GPUPlace()); + p1 = src_tensor.mutable_data(make_ddim({2, 2, 3}), CUDAPlace()); EXPECT_EQ(p1, p2); // set src_tensor a new dim with smaller size // momery block is supposed to be unchanged - p2 = src_tensor.mutable_data(make_ddim({2, 2}), GPUPlace()); + p2 = src_tensor.mutable_data(make_ddim({2, 2}), CUDAPlace()); EXPECT_EQ(p1, p2); } #endif @@ -130,7 +130,7 @@ TEST(Tensor, ShareDataWith) { { Tensor src_tensor; Tensor dst_tensor; - src_tensor.mutable_data(make_ddim({2, 3, 4}), GPUPlace()); + src_tensor.mutable_data(make_ddim({2, 3, 4}), CUDAPlace()); dst_tensor.ShareDataWith(src_tensor); ASSERT_EQ(src_tensor.data(), dst_tensor.data()); } @@ -166,7 +166,7 @@ TEST(Tensor, Slice) { #ifdef PADDLE_WITH_CUDA { Tensor src_tensor; - src_tensor.mutable_data(make_ddim({6, 9}), GPUPlace()); + src_tensor.mutable_data(make_ddim({6, 9}), CUDAPlace()); Tensor slice_tensor = src_tensor.Slice(2, 6); DDim slice_dims = slice_tensor.dims(); ASSERT_EQ(arity(slice_dims), 2); @@ -176,11 +176,11 @@ TEST(Tensor, Slice) { uintptr_t src_data_address = reinterpret_cast(src_tensor.data()); uintptr_t src_mutable_data_address = reinterpret_cast( - src_tensor.mutable_data(src_tensor.dims(), GPUPlace())); + src_tensor.mutable_data(src_tensor.dims(), CUDAPlace())); uintptr_t slice_data_address = reinterpret_cast(slice_tensor.data()); uintptr_t slice_mutable_data_address = reinterpret_cast( - slice_tensor.mutable_data(slice_tensor.dims(), GPUPlace())); + slice_tensor.mutable_data(slice_tensor.dims(), CUDAPlace())); EXPECT_EQ(src_data_address, src_mutable_data_address); EXPECT_EQ(slice_data_address, slice_mutable_data_address); EXPECT_EQ(src_data_address + 9 * 2 * sizeof(double), slice_data_address); diff --git a/paddle/framework/tensor_util.h b/paddle/framework/tensor_util.h index 4e34b90d5..5b474e4ae 100644 --- a/paddle/framework/tensor_util.h +++ b/paddle/framework/tensor_util.h @@ -47,11 +47,11 @@ inline void CopyFrom(const Tensor& src, const platform::Place& dst_place, #ifdef PADDLE_WITH_CUDA else if (platform::is_gpu_place(src_place) && // NOLINT platform::is_cpu_place(dst_place)) { - auto src_gpu_place = boost::get(src_place); + auto src_gpu_place = boost::get(src_place); auto dst_cpu_place = boost::get(dst_place); auto ctx_place = ctx.GetPlace(); PADDLE_ENFORCE(platform::is_gpu_place(ctx_place)); - auto ctx_gpu_place = boost::get(ctx_place); + auto ctx_gpu_place = boost::get(ctx_place); PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place); memory::Copy( dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size, @@ -59,21 +59,21 @@ inline void CopyFrom(const Tensor& src, const platform::Place& dst_place, } else if (platform::is_cpu_place(src_place) && platform::is_gpu_place(dst_place)) { auto src_cpu_place = boost::get(src_place); - auto dst_gpu_place = boost::get(dst_place); + auto dst_gpu_place = boost::get(dst_place); auto ctx_place = ctx.GetPlace(); PADDLE_ENFORCE(platform::is_gpu_place(ctx_place)); - auto ctx_gpu_place = boost::get(ctx_place); + auto ctx_gpu_place = boost::get(ctx_place); PADDLE_ENFORCE_EQ(dst_gpu_place, ctx_gpu_place); memory::Copy( dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size, reinterpret_cast(ctx).stream()); } else if (platform::is_gpu_place(src_place) && platform::is_gpu_place(dst_place)) { - auto src_gpu_place = boost::get(src_place); - auto dst_gpu_place = boost::get(dst_place); + auto src_gpu_place = boost::get(src_place); + auto dst_gpu_place = boost::get(dst_place); auto ctx_place = ctx.GetPlace(); PADDLE_ENFORCE(platform::is_gpu_place(ctx_place)); - auto ctx_gpu_place = boost::get(ctx_place); + auto ctx_gpu_place = boost::get(ctx_place); PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place); memory::Copy( dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, @@ -108,7 +108,7 @@ inline void CopyFromVector(const std::vector& src, #ifdef PADDLE_WITH_CUDA else if (platform::is_gpu_place(dst_place)) { // NOLINT memory::Copy( - boost::get(dst_place), dst_ptr, src_place, src_ptr, + boost::get(dst_place), dst_ptr, src_place, src_ptr, size, reinterpret_cast(ctx).stream()); } @@ -141,7 +141,7 @@ inline void CopyToVector(const Tensor& src, const platform::DeviceContext& ctx, #ifdef PADDLE_WITH_CUDA else if (platform::is_gpu_place(src.place())) { // NOLINT memory::Copy( - dst_place, dst_ptr, boost::get(src.place()), + dst_place, dst_ptr, boost::get(src.place()), src_ptr, size, reinterpret_cast(ctx).stream()); } diff --git a/paddle/framework/tensor_util_test.cc b/paddle/framework/tensor_util_test.cc index 03a70de18..3afb98a4a 100644 --- a/paddle/framework/tensor_util_test.cc +++ b/paddle/framework/tensor_util_test.cc @@ -58,7 +58,7 @@ TEST(CopyFrom, Tensor) { memcpy(src_ptr, arr, 9 * sizeof(int)); // CPU Tensor to GPU Tensor - auto gpu_place = new platform::GPUPlace(0); + auto gpu_place = new platform::CUDAPlace(0); platform::CUDADeviceContext gpu_ctx(*gpu_place); CopyFrom(src_tensor, *gpu_place, gpu_ctx, &gpu_tensor); @@ -143,7 +143,7 @@ TEST(CopyFromVector, Tensor) { // Copy to GPUTensor gpu_tensor.Resize(make_ddim({3, 3})); - auto gpu_place = new paddle::platform::GPUPlace(); + auto gpu_place = new paddle::platform::CUDAPlace(); CUDADeviceContext gpu_ctx(*gpu_place); CopyFromVector(src_vec, gpu_ctx, &gpu_tensor); // Copy from GPU to CPU tensor for comparison @@ -210,7 +210,7 @@ TEST(CopyToVector, Tensor) { { std::vector src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9}; Tensor gpu_tensor; - GPUPlace place; + CUDAPlace place; CUDADeviceContext gpu_ctx(place); CopyFromVector(src_vec, gpu_ctx, &gpu_tensor); diff --git a/paddle/memory/README.md b/paddle/memory/README.md index 6cb003c50..7cf61d089 100644 --- a/paddle/memory/README.md +++ b/paddle/memory/README.md @@ -12,13 +12,13 @@ p = memory::Alloc(platform::CPUPlace(), 4*1024); To allocate 4KB memory on the 3rd GPU: ```cpp -p = memory::Alloc(platform::GPUPlace(2), 4*1024); +p = memory::Alloc(platform::CUDAPlace(2), 4*1024); ``` To free memory and check the so-far used amount of memory on a place: ```cpp -auto pl = platform::GPUPlace(0); +auto pl = platform::CUDAPlace(0); p = memory::Alloc(pl, 4*1024); cout << memory::Used(pl); memory::Free(pl, p); @@ -36,7 +36,7 @@ template size_t Used(Place); } // namespace memory ``` -These function templates have specializations on either `platform::CPUPlace` or `platform::GPUPlace`: +These function templates have specializations on either `platform::CPUPlace` or `platform::CUDAPlace`: ```cpp template<> @@ -49,7 +49,7 @@ and ```cpp template<> -void Alloc(GPUPlace p, size_t size) { +void Alloc(CUDAPlace p, size_t size) { return GetGPUBuddyAllocator(p.id)->Alloc(size); } ``` @@ -122,7 +122,7 @@ There are two implementations of `Context`: 1. [`CPUContext`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context.h#L105), whose [`New` method](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context.h#L131) calls [`g_cpu_allocator.get()->New(size_t)`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context.cc#L15) to allocate the memory. -1. [`CUDAContext`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.h#L99), which has a data member [`int gpu_id_`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.h#L202). This looks very similar to class `majel::GPUPlace`, who also has an `int id_` data member. `CUDAContext::New(size_t)` calls [`g_cub_allocator->DeviceAllocate(&ptr, nbytes)`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.cu#L355) to allocate the memory. +1. [`CUDAContext`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.h#L99), which has a data member [`int gpu_id_`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.h#L202). This looks very similar to class `majel::CUDAPlace`, who also has an `int id_` data member. `CUDAContext::New(size_t)` calls [`g_cub_allocator->DeviceAllocate(&ptr, nbytes)`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.cu#L355) to allocate the memory. ### Majel diff --git a/paddle/memory/memcpy.cc b/paddle/memory/memcpy.cc index 5c629dc3d..b46141aaf 100644 --- a/paddle/memory/memcpy.cc +++ b/paddle/memory/memcpy.cc @@ -28,31 +28,25 @@ void Copy(platform::CPUPlace, void* dst, #ifdef PADDLE_WITH_CUDA template <> -void Copy(platform::CPUPlace dst_place, - void* dst, - platform::GPUPlace src_place, - const void* src, size_t num, - cudaStream_t stream) { +void Copy( + platform::CPUPlace dst_place, void* dst, platform::CUDAPlace src_place, + const void* src, size_t num, cudaStream_t stream) { platform::SetDeviceId(src_place.device); platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyDeviceToHost, stream); } template <> -void Copy(platform::GPUPlace dst_place, - void* dst, - platform::CPUPlace src_place, - const void* src, size_t num, - cudaStream_t stream) { +void Copy( + platform::CUDAPlace dst_place, void* dst, platform::CPUPlace src_place, + const void* src, size_t num, cudaStream_t stream) { platform::SetDeviceId(dst_place.device); platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyHostToDevice, stream); } template <> -void Copy(platform::GPUPlace dst_place, - void* dst, - platform::GPUPlace src_place, - const void* src, size_t num, - cudaStream_t stream) { +void Copy( + platform::CUDAPlace dst_place, void* dst, platform::CUDAPlace src_place, + const void* src, size_t num, cudaStream_t stream) { if (dst_place == src_place) { platform::SetDeviceId(src_place.device); platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyDeviceToDevice, stream); diff --git a/paddle/memory/memory.cc b/paddle/memory/memory.cc index 9cafdfda7..c4bb6baee 100644 --- a/paddle/memory/memory.cc +++ b/paddle/memory/memory.cc @@ -83,12 +83,12 @@ BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { } template <> -size_t Used(platform::GPUPlace place) { +size_t Used(platform::CUDAPlace place) { return GetGPUBuddyAllocator(place.device)->Used(); } template <> -void* Alloc(platform::GPUPlace place, size_t size) { +void* Alloc(platform::CUDAPlace place, size_t size) { auto* buddy_allocator = GetGPUBuddyAllocator(place.device); auto* ptr = buddy_allocator->Alloc(size); if (ptr == nullptr) { @@ -101,14 +101,14 @@ void* Alloc(platform::GPUPlace place, size_t size) { LOG(WARNING) << "total " << total; LOG(WARNING) << "GpuMinChunkSize " << platform::GpuMinChunkSize(); LOG(WARNING) << "GpuMaxChunkSize " << platform::GpuMaxChunkSize(); - LOG(WARNING) << "GPU memory used: " << Used(place); + LOG(WARNING) << "GPU memory used: " << Used(place); platform::SetDeviceId(cur_dev); } return ptr; } template <> -void Free(platform::GPUPlace place, void* p) { +void Free(platform::CUDAPlace place, void* p) { GetGPUBuddyAllocator(place.device)->Free(p); } diff --git a/paddle/memory/memory_test.cc b/paddle/memory/memory_test.cc index 2444931e2..f476bf712 100644 --- a/paddle/memory/memory_test.cc +++ b/paddle/memory/memory_test.cc @@ -82,7 +82,7 @@ TEST(BuddyAllocator, CPUMultAlloc) { #ifdef PADDLE_WITH_CUDA -size_t align(size_t size, paddle::platform::GPUPlace place) { +size_t align(size_t size, paddle::platform::CUDAPlace place) { size += sizeof(paddle::memory::detail::Metadata); size_t alignment = paddle::platform::GpuMinChunkSize(); size_t remaining = size % alignment; @@ -94,7 +94,7 @@ TEST(BuddyAllocator, GPUAllocation) { EXPECT_EQ(p, nullptr); - paddle::platform::GPUPlace gpu(0); + paddle::platform::CUDAPlace gpu(0); p = paddle::memory::Alloc(gpu, 4096); EXPECT_NE(p, nullptr); @@ -103,7 +103,7 @@ TEST(BuddyAllocator, GPUAllocation) { } TEST(BuddyAllocator, GPUMultAlloc) { - paddle::platform::GPUPlace gpu; + paddle::platform::CUDAPlace gpu; std::unordered_map ps; diff --git a/paddle/operators/accuracy_op.cu b/paddle/operators/accuracy_op.cu index dd51aad10..0aadd5af4 100644 --- a/paddle/operators/accuracy_op.cu +++ b/paddle/operators/accuracy_op.cu @@ -56,7 +56,7 @@ class AccuracyOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), - "It must use GPUPlace."); + "It must use CUDAPlace."); auto* inference = ctx.Input("Out"); auto* indices = ctx.Input("Indices"); auto* label = ctx.Input("Label"); diff --git a/paddle/operators/batch_norm_op.cu.cc b/paddle/operators/batch_norm_op.cu.cc index 55d0736a4..3d17725ab 100644 --- a/paddle/operators/batch_norm_op.cu.cc +++ b/paddle/operators/batch_norm_op.cu.cc @@ -53,7 +53,7 @@ class BatchNormKernel public: void Compute(const framework::ExecutionContext &ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), - "It must use GPUPlace."); + "It must use CUDAPlace."); double epsilon = static_cast(ctx.Attr("epsilon")); const float momentum = ctx.Attr("momentum"); const bool is_test = ctx.Attr("is_test"); @@ -179,7 +179,7 @@ class BatchNormGradKernel public: void Compute(const framework::ExecutionContext &ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), - "It must use GPUPlace."); + "It must use CUDAPlace."); double epsilon = static_cast(ctx.Attr("epsilon")); const std::string data_layout_str = ctx.Attr("data_layout"); const DataLayout data_layout = diff --git a/paddle/operators/conv_cudnn_op.cu.cc b/paddle/operators/conv_cudnn_op.cu.cc index 3da0a9001..79e020b75 100644 --- a/paddle/operators/conv_cudnn_op.cu.cc +++ b/paddle/operators/conv_cudnn_op.cu.cc @@ -36,7 +36,7 @@ class CudnnConvOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), - "It must use GPUPlace."); + "It must use CUDAPlace."); auto* input = ctx.Input("Input"); auto* filter = ctx.Input("Filter"); auto* output = ctx.Output("Output"); @@ -130,7 +130,7 @@ class CudnnConvOpKernel : public framework::OpKernel { handle, cudnn_input_desc, cudnn_filter_desc, cudnn_conv_desc, cudnn_output_desc, algo, &workspace_size_in_bytes)); // Allocate on GPU memory - platform::GPUPlace gpu = boost::get(ctx.GetPlace()); + platform::CUDAPlace gpu = boost::get(ctx.GetPlace()); cudnn_workspace = paddle::memory::Alloc(gpu, workspace_size_in_bytes); // ------------------- cudnn conv forward --------------------- T alpha = 1.0f, beta = 0.0f; @@ -151,7 +151,7 @@ class CudnnConvGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), - "It must use GPUPlace."); + "It must use CUDAPlace."); auto input = ctx.Input("Input"); auto filter = ctx.Input("Filter"); auto output_grad = ctx.Input(framework::GradVarName("Output")); @@ -277,7 +277,7 @@ class CudnnConvGradOpKernel : public framework::OpKernel { // ------------------- cudnn conv workspace --------------------- // Already on GPU void* cudnn_workspace = nullptr; - platform::GPUPlace gpu = boost::get(ctx.GetPlace()); + platform::CUDAPlace gpu = boost::get(ctx.GetPlace()); cudnn_workspace = paddle::memory::Alloc(gpu, workspace_size_in_bytes); // ------------------- cudnn conv backward data --------------------- T alpha = 1.0f, beta = 0.0f; diff --git a/paddle/operators/conv_transpose_cudnn_op.cu.cc b/paddle/operators/conv_transpose_cudnn_op.cu.cc index f0297f6c4..b3663209f 100644 --- a/paddle/operators/conv_transpose_cudnn_op.cu.cc +++ b/paddle/operators/conv_transpose_cudnn_op.cu.cc @@ -35,7 +35,7 @@ class CudnnConvTransposeOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), - "It must use GPUPlace."); + "It must use CUDAPlace."); auto* input = ctx.Input("Input"); auto* filter = ctx.Input("Filter"); auto* output = ctx.Output("Output"); @@ -100,7 +100,7 @@ class CudnnConvTransposeOpKernel : public framework::OpKernel { cudnn_output_desc, algo, &workspace_size_in_bytes)); // Allocate on GPU memory - platform::GPUPlace gpu = boost::get(ctx.GetPlace()); + platform::CUDAPlace gpu = boost::get(ctx.GetPlace()); cudnn_workspace = paddle::memory::Alloc(gpu, workspace_size_in_bytes); // ------------------- cudnn conv transpose forward --------------------- @@ -120,7 +120,7 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), - "It must use GPUPlace."); + "It must use CUDAPlace."); auto input = ctx.Input("Input"); auto filter = ctx.Input("Filter"); auto output_grad = ctx.Input(framework::GradVarName("Output")); @@ -201,7 +201,7 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel { // ------------------- cudnn conv workspace --------------------- // Already on GPU void* cudnn_workspace = nullptr; - platform::GPUPlace gpu = boost::get(ctx.GetPlace()); + platform::CUDAPlace gpu = boost::get(ctx.GetPlace()); cudnn_workspace = paddle::memory::Alloc(gpu, workspace_size_in_bytes); // ------------------- cudnn conv backward data --------------------- // FIXME(typhoonzero): template type T may not be the same as cudnn call. diff --git a/paddle/operators/detail/strided_memcpy.h b/paddle/operators/detail/strided_memcpy.h index 068c82f39..b81bb8ba7 100644 --- a/paddle/operators/detail/strided_memcpy.h +++ b/paddle/operators/detail/strided_memcpy.h @@ -35,7 +35,7 @@ struct StridedMemcpyFunctor { memory::Copy(cpu_place, dst, cpu_place, src, sizeof(T) * dst_dim.head); } else { #ifdef PADDLE_WITH_CUDA - auto& gpu_place = boost::get(place); + auto& gpu_place = boost::get(place); auto& cuda_ctx = reinterpret_cast(dev_ctx); memory::Copy(gpu_place, dst, gpu_place, src, sizeof(T) * dst_dim.head, diff --git a/paddle/operators/linear_chain_crf_op.h b/paddle/operators/linear_chain_crf_op.h index 694584e79..19c6715ec 100644 --- a/paddle/operators/linear_chain_crf_op.h +++ b/paddle/operators/linear_chain_crf_op.h @@ -219,8 +219,8 @@ class LinearChainCRFOpKernel : public framework::OpKernel { // operators runs on GPU device. auto copyTensor = [](const platform::DeviceContext& ctx, const Tensor& src, Tensor* dst) { - dst->mutable_data(platform::GPUPlace()); - framework::CopyFrom(src, platform::GPUPlace(), ctx, dst); + dst->mutable_data(platform::CUDAPlace()); + framework::CopyFrom(src, platform::CUDAPlace(), ctx, dst); }; copyTensor(ctx, emission_exps_src, emission_exps_dst); copyTensor(ctx, transition_exps_src, transition_exps_dst); @@ -433,8 +433,8 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel { auto copyTensor = [](const platform::DeviceContext& ctx, const Tensor* src, Tensor* dst) { if (src && dst) { - dst->mutable_data(platform::GPUPlace()); - framework::CopyFrom(*src, platform::GPUPlace(), ctx, dst); + dst->mutable_data(platform::CUDAPlace()); + framework::CopyFrom(*src, platform::CUDAPlace(), ctx, dst); } }; copyTensor(ctx, emission_grad_src, emission_grad_dst); diff --git a/paddle/operators/lookup_table_op.cu b/paddle/operators/lookup_table_op.cu index 9431030a5..a3ab1a729 100644 --- a/paddle/operators/lookup_table_op.cu +++ b/paddle/operators/lookup_table_op.cu @@ -101,7 +101,7 @@ class LookupTableGradCUDAKernel : public framework::OpKernel { // copy GPU memory to CPU pinned memory framework::Vector new_rows; new_rows.resize(ids_dim[0]); - auto gpu_place = boost::get(context.GetPlace()); + auto gpu_place = boost::get(context.GetPlace()); memory::Copy(platform::CPUPlace(), new_rows.data(), gpu_place, ids_data, ids_dim[0] * sizeof(int64_t), stream); diff --git a/paddle/operators/lstm_unit_op.cu b/paddle/operators/lstm_unit_op.cu index 291f2c295..4b164d964 100644 --- a/paddle/operators/lstm_unit_op.cu +++ b/paddle/operators/lstm_unit_op.cu @@ -98,7 +98,7 @@ class LstmUnitOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), - "It must use GPUPlace."); + "It must use CUDAPlace."); auto* x_tensor = ctx.Input("X"); auto* c_prev_tensor = ctx.Input("C_prev"); @@ -129,7 +129,7 @@ class LstmUnitGradOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), - "It must use GPUPlace."); + "It must use CUDAPlace."); auto x_tensor = ctx.Input("X"); auto c_prev_tensor = ctx.Input("C_prev"); diff --git a/paddle/operators/math/im2col_test.cc b/paddle/operators/math/im2col_test.cc index 256f3bc9b..26c038e43 100644 --- a/paddle/operators/math/im2col_test.cc +++ b/paddle/operators/math/im2col_test.cc @@ -159,6 +159,7 @@ void testIm2col() { TEST(math, im2col) { testIm2col(); #ifdef PADDLE_WITH_CUDA - testIm2col(); + testIm2col(); #endif } diff --git a/paddle/operators/math/math_function.cu b/paddle/operators/math/math_function.cu index 7852bb53a..0a818bc5d 100644 --- a/paddle/operators/math/math_function.cu +++ b/paddle/operators/math/math_function.cu @@ -105,7 +105,7 @@ void matmul( PADDLE_ENFORCE(platform::is_gpu_place(matrix_a.place()) && platform::is_gpu_place(matrix_b.place()) && platform::is_gpu_place(matrix_out->place()), - "Matrix must all be in GPUPlace"); + "Matrix must all be in CUDAPlace"); int M = dim_out[0]; int N = dim_out[1]; @@ -134,7 +134,7 @@ void matmul( PADDLE_ENFORCE(platform::is_gpu_place(matrix_a.place()) && platform::is_gpu_place(matrix_b.place()) && platform::is_gpu_place(matrix_out->place()), - "Matrix must all be in GPUPlace"); + "Matrix must all be in CUDAPlace"); int M = dim_out[0]; int N = dim_out[1]; @@ -266,7 +266,7 @@ struct TensorSetConstantGPU { }; template <> -void set_constant_with_place( +void set_constant_with_place( const platform::DeviceContext& context, framework::Tensor* tensor, float value) { framework::VisitDataType(framework::ToDataType(tensor->type()), @@ -277,7 +277,7 @@ template <> void set_constant_with_place( const platform::DeviceContext& context, framework::Tensor* tensor, float value) { - set_constant_with_place(context, tensor, value); + set_constant_with_place(context, tensor, value); } template struct RowwiseAdd; diff --git a/paddle/operators/math/math_function_test.cu b/paddle/operators/math/math_function_test.cu index 32e96d948..4325a7966 100644 --- a/paddle/operators/math/math_function_test.cu +++ b/paddle/operators/math/math_function_test.cu @@ -13,7 +13,7 @@ TEST(math_function, notrans_mul_trans) { float arr[6] = {0, 1, 2, 3, 4, 5}; memcpy(input1_ptr, arr, 6 * sizeof(float)); - auto* gpu_place = new paddle::platform::GPUPlace(0); + auto* gpu_place = new paddle::platform::CUDAPlace(0); paddle::platform::CUDADeviceContext context(*gpu_place); paddle::framework::CopyFrom(input1, *gpu_place, context, &input1_gpu); @@ -47,7 +47,7 @@ TEST(math_function, trans_mul_notrans) { float arr[6] = {0, 1, 2, 3, 4, 5}; memcpy(input1_ptr, arr, 6 * sizeof(float)); - auto* gpu_place = new paddle::platform::GPUPlace(0); + auto* gpu_place = new paddle::platform::CUDAPlace(0); paddle::platform::CUDADeviceContext context(*gpu_place); paddle::framework::CopyFrom(input1, *gpu_place, context, &input1_gpu); @@ -96,7 +96,7 @@ TEST(math_function, gemm_notrans_cublas) { float arr3[8] = {0, 1, 2, 3, 4, 5, 6, 7}; memcpy(input3_ptr, arr3, 8 * sizeof(float)); - auto* gpu_place = new paddle::platform::GPUPlace(0); + auto* gpu_place = new paddle::platform::CUDAPlace(0); paddle::platform::CUDADeviceContext context(*gpu_place); paddle::framework::CopyFrom(input1, *gpu_place, context, &input1_gpu); @@ -151,7 +151,7 @@ TEST(math_function, gemm_trans_cublas) { float arr3[8] = {0, 1, 2, 3, 4, 5, 6, 7}; memcpy(input3_ptr, arr3, 8 * sizeof(float)); - auto* gpu_place = new paddle::platform::GPUPlace(0); + auto* gpu_place = new paddle::platform::CUDAPlace(0); paddle::platform::CUDADeviceContext context(*gpu_place); paddle::framework::CopyFrom(input1, *gpu_place, context, &input1_gpu); @@ -189,7 +189,7 @@ void GemvTest(int m, int n, bool trans) { T* data_b = vec_b.mutable_data({trans ? m : n}, *cpu_place); T* data_c = vec_c.mutable_data({trans ? n : m}, *cpu_place); - auto* gpu_place = new paddle::platform::GPUPlace(0); + auto* gpu_place = new paddle::platform::CUDAPlace(0); paddle::framework::Tensor g_mat_a; paddle::framework::Tensor g_vec_b; paddle::framework::Tensor g_vec_c; diff --git a/paddle/operators/math/selected_rows_functor.cu b/paddle/operators/math/selected_rows_functor.cu index c44577e00..9fddd97a3 100644 --- a/paddle/operators/math/selected_rows_functor.cu +++ b/paddle/operators/math/selected_rows_functor.cu @@ -58,15 +58,15 @@ struct SelectedRowsAdd { PADDLE_ENFORCE(platform::is_gpu_place(out_place)); memory::Copy( - boost::get(out_place), out_data, - boost::get(in1_place), in1_data, + boost::get(out_place), out_data, + boost::get(in1_place), in1_data, in1_value.numel() * sizeof(T), reinterpret_cast(context).stream()); auto* in2_data = in2_value.data(); - memory::Copy(boost::get(out_place), + memory::Copy(boost::get(out_place), out_data + in1_value.numel(), - boost::get(in2_place), in2_data, + boost::get(in2_place), in2_data, in2_value.numel() * sizeof(T), context.stream()); } }; @@ -160,9 +160,9 @@ struct SelectedRowsAddTo { auto* in1_data = in1_value.data(); auto* in2_data = in2_value->data(); - memory::Copy(boost::get(in2_place), + memory::Copy(boost::get(in2_place), in2_data + input2_offset, - boost::get(in1_place), in1_data, + boost::get(in1_place), in1_data, in1_value.numel() * sizeof(T), context.stream()); } }; diff --git a/paddle/operators/math/selected_rows_functor_test.cu b/paddle/operators/math/selected_rows_functor_test.cu index 777caf563..0a2e36f68 100644 --- a/paddle/operators/math/selected_rows_functor_test.cu +++ b/paddle/operators/math/selected_rows_functor_test.cu @@ -21,7 +21,7 @@ TEST(selected_rows_functor, gpu_add) { using namespace paddle::platform; using namespace paddle::operators::math; - GPUPlace gpu_place(0); + CUDAPlace gpu_place(0); CPUPlace cpu_place; CUDADeviceContext ctx(gpu_place); SetConstant functor; @@ -119,7 +119,7 @@ TEST(selected_rows_functor, gpu_add_to) { using namespace paddle::platform; using namespace paddle::operators::math; - GPUPlace gpu_place(0); + CUDAPlace gpu_place(0); CPUPlace cpu_place; CUDADeviceContext ctx(gpu_place); SetConstant functor; diff --git a/paddle/operators/math/vol2col_test.cc b/paddle/operators/math/vol2col_test.cc index f46db3c56..3794f0e52 100644 --- a/paddle/operators/math/vol2col_test.cc +++ b/paddle/operators/math/vol2col_test.cc @@ -122,6 +122,6 @@ TEST(math, vol2col) { testVol2col(); #ifdef PADDLE_WITH_CUDA testVol2col(); + paddle::platform::CUDAPlace>(); #endif // PADDLE_WITH_CUDA } diff --git a/paddle/operators/multiplex_op.cu b/paddle/operators/multiplex_op.cu index 47986e9ff..57e6880b4 100644 --- a/paddle/operators/multiplex_op.cu +++ b/paddle/operators/multiplex_op.cu @@ -36,7 +36,7 @@ class MultiplexGPUKernel : public framework::OpKernel { CopyFrom(*ids, platform::CPUPlace(), ctx.device_context(), &index_t_cpu); auto* index = index_t_cpu.data(); auto stream = ctx.cuda_device_context().stream(); - platform::GPUPlace place = boost::get(ctx.GetPlace()); + platform::CUDAPlace place = boost::get(ctx.GetPlace()); for (auto i = 0; i < rows; i++) { int32_t k = index[i]; PADDLE_ENFORCE_GE(k, 0, "index must be nonnegative."); @@ -73,7 +73,7 @@ class MultiplexGradGPUKernel : public framework::OpKernel { auto* index = index_t_cpu.data(); auto stream = ctx.cuda_device_context().stream(); - platform::GPUPlace place = boost::get(ctx.GetPlace()); + platform::CUDAPlace place = boost::get(ctx.GetPlace()); for (auto i = 0; i < rows; i++) { size_t k = static_cast(index[i]); if (d_ins[k]) { diff --git a/paddle/operators/nccl_op.cu.cc b/paddle/operators/nccl_op.cu.cc index 6ca6db725..1b986a136 100644 --- a/paddle/operators/nccl_op.cu.cc +++ b/paddle/operators/nccl_op.cu.cc @@ -67,7 +67,7 @@ class NCCLAllReduceKernel : public framework::OpKernel { auto stream = ctx.cuda_device_context().stream(); // device id - int gpu_id = boost::get(ctx.GetPlace()).GetDeviceId(); + int gpu_id = boost::get(ctx.GetPlace()).GetDeviceId(); int idx = comm->GetCommId(gpu_id); for (size_t i = 0; i < ins.size(); ++i) { @@ -120,7 +120,7 @@ class NCCLReduceKernel : public framework::OpKernel { ctx.device_context()) .stream(); // device id - int gpu_id = boost::get(ctx.GetPlace()).GetDeviceId(); + int gpu_id = boost::get(ctx.GetPlace()).GetDeviceId(); int idx = comm->GetCommId(gpu_id); auto ins_names = ctx.Inputs("X"); @@ -164,7 +164,7 @@ class NCCLBcastKernel : public framework::OpKernel { ctx.device_context()) .stream(); // device id - int gpu_id = boost::get(ctx.GetPlace()).GetDeviceId(); + int gpu_id = boost::get(ctx.GetPlace()).GetDeviceId(); int idx = comm->GetCommId(gpu_id); if (idx == root) { diff --git a/paddle/operators/nccl_op_test.cu.cc b/paddle/operators/nccl_op_test.cu.cc index b6e4ccb73..361bfa8d7 100644 --- a/paddle/operators/nccl_op_test.cu.cc +++ b/paddle/operators/nccl_op_test.cu.cc @@ -52,7 +52,7 @@ class NCCLTester : public ::testing::Test { virtual void SetUp() override { paddle::platform::CPUPlace cpu_place; for (size_t i = 0; i < gpu_list.size(); ++i) { - p::GPUPlace place(i); + p::CUDAPlace place(i); dev_ctxs.emplace_back(new p::CUDADeviceContext(place)); } @@ -87,7 +87,7 @@ class NCCLTester : public ::testing::Test { std::unique_lock lk(mu); const f::OpDesc *op1 = &op_desc; - p::GPUPlace place(gpu_id); + p::CUDAPlace place(gpu_id); auto &ctx = dev_ctxs.at(gpu_id); auto *send_tensor = scope->Var("st")->GetMutable(); @@ -171,7 +171,7 @@ TEST_F(NCCLTester, ncclAllReduceOp) { for (size_t i = 0; i < dev_scopes.size(); ++i) { p::CPUPlace cpu_place; - p::GPUPlace gpu_place(gpu_list[i]); + p::CUDAPlace gpu_place(gpu_list[i]); auto &recv_tensor = dev_scopes[i]->FindVar("rt")->Get(); auto *rt = recv_tensor.data(); @@ -180,7 +180,7 @@ TEST_F(NCCLTester, ncclAllReduceOp) { auto *ct = result_tensor->mutable_data(cpu_place); paddle::memory::Copy( - cpu_place, ct, p::GPUPlace(gpu_list[i]), rt, + cpu_place, ct, p::CUDAPlace(gpu_list[i]), rt, recv_tensor.numel() * sizeof(float), static_cast(dev_ctxs[i])->stream()); @@ -219,7 +219,7 @@ TEST_F(NCCLTester, ncclReduceOp) { float result = std::accumulate(gpu_list.begin(), gpu_list.end(), 0); p::CPUPlace cpu_place; - p::GPUPlace gpu_place(gpu_list[kRoot]); + p::CUDAPlace gpu_place(gpu_list[kRoot]); auto &recv_tensor = dev_scopes[kRoot]->FindVar("rt")->Get(); auto *rt = recv_tensor.data(); @@ -229,7 +229,7 @@ TEST_F(NCCLTester, ncclReduceOp) { auto *ct = result_tensor->mutable_data(cpu_place); paddle::memory::Copy( - cpu_place, ct, p::GPUPlace(gpu_list[kRoot]), rt, + cpu_place, ct, p::CUDAPlace(gpu_list[kRoot]), rt, recv_tensor.numel() * sizeof(float), static_cast(dev_ctxs[kRoot])->stream()); @@ -268,7 +268,7 @@ TEST_F(NCCLTester, ncclBcastOp) { float result = kRoot; p::CPUPlace cpu_place; - p::GPUPlace gpu_place(gpu_list[idx]); + p::CUDAPlace gpu_place(gpu_list[idx]); auto &recv_tensor = dev_scopes[idx]->FindVar("rt")->Get(); auto *rt = recv_tensor.data(); @@ -277,7 +277,7 @@ TEST_F(NCCLTester, ncclBcastOp) { auto *ct = result_tensor->mutable_data(cpu_place); paddle::memory::Copy( - cpu_place, ct, p::GPUPlace(gpu_list[idx]), rt, + cpu_place, ct, p::CUDAPlace(gpu_list[idx]), rt, recv_tensor.numel() * sizeof(float), static_cast(dev_ctxs[idx])->stream()); @@ -300,7 +300,7 @@ int main(int argc, char **argv) { places.emplace_back(paddle::platform::CPUPlace()); int count = paddle::platform::GetCUDADeviceCount(); for (int i = 0; i < count; ++i) { - places.emplace_back(paddle::platform::GPUPlace(i)); + places.emplace_back(paddle::platform::CUDAPlace(i)); gpu_list.emplace_back(i); } diff --git a/paddle/operators/pool_cudnn_op.cu.cc b/paddle/operators/pool_cudnn_op.cu.cc index fc2b37bd0..2d0001ba1 100644 --- a/paddle/operators/pool_cudnn_op.cu.cc +++ b/paddle/operators/pool_cudnn_op.cu.cc @@ -29,7 +29,7 @@ class PoolCudnnOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), - "It must use GPUPlace."); + "It must use CUDAPlace."); const Tensor *input = ctx.Input("X"); Tensor *output = ctx.Output("Out"); @@ -90,7 +90,7 @@ class PoolCudnnGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), - "It must use GPUPlace."); + "It must use CUDAPlace."); const Tensor *input = ctx.Input("X"); const Tensor *output = ctx.Input("Out"); diff --git a/paddle/operators/reshape_op.cu b/paddle/operators/reshape_op.cu index b7329238c..a5dcd2ec9 100644 --- a/paddle/operators/reshape_op.cu +++ b/paddle/operators/reshape_op.cu @@ -16,7 +16,7 @@ REGISTER_OP_CUDA_KERNEL( reshape, - paddle::operators::ReshapeKernel); + paddle::operators::ReshapeKernel); REGISTER_OP_CUDA_KERNEL( reshape_grad, - paddle::operators::ReshapeGradKernel); + paddle::operators::ReshapeGradKernel); diff --git a/paddle/operators/strided_memcpy_test.cc b/paddle/operators/strided_memcpy_test.cc index 230cc1ab0..d47fd98d0 100644 --- a/paddle/operators/strided_memcpy_test.cc +++ b/paddle/operators/strided_memcpy_test.cc @@ -82,7 +82,7 @@ TEST(StridedMemcpy, GPUCrop) { }; // clang-format on - platform::GPUPlace gpu0(0); + platform::CUDAPlace gpu0(0); platform::CPUPlace cpu; platform::CUDADeviceContext ctx(gpu0); @@ -121,7 +121,7 @@ TEST(StridedMemcpy, GPUConcat) { }; // clang-format on - platform::GPUPlace gpu0(0); + platform::CUDAPlace gpu0(0); platform::CPUPlace cpu; platform::CUDADeviceContext ctx(gpu0); diff --git a/paddle/operators/top_k_op.cu b/paddle/operators/top_k_op.cu index 453bd0726..0a70ad87e 100644 --- a/paddle/operators/top_k_op.cu +++ b/paddle/operators/top_k_op.cu @@ -283,7 +283,7 @@ class TopkOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), - "It must use GPUPlace."); + "It must use CUDAPlace."); auto* input = ctx.Input("X"); auto* output = ctx.Output("Out"); auto* indices = ctx.Output("Indices"); diff --git a/paddle/platform/device_context.cc b/paddle/platform/device_context.cc index a28e9de71..8ee0f18e6 100644 --- a/paddle/platform/device_context.cc +++ b/paddle/platform/device_context.cc @@ -58,10 +58,10 @@ DeviceContextPool::DeviceContextPool( #ifdef PADDLE_WITH_CUDA device_contexts_.emplace(places[i], new platform::CUDADeviceContext( - boost::get(places[i]))); + boost::get(places[i]))); #else PADDLE_THROW( - "'GPUPlace' is not supported, Please re-compile with WITH_GPU " + "'CUDAPlace' is not supported, Please re-compile with WITH_GPU " "option"); #endif } @@ -91,7 +91,7 @@ class EigenCudaStreamDevice : public Eigen::StreamInterface { } ~EigenCudaStreamDevice() override {} - void Reinitialize(const cudaStream_t* cuda_stream, GPUPlace place) { + void Reinitialize(const cudaStream_t* cuda_stream, CUDAPlace place) { stream_ = cuda_stream; place_ = place; device_prop_ = &Eigen::m_deviceProperties[place.device]; @@ -130,14 +130,14 @@ class EigenCudaStreamDevice : public Eigen::StreamInterface { } private: - GPUPlace place_; + CUDAPlace place_; const cudaStream_t* stream_; // not owned; const cudaDeviceProp* device_prop_; // not owned; mutable void* scratch_; mutable unsigned int* semaphore_; }; -CUDADeviceContext::CUDADeviceContext(GPUPlace place) : place_(place) { +CUDADeviceContext::CUDADeviceContext(CUDAPlace place) : place_(place) { SetDeviceId(place_.device); PADDLE_ENFORCE(cudaStreamCreate(&stream_)); eigen_stream_.reset(new EigenCudaStreamDevice()); diff --git a/paddle/platform/device_context.h b/paddle/platform/device_context.h index 9b958f7c9..877a66363 100644 --- a/paddle/platform/device_context.h +++ b/paddle/platform/device_context.h @@ -58,7 +58,7 @@ class EigenCudaStreamDevice; class CUDADeviceContext : public DeviceContext { public: - explicit CUDADeviceContext(GPUPlace place); + explicit CUDADeviceContext(CUDAPlace place); virtual ~CUDADeviceContext(); /*! \brief Wait for all operations completion in the stream. */ @@ -80,7 +80,7 @@ class CUDADeviceContext : public DeviceContext { cudaStream_t stream() const; private: - GPUPlace place_; + CUDAPlace place_; std::unique_ptr eigen_device_; std::unique_ptr eigen_stream_; @@ -143,7 +143,7 @@ class DeviceContextPool { size_t operator()(const platform::Place& place) const { int pre_hash = place.which() + (1 << LEFT_SHIFT); if (platform::is_gpu_place(place)) { - pre_hash += boost::get(place).GetDeviceId(); + pre_hash += boost::get(place).GetDeviceId(); } return hash_(pre_hash); } diff --git a/paddle/platform/device_context_test.cu b/paddle/platform/device_context_test.cu index f046c79e0..186824c01 100644 --- a/paddle/platform/device_context_test.cu +++ b/paddle/platform/device_context_test.cu @@ -20,11 +20,11 @@ limitations under the License. */ TEST(Device, Init) { using paddle::platform::DeviceContext; using paddle::platform::CUDADeviceContext; - using paddle::platform::GPUPlace; + using paddle::platform::CUDAPlace; int count = paddle::platform::GetCUDADeviceCount(); for (int i = 0; i < count; i++) { - CUDADeviceContext* device_context = new CUDADeviceContext(GPUPlace(i)); + CUDADeviceContext* device_context = new CUDADeviceContext(CUDAPlace(i)); Eigen::GpuDevice* gpu_device = device_context->eigen_device(); ASSERT_NE(nullptr, gpu_device); delete device_context; @@ -33,11 +33,11 @@ TEST(Device, Init) { TEST(Device, CUDADeviceContext) { using paddle::platform::CUDADeviceContext; - using paddle::platform::GPUPlace; + using paddle::platform::CUDAPlace; int count = paddle::platform::GetCUDADeviceCount(); for (int i = 0; i < count; i++) { - CUDADeviceContext* device_context = new CUDADeviceContext(GPUPlace(i)); + CUDADeviceContext* device_context = new CUDADeviceContext(CUDAPlace(i)); Eigen::GpuDevice* gpu_device = device_context->eigen_device(); ASSERT_NE(nullptr, gpu_device); cudnnHandle_t cudnn_handle = device_context->cudnn_handle(); @@ -70,7 +70,7 @@ TEST(Device, DeviceContextPool) { using paddle::platform::CUDADeviceContext; using paddle::platform::Place; using paddle::platform::CPUPlace; - using paddle::platform::GPUPlace; + using paddle::platform::CUDAPlace; DeviceContextPool& pool = DeviceContextPool::Get(); auto cpu_dev_ctx1 = pool.Borrow(CPUPlace()); @@ -80,14 +80,14 @@ TEST(Device, DeviceContextPool) { std::vector gpu_places; int count = paddle::platform::GetCUDADeviceCount(); for (int i = 0; i < count; ++i) { - gpu_places.emplace_back(GPUPlace(i)); + gpu_places.emplace_back(CUDAPlace(i)); } auto dev_ctxs = pool.Borrow(gpu_places); for (size_t i = 0; i < dev_ctxs.size(); ++i) { auto* dev_ctx = static_cast(dev_ctxs[i]); - // check same as GPUPlace(i) - GPUPlace place = boost::get(dev_ctx->GetPlace()); + // check same as CUDAPlace(i) + CUDAPlace place = boost::get(dev_ctx->GetPlace()); EXPECT_EQ(place.GetDeviceId(), static_cast(i)); } } @@ -106,7 +106,7 @@ int main(int argc, char** argv) { places.emplace_back(paddle::platform::CPUPlace()); int count = paddle::platform::GetCUDADeviceCount(); for (int i = 0; i < count; ++i) { - places.emplace_back(paddle::platform::GPUPlace(i)); + places.emplace_back(paddle::platform::CUDAPlace(i)); } VLOG(0) << " DeviceCount " << count; diff --git a/paddle/platform/nccl_test.cu b/paddle/platform/nccl_test.cu index 6750c8da7..f57c32940 100644 --- a/paddle/platform/nccl_test.cu +++ b/paddle/platform/nccl_test.cu @@ -50,7 +50,7 @@ struct PerThreadData { T* RecvBuff() { return thrust::raw_pointer_cast(recv_buff.data()); } - PerThreadData(int gpu_id, size_t size) : dev_ctx(GPUPlace(gpu_id)) { + PerThreadData(int gpu_id, size_t size) : dev_ctx(CUDAPlace(gpu_id)) { send_buff.resize(size); for (size_t i = 0; i < size; ++i) { send_buff[i] = static_cast(i); @@ -140,7 +140,7 @@ int main(int argc, char** argv) { places.emplace_back(paddle::platform::CPUPlace()); int count = paddle::platform::GetCUDADeviceCount(); for (int i = 0; i < count; ++i) { - places.emplace_back(paddle::platform::GPUPlace(i)); + places.emplace_back(paddle::platform::CUDAPlace(i)); } VLOG(0) << " DeviceCount " << count; diff --git a/paddle/platform/place.cc b/paddle/platform/place.cc index 25fe8d21b..4d23cfd88 100644 --- a/paddle/platform/place.cc +++ b/paddle/platform/place.cc @@ -24,7 +24,9 @@ class PlacePrinter : public boost::static_visitor<> { explicit PlacePrinter(std::ostream &os) : os_(os) {} void operator()(const CPUPlace &) { os_ << "CPUPlace"; } void operator()(const MKLDNNPlace &) { os_ << "MKLDNNPlace"; } - void operator()(const GPUPlace &p) { os_ << "GPUPlace(" << p.device << ")"; } + void operator()(const CUDAPlace &p) { + os_ << "CUDAPlace(" << p.device << ")"; + } private: std::ostream &os_; @@ -37,12 +39,12 @@ static Place the_default_place; void set_place(const Place &place) { the_default_place = place; } const Place &get_place() { return the_default_place; } -const GPUPlace default_gpu() { return GPUPlace(0); } +const CUDAPlace default_gpu() { return CUDAPlace(0); } const CPUPlace default_cpu() { return CPUPlace(); } const MKLDNNPlace default_mkldnn() { return MKLDNNPlace(); } bool is_gpu_place(const Place &p) { - return boost::apply_visitor(IsGPUPlace(), p); + return boost::apply_visitor(IsCUDAPlace(), p); } bool is_cpu_place(const Place &p) { return !is_gpu_place(p) && !is_mkldnn_place(p); diff --git a/paddle/platform/place.h b/paddle/platform/place.h index daeafbbcd..4eab1a396 100644 --- a/paddle/platform/place.h +++ b/paddle/platform/place.h @@ -39,43 +39,45 @@ struct MKLDNNPlace { inline bool operator!=(const MKLDNNPlace &) const { return false; } }; -struct GPUPlace { - GPUPlace() : GPUPlace(0) {} - explicit GPUPlace(int d) : device(d) {} +struct CUDAPlace { + CUDAPlace() : CUDAPlace(0) {} + explicit CUDAPlace(int d) : device(d) {} inline int GetDeviceId() const { return device; } // needed for variant equality comparison - inline bool operator==(const GPUPlace &o) const { return device == o.device; } - inline bool operator!=(const GPUPlace &o) const { return !(*this == o); } + inline bool operator==(const CUDAPlace &o) const { + return device == o.device; + } + inline bool operator!=(const CUDAPlace &o) const { return !(*this == o); } int device; }; -struct CUDNNPlace : public GPUPlace { - CUDNNPlace() : GPUPlace() {} - explicit CUDNNPlace(int d) : GPUPlace(d) {} +struct CUDNNPlace : public CUDAPlace { + CUDNNPlace() : CUDAPlace() {} + explicit CUDNNPlace(int d) : CUDAPlace(d) {} }; -struct IsGPUPlace : public boost::static_visitor { +struct IsCUDAPlace : public boost::static_visitor { bool operator()(const CPUPlace &) const { return false; } bool operator()(const MKLDNNPlace &) const { return false; } - bool operator()(const GPUPlace &gpu) const { return true; } + bool operator()(const CUDAPlace &gpu) const { return true; } bool operator()(const CUDNNPlace &) const { return true; } }; struct IsMKLDNNPlace : public boost::static_visitor { bool operator()(const MKLDNNPlace &) const { return true; } bool operator()(const CPUPlace &) const { return false; } - bool operator()(const GPUPlace &) const { return false; } + bool operator()(const CUDAPlace &) const { return false; } bool operator()(const CUDNNPlace &) const { return false; } }; -typedef boost::variant Place; +typedef boost::variant Place; void set_place(const Place &); const Place &get_place(); -const GPUPlace default_gpu(); +const CUDAPlace default_gpu(); const CPUPlace default_cpu(); const MKLDNNPlace default_mkldnn(); diff --git a/paddle/platform/place_test.cc b/paddle/platform/place_test.cc index c536b59ed..21f7d9f21 100644 --- a/paddle/platform/place_test.cc +++ b/paddle/platform/place_test.cc @@ -4,7 +4,7 @@ TEST(Place, Equality) { paddle::platform::CPUPlace cpu; - paddle::platform::GPUPlace g0(0), g1(1), gg0(0); + paddle::platform::CUDAPlace g0(0), g1(1), gg0(0); paddle::platform::CUDNNPlace d0(0), d1(1), dd0(0); EXPECT_EQ(cpu, cpu); @@ -41,8 +41,8 @@ TEST(Place, Default) { TEST(Place, Print) { { std::stringstream ss; - ss << paddle::platform::GPUPlace(1); - EXPECT_EQ("GPUPlace(1)", ss.str()); + ss << paddle::platform::CUDAPlace(1); + EXPECT_EQ("CUDAPlace(1)", ss.str()); } { std::stringstream ss; diff --git a/paddle/platform/transform_test.cu b/paddle/platform/transform_test.cu index 464096111..8e2483aa8 100644 --- a/paddle/platform/transform_test.cu +++ b/paddle/platform/transform_test.cu @@ -49,7 +49,7 @@ TEST(Transform, CPUUnary) { TEST(Transform, GPUUnary) { using namespace paddle::platform; using namespace paddle::memory; - GPUPlace gpu0(0); + CUDAPlace gpu0(0); CUDADeviceContext ctx(gpu0); float cpu_buf[4] = {0.1, 0.2, 0.3, 0.4}; float* gpu_buf = static_cast(Alloc(gpu0, sizeof(float) * 4)); @@ -80,7 +80,7 @@ TEST(Transform, GPUBinary) { using namespace paddle::platform; using namespace paddle::memory; int buf[4] = {1, 2, 3, 4}; - GPUPlace gpu0(0); + CUDAPlace gpu0(0); CUDADeviceContext ctx(gpu0); int* gpu_buf = static_cast(Alloc(gpu0, sizeof(buf))); Copy(gpu0, gpu_buf, CPUPlace(), buf, sizeof(buf), ctx.stream()); diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index de6b24f70..668a48e81 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -79,7 +79,7 @@ PYBIND11_PLUGIN(core) { self.Resize(make_ddim(dim)); }) .def("alloc_float", - [](Tensor &self, paddle::platform::GPUPlace &place) { + [](Tensor &self, paddle::platform::CUDAPlace &place) { self.mutable_data(place); }) .def("alloc_float", @@ -91,7 +91,7 @@ PYBIND11_PLUGIN(core) { self.mutable_data(place); }) .def("alloc_int", - [](Tensor &self, paddle::platform::GPUPlace &place) { + [](Tensor &self, paddle::platform::CUDAPlace &place) { self.mutable_data(place); }) .def("set", PyCPUTensorSetFromArray) @@ -310,10 +310,10 @@ All parameter, weight, gradient are variables in Paddle. return new paddle::platform::CPUDeviceContext(); }) .def_static("create", - [](paddle::platform::GPUPlace& place) + [](paddle::platform::CUDAPlace& place) -> paddle::platform::DeviceContext* { #ifndef PADDLE_WITH_CUDA - PADDLE_THROW("GPUPlace is not supported in CPU device."); + PADDLE_THROW("CUDAPlace is not supported in CPU device."); #else return new paddle::platform::CUDADeviceContext(place); #endif @@ -323,9 +323,9 @@ All parameter, weight, gradient are variables in Paddle. #ifdef PADDLE_WITH_CUDA py::class_(m, "Communicator").def(py::init<>()); #endif - py::class_(m, "GPUPlace") + py::class_(m, "CUDAPlace") .def(py::init()) - .def("__str__", string::to_string); + .def("__str__", string::to_string); py::class_(m, "CPUPlace") .def(py::init<>()) @@ -338,7 +338,7 @@ All parameter, weight, gradient are variables in Paddle. self = cpu_place; }) .def("set_place", - [](platform::Place &self, const platform::GPUPlace &gpu_place) { + [](platform::Place &self, const platform::CUDAPlace &gpu_place) { self = gpu_place; }); @@ -363,7 +363,7 @@ All parameter, weight, gradient are variables in Paddle. const platform::CPUPlace &place) { self.Run(scope, place); }) .def("run", [](OperatorBase &self, const Scope &scope, - const platform::GPUPlace &place) { self.Run(scope, place); }) + const platform::CUDAPlace &place) { self.Run(scope, place); }) .def("type", [](const OperatorBase &op) -> std::string { return op.Type(); }) .def("outputs", diff --git a/paddle/pybind/tensor_py.h b/paddle/pybind/tensor_py.h index 413fd9b04..7b8c29ff8 100644 --- a/paddle/pybind/tensor_py.h +++ b/paddle/pybind/tensor_py.h @@ -71,7 +71,7 @@ struct CastToPyBufferImpl { dst_ptr, src_ptr, sizeof(CUR_TYPE) * tensor.numel(), cudaMemcpyDeviceToHost, dev_ctx->stream()); #else - PADDLE_THROW("'GPUPlace' is not supported in CPU only device."); + PADDLE_THROW("'CUDAPlace' is not supported in CPU only device."); #endif } else if (paddle::platform::is_cpu_place(tensor.place())) { dst_tensor = tensor; @@ -127,7 +127,7 @@ template void PyCUDATensorSetFromArray( framework::Tensor &self, py::array_t array, - paddle::platform::GPUPlace &place) { + paddle::platform::CUDAPlace &place) { std::vector dims; dims.reserve(array.ndim()); for (size_t i = 0; i < array.ndim(); ++i) { diff --git a/paddle/testing/paddle_gtest_main.cc b/paddle/testing/paddle_gtest_main.cc index 7ba1bf095..108ff335b 100644 --- a/paddle/testing/paddle_gtest_main.cc +++ b/paddle/testing/paddle_gtest_main.cc @@ -36,7 +36,7 @@ int main(int argc, char** argv) { paddle::memory::Used(paddle::platform::CPUPlace()); std::vector devs = {"CPU"}; #ifdef PADDLE_WITH_CUDA - paddle::memory::Used(paddle::platform::GPUPlace(0)); + paddle::memory::Used(paddle::platform::CUDAPlace(0)); devs.push_back("GPU:0"); #endif paddle::framework::InitDevices(devs); diff --git a/python/paddle/v2/fluid/__init__.py b/python/paddle/v2/fluid/__init__.py index 051b9094a..c72b57306 100644 --- a/python/paddle/v2/fluid/__init__.py +++ b/python/paddle/v2/fluid/__init__.py @@ -15,14 +15,14 @@ import backward import regularizer from param_attr import ParamAttr from data_feeder import DataFeeder -from core import LoDTensor, CPUPlace, GPUPlace +from core import LoDTensor, CPUPlace, CUDAPlace from distribute_transpiler import DistributeTranspiler import clip Tensor = LoDTensor __all__ = framework.__all__ + executor.__all__ + [ 'io', 'initializer', 'layers', 'nets', 'optimizer', 'backward', - 'regularizer', 'LoDTensor', 'CPUPlace', 'GPUPlace', 'Tensor', 'ParamAttr' + 'regularizer', 'LoDTensor', 'CPUPlace', 'CUDAPlace', 'Tensor', 'ParamAttr' 'DataFeeder', 'clip', 'DistributeTranspiler' ] diff --git a/python/paddle/v2/fluid/executor.py b/python/paddle/v2/fluid/executor.py index cdd576294..2c91afb36 100644 --- a/python/paddle/v2/fluid/executor.py +++ b/python/paddle/v2/fluid/executor.py @@ -47,7 +47,7 @@ class Executor(object): act_places.append(p) # TODO(dzhwinter) : consider that our fluid tests all written in - # GPUPlace(gpu_id), this will be changed in the future + # CUDAPlace(gpu_id), this will be changed in the future if core.is_compile_gpu(): core.init_devices(["CPU", "GPU:0"]) else: diff --git a/python/paddle/v2/fluid/tests/book/test_recommender_system.py b/python/paddle/v2/fluid/tests/book/test_recommender_system.py index b0c11ba34..e3cc2a893 100644 --- a/python/paddle/v2/fluid/tests/book/test_recommender_system.py +++ b/python/paddle/v2/fluid/tests/book/test_recommender_system.py @@ -142,7 +142,7 @@ def main(): opts = sgd_optimizer.minimize(cost) if USE_GPU: - place = core.GPUPlace(0) + place = core.CUDAPlace(0) else: place = core.CPUPlace() diff --git a/python/paddle/v2/fluid/tests/op_test.py b/python/paddle/v2/fluid/tests/op_test.py index 087283bfd..8dbfbd547 100644 --- a/python/paddle/v2/fluid/tests/op_test.py +++ b/python/paddle/v2/fluid/tests/op_test.py @@ -316,7 +316,7 @@ class OpTest(unittest.TestCase): def check_output(self, atol=1e-5): places = [core.CPUPlace()] if core.is_compile_gpu() and core.op_support_gpu(self.op_type): - places.append(core.GPUPlace(0)) + places.append(core.CUDAPlace(0)) for place in places: self.check_output_with_place(place, atol) @@ -379,7 +379,7 @@ class OpTest(unittest.TestCase): "Gradient Check On %s" % str(cpu_place)) if core.is_compile_gpu() and self.op.support_gpu(): - gpu_place = core.GPUPlace(0) + gpu_place = core.CUDAPlace(0) gpu_analytic_grads = self._get_gradient(inputs_to_check, gpu_place, output_names, no_grad_set) diff --git a/python/paddle/v2/fluid/tests/test_adagrad_op.py b/python/paddle/v2/fluid/tests/test_adagrad_op.py index 1ff393216..7b2d02fbf 100644 --- a/python/paddle/v2/fluid/tests/test_adagrad_op.py +++ b/python/paddle/v2/fluid/tests/test_adagrad_op.py @@ -167,7 +167,7 @@ class TestSparseAdagradOp(unittest.TestCase): def test_sparse_adagrad(self): places = [core.CPUPlace()] if core.is_compile_gpu(): - places.append(core.GPUPlace(0)) + places.append(core.CUDAPlace(0)) for place in places: self.check_with_place(place) diff --git a/python/paddle/v2/fluid/tests/test_batch_norm_op.py b/python/paddle/v2/fluid/tests/test_batch_norm_op.py index dfc047e1f..abbd48d2b 100644 --- a/python/paddle/v2/fluid/tests/test_batch_norm_op.py +++ b/python/paddle/v2/fluid/tests/test_batch_norm_op.py @@ -304,7 +304,7 @@ class TestBatchNormOp(OpTest): self.__assert_close(saved_variance_tensor, saved_variance, "saved_variance") self.__assert_close(mean_out_tensor, mean_out, "mean_out") - if isinstance(place, core.GPUPlace): + if isinstance(place, core.CUDAPlace): atol = 5e-2 else: atol = 1e-4 @@ -339,7 +339,7 @@ class TestBatchNormOp(OpTest): places = [core.CPUPlace()] if core.is_compile_gpu() and core.op_support_gpu("batch_norm"): - places.append(core.GPUPlace(0)) + places.append(core.CUDAPlace(0)) core.init_devices(["CPU", "GPU:0"]) else: diff --git a/python/paddle/v2/fluid/tests/test_gaussian_random_op.py b/python/paddle/v2/fluid/tests/test_gaussian_random_op.py index 4afe0c6a6..6f6a60ccb 100644 --- a/python/paddle/v2/fluid/tests/test_gaussian_random_op.py +++ b/python/paddle/v2/fluid/tests/test_gaussian_random_op.py @@ -20,7 +20,7 @@ class TestGaussianRandomOp(unittest.TestCase): def test_gpu(self): if core.is_compile_gpu(): - self.gaussian_random_test(place=fluid.GPUPlace(0)) + self.gaussian_random_test(place=fluid.CUDAPlace(0)) def gaussian_random_test(self, place): diff --git a/python/paddle/v2/fluid/tests/test_profiler.py b/python/paddle/v2/fluid/tests/test_profiler.py index d01e25744..e3f3ac58e 100644 --- a/python/paddle/v2/fluid/tests/test_profiler.py +++ b/python/paddle/v2/fluid/tests/test_profiler.py @@ -15,7 +15,7 @@ class TestProfiler(unittest.TestCase): data = layers.data(name='data', shape=[3, 28, 28], dtype='float32') conv = layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1]) - place = fluid.GPUPlace(0) + place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) diff --git a/python/paddle/v2/fluid/tests/test_sgd_op.py b/python/paddle/v2/fluid/tests/test_sgd_op.py index 9c345792b..14d41e172 100644 --- a/python/paddle/v2/fluid/tests/test_sgd_op.py +++ b/python/paddle/v2/fluid/tests/test_sgd_op.py @@ -78,7 +78,7 @@ class TestSparseSGDOp(unittest.TestCase): def test_sparse_sgd(self): places = [core.CPUPlace()] if core.is_compile_gpu(): - places.append(core.GPUPlace(0)) + places.append(core.CUDAPlace(0)) for place in places: self.check_with_place(place) diff --git a/python/paddle/v2/fluid/tests/test_uniform_random_op.py b/python/paddle/v2/fluid/tests/test_uniform_random_op.py index d6872c8ba..dbe4d6bcd 100644 --- a/python/paddle/v2/fluid/tests/test_uniform_random_op.py +++ b/python/paddle/v2/fluid/tests/test_uniform_random_op.py @@ -23,7 +23,7 @@ class TestUniformRandomOp(unittest.TestCase): def test_gpu(self): if core.is_compile_gpu(): - self.uniform_random_test(place=core.GPUPlace(0)) + self.uniform_random_test(place=core.CUDAPlace(0)) def uniform_random_test(self, place): program = fluid.Program() -- GitLab