diff --git a/paddle/fluid/operators/reader/buffered_reader.cc b/paddle/fluid/operators/reader/buffered_reader.cc index 971db8b37d8409185a4e81d1e77b0fc53534e9f5..26ff221dfa0768bd2bcc9e6485a32485f0212ac6 100644 --- a/paddle/fluid/operators/reader/buffered_reader.cc +++ b/paddle/fluid/operators/reader/buffered_reader.cc @@ -14,7 +14,6 @@ #include "paddle/fluid/operators/reader/buffered_reader.h" #include -#include "paddle/fluid/framework/data_type.h" namespace paddle { namespace operators { @@ -25,12 +24,6 @@ BufferedReader::~BufferedReader() { position_.front().wait(); position_.pop(); } -#ifdef PADDLE_WITH_CUDA - if (platform::is_gpu_place(place_)) { - platform::SetDeviceId(boost::get(place_).device); - PADDLE_ENFORCE(cudaStreamDestroy(stream)); - } -#endif } BufferedReader::BufferedReader( @@ -40,12 +33,6 @@ BufferedReader::BufferedReader( thread_pool_(1), place_(place), buffer_size_(buffer_size) { -#ifdef PADDLE_WITH_CUDA - if (platform::is_gpu_place(place_)) { - platform::SetDeviceId(boost::get(place_).device); - PADDLE_ENFORCE(cudaStreamCreate(&stream)); - } -#endif cpu_buffer_.resize(buffer_size); gpu_buffer_.resize(buffer_size); ReadTillBufferFullAsync(); @@ -67,39 +54,14 @@ void BufferedReader::ReadAsync(size_t i) { return -1UL; } -#ifdef PADDLE_WITH_CUDA - // NOTE(liangdun): using async copy instead of TensorCopySync - // TensorCopySync would block other stream if (platform::is_gpu_place(place_)) { TensorVec &gpu = gpu_buffer_[i]; gpu.resize(cpu.size()); for (size_t i = 0; i < cpu.size(); ++i) { - gpu[i].Resize(cpu[i].dims()); - gpu[i].set_layout(cpu[i].layout()); - auto cpu_place = cpu[i].place(); - auto cpu_ptr = cpu[i].data(); - auto gpu_ptr = gpu[i].mutable_data(place_, cpu[i].type()); - auto size = - cpu[i].numel() * paddle::framework::SizeOfType(cpu[i].type()); - if (platform::is_cuda_pinned_place(cpu_place)) - memory::Copy(boost::get(place_), gpu_ptr, - boost::get(cpu_place), - cpu_ptr, size, stream); - else if ((platform::is_gpu_place(cpu_place))) - memory::Copy(boost::get(place_), gpu_ptr, - boost::get(cpu_place), cpu_ptr, - size, stream); - else - // if cpu place is not pinned, async copy is slower than sync copy, - // so we use sync copy instead. - memory::Copy(boost::get(place_), gpu_ptr, - boost::get(cpu_place), cpu_ptr, size, - 0); + framework::TensorCopySync(cpu[i], place_, &gpu[i]); gpu[i].set_lod(cpu[i].lod()); } - PADDLE_ENFORCE(cudaStreamSynchronize(stream)); } -#endif return i; })); } diff --git a/paddle/fluid/operators/reader/buffered_reader.h b/paddle/fluid/operators/reader/buffered_reader.h index e55572177ccd7cd18695bdecc4c65a25ffd6b5d4..cbe2bc1b5fdd69d1a843b768e3289acd621369a6 100644 --- a/paddle/fluid/operators/reader/buffered_reader.h +++ b/paddle/fluid/operators/reader/buffered_reader.h @@ -19,9 +19,6 @@ #include #include "ThreadPool.h" #include "paddle/fluid/framework/reader.h" -#ifdef PADDLE_WITH_CUDA -#include "paddle/fluid/platform/gpu_info.h" -#endif namespace paddle { namespace operators { @@ -62,9 +59,6 @@ class BufferedReader : public framework::DecoratedReader { std::vector cpu_buffer_; std::vector gpu_buffer_; size_t prev_pos_{-1UL}; -#ifdef PADDLE_WITH_CUDA - cudaStream_t stream; -#endif }; } // namespace reader diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py index 2cd4e328b2f334d1230f8573908791f58f74259e..1762bd3e343e8af6768dd23f8fbc58cd0182d3c9 100644 --- a/python/paddle/fluid/layers/io.py +++ b/python/paddle/fluid/layers/io.py @@ -484,7 +484,7 @@ def _py_reader(capacity, name=None, use_double_buffer=True, feed_list=None): - use_cuda_pinned_place = use_double_buffer and core.is_compiled_with_cuda() + if feed_list is not None: if not isinstance(feed_list, list): raise TypeError("feed_list should be a list of Variable" @@ -565,10 +565,7 @@ def _py_reader(capacity, for item in tensors: if not isinstance(item, core.LoDTensor): tmp = core.LoDTensor() - if use_cuda_pinned_place: - tmp.set(item, core.CUDAPinnedPlace()) - else: - tmp.set(item, core.CPUPlace()) + tmp.set(item, core.CPUPlace()) item = tmp array.append(item)