From a1fc570197dd9cacaf5dc6941a47bab5fc2d4fba Mon Sep 17 00:00:00 2001 From: Abhinav Arora Date: Wed, 7 Feb 2018 23:45:16 -0800 Subject: [PATCH] Fix for program crash when destructor is called before channel close with blocked readers/writers (#8197) * Fix destructor crash and add unit tests * Fix typo in unit test * Reword comments * Make close channel a generic test * Refactoring unit tests * Fix method name --- paddle/framework/channel_test.cc | 181 +++++++++++++++++- paddle/framework/details/buffered_channel.h | 22 ++- paddle/framework/details/unbuffered_channel.h | 19 +- 3 files changed, 213 insertions(+), 9 deletions(-) diff --git a/paddle/framework/channel_test.cc b/paddle/framework/channel_test.cc index df9e15e22b8..a307abb4ed3 100644 --- a/paddle/framework/channel_test.cc +++ b/paddle/framework/channel_test.cc @@ -22,6 +22,8 @@ limitations under the License. */ using paddle::framework::Channel; using paddle::framework::MakeChannel; using paddle::framework::CloseChannel; +using paddle::framework::details::Buffered; +using paddle::framework::details::UnBuffered; TEST(Channel, MakeAndClose) { using paddle::framework::details::Buffered; @@ -60,13 +62,54 @@ TEST(Channel, SufficientBufferSizeDoesntBlock) { delete ch; } -TEST(Channel, SendOnClosedChannelPanics) { - const size_t buffer_size = 10; - auto ch = MakeChannel(buffer_size); - size_t i = 5; - EXPECT_EQ(ch->Send(&i), true); // should not block or panic +// This tests that a channel must return false +// on send and receive performed after closing the channel. +// Receive will only return false after close when queue is empty. +// By creating separate threads for sending and receiving, we make this +// function able to test both buffered and unbuffered channels. +void SendReceiveWithACloseChannelShouldPanic(Channel *ch) { + const size_t data = 5; + std::thread send_thread{[&]() { + size_t i = data; + EXPECT_EQ(ch->Send(&i), true); // should not block + }}; + + std::thread recv_thread{[&]() { + size_t i; + EXPECT_EQ(ch->Receive(&i), true); // should not block + EXPECT_EQ(i, data); + }}; + + send_thread.join(); + recv_thread.join(); + + // After closing send should return false. Receive should + // also return false as there is no data in queue. CloseChannel(ch); - EXPECT_EQ(ch->Send(&i), false); // should panic + send_thread = std::thread{[&]() { + size_t i = data; + EXPECT_EQ(ch->Send(&i), false); // should return false + }}; + recv_thread = std::thread{[&]() { + size_t i; + // should return false because channel is closed and queue is empty + EXPECT_EQ(ch->Receive(&i), false); + }}; + + send_thread.join(); + recv_thread.join(); +} + +TEST(Channel, SendReceiveClosedBufferedChannelPanics) { + size_t buffer_size = 10; + auto ch = MakeChannel(buffer_size); + SendReceiveWithACloseChannelShouldPanic(ch); + delete ch; +} + +TEST(Channel, SendReceiveClosedUnBufferedChannelPanics) { + auto ch = MakeChannel(0); + SendReceiveWithACloseChannelShouldPanic(ch); delete ch; } @@ -381,3 +424,129 @@ TEST(Channel, UnbufferedMoreReceiveLessSendTest) { EXPECT_EQ(sum_receive, 28U); delete ch; } + +// This tests that destroying a channel unblocks +// any senders waiting for channel to have write space +void ChannelDestroyUnblockSenders(Channel *ch) { + size_t num_threads = 5; + std::thread t[num_threads]; + bool thread_ended[num_threads]; + bool send_success[num_threads]; + + // Launches threads that try to write and are blocked because of no readers + for (size_t i = 0; i < num_threads; i++) { + thread_ended[i] = false; + send_success[i] = false; + t[i] = std::thread( + [&](bool *ended, bool *success) { + int data = 10; + *success = ch->Send(&data); + *ended = true; + }, + &thread_ended[i], &send_success[i]); + } + + std::this_thread::sleep_for(std::chrono::milliseconds(500)); // wait 0.5 sec + bool is_buffered_channel = false; + if (dynamic_cast *>(ch)) is_buffered_channel = true; + + if (is_buffered_channel) { + // If channel is buffered, verify that atleast 4 threads are blocked + int ct = 0; + for (size_t i = 0; i < num_threads; i++) { + if (thread_ended[i] == false) ct++; + } + // Atleast 4 threads must be blocked + EXPECT_GE(ct, 4); + } else { + // Verify that all the threads are blocked + for (size_t i = 0; i < num_threads; i++) { + EXPECT_EQ(thread_ended[i], false); + } + } + // Explicitly destroy the channel + delete ch; + std::this_thread::sleep_for(std::chrono::milliseconds(200)); // wait + + // Verify that all threads got unblocked + for (size_t i = 0; i < num_threads; i++) { + EXPECT_EQ(thread_ended[i], true); + } + + // Count number of successfuld sends + int ct = 0; + for (size_t i = 0; i < num_threads; i++) { + if (send_success[i]) ct++; + } + + if (is_buffered_channel) { + // Only 1 send must be successful + EXPECT_EQ(ct, 1); + } else { + // In unbuffered channel, no send should be successful + EXPECT_EQ(ct, 0); + } + + // Join all threads + for (size_t i = 0; i < num_threads; i++) t[i].join(); +} + +// This tests that destroying a channel also unblocks +// any receivers waiting on the channel +void ChannelDestroyUnblockReceivers(Channel *ch) { + size_t num_threads = 5; + std::thread t[num_threads]; + bool thread_ended[num_threads]; + + // Launches threads that try to read and are blocked because of no writers + for (size_t i = 0; i < num_threads; i++) { + thread_ended[i] = false; + t[i] = std::thread( + [&](bool *p) { + int data; + // All reads should return false + EXPECT_EQ(ch->Receive(&data), false); + *p = true; + }, + &thread_ended[i]); + } + std::this_thread::sleep_for(std::chrono::milliseconds(100)); // wait + + // Verify that all threads are blocked + for (size_t i = 0; i < num_threads; i++) { + EXPECT_EQ(thread_ended[i], false); + } + // delete the channel + delete ch; + std::this_thread::sleep_for(std::chrono::milliseconds(200)); // wait + // Verify that all threads got unblocked + for (size_t i = 0; i < num_threads; i++) { + EXPECT_EQ(thread_ended[i], true); + } + + for (size_t i = 0; i < num_threads; i++) t[i].join(); +} + +TEST(Channel, BufferedChannelDestroyUnblocksReceiversTest) { + size_t buffer_size = 1; + auto ch = MakeChannel(buffer_size); + ChannelDestroyUnblockReceivers(ch); +} + +TEST(Channel, BufferedChannelDestroyUnblocksSendersTest) { + size_t buffer_size = 1; + auto ch = MakeChannel(buffer_size); + ChannelDestroyUnblockSenders(ch); +} + +// This tests that destroying an unbuffered channel also unblocks +// unblocks any receivers waiting for senders +TEST(Channel, UnbufferedChannelDestroyUnblocksReceiversTest) { + auto ch = MakeChannel(0); + ChannelDestroyUnblockReceivers(ch); +} + +TEST(Channel, UnbufferedChannelDestroyUnblocksSendersTest) { + auto ch = MakeChannel(0); + ChannelDestroyUnblockSenders(ch); +} diff --git a/paddle/framework/details/buffered_channel.h b/paddle/framework/details/buffered_channel.h index 00b63da4da7..77eebc99249 100644 --- a/paddle/framework/details/buffered_channel.h +++ b/paddle/framework/details/buffered_channel.h @@ -42,8 +42,11 @@ class Buffered : public paddle::framework::Channel { std::mutex mu_; std::condition_variable empty_cond_var_; std::condition_variable full_cond_var_; + std::condition_variable destructor_cond_var_; std::deque channel_; std::atomic closed_{false}; + std::atomic send_ctr{0}; + std::atomic recv_ctr{0}; Buffered(size_t cap) : cap_(cap), closed_(false) { PADDLE_ENFORCE_GT(cap, 0); @@ -58,6 +61,7 @@ bool Buffered::Send(T* item) { if (closed_) { return ret; } + send_ctr++; std::unique_lock lock(mu_); full_cond_var_.wait(lock, [this]() { return channel_.size() < cap_ || closed_; }); @@ -67,20 +71,30 @@ bool Buffered::Send(T* item) { empty_cond_var_.notify_one(); ret = true; } + send_ctr--; + destructor_cond_var_.notify_one(); return ret; } template bool Buffered::Receive(T* item) { + bool ret = false; + // Once the channel has been closed and all data has been consumed, + // just return false. Don't even try acquiring the mutex. + if (closed_ && channel_.empty()) { + return false; + } + recv_ctr++; std::unique_lock lock(mu_); empty_cond_var_.wait(lock, [this]() { return !channel_.empty() || closed_; }); - bool ret = false; if (!channel_.empty()) { *item = std::move(channel_.front()); channel_.pop_front(); full_cond_var_.notify_one(); ret = true; } + recv_ctr--; + destructor_cond_var_.notify_one(); return ret; } @@ -100,6 +114,12 @@ Buffered::~Buffered() { closed_ = true; channel_.clear(); NotifyAllParticipants(&lock); + + // The destructor must wait for all readers and writers to complete their task + // The channel has been closed, so we will not accept new readers and writers + lock.lock(); + destructor_cond_var_.wait( + lock, [this]() { return send_ctr == 0 && recv_ctr == 0; }); } template diff --git a/paddle/framework/details/unbuffered_channel.h b/paddle/framework/details/unbuffered_channel.h index 815cebad2d8..92a16b4d22b 100644 --- a/paddle/framework/details/unbuffered_channel.h +++ b/paddle/framework/details/unbuffered_channel.h @@ -45,9 +45,11 @@ class UnBuffered : public paddle::framework::Channel { // A transaction occurs only when both are true std::atomic reader_found_{false}, writer_found_{false}; std::condition_variable cv_channel_; - std::condition_variable_any cv_reader_, cv_writer_; + std::condition_variable_any cv_reader_, cv_writer_, cv_destructor_; T* item{nullptr}; std::atomic closed_{false}; + std::atomic send_ctr{0}; + std::atomic recv_ctr{0}; UnBuffered() : closed_(false) {} @@ -62,6 +64,7 @@ bool UnBuffered::Send(T* data) { if (closed_) { return ret; } + send_ctr++; // Prevent other writers from entering std::unique_lock writer_lock(mu_write_); writer_found_ = true; @@ -81,6 +84,8 @@ bool UnBuffered::Send(T* data) { ret = true; } writer_found_ = false; + send_ctr--; + cv_destructor_.notify_one(); return ret; } @@ -88,6 +93,12 @@ bool UnBuffered::Send(T* data) { // data that was sent by a writer is read from a reader. template bool UnBuffered::Receive(T* data) { + bool ret = false; + // If channel is closed, we don't even want any reader to enter. + // Unlike a buffered channel, an unbuffered channel does not allow + // readers to read after closing because there is no buffer to be consumed. + if (closed_) return ret; + recv_ctr++; // Prevent other readers from entering std::unique_lock read_lock{mu_read_}; reader_found_ = true; @@ -96,7 +107,6 @@ bool UnBuffered::Receive(T* data) { cv_reader_.wait(cv_lock, [this]() { return writer_found_ == true || closed_; }); cv_writer_.notify_one(); - bool ret = false; if (!closed_) { std::unique_lock lock_ch{mu_ch_}; // Reader should wait for the writer to first write its data @@ -110,6 +120,8 @@ bool UnBuffered::Receive(T* data) { cv_channel_.notify_one(); } reader_found_ = false; + recv_ctr--; + cv_destructor_.notify_one(); return ret; } @@ -135,6 +147,9 @@ UnBuffered::~UnBuffered() { item = nullptr; closed_ = true; NotifyAllParticipants(&lock); + lock.lock(); + cv_destructor_.wait(lock, + [this]() { return send_ctr == 0 && recv_ctr == 0; }); } // This function notifies all the readers, writers and -- GitLab