未验证 提交 80c0cc97 编写于 作者: W wanghuancoder 提交者: GitHub

modify gc logic, use new device_event (#35208)

* modify gc logic, use new device_event, test=develop

* use GenerateDeviceEventFlag, test=develop

* refine, test=develop

* fix test_standalone_executor.py, test=develop

* refine, test=develop
上级 b6adfd97
...@@ -11,6 +11,14 @@ ...@@ -11,6 +11,14 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#if !defined(_WIN32)
#include <sched.h>
#else
#define NOMINMAX
#include <windows.h>
#endif // !_WIN32
#include "paddle/fluid/framework/new_executor/interpretercore.h" #include "paddle/fluid/framework/new_executor/interpretercore.h"
#include <unordered_set> #include <unordered_set>
...@@ -255,10 +263,7 @@ void InterpreterCore::Convert() { ...@@ -255,10 +263,7 @@ void InterpreterCore::Convert() {
} }
for (size_t i = 0; i < vec_instruction_.size(); ++i) { for (size_t i = 0; i < vec_instruction_.size(); ++i) {
// int device_type = static_cast<int>(paddle::platform::DeviceType::CUDA); gc_event_.emplace_back(place_, platform::GenerateDeviceEventFlag());
// paddle::platform::DeviceOption dev_opt(
// device_type, BOOST_GET_CONST(platform::CUDAPlace, place_).device);
gc_event_.emplace_back(place_);
std::vector<size_t> vec_temp; std::vector<size_t> vec_temp;
for (auto& item : vec_instruction_[i].output_index_) { for (auto& item : vec_instruction_[i].output_index_) {
...@@ -450,41 +455,40 @@ void InterpreterCore::CheckGC(size_t instr_id, ...@@ -450,41 +455,40 @@ void InterpreterCore::CheckGC(size_t instr_id,
if (!garbages_->empty()) { if (!garbages_->empty()) {
if (max_memory_size_ <= 1) { if (max_memory_size_ <= 1) {
#if defined(PADDLE_WITH_CUDA) gc_event_[instr_id].Record(
auto* dev_ctx = reinterpret_cast<platform::CUDADeviceContext*>(
platform::DeviceContextPool::Instance().Get(place)); platform::DeviceContextPool::Instance().Get(place));
gc_event_[instr_id].Record(dev_ctx); gc_event_[instr_id].SetFininshed(); // Only for CPU Event
gc_queue_->AddTask( gc_queue_->AddTask(
[ container = garbages_.release(), event = &gc_event_[instr_id] ]() { [ container = garbages_.release(), event = &gc_event_[instr_id] ]() {
while (!event->Query()) { while (!event->Query()) {
#if defined(_WIN32)
SleepEx(50, FALSE);
#else
sched_yield();
#endif
continue; continue;
} }
delete container; delete container;
}); });
garbages_.reset(new GarbageQueue()); garbages_.reset(new GarbageQueue());
#else
delete garbages_.release();
garbages_.reset(new GarbageQueue());
#endif
} else if (cur_memory_size_ >= max_memory_size_) { } else if (cur_memory_size_ >= max_memory_size_) {
#if defined(PADDLE_WITH_CUDA) gc_event_[instr_id].Record(
auto* dev_ctx = reinterpret_cast<platform::CUDADeviceContext*>(
platform::DeviceContextPool::Instance().Get(place)); platform::DeviceContextPool::Instance().Get(place));
gc_event_[instr_id].Record(dev_ctx); gc_event_[instr_id].SetFininshed(); // Only for CPU Event
gc_queue_->AddTask( gc_queue_->AddTask(
[ container = garbages_.release(), event = &gc_event_[instr_id] ]() { [ container = garbages_.release(), event = &gc_event_[instr_id] ]() {
while (!event->Query()) { while (!event->Query()) {
#if defined(_WIN32)
SleepEx(50, FALSE);
#else
sched_yield();
#endif
continue; continue;
} }
delete container; delete container;
}); });
garbages_.reset(new GarbageQueue()); garbages_.reset(new GarbageQueue());
cur_memory_size_ = 0; cur_memory_size_ = 0;
#else
delete garbages_.release();
garbages_.reset(new GarbageQueue());
cur_memory_size_ = 0;
#endif
} }
} }
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册