未验证 提交 80c0cc97 编写于 作者: W wanghuancoder 提交者: GitHub

modify gc logic, use new device_event (#35208)

* modify gc logic, use new device_event, test=develop

* use GenerateDeviceEventFlag, test=develop

* refine, test=develop

* fix test_standalone_executor.py, test=develop

* refine, test=develop
上级 b6adfd97
......@@ -11,6 +11,14 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#if !defined(_WIN32)
#include <sched.h>
#else
#define NOMINMAX
#include <windows.h>
#endif // !_WIN32
#include "paddle/fluid/framework/new_executor/interpretercore.h"
#include <unordered_set>
......@@ -255,10 +263,7 @@ void InterpreterCore::Convert() {
}
for (size_t i = 0; i < vec_instruction_.size(); ++i) {
// int device_type = static_cast<int>(paddle::platform::DeviceType::CUDA);
// paddle::platform::DeviceOption dev_opt(
// device_type, BOOST_GET_CONST(platform::CUDAPlace, place_).device);
gc_event_.emplace_back(place_);
gc_event_.emplace_back(place_, platform::GenerateDeviceEventFlag());
std::vector<size_t> vec_temp;
for (auto& item : vec_instruction_[i].output_index_) {
......@@ -450,41 +455,40 @@ void InterpreterCore::CheckGC(size_t instr_id,
if (!garbages_->empty()) {
if (max_memory_size_ <= 1) {
#if defined(PADDLE_WITH_CUDA)
auto* dev_ctx = reinterpret_cast<platform::CUDADeviceContext*>(
gc_event_[instr_id].Record(
platform::DeviceContextPool::Instance().Get(place));
gc_event_[instr_id].Record(dev_ctx);
gc_event_[instr_id].SetFininshed(); // Only for CPU Event
gc_queue_->AddTask(
[ container = garbages_.release(), event = &gc_event_[instr_id] ]() {
while (!event->Query()) {
#if defined(_WIN32)
SleepEx(50, FALSE);
#else
sched_yield();
#endif
continue;
}
delete container;
});
garbages_.reset(new GarbageQueue());
#else
delete garbages_.release();
garbages_.reset(new GarbageQueue());
#endif
} else if (cur_memory_size_ >= max_memory_size_) {
#if defined(PADDLE_WITH_CUDA)
auto* dev_ctx = reinterpret_cast<platform::CUDADeviceContext*>(
gc_event_[instr_id].Record(
platform::DeviceContextPool::Instance().Get(place));
gc_event_[instr_id].Record(dev_ctx);
gc_event_[instr_id].SetFininshed(); // Only for CPU Event
gc_queue_->AddTask(
[ container = garbages_.release(), event = &gc_event_[instr_id] ]() {
while (!event->Query()) {
#if defined(_WIN32)
SleepEx(50, FALSE);
#else
sched_yield();
#endif
continue;
}
delete container;
});
garbages_.reset(new GarbageQueue());
cur_memory_size_ = 0;
#else
delete garbages_.release();
garbages_.reset(new GarbageQueue());
cur_memory_size_ = 0;
#endif
}
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册