提交 1533bf12 编写于 作者: Y Yu Yang

Use event and single thread

上级 176277b8
......@@ -245,7 +245,7 @@ struct FetchOpHandle : public OpHandle {
class ParallelExecutorPrivate {
public:
explicit ParallelExecutorPrivate(size_t num_threads = 12)
explicit ParallelExecutorPrivate(size_t num_threads = 0)
: pool_(num_threads == 0 ? nullptr : new ThreadPool(num_threads)) {}
std::vector<platform::Place> places_;
......@@ -669,7 +669,7 @@ void ParallelExecutor::BuildNCCLCommunicator() const {
void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors,
const std::string &fetched_var_name) {
bool use_event = false;
bool use_event = true;
auto fetched_data = std::make_shared<FetchedData>(fetch_tensors.size());
// Version --> VarHandle
member_->exception_.reset();
......
......@@ -90,7 +90,6 @@ size_t Used<platform::CUDAPlace>(platform::CUDAPlace place) {
template <>
void* Alloc<platform::CUDAPlace>(platform::CUDAPlace place, size_t size) {
auto* buddy_allocator = GetGPUBuddyAllocator(place.device);
VLOG(30) << "Allocating " << size << " bytes on " << place;
auto* ptr = buddy_allocator->Alloc(size);
if (ptr == nullptr) {
int cur_dev = platform::GetCurrentDeviceId();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册