提交 d26f093f 编写于 作者: Y Yu Yang

Log

上级 d55a03d9
...@@ -132,9 +132,13 @@ struct ScaleLossGradOpHandle : public OpHandle { ...@@ -132,9 +132,13 @@ struct ScaleLossGradOpHandle : public OpHandle {
scope_(scope), scope_(scope),
place_(place) { place_(place) {
PADDLE_ENFORCE(cudaEventCreateWithFlags(&ev_, cudaEventDisableTiming)); PADDLE_ENFORCE(cudaEventCreateWithFlags(&ev_, cudaEventDisableTiming));
VLOG(3) << "Create " << ev_;
} }
~ScaleLossGradOpHandle() { PADDLE_ENFORCE(cudaEventDestroy(ev_)); } ~ScaleLossGradOpHandle() {
VLOG(3) << "Destroy " << ev_;
PADDLE_ENFORCE(cudaEventDestroy(ev_));
}
void Run() override { void Run() override {
std::string var_name = static_cast<VarHandle *>(this->outputs_[0])->name_; std::string var_name = static_cast<VarHandle *>(this->outputs_[0])->name_;
...@@ -146,20 +150,13 @@ struct ScaleLossGradOpHandle : public OpHandle { ...@@ -146,20 +150,13 @@ struct ScaleLossGradOpHandle : public OpHandle {
if (platform::is_cpu_place(place_)) { if (platform::is_cpu_place(place_)) {
*tmp = coeff_; *tmp = coeff_;
} else { } else {
VLOG(3) << "Scale loss on place" << place_;
auto stream = auto stream =
static_cast<platform::CUDADeviceContext *>(this->dev_ctx_[place_]) static_cast<platform::CUDADeviceContext *>(this->dev_ctx_[place_])
->stream(); ->stream();
cudaSetDevice(boost::get<platform::CUDAPlace>(place_).device); cudaSetDevice(boost::get<platform::CUDAPlace>(place_).device);
VLOG(3) << "1";
PADDLE_ENFORCE(cudaGetLastError());
VLOG(3) << "2";
memory::Copy(boost::get<platform::CUDAPlace>(place_), tmp, memory::Copy(boost::get<platform::CUDAPlace>(place_), tmp,
platform::CPUPlace(), &coeff_, sizeof(float), stream); platform::CPUPlace(), &coeff_, sizeof(float), stream);
PADDLE_ENFORCE(cudaDeviceSynchronize());
VLOG(3) << "3";
PADDLE_ENFORCE(cudaEventRecord(ev_, stream)); PADDLE_ENFORCE(cudaEventRecord(ev_, stream));
VLOG(3) << "4";
} }
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册