From d1b25ed9d73da2ac85d4cde3eab6b883235f5384 Mon Sep 17 00:00:00 2001 From: wanghuancoder Date: Wed, 20 Jan 2021 09:37:53 +0800 Subject: [PATCH] add some RecordEvent, for dygraph timeline (#30299) * add some RecordEvent, for dygraph timeline, test=develop * change GpuMemcpySync to memory::Copy, test=develop * fix compile problem, test=develop * fix compile problem, test=develop * fix, test=develop * fix, test=develop --- paddle/fluid/imperative/basic_engine.cc | 2 ++ paddle/fluid/imperative/layer.cc | 1 + paddle/fluid/imperative/tracer.cc | 1 + paddle/fluid/pybind/tensor_py.h | 7 +++++-- 4 files changed, 9 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/imperative/basic_engine.cc b/paddle/fluid/imperative/basic_engine.cc index 990937647ac..29ba5498680 100644 --- a/paddle/fluid/imperative/basic_engine.cc +++ b/paddle/fluid/imperative/basic_engine.cc @@ -281,6 +281,8 @@ void BasicEngine::Execute() { auto& inplace_grad_name_map = shared_cur_node->InplaceGradNameMap(); for (auto& cur_op : *shared_cur_node) { + platform::RecordEvent op_type_record_event(cur_op.Type()); + ++op_num; // CheckBackWardInput diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index 3123d4b5077..365dbbfa125 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -206,6 +206,7 @@ void VarBase::ClearGradient() { grad_t->mutable_value()->clear(); } } else { + platform::RecordEvent record_event("ClearGradient"); auto* grad_t = grad_var_->MutableVar()->GetMutable(); if (grad_t->IsInitialized()) { diff --git a/paddle/fluid/imperative/tracer.cc b/paddle/fluid/imperative/tracer.cc index e5d664070e1..1cf94c7a79e 100644 --- a/paddle/fluid/imperative/tracer.cc +++ b/paddle/fluid/imperative/tracer.cc @@ -133,6 +133,7 @@ void Tracer::TraceOp(const std::string& type, const NameVarBaseMap& ins, const NameVarBaseMap& outs, framework::AttributeMap attrs, const platform::Place& place, bool trace_backward, const std::map& inplace_map) { + platform::RecordEvent op_type_record_event(type); VLOG(1) << "Trace Op: " << type; if (FLAGS_use_mkldnn) { // if both lists are empty all ops are enabled (default for diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h index 7e60c98dc18..6d1281d11f1 100644 --- a/paddle/fluid/pybind/tensor_py.h +++ b/paddle/fluid/pybind/tensor_py.h @@ -29,6 +29,7 @@ limitations under the License. */ #include "paddle/fluid/platform/bfloat16.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/float16.h" +#include "paddle/fluid/platform/profiler.h" #include "pybind11/numpy.h" #include "pybind11/pybind11.h" @@ -293,6 +294,7 @@ void SetTensorFromPyArrayT( auto dst = self->mutable_data(place); paddle::platform::GpuMemcpySync(dst, array.data(), array.nbytes(), cudaMemcpyHostToDevice); + } else if (paddle::platform::is_cuda_pinned_place(place)) { auto dst = self->mutable_data(place); std::memcpy(dst, array.data(), array.nbytes()); @@ -706,8 +708,9 @@ inline py::array TensorToPyArray(const framework::Tensor &tensor, "or double free would occur")); size_t copy_bytes = sizeof_dtype * numel; - paddle::platform::GpuMemcpySync(py_arr.mutable_data(), tensor_buf_ptr, - copy_bytes, cudaMemcpyDeviceToHost); + auto p = BOOST_GET_CONST(platform::CUDAPlace, tensor.place()); + paddle::memory::Copy(platform::CPUPlace(), py_arr.mutable_data(), p, + tensor_buf_ptr, copy_bytes, nullptr); return py_arr; #else PADDLE_THROW(platform::errors::PermissionDenied( -- GitLab