From d1b25ed9d73da2ac85d4cde3eab6b883235f5384 Mon Sep 17 00:00:00 2001
From: wanghuancoder <wanghuancoder@163.com>
Date: Wed, 20 Jan 2021 09:37:53 +0800
Subject: [PATCH] add some RecordEvent, for dygraph timeline (#30299)

* add some RecordEvent, for dygraph timeline, test=develop

* change GpuMemcpySync to memory::Copy, test=develop

* fix compile problem, test=develop

* fix compile problem, test=develop

* fix, test=develop

* fix, test=develop
---
 paddle/fluid/imperative/basic_engine.cc | 2 ++
 paddle/fluid/imperative/layer.cc        | 1 +
 paddle/fluid/imperative/tracer.cc       | 1 +
 paddle/fluid/pybind/tensor_py.h         | 7 +++++--
 4 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/paddle/fluid/imperative/basic_engine.cc b/paddle/fluid/imperative/basic_engine.cc
index 990937647a..29ba549868 100644
--- a/paddle/fluid/imperative/basic_engine.cc
+++ b/paddle/fluid/imperative/basic_engine.cc
@@ -281,6 +281,8 @@ void BasicEngine::Execute() {
     auto& inplace_grad_name_map = shared_cur_node->InplaceGradNameMap();
 
     for (auto& cur_op : *shared_cur_node) {
+      platform::RecordEvent op_type_record_event(cur_op.Type());
+
       ++op_num;
 
       // CheckBackWardInput
diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc
index 3123d4b507..365dbbfa12 100644
--- a/paddle/fluid/imperative/layer.cc
+++ b/paddle/fluid/imperative/layer.cc
@@ -206,6 +206,7 @@ void VarBase::ClearGradient() {
         grad_t->mutable_value()->clear();
       }
     } else {
+      platform::RecordEvent record_event("ClearGradient");
       auto* grad_t =
           grad_var_->MutableVar()->GetMutable<framework::LoDTensor>();
       if (grad_t->IsInitialized()) {
diff --git a/paddle/fluid/imperative/tracer.cc b/paddle/fluid/imperative/tracer.cc
index e5d664070e..1cf94c7a79 100644
--- a/paddle/fluid/imperative/tracer.cc
+++ b/paddle/fluid/imperative/tracer.cc
@@ -133,6 +133,7 @@ void Tracer::TraceOp(const std::string& type, const NameVarBaseMap& ins,
                      const NameVarBaseMap& outs, framework::AttributeMap attrs,
                      const platform::Place& place, bool trace_backward,
                      const std::map<std::string, std::string>& inplace_map) {
+  platform::RecordEvent op_type_record_event(type);
   VLOG(1) << "Trace Op: " << type;
   if (FLAGS_use_mkldnn) {
     // if both lists are empty all ops are enabled (default for
diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h
index 7e60c98dc1..6d1281d11f 100644
--- a/paddle/fluid/pybind/tensor_py.h
+++ b/paddle/fluid/pybind/tensor_py.h
@@ -29,6 +29,7 @@ limitations under the License. */
 #include "paddle/fluid/platform/bfloat16.h"
 #include "paddle/fluid/platform/device_context.h"
 #include "paddle/fluid/platform/float16.h"
+#include "paddle/fluid/platform/profiler.h"
 #include "pybind11/numpy.h"
 #include "pybind11/pybind11.h"
 
@@ -293,6 +294,7 @@ void SetTensorFromPyArrayT(
       auto dst = self->mutable_data<T>(place);
       paddle::platform::GpuMemcpySync(dst, array.data(), array.nbytes(),
                                       cudaMemcpyHostToDevice);
+
     } else if (paddle::platform::is_cuda_pinned_place(place)) {
       auto dst = self->mutable_data<T>(place);
       std::memcpy(dst, array.data(), array.nbytes());
@@ -706,8 +708,9 @@ inline py::array TensorToPyArray(const framework::Tensor &tensor,
             "or double free would occur"));
 
     size_t copy_bytes = sizeof_dtype * numel;
-    paddle::platform::GpuMemcpySync(py_arr.mutable_data(), tensor_buf_ptr,
-                                    copy_bytes, cudaMemcpyDeviceToHost);
+    auto p = BOOST_GET_CONST(platform::CUDAPlace, tensor.place());
+    paddle::memory::Copy(platform::CPUPlace(), py_arr.mutable_data(), p,
+                         tensor_buf_ptr, copy_bytes, nullptr);
     return py_arr;
 #else
     PADDLE_THROW(platform::errors::PermissionDenied(
-- 
GitLab