fix d-chip wacthpoints, latest value for GPU inputs

35695132 · John Tzanakakis · 2905fb8c · 35695132 · 35695132 · 35695132
3 changed file
--- a/mindspore/ccsrc/debug/debugger/debugger.cc
+++ b/mindspore/ccsrc/debug/debugger/debugger.cc
@@ -172,8 +172,13 @@ void Debugger::PostExecute() {
    return;
  }
  if (debugger_enabled_ && !is_dataset_graph_) {
-    MS_LOG(INFO) << "Debugger suspend at end of step; number of steps executed: " << num_step_;
-    CommandLoop();
+    if (device_target_ != kGPUDevice) {
+      num_step_++;
+      MS_LOG(INFO) << "Debugger suspend at end of step; number of steps executed: " << num_step_;
+      SendWatchpointsAndSuspend(CheckWatchpoints());
+    } else {
+      CommandLoop();
+    }
  }
 }


--- a/mindspore/ccsrc/debug/tensor_load.h
+++ b/mindspore/ccsrc/debug/tensor_load.h
@@ -46,7 +46,7 @@ class TensorLoader {
      }
    }
    tensor_list.push_back(tensor);
-    tensor_list_map.insert({tensor->GetName(), tensor});
+    tensor_list_map[tensor->GetName()] = tensor;  // use [] instead of insert to ensure latest value
    auto node_name = tensor->GetName();
    node_name = node_name.substr(0, node_name.find_first_of(":"));
    node_tensor_map.insert({node_name, tensor});

--- a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc
@@ -241,25 +241,23 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,
  }

  // get inputs
-  if (!dump_enabled) {
-    auto input_size = AnfAlgo::GetInputTensorNum(kernel);
-    for (size_t j = 0; j < input_size; ++j) {
-      auto input_kernel = kernel->input(j + 1);
-      std::string input_kernel_name = input_kernel->fullname_with_scope();
-      auto addr = kernel_inputs[j];
-      auto type = AnfAlgo::GetOutputInferDataType(input_kernel, PARAMETER_OUTPUT_INDEX);
-      auto format = kOpFormat_DEFAULT;
-      auto gpu_addr = std::make_unique<GPUDeviceAddress>(addr->addr, addr->size, format, type);
-      string input_tensor_name = input_kernel_name + ':' + "0";
-      std::vector<int> int_shapes;
-      auto shape = AnfAlgo::GetOutputDeviceShape(input_kernel, PARAMETER_OUTPUT_INDEX);
-      (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes),
-                           [](size_t inner_item) { return SizeToInt(inner_item); });
-      auto ret = gpu_addr->LoadMemToHost(input_tensor_name, exec_order, format, int_shapes, type, 0, debugger, false);
-      if (!ret) {
-        MS_LOG(ERROR) << "LoadMemToHost:"
-                      << ", tensor_name:" << input_tensor_name << ", host_format:" << format << ".!";
-      }
+  auto input_size = AnfAlgo::GetInputTensorNum(kernel);
+  for (size_t j = 0; j < input_size; ++j) {
+    auto input_kernel = kernel->input(j + 1);
+    std::string input_kernel_name = input_kernel->fullname_with_scope();
+    auto addr = kernel_inputs[j];
+    auto type = AnfAlgo::GetOutputInferDataType(input_kernel, PARAMETER_OUTPUT_INDEX);
+    auto format = kOpFormat_DEFAULT;
+    auto gpu_addr = std::make_unique<GPUDeviceAddress>(addr->addr, addr->size, format, type);
+    string input_tensor_name = input_kernel_name + ':' + "0";
+    std::vector<int> int_shapes;
+    auto shape = AnfAlgo::GetOutputDeviceShape(input_kernel, PARAMETER_OUTPUT_INDEX);
+    (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes),
+                         [](size_t inner_item) { return SizeToInt(inner_item); });
+    auto ret = gpu_addr->LoadMemToHost(input_tensor_name, exec_order, format, int_shapes, type, 0, debugger, false);
+    if (!ret) {
+      MS_LOG(ERROR) << "LoadMemToHost:"
+                    << ", tensor_name:" << input_tensor_name << ", host_format:" << format << ".!";
    }
  }