test_event_node.cc 12.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "gtest/gtest.h"
#include "paddle/fluid/platform/profiler/chrometracing_logger.h"
#include "paddle/fluid/platform/profiler/event_node.h"

using paddle::platform::ChromeTracingLogger;
using paddle::platform::CudaRuntimeTraceEventNode;
21
using paddle::platform::DeviceTraceEvent;
22 23
using paddle::platform::DeviceTraceEventNode;
using paddle::platform::HostTraceEvent;
24
using paddle::platform::HostTraceEventNode;
25 26 27
using paddle::platform::KernelEventInfo;
using paddle::platform::MemcpyEventInfo;
using paddle::platform::MemsetEventInfo;
C
chenjian 已提交
28 29
using paddle::platform::MemTraceEvent;
using paddle::platform::MemTraceEventNode;
30
using paddle::platform::NodeTrees;
C
chenjian 已提交
31 32
using paddle::platform::OperatorSupplementEvent;
using paddle::platform::OperatorSupplementEventNode;
33 34
using paddle::platform::RuntimeTraceEvent;
using paddle::platform::TracerEventType;
C
chenjian 已提交
35
using paddle::platform::TracerMemEventType;
36 37 38 39
TEST(NodeTreesTest, LogMe_case0) {
  std::list<HostTraceEvent> host_events;
  std::list<RuntimeTraceEvent> runtime_events;
  std::list<DeviceTraceEvent> device_events;
C
chenjian 已提交
40 41
  std::list<MemTraceEvent> mem_events;
  std::list<OperatorSupplementEvent> op_supplement_events;
42 43 44 45 46 47 48 49 50
  host_events.push_back(HostTraceEvent(std::string("dataloader#1"),
                                       TracerEventType::Dataloader, 1000, 10000,
                                       10, 10));
  host_events.push_back(HostTraceEvent(
      std::string("op1"), TracerEventType::Operator, 11000, 20000, 10, 10));
  host_events.push_back(HostTraceEvent(
      std::string("op2"), TracerEventType::Operator, 21000, 30000, 10, 10));
  host_events.push_back(HostTraceEvent(
      std::string("op3"), TracerEventType::Operator, 31000, 40000, 10, 11));
C
chenjian 已提交
51 52 53 54 55 56 57 58 59 60 61 62 63
  mem_events.push_back(MemTraceEvent(11500, 0x1000,
                                     TracerMemEventType::Allocate, 10, 10, 50,
                                     "GPU:0", 50, 50));
  mem_events.push_back(MemTraceEvent(11900, 0x1000, TracerMemEventType::Free,
                                     10, 10, -50, "GPU:0", 0, 50));
  std::map<std::string, std::vector<std::vector<int64_t>>> input_shapes;
  std::map<std::string, std::vector<std::string>> dtypes;
  input_shapes[std::string("X")].push_back(std::vector<int64_t>{1, 2, 3});
  input_shapes[std::string("X")].push_back(std::vector<int64_t>{4, 5, 6, 7});
  dtypes[std::string("X")].push_back(std::string("int8"));
  dtypes[std::string("X")].push_back(std::string("float32"));
  op_supplement_events.push_back(OperatorSupplementEvent(
      11600, "op1", input_shapes, dtypes, "op1()", 10, 10));
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
  runtime_events.push_back(RuntimeTraceEvent(std::string("cudalaunch1"), 15000,
                                             17000, 10, 10, 1, 0));
  runtime_events.push_back(RuntimeTraceEvent(std::string("cudalaunch2"), 25000,
                                             35000, 10, 10, 2, 0));
  runtime_events.push_back(RuntimeTraceEvent(std::string("cudalaunch3"), 33000,
                                             37000, 10, 11, 3, 0));
  runtime_events.push_back(RuntimeTraceEvent(std::string("cudaMemcpy1"), 18000,
                                             19000, 10, 10, 4, 0));
  runtime_events.push_back(RuntimeTraceEvent(std::string("cudaMemset1"), 38000,
                                             39000, 10, 11, 5, 0));
  device_events.push_back(
      DeviceTraceEvent(std::string("kernel1"), TracerEventType::Kernel, 40000,
                       55000, 0, 10, 10, 1, KernelEventInfo()));
  device_events.push_back(
      DeviceTraceEvent(std::string("kernel2"), TracerEventType::Kernel, 70000,
                       95000, 0, 10, 10, 2, KernelEventInfo()));
  device_events.push_back(
      DeviceTraceEvent(std::string("kernel3"), TracerEventType::Kernel, 60000,
                       65000, 0, 10, 11, 3, KernelEventInfo()));
  device_events.push_back(
      DeviceTraceEvent(std::string("memcpy1"), TracerEventType::Memcpy, 56000,
                       59000, 0, 10, 10, 4, MemcpyEventInfo()));
  device_events.push_back(
      DeviceTraceEvent(std::string("memset1"), TracerEventType::Memset, 66000,
                       69000, 0, 10, 11, 5, MemsetEventInfo()));
  ChromeTracingLogger logger("test_nodetrees_logme_case0.json");
C
chenjian 已提交
90 91
  NodeTrees tree(host_events, runtime_events, device_events, mem_events,
                 op_supplement_events);
92 93 94 95 96 97 98 99 100 101 102 103 104
  std::map<uint64_t, std::vector<HostTraceEventNode*>> nodes =
      tree.Traverse(true);
  EXPECT_EQ(nodes[10].size(), 4u);
  EXPECT_EQ(nodes[11].size(), 2u);
  std::vector<HostTraceEventNode*> thread1_nodes = nodes[10];
  std::vector<HostTraceEventNode*> thread2_nodes = nodes[11];
  for (auto it = thread1_nodes.begin(); it != thread1_nodes.end(); it++) {
    if ((*it)->Name() == "root node") {
      EXPECT_EQ((*it)->GetChildren().size(), 3u);
    }
    if ((*it)->Name() == "op1") {
      EXPECT_EQ((*it)->GetChildren().size(), 0u);
      EXPECT_EQ((*it)->GetRuntimeTraceEventNodes().size(), 2u);
C
chenjian 已提交
105 106
      EXPECT_EQ((*it)->GetMemTraceEventNodes().size(), 2u);
      EXPECT_NE((*it)->GetOperatorSupplementEventNode(), nullptr);
107 108 109 110 111 112 113 114 115
    }
  }
  for (auto it = thread2_nodes.begin(); it != thread2_nodes.end(); it++) {
    if ((*it)->Name() == "op3") {
      EXPECT_EQ((*it)->GetChildren().size(), 0u);
      EXPECT_EQ((*it)->GetRuntimeTraceEventNodes().size(), 2u);
    }
  }
  tree.LogMe(&logger);
C
chenjian 已提交
116
  logger.LogMetaInfo(std::unordered_map<std::string, std::string>());
117 118 119 120 121 122
}

TEST(NodeTreesTest, LogMe_case1) {
  std::list<HostTraceEvent> host_events;
  std::list<RuntimeTraceEvent> runtime_events;
  std::list<DeviceTraceEvent> device_events;
C
chenjian 已提交
123 124
  std::list<MemTraceEvent> mem_events;
  std::list<OperatorSupplementEvent> op_supplement_events;
125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
  runtime_events.push_back(RuntimeTraceEvent(std::string("cudalaunch1"), 15000,
                                             17000, 10, 10, 1, 0));
  runtime_events.push_back(RuntimeTraceEvent(std::string("cudalaunch2"), 25000,
                                             35000, 10, 10, 2, 0));
  runtime_events.push_back(RuntimeTraceEvent(std::string("cudalaunch3"), 33000,
                                             37000, 10, 11, 3, 0));
  runtime_events.push_back(RuntimeTraceEvent(std::string("cudaMemcpy1"), 18000,
                                             19000, 10, 10, 4, 0));
  runtime_events.push_back(RuntimeTraceEvent(std::string("cudaMemset1"), 38000,
                                             39000, 10, 11, 5, 0));
  device_events.push_back(
      DeviceTraceEvent(std::string("kernel1"), TracerEventType::Kernel, 40000,
                       55000, 0, 10, 10, 1, KernelEventInfo()));
  device_events.push_back(
      DeviceTraceEvent(std::string("kernel2"), TracerEventType::Kernel, 70000,
                       95000, 0, 10, 10, 2, KernelEventInfo()));
  device_events.push_back(
      DeviceTraceEvent(std::string("kernel3"), TracerEventType::Kernel, 60000,
                       65000, 0, 10, 11, 3, KernelEventInfo()));
  device_events.push_back(
      DeviceTraceEvent(std::string("memcpy1"), TracerEventType::Memcpy, 56000,
                       59000, 0, 10, 10, 4, MemcpyEventInfo()));
  device_events.push_back(
      DeviceTraceEvent(std::string("memset1"), TracerEventType::Memset, 66000,
                       69000, 0, 10, 11, 5, MemsetEventInfo()));
  ChromeTracingLogger logger("test_nodetrees_logme_case1.json");
C
chenjian 已提交
151 152
  NodeTrees tree(host_events, runtime_events, device_events, mem_events,
                 op_supplement_events);
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
  std::map<uint64_t, std::vector<HostTraceEventNode*>> nodes =
      tree.Traverse(true);
  EXPECT_EQ(nodes[10].size(), 1u);
  EXPECT_EQ(nodes[11].size(), 1u);
  std::vector<HostTraceEventNode*> thread1_nodes = nodes[10];
  std::vector<HostTraceEventNode*> thread2_nodes = nodes[11];
  for (auto it = thread1_nodes.begin(); it != thread1_nodes.end(); it++) {
    if ((*it)->Name() == "root node") {
      EXPECT_EQ((*it)->GetRuntimeTraceEventNodes().size(), 3u);
    }
  }
  for (auto it = thread2_nodes.begin(); it != thread2_nodes.end(); it++) {
    if ((*it)->Name() == "root node") {
      EXPECT_EQ((*it)->GetChildren().size(), 0u);
      EXPECT_EQ((*it)->GetRuntimeTraceEventNodes().size(), 2u);
    }
  }
  tree.LogMe(&logger);
C
chenjian 已提交
171
  logger.LogMetaInfo(std::unordered_map<std::string, std::string>());
172 173 174 175 176 177
}

TEST(NodeTreesTest, HandleTrees_case0) {
  std::list<HostTraceEvent> host_events;
  std::list<RuntimeTraceEvent> runtime_events;
  std::list<DeviceTraceEvent> device_events;
C
chenjian 已提交
178 179
  std::list<MemTraceEvent> mem_events;
  std::list<OperatorSupplementEvent> op_supplement_events;
180 181 182 183 184 185
  host_events.push_back(HostTraceEvent(
      std::string("op1"), TracerEventType::Operator, 10000, 100000, 10, 10));
  host_events.push_back(HostTraceEvent(
      std::string("op2"), TracerEventType::Operator, 30000, 70000, 10, 10));
  host_events.push_back(HostTraceEvent(
      std::string("op3"), TracerEventType::Operator, 2000, 120000, 10, 11));
C
chenjian 已提交
186 187 188 189 190 191 192 193
  mem_events.push_back(MemTraceEvent(11500, 0x1000,
                                     TracerMemEventType::Allocate, 10, 10, 50,
                                     "GPU:0", 50, 50));
  mem_events.push_back(MemTraceEvent(11900, 0x1000, TracerMemEventType::Free,
                                     10, 10, -50, "GPU:0", 0, 50));
  op_supplement_events.push_back(OperatorSupplementEvent(
      11600, "op1", std::map<std::string, std::vector<std::vector<int64_t>>>(),
      std::map<std::string, std::vector<std::string>>(), "op1()", 10, 10));
194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209
  runtime_events.push_back(RuntimeTraceEvent(std::string("cudalaunch1"), 15000,
                                             25000, 10, 10, 1, 0));
  runtime_events.push_back(RuntimeTraceEvent(std::string("cudalaunch2"), 35000,
                                             45000, 10, 10, 2, 0));
  runtime_events.push_back(RuntimeTraceEvent(std::string("cudalaunch3"), 10000,
                                             55000, 10, 11, 3, 0));
  device_events.push_back(
      DeviceTraceEvent(std::string("kernel1"), TracerEventType::Kernel, 40000,
                       55000, 0, 10, 10, 1, KernelEventInfo()));
  device_events.push_back(
      DeviceTraceEvent(std::string("kernel2"), TracerEventType::Kernel, 70000,
                       95000, 0, 10, 10, 2, KernelEventInfo()));
  device_events.push_back(
      DeviceTraceEvent(std::string("kernel3"), TracerEventType::Kernel, 60000,
                       75000, 0, 10, 11, 3, KernelEventInfo()));
  ChromeTracingLogger logger("test_nodetrees_handletrees_case0.json");
C
chenjian 已提交
210 211
  NodeTrees tree(host_events, runtime_events, device_events, mem_events,
                 op_supplement_events);
212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
  std::map<uint64_t, std::vector<HostTraceEventNode*>> nodes =
      tree.Traverse(true);
  EXPECT_EQ(nodes[10].size(), 3u);
  EXPECT_EQ(nodes[11].size(), 2u);
  std::vector<HostTraceEventNode*> thread1_nodes = nodes[10];
  std::vector<HostTraceEventNode*> thread2_nodes = nodes[11];
  for (auto it = thread1_nodes.begin(); it != thread1_nodes.end(); it++) {
    if ((*it)->Name() == "root node") {
      EXPECT_EQ((*it)->GetChildren().size(), 1u);
    }
    if ((*it)->Name() == "op1") {
      EXPECT_EQ((*it)->GetChildren().size(), 1u);
      EXPECT_EQ((*it)->GetRuntimeTraceEventNodes().size(), 1u);
    }
  }
  for (auto it = thread2_nodes.begin(); it != thread2_nodes.end(); it++) {
    if ((*it)->Name() == "op3") {
      EXPECT_EQ((*it)->GetChildren().size(), 0u);
      EXPECT_EQ((*it)->GetRuntimeTraceEventNodes().size(), 1u);
    }
  }
  std::function<void(HostTraceEventNode*)> host_event_node_handle(
      [&](HostTraceEventNode* a) { logger.LogHostTraceEventNode(*a); });
235 236 237 238
  std::function<void(CudaRuntimeTraceEventNode*)> runtime_event_node_handle(
      [&](CudaRuntimeTraceEventNode* a) {
        logger.LogRuntimeTraceEventNode(*a);
      });
239 240
  std::function<void(DeviceTraceEventNode*)> device_event_node_handle(
      [&](DeviceTraceEventNode* a) { logger.LogDeviceTraceEventNode(*a); });
C
chenjian 已提交
241 242 243 244
  std::function<void(MemTraceEventNode*)> mem_event_node_handle(
      [&](MemTraceEventNode* a) { logger.LogMemTraceEventNode(*a); });
  std::function<void(OperatorSupplementEventNode*)>
      op_supplement_event_node_handle([&](OperatorSupplementEventNode* a) {});
245
  tree.HandleTrees(host_event_node_handle, runtime_event_node_handle,
C
chenjian 已提交
246 247 248
                   device_event_node_handle, mem_event_node_handle,
                   op_supplement_event_node_handle);
  logger.LogMetaInfo(std::unordered_map<std::string, std::string>());
249
}