test_serialization_logger.cc 10.9 KB
Newer Older
C
chenjian 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "gtest/gtest.h"
#include "paddle/fluid/platform/profiler/dump/deserialization_reader.h"
#include "paddle/fluid/platform/profiler/dump/serialization_logger.h"
#include "paddle/fluid/platform/profiler/event_node.h"
C
chenjian 已提交
19
#include "paddle/fluid/platform/profiler/event_python.h"
C
chenjian 已提交
20 21

using paddle::platform::CudaRuntimeTraceEventNode;
22 23
using paddle::platform::DeserializationReader;
using paddle::platform::DeviceTraceEvent;
C
chenjian 已提交
24 25
using paddle::platform::DeviceTraceEventNode;
using paddle::platform::HostTraceEvent;
26
using paddle::platform::HostTraceEventNode;
C
chenjian 已提交
27 28 29
using paddle::platform::KernelEventInfo;
using paddle::platform::MemcpyEventInfo;
using paddle::platform::MemsetEventInfo;
C
chenjian 已提交
30
using paddle::platform::MemTraceEvent;
31
using paddle::platform::NodeTrees;
C
chenjian 已提交
32
using paddle::platform::OperatorSupplementEvent;
C
chenjian 已提交
33
using paddle::platform::ProfilerResult;
34 35 36
using paddle::platform::RuntimeTraceEvent;
using paddle::platform::SerializationLogger;
using paddle::platform::TracerEventType;
37
using paddle::platform::TracerMemEventType;
C
chenjian 已提交
38 39 40 41 42

TEST(SerializationLoggerTest, dump_case0) {
  std::list<HostTraceEvent> host_events;
  std::list<RuntimeTraceEvent> runtime_events;
  std::list<DeviceTraceEvent> device_events;
C
chenjian 已提交
43 44
  std::list<MemTraceEvent> mem_events;
  std::list<OperatorSupplementEvent> op_supplement_events;
C
chenjian 已提交
45 46 47 48 49 50 51 52 53
  host_events.push_back(HostTraceEvent(std::string("dataloader#1"),
                                       TracerEventType::Dataloader, 1000, 10000,
                                       10, 10));
  host_events.push_back(HostTraceEvent(
      std::string("op1"), TracerEventType::Operator, 11000, 20000, 10, 10));
  host_events.push_back(HostTraceEvent(
      std::string("op2"), TracerEventType::Operator, 21000, 30000, 10, 10));
  host_events.push_back(HostTraceEvent(
      std::string("op3"), TracerEventType::Operator, 31000, 40000, 10, 11));
54 55 56 57 58 59 60 61 62 63 64 65 66
  mem_events.push_back(MemTraceEvent(11500, 0x1000,
                                     TracerMemEventType::Allocate, 10, 10, 50,
                                     "GPU:0", 50, 50));
  mem_events.push_back(MemTraceEvent(11900, 0x1000, TracerMemEventType::Free,
                                     10, 10, -50, "GPU:0", 0, 50));
  std::map<std::string, std::vector<std::vector<int64_t>>> input_shapes;
  std::map<std::string, std::vector<std::string>> dtypes;
  input_shapes[std::string("X")].push_back(std::vector<int64_t>{1, 2, 3});
  input_shapes[std::string("X")].push_back(std::vector<int64_t>{4, 5, 6, 7});
  dtypes[std::string("X")].push_back(std::string("int8"));
  dtypes[std::string("X")].push_back(std::string("float32"));
  op_supplement_events.push_back(OperatorSupplementEvent(
      11600, "op1", input_shapes, dtypes, "op1()", 10, 10));
C
chenjian 已提交
67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
  runtime_events.push_back(RuntimeTraceEvent(std::string("cudalaunch1"), 15000,
                                             17000, 10, 10, 1, 0));
  runtime_events.push_back(RuntimeTraceEvent(std::string("cudalaunch2"), 25000,
                                             35000, 10, 10, 2, 0));
  runtime_events.push_back(RuntimeTraceEvent(std::string("cudalaunch3"), 33000,
                                             37000, 10, 11, 3, 0));
  runtime_events.push_back(RuntimeTraceEvent(std::string("cudaMemcpy1"), 18000,
                                             19000, 10, 10, 4, 0));
  runtime_events.push_back(RuntimeTraceEvent(std::string("cudaMemset1"), 38000,
                                             39000, 10, 11, 5, 0));
  device_events.push_back(
      DeviceTraceEvent(std::string("kernel1"), TracerEventType::Kernel, 40000,
                       55000, 0, 10, 10, 1, KernelEventInfo()));
  device_events.push_back(
      DeviceTraceEvent(std::string("kernel2"), TracerEventType::Kernel, 70000,
                       95000, 0, 10, 10, 2, KernelEventInfo()));
  device_events.push_back(
      DeviceTraceEvent(std::string("kernel3"), TracerEventType::Kernel, 60000,
                       65000, 0, 10, 11, 3, KernelEventInfo()));
  device_events.push_back(
      DeviceTraceEvent(std::string("memcpy1"), TracerEventType::Memcpy, 56000,
                       59000, 0, 10, 10, 4, MemcpyEventInfo()));
  device_events.push_back(
      DeviceTraceEvent(std::string("memset1"), TracerEventType::Memset, 66000,
                       69000, 0, 10, 11, 5, MemsetEventInfo()));
  SerializationLogger logger("test_serialization_logger_case0.pb");
C
chenjian 已提交
93 94
  NodeTrees tree(host_events, runtime_events, device_events, mem_events,
                 op_supplement_events);
C
chenjian 已提交
95 96 97 98 99 100 101 102 103 104 105 106 107
  std::map<uint64_t, std::vector<HostTraceEventNode*>> nodes =
      tree.Traverse(true);
  EXPECT_EQ(nodes[10].size(), 4u);
  EXPECT_EQ(nodes[11].size(), 2u);
  std::vector<HostTraceEventNode*> thread1_nodes = nodes[10];
  std::vector<HostTraceEventNode*> thread2_nodes = nodes[11];
  for (auto it = thread1_nodes.begin(); it != thread1_nodes.end(); it++) {
    if ((*it)->Name() == "root node") {
      EXPECT_EQ((*it)->GetChildren().size(), 3u);
    }
    if ((*it)->Name() == "op1") {
      EXPECT_EQ((*it)->GetChildren().size(), 0u);
      EXPECT_EQ((*it)->GetRuntimeTraceEventNodes().size(), 2u);
108 109
      EXPECT_EQ((*it)->GetMemTraceEventNodes().size(), 2u);
      EXPECT_NE((*it)->GetOperatorSupplementEventNode(), nullptr);
C
chenjian 已提交
110 111 112 113 114 115 116 117 118
    }
  }
  for (auto it = thread2_nodes.begin(); it != thread2_nodes.end(); it++) {
    if ((*it)->Name() == "op3") {
      EXPECT_EQ((*it)->GetChildren().size(), 0u);
      EXPECT_EQ((*it)->GetRuntimeTraceEventNodes().size(), 2u);
    }
  }
  tree.LogMe(&logger);
119
  logger.LogMetaInfo(std::unordered_map<std::string, std::string>());
C
chenjian 已提交
120 121 122 123 124 125
}

TEST(SerializationLoggerTest, dump_case1) {
  std::list<HostTraceEvent> host_events;
  std::list<RuntimeTraceEvent> runtime_events;
  std::list<DeviceTraceEvent> device_events;
C
chenjian 已提交
126 127
  std::list<MemTraceEvent> mem_events;
  std::list<OperatorSupplementEvent> op_supplement_events;
C
chenjian 已提交
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
  runtime_events.push_back(RuntimeTraceEvent(std::string("cudalaunch1"), 15000,
                                             17000, 10, 10, 1, 0));
  runtime_events.push_back(RuntimeTraceEvent(std::string("cudalaunch2"), 25000,
                                             35000, 10, 10, 2, 0));
  runtime_events.push_back(RuntimeTraceEvent(std::string("cudalaunch3"), 33000,
                                             37000, 10, 11, 3, 0));
  runtime_events.push_back(RuntimeTraceEvent(std::string("cudaMemcpy1"), 18000,
                                             19000, 10, 10, 4, 0));
  runtime_events.push_back(RuntimeTraceEvent(std::string("cudaMemset1"), 38000,
                                             39000, 10, 11, 5, 0));
  device_events.push_back(
      DeviceTraceEvent(std::string("kernel1"), TracerEventType::Kernel, 40000,
                       55000, 0, 10, 10, 1, KernelEventInfo()));
  device_events.push_back(
      DeviceTraceEvent(std::string("kernel2"), TracerEventType::Kernel, 70000,
                       95000, 0, 10, 10, 2, KernelEventInfo()));
  device_events.push_back(
      DeviceTraceEvent(std::string("kernel3"), TracerEventType::Kernel, 60000,
                       65000, 0, 10, 11, 3, KernelEventInfo()));
  device_events.push_back(
      DeviceTraceEvent(std::string("memcpy1"), TracerEventType::Memcpy, 56000,
                       59000, 0, 10, 10, 4, MemcpyEventInfo()));
  device_events.push_back(
      DeviceTraceEvent(std::string("memset1"), TracerEventType::Memset, 66000,
                       69000, 0, 10, 11, 5, MemsetEventInfo()));
  SerializationLogger logger("test_serialization_logger_case1.pb");
C
chenjian 已提交
154 155
  NodeTrees tree(host_events, runtime_events, device_events, mem_events,
                 op_supplement_events);
C
chenjian 已提交
156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
  std::map<uint64_t, std::vector<HostTraceEventNode*>> nodes =
      tree.Traverse(true);
  EXPECT_EQ(nodes[10].size(), 1u);
  EXPECT_EQ(nodes[11].size(), 1u);
  std::vector<HostTraceEventNode*> thread1_nodes = nodes[10];
  std::vector<HostTraceEventNode*> thread2_nodes = nodes[11];
  for (auto it = thread1_nodes.begin(); it != thread1_nodes.end(); it++) {
    if ((*it)->Name() == "root node") {
      EXPECT_EQ((*it)->GetRuntimeTraceEventNodes().size(), 3u);
    }
  }
  for (auto it = thread2_nodes.begin(); it != thread2_nodes.end(); it++) {
    if ((*it)->Name() == "root node") {
      EXPECT_EQ((*it)->GetChildren().size(), 0u);
      EXPECT_EQ((*it)->GetRuntimeTraceEventNodes().size(), 2u);
    }
  }
  tree.LogMe(&logger);
174
  logger.LogMetaInfo(std::unordered_map<std::string, std::string>());
C
chenjian 已提交
175 176 177 178
}

TEST(DeserializationReaderTest, restore_case0) {
  DeserializationReader reader("test_serialization_logger_case0.pb");
C
chenjian 已提交
179
  auto profiler_result = reader.Parse();
L
liutiexing 已提交
180
  auto tree = profiler_result->GetNodeTrees();
C
chenjian 已提交
181 182 183 184 185 186 187 188 189 190 191 192 193
  std::map<uint64_t, std::vector<HostTraceEventNode*>> nodes =
      tree->Traverse(true);
  EXPECT_EQ(nodes[10].size(), 4u);
  EXPECT_EQ(nodes[11].size(), 2u);
  std::vector<HostTraceEventNode*> thread1_nodes = nodes[10];
  std::vector<HostTraceEventNode*> thread2_nodes = nodes[11];
  for (auto it = thread1_nodes.begin(); it != thread1_nodes.end(); it++) {
    if ((*it)->Name() == "root node") {
      EXPECT_EQ((*it)->GetChildren().size(), 3u);
    }
    if ((*it)->Name() == "op1") {
      EXPECT_EQ((*it)->GetChildren().size(), 0u);
      EXPECT_EQ((*it)->GetRuntimeTraceEventNodes().size(), 2u);
194 195
      EXPECT_EQ((*it)->GetMemTraceEventNodes().size(), 2u);
      EXPECT_NE((*it)->GetOperatorSupplementEventNode(), nullptr);
C
chenjian 已提交
196 197 198 199 200 201 202 203 204
    }
  }
  for (auto it = thread2_nodes.begin(); it != thread2_nodes.end(); it++) {
    if ((*it)->Name() == "op3") {
      EXPECT_EQ((*it)->GetChildren().size(), 0u);
      EXPECT_EQ((*it)->GetRuntimeTraceEventNodes().size(), 2u);
    }
  }
}
C
chenjian 已提交
205 206 207 208

TEST(DeserializationReaderTest, restore_case1) {
  DeserializationReader reader("test_serialization_logger_case1.pb");
  auto profiler_result = reader.Parse();
L
liutiexing 已提交
209
  auto tree = profiler_result->GetNodeTrees();
C
chenjian 已提交
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
  std::map<uint64_t, std::vector<HostTraceEventNode*>> nodes =
      tree->Traverse(true);
  EXPECT_EQ(nodes[10].size(), 1u);
  EXPECT_EQ(nodes[11].size(), 1u);
  std::vector<HostTraceEventNode*> thread1_nodes = nodes[10];
  std::vector<HostTraceEventNode*> thread2_nodes = nodes[11];
  for (auto it = thread1_nodes.begin(); it != thread1_nodes.end(); it++) {
    if ((*it)->Name() == "root node") {
      EXPECT_EQ((*it)->GetRuntimeTraceEventNodes().size(), 3u);
    }
  }
  for (auto it = thread2_nodes.begin(); it != thread2_nodes.end(); it++) {
    if ((*it)->Name() == "root node") {
      EXPECT_EQ((*it)->GetChildren().size(), 0u);
      EXPECT_EQ((*it)->GetRuntimeTraceEventNodes().size(), 2u);
    }
  }
}