diff --git a/imperative/src/impl/profiler/states.h b/imperative/src/impl/profiler/states.h new file mode 100644 index 0000000000000000000000000000000000000000..ed3feb6c76168650e5688df8af32652e1b6c5fa2 --- /dev/null +++ b/imperative/src/impl/profiler/states.h @@ -0,0 +1,153 @@ +#pragma once + +#include +#include +#include + +#include "megbrain/tensor.h" + +namespace mgb::imperative::profiler { + +struct ProfileDeviceState { + int64_t index; + CompNode device; + std::shared_ptr base_event; + uint64_t base_time; //in ns +}; + +struct ProfileWorkerState { + +}; + +struct ProfileTensorState { + uint64_t id; + TensorLayout layout; + CompNode device; + std::string name; + uint64_t produced = 0; + uint64_t living_time = 0; + + size_t size_in_bytes() const { + if (!layout.dtype.valid()) { + return 0; + } + return layout.dtype.size(layout.total_nr_elems()); + } +}; + +struct ProfileStaticsState { + size_t op_enqueue_count = 0; + size_t op_execute_count = 0; + size_t wait_value_count = 0; + size_t wait_shape_count = 0; + size_t exception_count = 0; + size_t infer_shape_valid_count = 0; + size_t infer_shape_invalid_count = 0; + size_t alive_tensor_count = 0; + size_t produce_tensor_count = 0; + size_t erase_tensor_count = 0; + size_t wait_prop_count = 0; + size_t redundant_tensor_count = 0; +}; + +struct ProfileOperatorState { + uint64_t id; + std::string name; + SmallVector inputs; + SmallVector outputs; + CompNode device; + + uint64_t host_begin; + uint64_t host_end; + std::shared_ptr device_begin; + std::shared_ptr device_end; +}; + +struct ProfileThreadState { + std::thread::id tid; + int64_t index; + std::vector scope_stack; +}; + +template +struct ProfileTensorPropPair { + uint64_t id; + TProp value; + + bool operator<(const ProfileTensorPropPair& lhs) const { + return value == lhs.value ? id < lhs.id : value < lhs.value; + } + + bool operator==(const ProfileTensorPropPair& lhs) const { + return id == lhs.id && value == lhs.value; + } + + bool operator>(const ProfileTensorPropPair& lhs) const { + return value == lhs.value ? id > lhs.id : value > lhs.value; + } +}; + +using ProfileTensorSizePair = ProfileTensorPropPair; +using ProfileTensorProducedPair = ProfileTensorPropPair; + +struct GeneralTensorEvent { + uint64_t tensor_id; + std::type_index type; +}; + +struct ProfileState { + std::unordered_map tensors; + std::unordered_map operators; + std::unordered_map tensor_name_counter; + std::set tensors_by_size; + std::set tensors_by_produced; + ProfileWorkerState worker; + ProfileStaticsState statics; + std::unordered_map threads; + CompNode::UnorderedMap devices; + + ProfileThreadState& operator[](std::thread::id tid) { + if (threads.count(tid) == 0) { + threads[tid].tid = tid; + threads[tid].index = threads.size(); + } + return threads[tid]; + } + + ProfileDeviceState& operator[](CompNode device) { + if (devices.count(device) == 0) { + devices[device].device = device; + devices[device].index = devices.size(); + } + return devices[device]; + } + + std::vector top_k_tensor_in_device(CompNode device, size_t k) { + std::vector results; + for (auto iter = tensors_by_size.rbegin(); iter != tensors_by_size.rend(); ++iter) { + if (!k) { + break; + } + if (tensors[iter->id].device == device) { + results.push_back(iter->id); + --k; + } + } + return results; + } + + std::string concat_scope(std::thread::id tid) { + auto& scope_stack = threads[tid].scope_stack; + if (scope_stack.empty()) { + return {}; + } + std::string result = scope_stack[0]; + for (size_t i = 1; i < scope_stack.size(); ++i) { + result += "::"; + result += scope_stack[i]; + } + return result; + } +}; + +}