// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #pragma once #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/variable.h" #include "paddle/fluid/platform/gpu_info.h" #include "paddle/fluid/platform/timer.h" namespace paddle { namespace framework { static void GetTensors(Variable* var, std::unordered_set* tensor_set) { if (var->IsType() && var->Get().IsInitialized()) { tensor_set->insert(var->GetMutable()); } else if (var->IsType() && var->Get().value().IsInitialized()) { tensor_set->insert(var->GetMutable()->mutable_value()); } else if (var->IsType()) { auto* tensor_arr = var->GetMutable(); for (auto& t : *tensor_arr) { if (t.IsInitialized()) { tensor_set->insert(&t); } } } } static std::pair GetTensorMemorySize( const std::vector& var_list) { std::unordered_set tensor_set; for (auto* var : var_list) { GetTensors(var, &tensor_set); } size_t host_memory_bytes = 0; size_t device_memory_bytes = 0; std::unordered_set allocation_set; for (auto* tensor : tensor_set) { auto allocation = tensor->Holder().get(); if (!allocation_set.count(allocation)) { allocation_set.insert(allocation); if (platform::is_cuda_pinned_place(tensor->place()) || platform::is_cpu_place(tensor->place())) { VLOG(3) << "found host memory : " << allocation->size(); host_memory_bytes += allocation->size(); } else { VLOG(3) << "found device memory : " << allocation->size(); device_memory_bytes += allocation->size(); } } } return {host_memory_bytes, device_memory_bytes}; } struct CostInfo { double total_time{0.}; // ms size_t host_memory_bytes{0}; // bytes size_t device_memory_bytes{0}; // bytes size_t device_total_memory_bytes{0}; // total allocated memory size }; class InterpreterProfiler { public: void Start() { timer_.Start(); } void Pause() { timer_.Pause(); cost_info_.total_time += timer_.ElapsedMS(); } void Reset() { timer_.Reset(); cost_info_.total_time = 0.; cost_info_.host_memory_bytes = 0; cost_info_.device_memory_bytes = 0; cost_info_.device_total_memory_bytes = 0; } void ParseMemoryInfo(const std::vector& vars) { timer_.Start(); auto memory_info = GetTensorMemorySize(vars); VLOG(3) << "host memory size: " << memory_info.first; cost_info_.host_memory_bytes = std::max(cost_info_.host_memory_bytes, memory_info.first); VLOG(3) << "device memory size: " << memory_info.second; cost_info_.device_memory_bytes = std::max(cost_info_.device_memory_bytes, memory_info.second); timer_.Pause(); cost_info_.total_time -= timer_.ElapsedMS(); } void TotalCUDAAllocatedMemorySize(const platform::Place& place) { if (platform::is_gpu_place(place)) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) auto cuda_place = BOOST_GET_CONST(platform::CUDAPlace, place); cost_info_.device_total_memory_bytes = platform::RecordedCudaMallocSize(cuda_place.device); #endif } } const CostInfo& GetCostInfo() const { return cost_info_; } private: platform::Timer timer_; CostInfo cost_info_; }; } // namespace framework } // namespace paddle