From 82eeda69a1cc00b24014bfb920ba470c14f4ef14 Mon Sep 17 00:00:00 2001 From: jameszhang Date: Fri, 16 Jun 2023 14:55:07 +0800 Subject: [PATCH] [kunlun] support xpu runtime profiler (#54685) * [kunlun] support xpu runtime profiler * fix cmake error * add libxpti.so to paddle package * fix for style check * sync change in setup.py and python/setup.py.in * remove libxpti.so from paddle output dir in this PR --- cmake/external/xpu.cmake | 26 ++- paddle/fluid/platform/dynload/xpti.cc | 31 +++ paddle/fluid/platform/dynload/xpti.h | 43 ++++ paddle/fluid/platform/profiler/xpu_tracer.cc | 210 +++++++++++++++++++ paddle/fluid/platform/profiler/xpu_tracer.h | 52 +++++ paddle/phi/backends/dynload/xpti.cc | 32 +++ paddle/phi/backends/dynload/xpti.h | 59 ++++++ 7 files changed, 452 insertions(+), 1 deletion(-) create mode 100644 paddle/fluid/platform/dynload/xpti.cc create mode 100644 paddle/fluid/platform/dynload/xpti.h create mode 100644 paddle/fluid/platform/profiler/xpu_tracer.cc create mode 100644 paddle/fluid/platform/profiler/xpu_tracer.h create mode 100644 paddle/phi/backends/dynload/xpti.cc create mode 100644 paddle/phi/backends/dynload/xpti.h diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake index 04948cb9f76..9f16373f142 100644 --- a/cmake/external/xpu.cmake +++ b/cmake/external/xpu.cmake @@ -7,10 +7,12 @@ set(XPU_PROJECT "extern_xpu") set(XPU_API_LIB_NAME "libxpuapi.so") set(XPU_RT_LIB_NAME "libxpurt.so") set(XPU_XFT_LIB_NAME "libxft.so") +set(XPU_XPTI_LIB_NAME "libxpti.so") set(XPU_BASE_DATE "20230602") set(XPU_XCCL_BASE_VERSION "1.0.49.2") set(XPU_XFT_BASE_VERSION "latest") +set(XPU_XPTI_BASE_VERSION "0.0.1") if(NOT DEFINED XPU_BASE_URL) set(XPU_BASE_URL_WITHOUT_DATE @@ -30,6 +32,10 @@ if(NOT XPU_XFT_BASE_URL) ) endif() +set(XPU_XPTI_BASE_URL + "https://klx-sdk-release-public.su.bcebos.com/xpti/dev/${XPU_XPTI_BASE_VERSION}" +) + if(WITH_XCCL_RDMA) set(XPU_XCCL_PREFIX "xccl_rdma") else() @@ -67,6 +73,7 @@ else() set(XPU_XCCL_DIR_NAME "${XPU_XCCL_PREFIX}-ubuntu_x86_64") set(XPU_XFT_DIR_NAME "xft_ubuntu1604_x86_64") endif() +set(XPU_XPTI_DIR_NAME "xpti") set(XPU_XRE_URL "${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz" @@ -78,12 +85,18 @@ set(XPU_XCCL_URL "${XPU_XCCL_BASE_URL}/${XPU_XCCL_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) set(XPU_XFT_URL "${XPU_XFT_BASE_URL}/${XPU_XFT_DIR_NAME}.tar.gz") +set(XPU_XPTI_URL + "${XPU_XPTI_BASE_URL}/${XPU_XPTI_DIR_NAME}.tar.gz" + CACHE STRING "" FORCE) set(XPU_PACK_DEPENCE_URL "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/pack_paddle_depence.sh" CACHE STRING "" FORCE) set(XPU_XFT_GET_DEPENCE_URL "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/get_xft_dependence.sh" CACHE STRING "" FORCE) +set(XPU_XPTI_GET_DEPENCE_URL + "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/get_xpti_dependence.sh" + CACHE STRING "" FORCE) set(SNAPPY_PREFIX_DIR "${THIRD_PARTY_PATH}/xpu") set(XPU_DOWNLOAD_DIR "${SNAPPY_PREFIX_DIR}/src/${XPU_PROJECT}") @@ -123,7 +136,8 @@ ExternalProject_Add( pack_paddle_depence.sh ${XPU_XRE_URL} ${XPU_XRE_DIR_NAME} ${XPU_XDNN_URL} ${XPU_XDNN_DIR_NAME} ${XPU_XCCL_URL} ${XPU_XCCL_DIR_NAME} && wget ${XPU_XFT_GET_DEPENCE_URL} && bash get_xft_dependence.sh ${XPU_XFT_URL} - ${XPU_XFT_DIR_NAME} + ${XPU_XFT_DIR_NAME} && wget ${XPU_XPTI_GET_DEPENCE_URL} && bash + get_xpti_dependence.sh ${XPU_XPTI_URL} ${XPU_XPTI_DIR_NAME} DOWNLOAD_NO_PROGRESS 1 UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${XPU_INSTALL_ROOT} @@ -151,6 +165,12 @@ if(WITH_XPU_XFT) set(XPU_XFT_LIB "${XPU_LIB_DIR}/${XPU_XFT_LIB_NAME}") endif() +if(WITH_XPU_XPTI) + message(STATUS "Compile with XPU XPTI!") + add_definitions(-DPADDLE_WITH_XPU_XPTI) + set(XPU_XPTI_LIB "${XPU_LIB_DIR}/${XPU_XPTI_LIB_NAME}") +endif() + if(WITH_XPU_BKCL AND WITH_XPU_XFT) target_link_libraries(xpulib ${XPU_API_LIB} ${XPU_RT_LIB} ${XPU_BKCL_LIB} ${XPU_XFT_LIB}) @@ -162,6 +182,10 @@ else() target_link_libraries(xpulib ${XPU_API_LIB} ${XPU_RT_LIB}) endif() +if(WITH_XPU_XPTI) + target_link_libraries(xpulib ${XPU_XPTI_LIB}) +endif() + add_dependencies(xpulib ${XPU_PROJECT}) # Ensure that xpu/api.h can be included without dependency errors. diff --git a/paddle/fluid/platform/dynload/xpti.cc b/paddle/fluid/platform/dynload/xpti.cc new file mode 100644 index 00000000000..5d4b45fd07a --- /dev/null +++ b/paddle/fluid/platform/dynload/xpti.cc @@ -0,0 +1,31 @@ +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_XPU + +#include "paddle/fluid/platform/dynload/xpti.h" + +namespace paddle { +namespace platform { +namespace dynload { + +#define DEFINE_WRAP(__name) DynLoad__##__name __name + +XPTI_ROUTINE_EACH(DEFINE_WRAP); + +} // namespace dynload +} // namespace platform +} // namespace paddle + +#endif // PADDLE_WITH_XPU diff --git a/paddle/fluid/platform/dynload/xpti.h b/paddle/fluid/platform/dynload/xpti.h new file mode 100644 index 00000000000..86dc9d0f5ae --- /dev/null +++ b/paddle/fluid/platform/dynload/xpti.h @@ -0,0 +1,43 @@ +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#pragma once + +#include + +#include // NOLINT + +#include "paddle/phi/backends/dynload/xpti.h" + +namespace paddle { +namespace platform { +namespace dynload { + +#define DECLARE_DYNAMIC_LOAD_XPTI_WRAP(__name) \ + using DynLoad__##__name = phi::dynload::DynLoad__##__name; \ + extern DynLoad__##__name __name + +#define XPTI_RAND_ROUTINE_EACH(__macro) \ + __macro(xptiActivityEnable); \ + __macro(xptiActivityDisable); \ + __macro(xptiStartTracing); \ + __macro(xptiStopTracing); \ + __macro(xptiActivityFlushAll); \ + __macro(xptiActivityGetNextRecord); + +XPTI_RAND_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_XPTI_WRAP); + +#undef DECLARE_DYNAMIC_LOAD_XPTI_WRAP +} // namespace dynload +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/profiler/xpu_tracer.cc b/paddle/fluid/platform/profiler/xpu_tracer.cc new file mode 100644 index 00000000000..5e687b9c745 --- /dev/null +++ b/paddle/fluid/platform/profiler/xpu_tracer.cc @@ -0,0 +1,210 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/platform/profiler/xpu_tracer.h" + +#include +#include + +#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/os_info.h" +#ifdef PADDLE_WITH_XPU +#include "paddle/phi/backends/device_manager.h" +#endif + +#define XPTI_CALL(call) \ + do { \ + XPTIResult _status = call; \ + if (_status != XPTI_SUCCESS) { \ + LOG(ERROR) << "Function " << #call << " failed with error " << _status; \ + exit(-1); \ + } \ + } while (0) + +namespace paddle { +namespace platform { + +void XPUTracer::PrepareTracing() { + PADDLE_ENFORCE_EQ( + state_ == TracerState::UNINITED || state_ == TracerState::STOPED, + true, + platform::errors::PreconditionNotMet("XPUTracer must be UNINITED")); +#ifdef PADDLE_WITH_XPU + XPTI_CALL(dynload::xptiActivityEnable()); + VLOG(3) << "enable xpti activity"; +#endif + state_ = TracerState::READY; +} + +void XPUTracer::StartTracing() { + PADDLE_ENFORCE_EQ( + state_ == TracerState::READY, + true, + platform::errors::PreconditionNotMet("Tracer must be READY or STOPPED")); +#ifdef PADDLE_WITH_XPU + XPTI_CALL(dynload::xptiStartTracing()); +#endif + tracing_start_ns_ = PosixInNsec(); + state_ = TracerState::STARTED; +} + +void XPUTracer::StopTracing() { + PADDLE_ENFORCE_EQ( + state_, + TracerState::STARTED, + platform::errors::PreconditionNotMet("Tracer must be STARTED")); +#ifdef PADDLE_WITH_XPU + XPTI_CALL(dynload::xptiStopTracing()); + XPTI_CALL(dynload::xptiActivityDisable()); + VLOG(3) << "disable xpti activity"; +#endif + state_ = TracerState::STOPED; +} + +#ifdef PADDLE_WITH_XPU +void AddApiRecord(const baidu::xpu::xpti::XPTIEventApi* api, + uint64_t start_ns, + TraceEventCollector* collector) { + if (api->start < start_ns) { + VLOG(4) << "xpu event " << api->get_name() << " start " << api->start + << " is before profiler start " << start_ns << ", drop event"; + return; + } + RuntimeTraceEvent event; + event.name = api->get_name(); + event.start_ns = api->start; + event.end_ns = api->end; + event.process_id = api->pid; + event.thread_id = api->tid; + event.correlation_id = api->args.token; + + collector->AddRuntimeEvent(std::move(event)); + VLOG(4) << "Add api event " << event.name; +} + +void AddKernelRecord(const baidu::xpu::xpti::XPTIEventKernel* kernel, + uint64_t start_ns, + TraceEventCollector* collector) { + if (kernel->start < start_ns) { + VLOG(4) << "xpu event " << kernel->get_name() << "start " << kernel->start + << "is before profiler start " << start_ns << ", drop event"; + return; + } + DeviceTraceEvent event; + event.name = kernel->get_name(); + event.type = TracerEventType::Kernel; + event.start_ns = kernel->start; + event.end_ns = kernel->end; + event.device_id = kernel->args.board_id; + event.stream_id = kernel->args.stream_id; + event.correlation_id = kernel->args.token; + + collector->AddDeviceEvent(std::move(event)); + VLOG(4) << "Add kernel event " << event.name; +} + +void AddWaitRecord(const baidu::xpu::xpti::XPTIEventWait* wait, + uint64_t start_ns, + TraceEventCollector* collector) { + if (wait->start < start_ns) { + VLOG(4) << "xpu event " << wait->get_name() << "start " << wait->start + << "is before profiler start " << start_ns << ", drop event"; + return; + } + RuntimeTraceEvent event; + event.name = wait->get_name(); + event.start_ns = wait->start; + event.end_ns = wait->end; + event.process_id = wait->pid; + event.thread_id = wait->tid; + + collector->AddRuntimeEvent(std::move(event)); + VLOG(4) << "Add wait event " << event.name; +} + +void AddMemcpyRecord(const baidu::xpu::xpti::XPTIEventMem* memcpy, + uint64_t start_ns, + TraceEventCollector* collector) { + if (memcpy->start < start_ns) { + VLOG(4) << "xpu event " << memcpy->get_name() << "start " << memcpy->start + << "is before profiler start " << start_ns << ", drop event"; + return; + } + RuntimeTraceEvent event; + event.name = memcpy->get_name(); + event.start_ns = memcpy->start; + event.end_ns = memcpy->end; + event.process_id = memcpy->pid; + event.thread_id = memcpy->tid; + + collector->AddRuntimeEvent(std::move(event)); + VLOG(4) << "Add memcpy event " << event.name; +} +#endif + +void XPUTracer::CollectTraceData(TraceEventCollector* collector) { + PADDLE_ENFORCE_EQ( + state_, + TracerState::STOPED, + platform::errors::PreconditionNotMet("Tracer must be STOPED")); +#ifdef PADDLE_WITH_XPU + XPTI_CALL(dynload::xptiActivityFlushAll()); + baidu::xpu::xpti::XPTIEvent* record = nullptr; + while (true) { + XPTIResult status = dynload::xptiActivityGetNextRecord(&record); + if (status == XPTI_SUCCESS) { + record->PrintForDebug(); + switch (record->type) { + case XPTI_EVENT_TYPE_API: + AddApiRecord( + reinterpret_cast(record), + tracing_start_ns_, + collector); + break; + case XPTI_EVENT_TYPE_KERNEL: + AddKernelRecord( + reinterpret_cast( + record), + tracing_start_ns_, + collector); + break; + case XPTI_EVENT_TYPE_MEMCPY: + AddMemcpyRecord( + reinterpret_cast(record), + tracing_start_ns_, + collector); + break; + case XPTI_EVENT_TYPE_WAIT: + AddWaitRecord( + reinterpret_cast(record), + tracing_start_ns_, + collector); + break; + default: + break; + } + } else if (status == XPTI_INVALID_DATA) { + // data queue already empty + VLOG(4) << "xpti data queue is empty now, collect trace data done"; + break; + } else { + XPTI_CALL(status); + } + // free XPTIEvent + } +#endif +} + +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/profiler/xpu_tracer.h b/paddle/fluid/platform/profiler/xpu_tracer.h new file mode 100644 index 00000000000..98163b442c9 --- /dev/null +++ b/paddle/fluid/platform/profiler/xpu_tracer.h @@ -0,0 +1,52 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include + +#include "paddle/fluid/platform/dynload/xpti.h" +#include "paddle/fluid/platform/macros.h" +#include "paddle/fluid/platform/profiler/tracer_base.h" + +namespace paddle { +namespace platform { + +class XPUTracer : public TracerBase { + public: + static XPUTracer& GetInstance() { + static XPUTracer instance; + return instance; + } + + void PrepareTracing() override; + + void StartTracing() override; + + void StopTracing() override; + + void CollectTraceData(TraceEventCollector* collector) override; + + XPUTracer() {} + + private: + DISABLE_COPY_AND_ASSIGN(XPUTracer); + + uint64_t tracing_start_ns_ = UINT64_MAX; +}; + +} // namespace platform +} // namespace paddle diff --git a/paddle/phi/backends/dynload/xpti.cc b/paddle/phi/backends/dynload/xpti.cc new file mode 100644 index 00000000000..cf99344d0fe --- /dev/null +++ b/paddle/phi/backends/dynload/xpti.cc @@ -0,0 +1,32 @@ +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_XPU + +#include "paddle/phi/backends/dynload/xpti.h" + +namespace phi { +namespace dynload { + +std::once_flag xpti_dso_flag; +void *xpti_dso_handle = nullptr; + +#define DEFINE_WRAP(__name) DynLoad__##__name __name + +XPTI_ROUTINE_EACH(DEFINE_WRAP); + +} // namespace dynload +} // namespace phi + +#endif // PADDLE_WITH_XPU diff --git a/paddle/phi/backends/dynload/xpti.h b/paddle/phi/backends/dynload/xpti.h new file mode 100644 index 00000000000..98c81b72950 --- /dev/null +++ b/paddle/phi/backends/dynload/xpti.h @@ -0,0 +1,59 @@ +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#pragma once + +#ifdef PADDLE_WITH_XPU + +#include + +#include // NOLINT + +#include "paddle/phi/backends/dynload/dynamic_loader.h" +#include "paddle/phi/backends/dynload/port.h" + +namespace phi { +namespace dynload { + +extern std::once_flag xpti_dso_flag; +extern void *xpti_dso_handle; + +#define DECLARE_DYNAMIC_LOAD_XPTI_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + XPTIResult operator()(Args... args) { \ + using xptiFunc = decltype(&::__name); \ + std::call_once(xpti_dso_flag, []() { \ + xpti_dso_handle = phi::dynload::GetXPTIDsoHandle(); \ + }); \ + static void *p_##__name = dlsym(xpti_dso_handle, #__name); \ + return reinterpret_cast(p_##__name)(args...); \ + } \ + }; \ + extern DynLoad__##__name __name + +#define XPTI_ROUTINE_EACH(__macro) \ + __macro(xptiActivityEnable); \ + __macro(xptiActivityDisable); \ + __macro(xptiStartTracing); \ + __macro(xptiStopTracing); \ + __macro(xptiActivityFlushAll); \ + __macro(xptiActivityGetNextRecord); + +XPTI_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_XPTI_WRAP); + +#undef DECLARE_DYNAMIC_LOAD_XPTI_WRAP +} // namespace dynload +} // namespace phi + +#endif // PADDLE_WITH_XPTI -- GitLab