未验证 提交 82eeda69 编写于 作者: J jameszhang 提交者: GitHub

[kunlun] support xpu runtime profiler (#54685)

* [kunlun] support xpu runtime profiler

* fix cmake error

* add libxpti.so to paddle package

* fix for style check

* sync change in setup.py and python/setup.py.in

* remove libxpti.so from paddle output dir in this PR
上级 eb9d07e5
...@@ -7,10 +7,12 @@ set(XPU_PROJECT "extern_xpu") ...@@ -7,10 +7,12 @@ set(XPU_PROJECT "extern_xpu")
set(XPU_API_LIB_NAME "libxpuapi.so") set(XPU_API_LIB_NAME "libxpuapi.so")
set(XPU_RT_LIB_NAME "libxpurt.so") set(XPU_RT_LIB_NAME "libxpurt.so")
set(XPU_XFT_LIB_NAME "libxft.so") set(XPU_XFT_LIB_NAME "libxft.so")
set(XPU_XPTI_LIB_NAME "libxpti.so")
set(XPU_BASE_DATE "20230602") set(XPU_BASE_DATE "20230602")
set(XPU_XCCL_BASE_VERSION "1.0.49.2") set(XPU_XCCL_BASE_VERSION "1.0.49.2")
set(XPU_XFT_BASE_VERSION "latest") set(XPU_XFT_BASE_VERSION "latest")
set(XPU_XPTI_BASE_VERSION "0.0.1")
if(NOT DEFINED XPU_BASE_URL) if(NOT DEFINED XPU_BASE_URL)
set(XPU_BASE_URL_WITHOUT_DATE set(XPU_BASE_URL_WITHOUT_DATE
...@@ -30,6 +32,10 @@ if(NOT XPU_XFT_BASE_URL) ...@@ -30,6 +32,10 @@ if(NOT XPU_XFT_BASE_URL)
) )
endif() endif()
set(XPU_XPTI_BASE_URL
"https://klx-sdk-release-public.su.bcebos.com/xpti/dev/${XPU_XPTI_BASE_VERSION}"
)
if(WITH_XCCL_RDMA) if(WITH_XCCL_RDMA)
set(XPU_XCCL_PREFIX "xccl_rdma") set(XPU_XCCL_PREFIX "xccl_rdma")
else() else()
...@@ -67,6 +73,7 @@ else() ...@@ -67,6 +73,7 @@ else()
set(XPU_XCCL_DIR_NAME "${XPU_XCCL_PREFIX}-ubuntu_x86_64") set(XPU_XCCL_DIR_NAME "${XPU_XCCL_PREFIX}-ubuntu_x86_64")
set(XPU_XFT_DIR_NAME "xft_ubuntu1604_x86_64") set(XPU_XFT_DIR_NAME "xft_ubuntu1604_x86_64")
endif() endif()
set(XPU_XPTI_DIR_NAME "xpti")
set(XPU_XRE_URL set(XPU_XRE_URL
"${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz" "${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz"
...@@ -78,12 +85,18 @@ set(XPU_XCCL_URL ...@@ -78,12 +85,18 @@ set(XPU_XCCL_URL
"${XPU_XCCL_BASE_URL}/${XPU_XCCL_DIR_NAME}.tar.gz" "${XPU_XCCL_BASE_URL}/${XPU_XCCL_DIR_NAME}.tar.gz"
CACHE STRING "" FORCE) CACHE STRING "" FORCE)
set(XPU_XFT_URL "${XPU_XFT_BASE_URL}/${XPU_XFT_DIR_NAME}.tar.gz") set(XPU_XFT_URL "${XPU_XFT_BASE_URL}/${XPU_XFT_DIR_NAME}.tar.gz")
set(XPU_XPTI_URL
"${XPU_XPTI_BASE_URL}/${XPU_XPTI_DIR_NAME}.tar.gz"
CACHE STRING "" FORCE)
set(XPU_PACK_DEPENCE_URL set(XPU_PACK_DEPENCE_URL
"https://baidu-kunlun-public.su.bcebos.com/paddle_depence/pack_paddle_depence.sh" "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/pack_paddle_depence.sh"
CACHE STRING "" FORCE) CACHE STRING "" FORCE)
set(XPU_XFT_GET_DEPENCE_URL set(XPU_XFT_GET_DEPENCE_URL
"https://baidu-kunlun-public.su.bcebos.com/paddle_depence/get_xft_dependence.sh" "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/get_xft_dependence.sh"
CACHE STRING "" FORCE) CACHE STRING "" FORCE)
set(XPU_XPTI_GET_DEPENCE_URL
"https://baidu-kunlun-public.su.bcebos.com/paddle_depence/get_xpti_dependence.sh"
CACHE STRING "" FORCE)
set(SNAPPY_PREFIX_DIR "${THIRD_PARTY_PATH}/xpu") set(SNAPPY_PREFIX_DIR "${THIRD_PARTY_PATH}/xpu")
set(XPU_DOWNLOAD_DIR "${SNAPPY_PREFIX_DIR}/src/${XPU_PROJECT}") set(XPU_DOWNLOAD_DIR "${SNAPPY_PREFIX_DIR}/src/${XPU_PROJECT}")
...@@ -123,7 +136,8 @@ ExternalProject_Add( ...@@ -123,7 +136,8 @@ ExternalProject_Add(
pack_paddle_depence.sh ${XPU_XRE_URL} ${XPU_XRE_DIR_NAME} ${XPU_XDNN_URL} pack_paddle_depence.sh ${XPU_XRE_URL} ${XPU_XRE_DIR_NAME} ${XPU_XDNN_URL}
${XPU_XDNN_DIR_NAME} ${XPU_XCCL_URL} ${XPU_XCCL_DIR_NAME} && wget ${XPU_XDNN_DIR_NAME} ${XPU_XCCL_URL} ${XPU_XCCL_DIR_NAME} && wget
${XPU_XFT_GET_DEPENCE_URL} && bash get_xft_dependence.sh ${XPU_XFT_URL} ${XPU_XFT_GET_DEPENCE_URL} && bash get_xft_dependence.sh ${XPU_XFT_URL}
${XPU_XFT_DIR_NAME} ${XPU_XFT_DIR_NAME} && wget ${XPU_XPTI_GET_DEPENCE_URL} && bash
get_xpti_dependence.sh ${XPU_XPTI_URL} ${XPU_XPTI_DIR_NAME}
DOWNLOAD_NO_PROGRESS 1 DOWNLOAD_NO_PROGRESS 1
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${XPU_INSTALL_ROOT} CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${XPU_INSTALL_ROOT}
...@@ -151,6 +165,12 @@ if(WITH_XPU_XFT) ...@@ -151,6 +165,12 @@ if(WITH_XPU_XFT)
set(XPU_XFT_LIB "${XPU_LIB_DIR}/${XPU_XFT_LIB_NAME}") set(XPU_XFT_LIB "${XPU_LIB_DIR}/${XPU_XFT_LIB_NAME}")
endif() endif()
if(WITH_XPU_XPTI)
message(STATUS "Compile with XPU XPTI!")
add_definitions(-DPADDLE_WITH_XPU_XPTI)
set(XPU_XPTI_LIB "${XPU_LIB_DIR}/${XPU_XPTI_LIB_NAME}")
endif()
if(WITH_XPU_BKCL AND WITH_XPU_XFT) if(WITH_XPU_BKCL AND WITH_XPU_XFT)
target_link_libraries(xpulib ${XPU_API_LIB} ${XPU_RT_LIB} ${XPU_BKCL_LIB} target_link_libraries(xpulib ${XPU_API_LIB} ${XPU_RT_LIB} ${XPU_BKCL_LIB}
${XPU_XFT_LIB}) ${XPU_XFT_LIB})
...@@ -162,6 +182,10 @@ else() ...@@ -162,6 +182,10 @@ else()
target_link_libraries(xpulib ${XPU_API_LIB} ${XPU_RT_LIB}) target_link_libraries(xpulib ${XPU_API_LIB} ${XPU_RT_LIB})
endif() endif()
if(WITH_XPU_XPTI)
target_link_libraries(xpulib ${XPU_XPTI_LIB})
endif()
add_dependencies(xpulib ${XPU_PROJECT}) add_dependencies(xpulib ${XPU_PROJECT})
# Ensure that xpu/api.h can be included without dependency errors. # Ensure that xpu/api.h can be included without dependency errors.
......
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/platform/dynload/xpti.h"
namespace paddle {
namespace platform {
namespace dynload {
#define DEFINE_WRAP(__name) DynLoad__##__name __name
XPTI_ROUTINE_EACH(DEFINE_WRAP);
} // namespace dynload
} // namespace platform
} // namespace paddle
#endif // PADDLE_WITH_XPU
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <xpu/xpti.h>
#include <mutex> // NOLINT
#include "paddle/phi/backends/dynload/xpti.h"
namespace paddle {
namespace platform {
namespace dynload {
#define DECLARE_DYNAMIC_LOAD_XPTI_WRAP(__name) \
using DynLoad__##__name = phi::dynload::DynLoad__##__name; \
extern DynLoad__##__name __name
#define XPTI_RAND_ROUTINE_EACH(__macro) \
__macro(xptiActivityEnable); \
__macro(xptiActivityDisable); \
__macro(xptiStartTracing); \
__macro(xptiStopTracing); \
__macro(xptiActivityFlushAll); \
__macro(xptiActivityGetNextRecord);
XPTI_RAND_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_XPTI_WRAP);
#undef DECLARE_DYNAMIC_LOAD_XPTI_WRAP
} // namespace dynload
} // namespace platform
} // namespace paddle
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/platform/profiler/xpu_tracer.h"
#include <mutex>
#include <unordered_map>
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/os_info.h"
#ifdef PADDLE_WITH_XPU
#include "paddle/phi/backends/device_manager.h"
#endif
#define XPTI_CALL(call) \
do { \
XPTIResult _status = call; \
if (_status != XPTI_SUCCESS) { \
LOG(ERROR) << "Function " << #call << " failed with error " << _status; \
exit(-1); \
} \
} while (0)
namespace paddle {
namespace platform {
void XPUTracer::PrepareTracing() {
PADDLE_ENFORCE_EQ(
state_ == TracerState::UNINITED || state_ == TracerState::STOPED,
true,
platform::errors::PreconditionNotMet("XPUTracer must be UNINITED"));
#ifdef PADDLE_WITH_XPU
XPTI_CALL(dynload::xptiActivityEnable());
VLOG(3) << "enable xpti activity";
#endif
state_ = TracerState::READY;
}
void XPUTracer::StartTracing() {
PADDLE_ENFORCE_EQ(
state_ == TracerState::READY,
true,
platform::errors::PreconditionNotMet("Tracer must be READY or STOPPED"));
#ifdef PADDLE_WITH_XPU
XPTI_CALL(dynload::xptiStartTracing());
#endif
tracing_start_ns_ = PosixInNsec();
state_ = TracerState::STARTED;
}
void XPUTracer::StopTracing() {
PADDLE_ENFORCE_EQ(
state_,
TracerState::STARTED,
platform::errors::PreconditionNotMet("Tracer must be STARTED"));
#ifdef PADDLE_WITH_XPU
XPTI_CALL(dynload::xptiStopTracing());
XPTI_CALL(dynload::xptiActivityDisable());
VLOG(3) << "disable xpti activity";
#endif
state_ = TracerState::STOPED;
}
#ifdef PADDLE_WITH_XPU
void AddApiRecord(const baidu::xpu::xpti::XPTIEventApi* api,
uint64_t start_ns,
TraceEventCollector* collector) {
if (api->start < start_ns) {
VLOG(4) << "xpu event " << api->get_name() << " start " << api->start
<< " is before profiler start " << start_ns << ", drop event";
return;
}
RuntimeTraceEvent event;
event.name = api->get_name();
event.start_ns = api->start;
event.end_ns = api->end;
event.process_id = api->pid;
event.thread_id = api->tid;
event.correlation_id = api->args.token;
collector->AddRuntimeEvent(std::move(event));
VLOG(4) << "Add api event " << event.name;
}
void AddKernelRecord(const baidu::xpu::xpti::XPTIEventKernel* kernel,
uint64_t start_ns,
TraceEventCollector* collector) {
if (kernel->start < start_ns) {
VLOG(4) << "xpu event " << kernel->get_name() << "start " << kernel->start
<< "is before profiler start " << start_ns << ", drop event";
return;
}
DeviceTraceEvent event;
event.name = kernel->get_name();
event.type = TracerEventType::Kernel;
event.start_ns = kernel->start;
event.end_ns = kernel->end;
event.device_id = kernel->args.board_id;
event.stream_id = kernel->args.stream_id;
event.correlation_id = kernel->args.token;
collector->AddDeviceEvent(std::move(event));
VLOG(4) << "Add kernel event " << event.name;
}
void AddWaitRecord(const baidu::xpu::xpti::XPTIEventWait* wait,
uint64_t start_ns,
TraceEventCollector* collector) {
if (wait->start < start_ns) {
VLOG(4) << "xpu event " << wait->get_name() << "start " << wait->start
<< "is before profiler start " << start_ns << ", drop event";
return;
}
RuntimeTraceEvent event;
event.name = wait->get_name();
event.start_ns = wait->start;
event.end_ns = wait->end;
event.process_id = wait->pid;
event.thread_id = wait->tid;
collector->AddRuntimeEvent(std::move(event));
VLOG(4) << "Add wait event " << event.name;
}
void AddMemcpyRecord(const baidu::xpu::xpti::XPTIEventMem* memcpy,
uint64_t start_ns,
TraceEventCollector* collector) {
if (memcpy->start < start_ns) {
VLOG(4) << "xpu event " << memcpy->get_name() << "start " << memcpy->start
<< "is before profiler start " << start_ns << ", drop event";
return;
}
RuntimeTraceEvent event;
event.name = memcpy->get_name();
event.start_ns = memcpy->start;
event.end_ns = memcpy->end;
event.process_id = memcpy->pid;
event.thread_id = memcpy->tid;
collector->AddRuntimeEvent(std::move(event));
VLOG(4) << "Add memcpy event " << event.name;
}
#endif
void XPUTracer::CollectTraceData(TraceEventCollector* collector) {
PADDLE_ENFORCE_EQ(
state_,
TracerState::STOPED,
platform::errors::PreconditionNotMet("Tracer must be STOPED"));
#ifdef PADDLE_WITH_XPU
XPTI_CALL(dynload::xptiActivityFlushAll());
baidu::xpu::xpti::XPTIEvent* record = nullptr;
while (true) {
XPTIResult status = dynload::xptiActivityGetNextRecord(&record);
if (status == XPTI_SUCCESS) {
record->PrintForDebug();
switch (record->type) {
case XPTI_EVENT_TYPE_API:
AddApiRecord(
reinterpret_cast<const baidu::xpu::xpti::XPTIEventApi*>(record),
tracing_start_ns_,
collector);
break;
case XPTI_EVENT_TYPE_KERNEL:
AddKernelRecord(
reinterpret_cast<const baidu::xpu::xpti::XPTIEventKernel*>(
record),
tracing_start_ns_,
collector);
break;
case XPTI_EVENT_TYPE_MEMCPY:
AddMemcpyRecord(
reinterpret_cast<const baidu::xpu::xpti::XPTIEventMem*>(record),
tracing_start_ns_,
collector);
break;
case XPTI_EVENT_TYPE_WAIT:
AddWaitRecord(
reinterpret_cast<const baidu::xpu::xpti::XPTIEventWait*>(record),
tracing_start_ns_,
collector);
break;
default:
break;
}
} else if (status == XPTI_INVALID_DATA) {
// data queue already empty
VLOG(4) << "xpti data queue is empty now, collect trace data done";
break;
} else {
XPTI_CALL(status);
}
// free XPTIEvent
}
#endif
}
} // namespace platform
} // namespace paddle
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cstdint>
#include <memory>
#include <vector>
#include "paddle/fluid/platform/dynload/xpti.h"
#include "paddle/fluid/platform/macros.h"
#include "paddle/fluid/platform/profiler/tracer_base.h"
namespace paddle {
namespace platform {
class XPUTracer : public TracerBase {
public:
static XPUTracer& GetInstance() {
static XPUTracer instance;
return instance;
}
void PrepareTracing() override;
void StartTracing() override;
void StopTracing() override;
void CollectTraceData(TraceEventCollector* collector) override;
XPUTracer() {}
private:
DISABLE_COPY_AND_ASSIGN(XPUTracer);
uint64_t tracing_start_ns_ = UINT64_MAX;
};
} // namespace platform
} // namespace paddle
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include "paddle/phi/backends/dynload/xpti.h"
namespace phi {
namespace dynload {
std::once_flag xpti_dso_flag;
void *xpti_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
XPTI_ROUTINE_EACH(DEFINE_WRAP);
} // namespace dynload
} // namespace phi
#endif // PADDLE_WITH_XPU
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_XPU
#include <xpu/xpti.h>
#include <mutex> // NOLINT
#include "paddle/phi/backends/dynload/dynamic_loader.h"
#include "paddle/phi/backends/dynload/port.h"
namespace phi {
namespace dynload {
extern std::once_flag xpti_dso_flag;
extern void *xpti_dso_handle;
#define DECLARE_DYNAMIC_LOAD_XPTI_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
XPTIResult operator()(Args... args) { \
using xptiFunc = decltype(&::__name); \
std::call_once(xpti_dso_flag, []() { \
xpti_dso_handle = phi::dynload::GetXPTIDsoHandle(); \
}); \
static void *p_##__name = dlsym(xpti_dso_handle, #__name); \
return reinterpret_cast<xptiFunc>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name
#define XPTI_ROUTINE_EACH(__macro) \
__macro(xptiActivityEnable); \
__macro(xptiActivityDisable); \
__macro(xptiStartTracing); \
__macro(xptiStopTracing); \
__macro(xptiActivityFlushAll); \
__macro(xptiActivityGetNextRecord);
XPTI_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_XPTI_WRAP);
#undef DECLARE_DYNAMIC_LOAD_XPTI_WRAP
} // namespace dynload
} // namespace phi
#endif // PADDLE_WITH_XPTI
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册