未验证 提交 d67d74cc 编写于 作者: H houj04 提交者: GitHub

[XPU] update log for bkcl function calls. (#53609)

* [XPU] update log for bkcl function calls.

* minor update

* revert unnecessary modifications.
上级 0d45ac73
...@@ -115,7 +115,13 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupBKCL::Recv( ...@@ -115,7 +115,13 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupBKCL::Recv(
const phi::DenseTensor& input, const phi::DenseTensor& input,
BKCLContext_t comm, BKCLContext_t comm,
const XPUStream& stream) { const XPUStream& stream) {
VLOG(3) << "bkcl_recv"; VLOG(3) << "calling bkcl_recv"
<< ", rank_id: " << platform::GetBKCLRankID(comm)
<< ", dev_id: " << platform::GetBKCLDevID(comm)
<< ", nranks: " << platform::GetBKCLNRanks(comm)
<< ", src_rank: " << src_rank << ", numel: " << output->numel()
<< ", dtype: " << output->type() << ", sync_op: " << sync_op
<< ", use_calc_stream: " << use_calc_stream;
int r = bkcl_recv(comm, int r = bkcl_recv(comm,
output->data(), output->data(),
output->numel(), output->numel(),
...@@ -148,7 +154,14 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupBKCL::Send( ...@@ -148,7 +154,14 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupBKCL::Send(
const phi::DenseTensor& input, const phi::DenseTensor& input,
BKCLContext_t comm, BKCLContext_t comm,
const XPUStream& stream) { const XPUStream& stream) {
VLOG(3) << "bkcl_send"; VLOG(3) << "calling bkcl_send"
<< ", rank_id: " << platform::GetBKCLRankID(comm)
<< ", dev_id: " << platform::GetBKCLDevID(comm)
<< ", nranks: " << platform::GetBKCLNRanks(comm)
<< ", dst_rank: " << dst_rank
<< ", input numel: " << input.numel()
<< ", dtype: " << input.type() << ", sync_op: " << sync_op
<< ", use_calc_stream: " << use_calc_stream;
int r = bkcl_send(comm, int r = bkcl_send(comm,
input.data(), input.data(),
input.numel(), input.numel(),
...@@ -276,7 +289,14 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupBKCL::AllReduce( ...@@ -276,7 +289,14 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupBKCL::AllReduce(
const phi::DenseTensor& input, const phi::DenseTensor& input,
BKCLContext_t comm, BKCLContext_t comm,
const XPUStream& stream) { const XPUStream& stream) {
VLOG(3) << "bkcl_all_reduce"; VLOG(3) << "calling bkcl_all_reduce"
<< ", rank_id: " << platform::GetBKCLRankID(comm)
<< ", dev_id: " << platform::GetBKCLDevID(comm)
<< ", nranks: " << platform::GetBKCLNRanks(comm)
<< ", numel: " << input.numel() << ", dtype: " << input.type()
<< ", reduce_type: " << ToBKCLRedType(opts.reduce_op)
<< ", sync_op: " << sync_op
<< ", use_calc_stream: " << use_calc_stream;
int r = int r =
bkcl_all_reduce(comm, bkcl_all_reduce(comm,
input.data(), input.data(),
...@@ -307,7 +327,13 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupBKCL::Broadcast( ...@@ -307,7 +327,13 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupBKCL::Broadcast(
BKCLContext_t comm, BKCLContext_t comm,
const XPUStream& stream) { const XPUStream& stream) {
int root = opts.source_rank + opts.source_root; int root = opts.source_rank + opts.source_root;
VLOG(3) << "bkcl_broadcast"; VLOG(3) << "calling bkcl_broadcast"
<< ", rank_id: " << platform::GetBKCLRankID(comm)
<< ", dev_id: " << platform::GetBKCLDevID(comm)
<< ", nranks: " << platform::GetBKCLNRanks(comm)
<< ", root: " << root << ", numel: " << input.numel()
<< ", dtype: " << input.type() << ", sync_op: " << sync_op
<< ", use_calc_stream: " << use_calc_stream;
int r = int r =
bkcl_broadcast(comm, bkcl_broadcast(comm,
input.data(), input.data(),
...@@ -346,7 +372,13 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupBKCL::AllGather( ...@@ -346,7 +372,13 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupBKCL::AllGather(
const phi::DenseTensor& input, const phi::DenseTensor& input,
BKCLContext_t comm, BKCLContext_t comm,
const XPUStream& stream) { const XPUStream& stream) {
VLOG(3) << "bkcl_all_gather"; VLOG(3) << "calling bkcl_all_gather"
<< ", rank_id: " << platform::GetBKCLRankID(comm)
<< ", dev_id: " << platform::GetBKCLDevID(comm)
<< ", nranks: " << platform::GetBKCLNRanks(comm)
<< ", numel: " << in_tensor_maybe_partial.numel()
<< ", dtype: " << input.type() << ", sync_op: " << sync_op
<< ", use_calc_stream: " << use_calc_stream;
int r = int r =
bkcl_all_gather(comm, bkcl_all_gather(comm,
in_tensor_maybe_partial.data(), in_tensor_maybe_partial.data(),
...@@ -375,7 +407,15 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupBKCL::Reduce( ...@@ -375,7 +407,15 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupBKCL::Reduce(
const phi::DenseTensor& input, const phi::DenseTensor& input,
BKCLContext_t comm, BKCLContext_t comm,
const XPUStream& stream) { const XPUStream& stream) {
VLOG(3) << "bkcl_reduce"; VLOG(3) << "calling bkcl_reduce"
<< ", rank_id: " << platform::GetBKCLRankID(comm)
<< ", dev_id: " << platform::GetBKCLDevID(comm)
<< ", nranks: " << platform::GetBKCLNRanks(comm)
<< ", root: " << opts.root_rank << ", numel: " << input.numel()
<< ", dtype: " << input.type()
<< ", reduce_type: " << ToBKCLRedType(opts.reduce_op)
<< ", sync_op: " << sync_op
<< ", use_calc_stream: " << use_calc_stream;
int r = bkcl_reduce(comm, int r = bkcl_reduce(comm,
input.data(), input.data(),
output->data(), output->data(),
...@@ -405,7 +445,14 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupBKCL::ReduceScatter( ...@@ -405,7 +445,14 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupBKCL::ReduceScatter(
const phi::DenseTensor& input, const phi::DenseTensor& input,
BKCLContext_t comm, BKCLContext_t comm,
const XPUStream& stream) { const XPUStream& stream) {
VLOG(3) << "bkcl_reduce_scatter"; VLOG(3) << "calling bkcl_reduce_scatter"
<< ", rank_id: " << platform::GetBKCLRankID(comm)
<< ", dev_id: " << platform::GetBKCLDevID(comm)
<< ", nranks: " << platform::GetBKCLNRanks(comm)
<< ", numel: " << output->numel() << ", dtype: " << input.type()
<< ", reduce_type: " << ToBKCLRedType(opts.reduce_op)
<< ", sync_op: " << sync_op
<< ", use_calc_stream: " << use_calc_stream;
int r = bkcl_reduce_scatter( int r = bkcl_reduce_scatter(
comm, comm,
input.data(), input.data(),
...@@ -491,8 +538,13 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupBKCL::AllReduce( ...@@ -491,8 +538,13 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupBKCL::AllReduce(
const phi::DenseTensor& input, const phi::DenseTensor& input,
BKCLContext_t comm, BKCLContext_t comm,
const XPUStream& stream) { const XPUStream& stream) {
VLOG(3) << "bkcl_all_reduce"; VLOG(3) << "calling bkcl_all_reduce"
<< ", rank_id: " << platform::GetBKCLRankID(comm)
<< ", dev_id: " << platform::GetBKCLDevID(comm)
<< ", nranks: " << platform::GetBKCLNRanks(comm)
<< ", numel: " << input.numel() << ", dtype: " << input.type()
<< ", reduce_type: " << ToBKCLRedType(opts.reduce_op)
<< ", sync_op: " << true << ", use_calc_stream: " << false;
int r = int r =
bkcl_all_reduce(comm, bkcl_all_reduce(comm,
input.data(), input.data(),
...@@ -535,7 +587,13 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupBKCL::AllReduce( ...@@ -535,7 +587,13 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupBKCL::AllReduce(
const phi::DenseTensor& input, const phi::DenseTensor& input,
BKCLContext_t comm, BKCLContext_t comm,
const XPUStream& stream) { const XPUStream& stream) {
VLOG(3) << "bkcl_all_reduce"; VLOG(3) << "calling bkcl_all_reduce"
<< ", rank_id: " << platform::GetBKCLRankID(comm)
<< ", dev_id: " << platform::GetBKCLDevID(comm)
<< ", nranks: " << platform::GetBKCLNRanks(comm)
<< ", numel: " << input.numel() << ", dtype: " << input.type()
<< ", reduce_type: " << ToBKCLRedType(opts.reduce_op)
<< ", sync_op: " << sync_op << ", use_calc_stream: " << false;
int r = int r =
bkcl_all_reduce(comm, bkcl_all_reduce(comm,
input.data(), input.data(),
...@@ -580,7 +638,13 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupBKCL::Broadcast( ...@@ -580,7 +638,13 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupBKCL::Broadcast(
const XPUStream& stream) { const XPUStream& stream) {
const auto root = const auto root =
opts.source_rank * in_tensors.size() + opts.source_root; opts.source_rank * in_tensors.size() + opts.source_root;
VLOG(3) << "bkcl_broadcast"; VLOG(3) << "calling bkcl_broadcast"
<< ", rank_id: " << platform::GetBKCLRankID(comm)
<< ", dev_id: " << platform::GetBKCLDevID(comm)
<< ", nranks: " << platform::GetBKCLNRanks(comm)
<< ", root: " << root << ", numel: " << input.numel()
<< ", dtype: " << input.type() << ", sync_op: " << true
<< ", use_calc_stream: " << false;
int r = int r =
bkcl_broadcast(comm, bkcl_broadcast(comm,
input.data(), input.data(),
...@@ -626,7 +690,13 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupBKCL::Broadcast( ...@@ -626,7 +690,13 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupBKCL::Broadcast(
const XPUStream& stream) { const XPUStream& stream) {
const auto root = const auto root =
opts.source_rank * in_tensors.size() + opts.source_root; opts.source_rank * in_tensors.size() + opts.source_root;
VLOG(3) << "bkcl_broadcast"; VLOG(3) << "calling bkcl_broadcast"
<< ", rank_id: " << platform::GetBKCLRankID(comm)
<< ", dev_id: " << platform::GetBKCLDevID(comm)
<< ", nranks: " << platform::GetBKCLNRanks(comm)
<< ", root: " << root << ", numel: " << input.numel()
<< ", dtype: " << input.type() << ", sync_op: " << sync_op
<< ", use_calc_stream: " << false;
int r = int r =
bkcl_broadcast(comm, bkcl_broadcast(comm,
input.data(), input.data(),
...@@ -671,7 +741,12 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupBKCL::AllGather( ...@@ -671,7 +741,12 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupBKCL::AllGather(
const phi::DenseTensor& input, const phi::DenseTensor& input,
BKCLContext_t comm, BKCLContext_t comm,
const XPUStream& stream) { const XPUStream& stream) {
VLOG(3) << "bkcl_all_gather"; VLOG(3) << "calling bkcl_all_gather"
<< ", rank_id: " << platform::GetBKCLRankID(comm)
<< ", dev_id: " << platform::GetBKCLDevID(comm)
<< ", nranks: " << platform::GetBKCLNRanks(comm)
<< ", numel: " << input.numel() << ", dtype: " << input.type()
<< ", sync_op: " << true << ", use_calc_stream: " << false;
int r = int r =
bkcl_all_gather(comm, bkcl_all_gather(comm,
input.data(), input.data(),
...@@ -712,7 +787,12 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupBKCL::AllGather( ...@@ -712,7 +787,12 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupBKCL::AllGather(
const phi::DenseTensor& input, const phi::DenseTensor& input,
BKCLContext_t comm, BKCLContext_t comm,
const XPUStream& stream) { const XPUStream& stream) {
VLOG(3) << "bkcl_all_gather"; VLOG(3) << "calling bkcl_all_gather"
<< ", rank_id: " << platform::GetBKCLRankID(comm)
<< ", dev_id: " << platform::GetBKCLDevID(comm)
<< ", nranks: " << platform::GetBKCLNRanks(comm)
<< ", numel: " << input.numel() << ", dtype: " << input.type()
<< ", sync_op: " << sync_op << ", use_calc_stream: " << false;
int r = int r =
bkcl_all_gather(comm, bkcl_all_gather(comm,
input.data(), input.data(),
......
...@@ -66,7 +66,6 @@ class FusedGemmEpilogueXPUKernel : public framework::OpKernel<T> { ...@@ -66,7 +66,6 @@ class FusedGemmEpilogueXPUKernel : public framework::OpKernel<T> {
phi::XpuFcInfo fc_info; phi::XpuFcInfo fc_info;
phi::GetFCInfo(x_mat_dims, y->dims(), trans_x, trans_y, &fc_info); phi::GetFCInfo(x_mat_dims, y->dims(), trans_x, trans_y, &fc_info);
VLOG(0) << "FusedGemmEpilogueXPUKernel 000";
xpu::Context* xpu_ctx = dev_ctx.x_context(); xpu::Context* xpu_ctx = dev_ctx.x_context();
const XPUType* x_ptr = reinterpret_cast<const XPUType*>(x->data<T>()); const XPUType* x_ptr = reinterpret_cast<const XPUType*>(x->data<T>());
......
...@@ -62,6 +62,18 @@ inline BKCLDataType ToBKCLDataType(framework::proto::VarType::Type type) { ...@@ -62,6 +62,18 @@ inline BKCLDataType ToBKCLDataType(framework::proto::VarType::Type type) {
} }
} }
inline int GetBKCLRankID(BKCLContext_t comm) {
return reinterpret_cast<int *>(comm)[0];
}
inline int GetBKCLDevID(BKCLContext_t comm) {
return reinterpret_cast<int *>(comm)[1];
}
inline int GetBKCLNRanks(BKCLContext_t comm) {
return reinterpret_cast<int *>(comm)[2];
}
class BKCLGroupGuard { class BKCLGroupGuard {
public: public:
static std::mutex &BKCLMutex() { static std::mutex &BKCLMutex() {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册