未验证 提交 40f3f4f0 编写于 作者: W Wen Sun 提交者: GitHub

refactor: rename files (#49117)

上级 e77d1cac
......@@ -3,13 +3,13 @@ cc_library(
SRCS ProcessGroup.cc
DEPS dense_tensor)
cc_library(
processgroup_stream
SRCS ProcessGroupStream.cc
process_group_stream
SRCS process_group_stream.cc
DEPS dense_tensor)
cc_library(
eager_reducer
SRCS reducer.cc
DEPS eager_api processgroup processgroup_stream phi_api string_helper)
DEPS eager_api processgroup process_group_stream phi_api string_helper)
if(WITH_DISTRIBUTE)
cc_library(
......@@ -20,10 +20,10 @@ endif()
if(WITH_NCCL OR WITH_RCCL)
cc_library(
processgroup_nccl
SRCS ProcessGroupNCCL.cc NCCLTools.cc Common.cc check.cc
process_group_nccl
SRCS process_group_nccl.cc nccl_tools.cc Common.cc check.cc
DEPS processgroup
processgroup_stream
process_group_stream
place
enforce
collective_helper
......
......@@ -97,6 +97,17 @@ class ProcessGroup {
GetBackendName()));
}
virtual std::shared_ptr<ProcessGroup::Task> AllGather(
phi::DenseTensor* out_tensor,
const phi::DenseTensor& in_tensor,
bool sync_op) {
return AllGather(out_tensor,
in_tensor,
/*offset*/ 0,
/*numel*/ -1, // -1 indicates the whole tensor
sync_op);
}
virtual std::shared_ptr<ProcessGroup::Task> AllGather(
phi::DenseTensor* out_tensor,
const phi::DenseTensor& in_tensor,
......@@ -175,6 +186,16 @@ class ProcessGroup {
GetBackendName()));
}
virtual std::shared_ptr<ProcessGroup::Task> Recv(phi::DenseTensor* tensor,
int src_rank,
bool sync_op) {
return Recv(tensor,
src_rank,
/*offset*/ 0,
/*numel*/ -1, // -1 indicates the whole tensor
sync_op);
}
virtual std::shared_ptr<ProcessGroup::Task> Recv(phi::DenseTensor* tensor,
int src_rank,
int64_t offset,
......@@ -185,6 +206,15 @@ class ProcessGroup {
GetBackendName()));
}
virtual std::shared_ptr<ProcessGroup::Task> Send(
const phi::DenseTensor& tensor, int dst_rank, bool sync_op) {
return Send(tensor,
dst_rank,
/*offset*/ 0,
/*numel*/ -1, // -1 indicates the whole tensor
sync_op);
}
virtual std::shared_ptr<ProcessGroup::Task> Send(
const phi::DenseTensor& tensor,
int dst_rank,
......
......@@ -20,7 +20,7 @@
#include <string>
#include <unordered_map>
#include "paddle/fluid/distributed/collective/ProcessGroupStream.h"
#include "paddle/fluid/distributed/collective/process_group_stream.h"
#include "paddle/fluid/distributed/store/store.h"
#include "paddle/fluid/platform/device/xpu/xpu_header.h"
#include "paddle/fluid/platform/enforce.h"
......
......@@ -14,7 +14,7 @@
#include "paddle/fluid/distributed/collective/check.h"
#include "paddle/fluid/distributed/collective/NCCLTools.h"
#include "paddle/fluid/distributed/collective/nccl_tools.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/core/dense_tensor.h"
......
......@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/distributed/collective/NCCLTools.h"
#include "paddle/fluid/distributed/collective/nccl_tools.h"
#include "paddle/fluid/platform/enforce.h"
......
......@@ -12,11 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/distributed/collective/ProcessGroupNCCL.h"
#include "paddle/fluid/distributed/collective/process_group_nccl.h"
#include "paddle/fluid/distributed/collective/Common.h"
#include "paddle/fluid/distributed/collective/NCCLTools.h"
#include "paddle/fluid/distributed/collective/check.h"
#include "paddle/fluid/distributed/collective/nccl_tools.h"
#include "paddle/fluid/distributed/collective/utils.h"
#include "paddle/fluid/platform/device/gpu/nccl_helper.h"
#include "paddle/fluid/platform/place.h"
......
......@@ -20,7 +20,7 @@
#include <unordered_map>
#include <vector>
#include "paddle/fluid/distributed/collective/ProcessGroupStream.h"
#include "paddle/fluid/distributed/collective/process_group_stream.h"
#include "paddle/fluid/distributed/store/store.h"
#include "paddle/fluid/platform/cuda_device_guard.h"
#include "paddle/fluid/platform/device_event.h"
......@@ -29,7 +29,7 @@
#include "paddle/phi/core/device_context.h"
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
#include "paddle/fluid/distributed/collective/NCCLTools.h"
#include "paddle/fluid/distributed/collective/nccl_tools.h"
#endif
#ifdef PADDLE_WITH_RCCL
......
......@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/distributed/collective/ProcessGroupStream.h"
#include "paddle/fluid/distributed/collective/process_group_stream.h"
namespace paddle {
namespace distributed {
......@@ -40,6 +40,19 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupStream::AllGather(
/*use_calc_stream*/ false);
}
std::shared_ptr<ProcessGroup::Task> ProcessGroupStream::AllGather(
phi::DenseTensor* out_tensor,
const phi::DenseTensor& in_tensor,
bool sync_op,
bool use_calc_stream) {
return AllGather(out_tensor,
in_tensor,
/*offset*/ 0,
/*numel*/ -1, // -1 indicates the whole tensor
sync_op,
use_calc_stream);
}
std::shared_ptr<ProcessGroup::Task> ProcessGroupStream::AllGather(
phi::DenseTensor* out_tensor,
const phi::DenseTensor& in_tensor,
......@@ -200,6 +213,19 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupStream::Recv(
/*use_calc_stream*/ false);
}
std::shared_ptr<ProcessGroup::Task> ProcessGroupStream::Recv(
phi::DenseTensor* tensor,
int src_rank,
bool sync_op,
bool use_calc_stream) {
return Recv(tensor,
src_rank,
/*offset*/ 0,
/*numel*/ -1, // -1 indicates sending the whole tensor
sync_op,
use_calc_stream);
}
std::shared_ptr<ProcessGroup::Task> ProcessGroupStream::Recv(
phi::DenseTensor* tensor,
int src_rank,
......@@ -225,6 +251,19 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupStream::Send(
/*use_calc_stream*/ false);
}
std::shared_ptr<ProcessGroup::Task> ProcessGroupStream::Send(
const phi::DenseTensor& tensor,
int dst_rank,
bool sync_op,
bool use_calc_stream) {
return Send(tensor,
dst_rank,
/*offset*/ 0,
/*numel*/ -1, // -1 indicates receiving the whole tensor
sync_op,
use_calc_stream);
}
std::shared_ptr<ProcessGroup::Task> ProcessGroupStream::Send(
const phi::DenseTensor& tensor,
int dst_rank,
......
......@@ -69,6 +69,12 @@ class ProcessGroupStream : public ProcessGroup {
int64_t numel,
bool sync_op) override;
virtual std::shared_ptr<ProcessGroup::Task> AllGather(
phi::DenseTensor* out_tensor,
const phi::DenseTensor& in_tensor,
bool sync_op,
bool use_calc_stream);
virtual std::shared_ptr<ProcessGroup::Task> AllGather(
phi::DenseTensor* out_tensor,
const phi::DenseTensor& in_tensor,
......@@ -161,6 +167,11 @@ class ProcessGroupStream : public ProcessGroup {
int64_t numel,
bool sync_op) override;
virtual std::shared_ptr<ProcessGroup::Task> Recv(phi::DenseTensor* tensor,
int src_rank,
bool sync_op,
bool use_calc_stream);
virtual std::shared_ptr<ProcessGroup::Task> Recv(phi::DenseTensor* tensor,
int src_rank,
int64_t offset,
......@@ -174,6 +185,11 @@ class ProcessGroupStream : public ProcessGroup {
int64_t numel,
bool sync_op) override;
std::shared_ptr<ProcessGroup::Task> Send(const phi::DenseTensor& tensor,
int dst_rank,
bool sync_op,
bool use_calc_stream);
virtual std::shared_ptr<ProcessGroup::Task> Send(
const phi::DenseTensor& tensor,
int dst_rank,
......
......@@ -30,7 +30,7 @@ limitations under the License. */
#include "paddle/phi/kernels/funcs/math_function.h"
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
#include "paddle/fluid/distributed/collective/ProcessGroupNCCL.h"
#include "paddle/fluid/distributed/collective/process_group_nccl.h"
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/device/gpu/nccl_helper.h"
#endif
......
......@@ -23,7 +23,7 @@ limitations under the License. */
#include "paddle/phi/kernels/funcs/elementwise_functor.h"
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
#include "paddle/fluid/distributed/collective/ProcessGroupNCCL.h"
#include "paddle/fluid/distributed/collective/process_group_nccl.h"
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/device/gpu/nccl_helper.h"
#endif
......
......@@ -157,7 +157,7 @@ endif()
if(WITH_PYTHON)
set(PYBIND_DEPS ${PYBIND_DEPS} processgroup eager_reducer)
if(WITH_NCCL OR WITH_RCCL)
set(PYBIND_DEPS ${PYBIND_DEPS} processgroup_nccl)
set(PYBIND_DEPS ${PYBIND_DEPS} process_group_nccl)
endif()
if(WITH_XPU_BKCL)
set(PYBIND_DEPS ${PYBIND_DEPS} processgroup_bkcl)
......
......@@ -22,8 +22,8 @@ limitations under the License. */
#endif
#include "paddle/fluid/distributed/collective/ProcessGroup.h"
#include "paddle/fluid/distributed/collective/ProcessGroupStream.h"
#include "paddle/fluid/distributed/collective/Types.h"
#include "paddle/fluid/distributed/collective/process_group_stream.h"
#include "paddle/fluid/distributed/collective/reducer.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/tensor.h"
......@@ -34,7 +34,7 @@ limitations under the License. */
#include "paddle/phi/api/all.h"
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
#include "paddle/fluid/distributed/collective/ProcessGroupNCCL.h"
#include "paddle/fluid/distributed/collective/process_group_nccl.h"
#endif
#if defined(PADDLE_WITH_MPI)
......@@ -169,9 +169,7 @@ void BindDistributed(py::module *m) {
auto p_dense =
std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl());
auto out_dense = *p_dense;
// numel == -1 indicates sending the whole tensor
return self.Send(
out_dense, dst, /*offset*/ 0, /*numel*/ -1, sync_op);
return self.Send(out_dense, dst, sync_op);
},
py::arg("tensor"),
py::arg("dst"),
......@@ -215,9 +213,7 @@ void BindDistributed(py::module *m) {
auto p_dense =
std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl());
auto *in_dense = p_dense.get();
// numel == -1 indicates receiving the whole tensor
return self.Recv(
in_dense, src, /*offset*/ 0, /*numel*/ -1, sync_op);
return self.Recv(in_dense, src, sync_op);
},
py::arg("tensor"),
py::arg("src"),
......@@ -270,11 +266,7 @@ void BindDistributed(py::module *m) {
auto in_dense = *p_in_tensor;
auto *dev_ctx = self.GetDeviceContext(in_tensor.place());
auto task = self.AllGather(out_dense,
in_dense,
/*offset*/ 0,
/*numel*/ -1,
sync_op);
auto task = self.AllGather(out_dense, in_dense, sync_op);
SplitTensor(*dev_ctx, *out_dense, &out_tensor_list);
task->UpdateWaitChain(*dev_ctx);
return task;
......@@ -300,11 +292,7 @@ void BindDistributed(py::module *m) {
in_tensor.impl());
auto in_dense = *p_in_tensor;
return self.AllGather(out_dense,
in_dense,
/*offset*/ 0,
/*numel*/ -1,
sync_op);
return self.AllGather(out_dense, in_dense, sync_op);
},
py::arg("out"),
py::arg("in"),
......@@ -771,8 +759,6 @@ void BindDistributed(py::module *m) {
auto *dev_ctx = self.GetDeviceContext(in_tensor.place(), true);
auto task = self.AllGather(out_dense,
in_dense,
/*offset*/ 0,
/*numel*/ -1,
/*sync_op*/ true,
/*use_calc_stream*/ true);
SplitTensor(*dev_ctx, *out_dense, &out_tensor_list);
......@@ -799,8 +785,6 @@ void BindDistributed(py::module *m) {
return self.AllGather(out_dense,
in_dense,
/*offset*/ 0,
/*numel*/ -1,
/*sync_op*/ true,
/*use_calc_stream*/ true);
},
......@@ -1127,11 +1111,8 @@ void BindDistributed(py::module *m) {
auto p_dense =
std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl());
auto out_dense = *p_dense;
// numel == -1 indicates sending the whole tensor
return self.Send(out_dense,
dst,
/*offset*/ 0,
/*numel*/ -1,
/*sync_op*/ true,
/*use_calc_stream*/ true);
},
......@@ -1177,11 +1158,8 @@ void BindDistributed(py::module *m) {
auto p_dense =
std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl());
auto *in_dense = p_dense.get();
// numel == -1 indicates receiving the whole tensor
return self.Recv(in_dense,
src,
/*offset*/ 0,
/*numel*/ -1,
/*sync_op*/ true,
/*use_calc_stream*/ true);
},
......
......@@ -67,7 +67,7 @@ endif()
set(COMM_UTILS_DEPS processgroup)
if(WITH_NCCL OR WITH_RCCL)
set(COMM_UTILS_DEPS ${PROCESS_GROUP_UTILS_DEPS} processgroup_nccl)
set(COMM_UTILS_DEPS ${PROCESS_GROUP_UTILS_DEPS} process_group_nccl)
endif()
if(WITH_CUSTOM_DEVICE)
set(COMM_UTILS_DEPS ${PROCESS_GROUP_UTILS_DEPS} processgroup_custom)
......
......@@ -15,7 +15,7 @@
#include "paddle/fluid/distributed/collective/ProcessGroup.h"
#include "paddle/phi/backends/c_comm_lib.h"
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
#include "paddle/fluid/distributed/collective/ProcessGroupNCCL.h"
#include "paddle/fluid/distributed/collective/process_group_nccl.h"
#endif
#if defined(PADDLE_WITH_CUSTOM_DEVICE)
#include "paddle/fluid/distributed/collective/ProcessGroupCustom.h"
......
......@@ -80,7 +80,7 @@ set(COMMON_KERNEL_DEPS
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} processgroup)
if(WITH_NCCL OR WITH_RCCL)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} processgroup_nccl)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} process_group_nccl)
endif()
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} processgroup_comm_utils)
if(WITH_CUDNN_FRONTEND)
......
......@@ -28,7 +28,7 @@ namespace cub = hipcub;
#endif
#include "paddle/fluid/distributed/collective/ProcessGroup.h"
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
#include "paddle/fluid/distributed/collective/ProcessGroupNCCL.h"
#include "paddle/fluid/distributed/collective/process_group_nccl.h"
#endif
#include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/platform/device/gpu/nccl_helper.h"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册