From 40f3f4f00b856b017a522cefada93d63cca32ebf Mon Sep 17 00:00:00 2001 From: Wen Sun <35923278+HermitSun@users.noreply.github.com> Date: Fri, 16 Dec 2022 22:08:09 +0800 Subject: [PATCH] refactor: rename files (#49117) --- .../distributed/collective/CMakeLists.txt | 12 +++--- .../distributed/collective/ProcessGroup.h | 30 ++++++++++++++ .../distributed/collective/ProcessGroupBKCL.h | 2 +- paddle/fluid/distributed/collective/check.cc | 2 +- .../{NCCLTools.cc => nccl_tools.cc} | 2 +- .../collective/{NCCLTools.h => nccl_tools.h} | 0 ...cessGroupNCCL.cc => process_group_nccl.cc} | 4 +- ...rocessGroupNCCL.h => process_group_nccl.h} | 4 +- ...GroupStream.cc => process_group_stream.cc} | 41 ++++++++++++++++++- ...ssGroupStream.h => process_group_stream.h} | 16 ++++++++ .../operators/fused/fused_attention_op.cu | 2 +- .../operators/fused/fused_feedforward_op.cu | 2 +- paddle/fluid/pybind/CMakeLists.txt | 2 +- paddle/fluid/pybind/distributed_py.cc | 34 +++------------ paddle/phi/backends/CMakeLists.txt | 2 +- .../phi/backends/processgroup_comm_utils.cc | 2 +- paddle/phi/kernels/CMakeLists.txt | 2 +- .../phi/kernels/gpu/sync_batch_norm_utils.h | 2 +- 18 files changed, 112 insertions(+), 49 deletions(-) rename paddle/fluid/distributed/collective/{NCCLTools.cc => nccl_tools.cc} (96%) rename paddle/fluid/distributed/collective/{NCCLTools.h => nccl_tools.h} (100%) rename paddle/fluid/distributed/collective/{ProcessGroupNCCL.cc => process_group_nccl.cc} (99%) rename paddle/fluid/distributed/collective/{ProcessGroupNCCL.h => process_group_nccl.h} (98%) rename paddle/fluid/distributed/collective/{ProcessGroupStream.cc => process_group_stream.cc} (86%) rename paddle/fluid/distributed/collective/{ProcessGroupStream.h => process_group_stream.h} (90%) diff --git a/paddle/fluid/distributed/collective/CMakeLists.txt b/paddle/fluid/distributed/collective/CMakeLists.txt index 85efa52c319..de9059228a6 100644 --- a/paddle/fluid/distributed/collective/CMakeLists.txt +++ b/paddle/fluid/distributed/collective/CMakeLists.txt @@ -3,13 +3,13 @@ cc_library( SRCS ProcessGroup.cc DEPS dense_tensor) cc_library( - processgroup_stream - SRCS ProcessGroupStream.cc + process_group_stream + SRCS process_group_stream.cc DEPS dense_tensor) cc_library( eager_reducer SRCS reducer.cc - DEPS eager_api processgroup processgroup_stream phi_api string_helper) + DEPS eager_api processgroup process_group_stream phi_api string_helper) if(WITH_DISTRIBUTE) cc_library( @@ -20,10 +20,10 @@ endif() if(WITH_NCCL OR WITH_RCCL) cc_library( - processgroup_nccl - SRCS ProcessGroupNCCL.cc NCCLTools.cc Common.cc check.cc + process_group_nccl + SRCS process_group_nccl.cc nccl_tools.cc Common.cc check.cc DEPS processgroup - processgroup_stream + process_group_stream place enforce collective_helper diff --git a/paddle/fluid/distributed/collective/ProcessGroup.h b/paddle/fluid/distributed/collective/ProcessGroup.h index 7abecd36e3d..3792e8bc835 100644 --- a/paddle/fluid/distributed/collective/ProcessGroup.h +++ b/paddle/fluid/distributed/collective/ProcessGroup.h @@ -97,6 +97,17 @@ class ProcessGroup { GetBackendName())); } + virtual std::shared_ptr AllGather( + phi::DenseTensor* out_tensor, + const phi::DenseTensor& in_tensor, + bool sync_op) { + return AllGather(out_tensor, + in_tensor, + /*offset*/ 0, + /*numel*/ -1, // -1 indicates the whole tensor + sync_op); + } + virtual std::shared_ptr AllGather( phi::DenseTensor* out_tensor, const phi::DenseTensor& in_tensor, @@ -175,6 +186,16 @@ class ProcessGroup { GetBackendName())); } + virtual std::shared_ptr Recv(phi::DenseTensor* tensor, + int src_rank, + bool sync_op) { + return Recv(tensor, + src_rank, + /*offset*/ 0, + /*numel*/ -1, // -1 indicates the whole tensor + sync_op); + } + virtual std::shared_ptr Recv(phi::DenseTensor* tensor, int src_rank, int64_t offset, @@ -185,6 +206,15 @@ class ProcessGroup { GetBackendName())); } + virtual std::shared_ptr Send( + const phi::DenseTensor& tensor, int dst_rank, bool sync_op) { + return Send(tensor, + dst_rank, + /*offset*/ 0, + /*numel*/ -1, // -1 indicates the whole tensor + sync_op); + } + virtual std::shared_ptr Send( const phi::DenseTensor& tensor, int dst_rank, diff --git a/paddle/fluid/distributed/collective/ProcessGroupBKCL.h b/paddle/fluid/distributed/collective/ProcessGroupBKCL.h index 79d97609d92..822f690cb80 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupBKCL.h +++ b/paddle/fluid/distributed/collective/ProcessGroupBKCL.h @@ -20,7 +20,7 @@ #include #include -#include "paddle/fluid/distributed/collective/ProcessGroupStream.h" +#include "paddle/fluid/distributed/collective/process_group_stream.h" #include "paddle/fluid/distributed/store/store.h" #include "paddle/fluid/platform/device/xpu/xpu_header.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/distributed/collective/check.cc b/paddle/fluid/distributed/collective/check.cc index 9a2ca064024..151d7f35749 100644 --- a/paddle/fluid/distributed/collective/check.cc +++ b/paddle/fluid/distributed/collective/check.cc @@ -14,7 +14,7 @@ #include "paddle/fluid/distributed/collective/check.h" -#include "paddle/fluid/distributed/collective/NCCLTools.h" +#include "paddle/fluid/distributed/collective/nccl_tools.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" #include "paddle/phi/core/dense_tensor.h" diff --git a/paddle/fluid/distributed/collective/NCCLTools.cc b/paddle/fluid/distributed/collective/nccl_tools.cc similarity index 96% rename from paddle/fluid/distributed/collective/NCCLTools.cc rename to paddle/fluid/distributed/collective/nccl_tools.cc index 47c0f547ee7..ffb51d706d9 100644 --- a/paddle/fluid/distributed/collective/NCCLTools.cc +++ b/paddle/fluid/distributed/collective/nccl_tools.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/distributed/collective/NCCLTools.h" +#include "paddle/fluid/distributed/collective/nccl_tools.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/distributed/collective/NCCLTools.h b/paddle/fluid/distributed/collective/nccl_tools.h similarity index 100% rename from paddle/fluid/distributed/collective/NCCLTools.h rename to paddle/fluid/distributed/collective/nccl_tools.h diff --git a/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc b/paddle/fluid/distributed/collective/process_group_nccl.cc similarity index 99% rename from paddle/fluid/distributed/collective/ProcessGroupNCCL.cc rename to paddle/fluid/distributed/collective/process_group_nccl.cc index 13de2625a6e..0859708f92c 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc +++ b/paddle/fluid/distributed/collective/process_group_nccl.cc @@ -12,11 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/distributed/collective/ProcessGroupNCCL.h" +#include "paddle/fluid/distributed/collective/process_group_nccl.h" #include "paddle/fluid/distributed/collective/Common.h" -#include "paddle/fluid/distributed/collective/NCCLTools.h" #include "paddle/fluid/distributed/collective/check.h" +#include "paddle/fluid/distributed/collective/nccl_tools.h" #include "paddle/fluid/distributed/collective/utils.h" #include "paddle/fluid/platform/device/gpu/nccl_helper.h" #include "paddle/fluid/platform/place.h" diff --git a/paddle/fluid/distributed/collective/ProcessGroupNCCL.h b/paddle/fluid/distributed/collective/process_group_nccl.h similarity index 98% rename from paddle/fluid/distributed/collective/ProcessGroupNCCL.h rename to paddle/fluid/distributed/collective/process_group_nccl.h index 3ce77297f56..816a0d2ec90 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupNCCL.h +++ b/paddle/fluid/distributed/collective/process_group_nccl.h @@ -20,7 +20,7 @@ #include #include -#include "paddle/fluid/distributed/collective/ProcessGroupStream.h" +#include "paddle/fluid/distributed/collective/process_group_stream.h" #include "paddle/fluid/distributed/store/store.h" #include "paddle/fluid/platform/cuda_device_guard.h" #include "paddle/fluid/platform/device_event.h" @@ -29,7 +29,7 @@ #include "paddle/phi/core/device_context.h" #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) -#include "paddle/fluid/distributed/collective/NCCLTools.h" +#include "paddle/fluid/distributed/collective/nccl_tools.h" #endif #ifdef PADDLE_WITH_RCCL diff --git a/paddle/fluid/distributed/collective/ProcessGroupStream.cc b/paddle/fluid/distributed/collective/process_group_stream.cc similarity index 86% rename from paddle/fluid/distributed/collective/ProcessGroupStream.cc rename to paddle/fluid/distributed/collective/process_group_stream.cc index e1ee425f3f8..2b69cf51fe6 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupStream.cc +++ b/paddle/fluid/distributed/collective/process_group_stream.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/distributed/collective/ProcessGroupStream.h" +#include "paddle/fluid/distributed/collective/process_group_stream.h" namespace paddle { namespace distributed { @@ -40,6 +40,19 @@ std::shared_ptr ProcessGroupStream::AllGather( /*use_calc_stream*/ false); } +std::shared_ptr ProcessGroupStream::AllGather( + phi::DenseTensor* out_tensor, + const phi::DenseTensor& in_tensor, + bool sync_op, + bool use_calc_stream) { + return AllGather(out_tensor, + in_tensor, + /*offset*/ 0, + /*numel*/ -1, // -1 indicates the whole tensor + sync_op, + use_calc_stream); +} + std::shared_ptr ProcessGroupStream::AllGather( phi::DenseTensor* out_tensor, const phi::DenseTensor& in_tensor, @@ -200,6 +213,19 @@ std::shared_ptr ProcessGroupStream::Recv( /*use_calc_stream*/ false); } +std::shared_ptr ProcessGroupStream::Recv( + phi::DenseTensor* tensor, + int src_rank, + bool sync_op, + bool use_calc_stream) { + return Recv(tensor, + src_rank, + /*offset*/ 0, + /*numel*/ -1, // -1 indicates sending the whole tensor + sync_op, + use_calc_stream); +} + std::shared_ptr ProcessGroupStream::Recv( phi::DenseTensor* tensor, int src_rank, @@ -225,6 +251,19 @@ std::shared_ptr ProcessGroupStream::Send( /*use_calc_stream*/ false); } +std::shared_ptr ProcessGroupStream::Send( + const phi::DenseTensor& tensor, + int dst_rank, + bool sync_op, + bool use_calc_stream) { + return Send(tensor, + dst_rank, + /*offset*/ 0, + /*numel*/ -1, // -1 indicates receiving the whole tensor + sync_op, + use_calc_stream); +} + std::shared_ptr ProcessGroupStream::Send( const phi::DenseTensor& tensor, int dst_rank, diff --git a/paddle/fluid/distributed/collective/ProcessGroupStream.h b/paddle/fluid/distributed/collective/process_group_stream.h similarity index 90% rename from paddle/fluid/distributed/collective/ProcessGroupStream.h rename to paddle/fluid/distributed/collective/process_group_stream.h index 4ad75be3658..d48ff0f24f8 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupStream.h +++ b/paddle/fluid/distributed/collective/process_group_stream.h @@ -69,6 +69,12 @@ class ProcessGroupStream : public ProcessGroup { int64_t numel, bool sync_op) override; + virtual std::shared_ptr AllGather( + phi::DenseTensor* out_tensor, + const phi::DenseTensor& in_tensor, + bool sync_op, + bool use_calc_stream); + virtual std::shared_ptr AllGather( phi::DenseTensor* out_tensor, const phi::DenseTensor& in_tensor, @@ -161,6 +167,11 @@ class ProcessGroupStream : public ProcessGroup { int64_t numel, bool sync_op) override; + virtual std::shared_ptr Recv(phi::DenseTensor* tensor, + int src_rank, + bool sync_op, + bool use_calc_stream); + virtual std::shared_ptr Recv(phi::DenseTensor* tensor, int src_rank, int64_t offset, @@ -174,6 +185,11 @@ class ProcessGroupStream : public ProcessGroup { int64_t numel, bool sync_op) override; + std::shared_ptr Send(const phi::DenseTensor& tensor, + int dst_rank, + bool sync_op, + bool use_calc_stream); + virtual std::shared_ptr Send( const phi::DenseTensor& tensor, int dst_rank, diff --git a/paddle/fluid/operators/fused/fused_attention_op.cu b/paddle/fluid/operators/fused/fused_attention_op.cu index 67ee877f72c..559a2afb85f 100644 --- a/paddle/fluid/operators/fused/fused_attention_op.cu +++ b/paddle/fluid/operators/fused/fused_attention_op.cu @@ -30,7 +30,7 @@ limitations under the License. */ #include "paddle/phi/kernels/funcs/math_function.h" #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) -#include "paddle/fluid/distributed/collective/ProcessGroupNCCL.h" +#include "paddle/fluid/distributed/collective/process_group_nccl.h" #include "paddle/fluid/platform/collective_helper.h" #include "paddle/fluid/platform/device/gpu/nccl_helper.h" #endif diff --git a/paddle/fluid/operators/fused/fused_feedforward_op.cu b/paddle/fluid/operators/fused/fused_feedforward_op.cu index 28a9cb167e0..925ec7d2060 100644 --- a/paddle/fluid/operators/fused/fused_feedforward_op.cu +++ b/paddle/fluid/operators/fused/fused_feedforward_op.cu @@ -23,7 +23,7 @@ limitations under the License. */ #include "paddle/phi/kernels/funcs/elementwise_functor.h" #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) -#include "paddle/fluid/distributed/collective/ProcessGroupNCCL.h" +#include "paddle/fluid/distributed/collective/process_group_nccl.h" #include "paddle/fluid/platform/collective_helper.h" #include "paddle/fluid/platform/device/gpu/nccl_helper.h" #endif diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index 973ef8a4a79..37e085b82bc 100755 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -157,7 +157,7 @@ endif() if(WITH_PYTHON) set(PYBIND_DEPS ${PYBIND_DEPS} processgroup eager_reducer) if(WITH_NCCL OR WITH_RCCL) - set(PYBIND_DEPS ${PYBIND_DEPS} processgroup_nccl) + set(PYBIND_DEPS ${PYBIND_DEPS} process_group_nccl) endif() if(WITH_XPU_BKCL) set(PYBIND_DEPS ${PYBIND_DEPS} processgroup_bkcl) diff --git a/paddle/fluid/pybind/distributed_py.cc b/paddle/fluid/pybind/distributed_py.cc index c5d03ce8853..e9d59132d0e 100644 --- a/paddle/fluid/pybind/distributed_py.cc +++ b/paddle/fluid/pybind/distributed_py.cc @@ -22,8 +22,8 @@ limitations under the License. */ #endif #include "paddle/fluid/distributed/collective/ProcessGroup.h" -#include "paddle/fluid/distributed/collective/ProcessGroupStream.h" #include "paddle/fluid/distributed/collective/Types.h" +#include "paddle/fluid/distributed/collective/process_group_stream.h" #include "paddle/fluid/distributed/collective/reducer.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/tensor.h" @@ -34,7 +34,7 @@ limitations under the License. */ #include "paddle/phi/api/all.h" #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) -#include "paddle/fluid/distributed/collective/ProcessGroupNCCL.h" +#include "paddle/fluid/distributed/collective/process_group_nccl.h" #endif #if defined(PADDLE_WITH_MPI) @@ -169,9 +169,7 @@ void BindDistributed(py::module *m) { auto p_dense = std::dynamic_pointer_cast(tensor.impl()); auto out_dense = *p_dense; - // numel == -1 indicates sending the whole tensor - return self.Send( - out_dense, dst, /*offset*/ 0, /*numel*/ -1, sync_op); + return self.Send(out_dense, dst, sync_op); }, py::arg("tensor"), py::arg("dst"), @@ -215,9 +213,7 @@ void BindDistributed(py::module *m) { auto p_dense = std::dynamic_pointer_cast(tensor.impl()); auto *in_dense = p_dense.get(); - // numel == -1 indicates receiving the whole tensor - return self.Recv( - in_dense, src, /*offset*/ 0, /*numel*/ -1, sync_op); + return self.Recv(in_dense, src, sync_op); }, py::arg("tensor"), py::arg("src"), @@ -270,11 +266,7 @@ void BindDistributed(py::module *m) { auto in_dense = *p_in_tensor; auto *dev_ctx = self.GetDeviceContext(in_tensor.place()); - auto task = self.AllGather(out_dense, - in_dense, - /*offset*/ 0, - /*numel*/ -1, - sync_op); + auto task = self.AllGather(out_dense, in_dense, sync_op); SplitTensor(*dev_ctx, *out_dense, &out_tensor_list); task->UpdateWaitChain(*dev_ctx); return task; @@ -300,11 +292,7 @@ void BindDistributed(py::module *m) { in_tensor.impl()); auto in_dense = *p_in_tensor; - return self.AllGather(out_dense, - in_dense, - /*offset*/ 0, - /*numel*/ -1, - sync_op); + return self.AllGather(out_dense, in_dense, sync_op); }, py::arg("out"), py::arg("in"), @@ -771,8 +759,6 @@ void BindDistributed(py::module *m) { auto *dev_ctx = self.GetDeviceContext(in_tensor.place(), true); auto task = self.AllGather(out_dense, in_dense, - /*offset*/ 0, - /*numel*/ -1, /*sync_op*/ true, /*use_calc_stream*/ true); SplitTensor(*dev_ctx, *out_dense, &out_tensor_list); @@ -799,8 +785,6 @@ void BindDistributed(py::module *m) { return self.AllGather(out_dense, in_dense, - /*offset*/ 0, - /*numel*/ -1, /*sync_op*/ true, /*use_calc_stream*/ true); }, @@ -1127,11 +1111,8 @@ void BindDistributed(py::module *m) { auto p_dense = std::dynamic_pointer_cast(tensor.impl()); auto out_dense = *p_dense; - // numel == -1 indicates sending the whole tensor return self.Send(out_dense, dst, - /*offset*/ 0, - /*numel*/ -1, /*sync_op*/ true, /*use_calc_stream*/ true); }, @@ -1177,11 +1158,8 @@ void BindDistributed(py::module *m) { auto p_dense = std::dynamic_pointer_cast(tensor.impl()); auto *in_dense = p_dense.get(); - // numel == -1 indicates receiving the whole tensor return self.Recv(in_dense, src, - /*offset*/ 0, - /*numel*/ -1, /*sync_op*/ true, /*use_calc_stream*/ true); }, diff --git a/paddle/phi/backends/CMakeLists.txt b/paddle/phi/backends/CMakeLists.txt index c35bd2bc456..c9e110ae7b8 100644 --- a/paddle/phi/backends/CMakeLists.txt +++ b/paddle/phi/backends/CMakeLists.txt @@ -67,7 +67,7 @@ endif() set(COMM_UTILS_DEPS processgroup) if(WITH_NCCL OR WITH_RCCL) - set(COMM_UTILS_DEPS ${PROCESS_GROUP_UTILS_DEPS} processgroup_nccl) + set(COMM_UTILS_DEPS ${PROCESS_GROUP_UTILS_DEPS} process_group_nccl) endif() if(WITH_CUSTOM_DEVICE) set(COMM_UTILS_DEPS ${PROCESS_GROUP_UTILS_DEPS} processgroup_custom) diff --git a/paddle/phi/backends/processgroup_comm_utils.cc b/paddle/phi/backends/processgroup_comm_utils.cc index 841b88d752e..450c1763871 100644 --- a/paddle/phi/backends/processgroup_comm_utils.cc +++ b/paddle/phi/backends/processgroup_comm_utils.cc @@ -15,7 +15,7 @@ #include "paddle/fluid/distributed/collective/ProcessGroup.h" #include "paddle/phi/backends/c_comm_lib.h" #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) -#include "paddle/fluid/distributed/collective/ProcessGroupNCCL.h" +#include "paddle/fluid/distributed/collective/process_group_nccl.h" #endif #if defined(PADDLE_WITH_CUSTOM_DEVICE) #include "paddle/fluid/distributed/collective/ProcessGroupCustom.h" diff --git a/paddle/phi/kernels/CMakeLists.txt b/paddle/phi/kernels/CMakeLists.txt index 808b18bb02d..abe35f284d6 100644 --- a/paddle/phi/kernels/CMakeLists.txt +++ b/paddle/phi/kernels/CMakeLists.txt @@ -80,7 +80,7 @@ set(COMMON_KERNEL_DEPS set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} processgroup) if(WITH_NCCL OR WITH_RCCL) - set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} processgroup_nccl) + set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} process_group_nccl) endif() set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} processgroup_comm_utils) if(WITH_CUDNN_FRONTEND) diff --git a/paddle/phi/kernels/gpu/sync_batch_norm_utils.h b/paddle/phi/kernels/gpu/sync_batch_norm_utils.h index cfb2758e62d..f99da25dec9 100644 --- a/paddle/phi/kernels/gpu/sync_batch_norm_utils.h +++ b/paddle/phi/kernels/gpu/sync_batch_norm_utils.h @@ -28,7 +28,7 @@ namespace cub = hipcub; #endif #include "paddle/fluid/distributed/collective/ProcessGroup.h" #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) -#include "paddle/fluid/distributed/collective/ProcessGroupNCCL.h" +#include "paddle/fluid/distributed/collective/process_group_nccl.h" #endif #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/platform/device/gpu/nccl_helper.h" -- GitLab