未验证 提交 40f3f4f0 编写于 作者: W Wen Sun 提交者: GitHub

refactor: rename files (#49117)

上级 e77d1cac
...@@ -3,13 +3,13 @@ cc_library( ...@@ -3,13 +3,13 @@ cc_library(
SRCS ProcessGroup.cc SRCS ProcessGroup.cc
DEPS dense_tensor) DEPS dense_tensor)
cc_library( cc_library(
processgroup_stream process_group_stream
SRCS ProcessGroupStream.cc SRCS process_group_stream.cc
DEPS dense_tensor) DEPS dense_tensor)
cc_library( cc_library(
eager_reducer eager_reducer
SRCS reducer.cc SRCS reducer.cc
DEPS eager_api processgroup processgroup_stream phi_api string_helper) DEPS eager_api processgroup process_group_stream phi_api string_helper)
if(WITH_DISTRIBUTE) if(WITH_DISTRIBUTE)
cc_library( cc_library(
...@@ -20,10 +20,10 @@ endif() ...@@ -20,10 +20,10 @@ endif()
if(WITH_NCCL OR WITH_RCCL) if(WITH_NCCL OR WITH_RCCL)
cc_library( cc_library(
processgroup_nccl process_group_nccl
SRCS ProcessGroupNCCL.cc NCCLTools.cc Common.cc check.cc SRCS process_group_nccl.cc nccl_tools.cc Common.cc check.cc
DEPS processgroup DEPS processgroup
processgroup_stream process_group_stream
place place
enforce enforce
collective_helper collective_helper
......
...@@ -97,6 +97,17 @@ class ProcessGroup { ...@@ -97,6 +97,17 @@ class ProcessGroup {
GetBackendName())); GetBackendName()));
} }
virtual std::shared_ptr<ProcessGroup::Task> AllGather(
phi::DenseTensor* out_tensor,
const phi::DenseTensor& in_tensor,
bool sync_op) {
return AllGather(out_tensor,
in_tensor,
/*offset*/ 0,
/*numel*/ -1, // -1 indicates the whole tensor
sync_op);
}
virtual std::shared_ptr<ProcessGroup::Task> AllGather( virtual std::shared_ptr<ProcessGroup::Task> AllGather(
phi::DenseTensor* out_tensor, phi::DenseTensor* out_tensor,
const phi::DenseTensor& in_tensor, const phi::DenseTensor& in_tensor,
...@@ -175,6 +186,16 @@ class ProcessGroup { ...@@ -175,6 +186,16 @@ class ProcessGroup {
GetBackendName())); GetBackendName()));
} }
virtual std::shared_ptr<ProcessGroup::Task> Recv(phi::DenseTensor* tensor,
int src_rank,
bool sync_op) {
return Recv(tensor,
src_rank,
/*offset*/ 0,
/*numel*/ -1, // -1 indicates the whole tensor
sync_op);
}
virtual std::shared_ptr<ProcessGroup::Task> Recv(phi::DenseTensor* tensor, virtual std::shared_ptr<ProcessGroup::Task> Recv(phi::DenseTensor* tensor,
int src_rank, int src_rank,
int64_t offset, int64_t offset,
...@@ -185,6 +206,15 @@ class ProcessGroup { ...@@ -185,6 +206,15 @@ class ProcessGroup {
GetBackendName())); GetBackendName()));
} }
virtual std::shared_ptr<ProcessGroup::Task> Send(
const phi::DenseTensor& tensor, int dst_rank, bool sync_op) {
return Send(tensor,
dst_rank,
/*offset*/ 0,
/*numel*/ -1, // -1 indicates the whole tensor
sync_op);
}
virtual std::shared_ptr<ProcessGroup::Task> Send( virtual std::shared_ptr<ProcessGroup::Task> Send(
const phi::DenseTensor& tensor, const phi::DenseTensor& tensor,
int dst_rank, int dst_rank,
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include "paddle/fluid/distributed/collective/ProcessGroupStream.h" #include "paddle/fluid/distributed/collective/process_group_stream.h"
#include "paddle/fluid/distributed/store/store.h" #include "paddle/fluid/distributed/store/store.h"
#include "paddle/fluid/platform/device/xpu/xpu_header.h" #include "paddle/fluid/platform/device/xpu/xpu_header.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
#include "paddle/fluid/distributed/collective/check.h" #include "paddle/fluid/distributed/collective/check.h"
#include "paddle/fluid/distributed/collective/NCCLTools.h" #include "paddle/fluid/distributed/collective/nccl_tools.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/fluid/distributed/collective/NCCLTools.h" #include "paddle/fluid/distributed/collective/nccl_tools.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
......
...@@ -12,11 +12,11 @@ ...@@ -12,11 +12,11 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/fluid/distributed/collective/ProcessGroupNCCL.h" #include "paddle/fluid/distributed/collective/process_group_nccl.h"
#include "paddle/fluid/distributed/collective/Common.h" #include "paddle/fluid/distributed/collective/Common.h"
#include "paddle/fluid/distributed/collective/NCCLTools.h"
#include "paddle/fluid/distributed/collective/check.h" #include "paddle/fluid/distributed/collective/check.h"
#include "paddle/fluid/distributed/collective/nccl_tools.h"
#include "paddle/fluid/distributed/collective/utils.h" #include "paddle/fluid/distributed/collective/utils.h"
#include "paddle/fluid/platform/device/gpu/nccl_helper.h" #include "paddle/fluid/platform/device/gpu/nccl_helper.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
#include "paddle/fluid/distributed/collective/ProcessGroupStream.h" #include "paddle/fluid/distributed/collective/process_group_stream.h"
#include "paddle/fluid/distributed/store/store.h" #include "paddle/fluid/distributed/store/store.h"
#include "paddle/fluid/platform/cuda_device_guard.h" #include "paddle/fluid/platform/cuda_device_guard.h"
#include "paddle/fluid/platform/device_event.h" #include "paddle/fluid/platform/device_event.h"
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
#include "paddle/phi/core/device_context.h" #include "paddle/phi/core/device_context.h"
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
#include "paddle/fluid/distributed/collective/NCCLTools.h" #include "paddle/fluid/distributed/collective/nccl_tools.h"
#endif #endif
#ifdef PADDLE_WITH_RCCL #ifdef PADDLE_WITH_RCCL
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/fluid/distributed/collective/ProcessGroupStream.h" #include "paddle/fluid/distributed/collective/process_group_stream.h"
namespace paddle { namespace paddle {
namespace distributed { namespace distributed {
...@@ -40,6 +40,19 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupStream::AllGather( ...@@ -40,6 +40,19 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupStream::AllGather(
/*use_calc_stream*/ false); /*use_calc_stream*/ false);
} }
std::shared_ptr<ProcessGroup::Task> ProcessGroupStream::AllGather(
phi::DenseTensor* out_tensor,
const phi::DenseTensor& in_tensor,
bool sync_op,
bool use_calc_stream) {
return AllGather(out_tensor,
in_tensor,
/*offset*/ 0,
/*numel*/ -1, // -1 indicates the whole tensor
sync_op,
use_calc_stream);
}
std::shared_ptr<ProcessGroup::Task> ProcessGroupStream::AllGather( std::shared_ptr<ProcessGroup::Task> ProcessGroupStream::AllGather(
phi::DenseTensor* out_tensor, phi::DenseTensor* out_tensor,
const phi::DenseTensor& in_tensor, const phi::DenseTensor& in_tensor,
...@@ -200,6 +213,19 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupStream::Recv( ...@@ -200,6 +213,19 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupStream::Recv(
/*use_calc_stream*/ false); /*use_calc_stream*/ false);
} }
std::shared_ptr<ProcessGroup::Task> ProcessGroupStream::Recv(
phi::DenseTensor* tensor,
int src_rank,
bool sync_op,
bool use_calc_stream) {
return Recv(tensor,
src_rank,
/*offset*/ 0,
/*numel*/ -1, // -1 indicates sending the whole tensor
sync_op,
use_calc_stream);
}
std::shared_ptr<ProcessGroup::Task> ProcessGroupStream::Recv( std::shared_ptr<ProcessGroup::Task> ProcessGroupStream::Recv(
phi::DenseTensor* tensor, phi::DenseTensor* tensor,
int src_rank, int src_rank,
...@@ -225,6 +251,19 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupStream::Send( ...@@ -225,6 +251,19 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupStream::Send(
/*use_calc_stream*/ false); /*use_calc_stream*/ false);
} }
std::shared_ptr<ProcessGroup::Task> ProcessGroupStream::Send(
const phi::DenseTensor& tensor,
int dst_rank,
bool sync_op,
bool use_calc_stream) {
return Send(tensor,
dst_rank,
/*offset*/ 0,
/*numel*/ -1, // -1 indicates receiving the whole tensor
sync_op,
use_calc_stream);
}
std::shared_ptr<ProcessGroup::Task> ProcessGroupStream::Send( std::shared_ptr<ProcessGroup::Task> ProcessGroupStream::Send(
const phi::DenseTensor& tensor, const phi::DenseTensor& tensor,
int dst_rank, int dst_rank,
......
...@@ -69,6 +69,12 @@ class ProcessGroupStream : public ProcessGroup { ...@@ -69,6 +69,12 @@ class ProcessGroupStream : public ProcessGroup {
int64_t numel, int64_t numel,
bool sync_op) override; bool sync_op) override;
virtual std::shared_ptr<ProcessGroup::Task> AllGather(
phi::DenseTensor* out_tensor,
const phi::DenseTensor& in_tensor,
bool sync_op,
bool use_calc_stream);
virtual std::shared_ptr<ProcessGroup::Task> AllGather( virtual std::shared_ptr<ProcessGroup::Task> AllGather(
phi::DenseTensor* out_tensor, phi::DenseTensor* out_tensor,
const phi::DenseTensor& in_tensor, const phi::DenseTensor& in_tensor,
...@@ -161,6 +167,11 @@ class ProcessGroupStream : public ProcessGroup { ...@@ -161,6 +167,11 @@ class ProcessGroupStream : public ProcessGroup {
int64_t numel, int64_t numel,
bool sync_op) override; bool sync_op) override;
virtual std::shared_ptr<ProcessGroup::Task> Recv(phi::DenseTensor* tensor,
int src_rank,
bool sync_op,
bool use_calc_stream);
virtual std::shared_ptr<ProcessGroup::Task> Recv(phi::DenseTensor* tensor, virtual std::shared_ptr<ProcessGroup::Task> Recv(phi::DenseTensor* tensor,
int src_rank, int src_rank,
int64_t offset, int64_t offset,
...@@ -174,6 +185,11 @@ class ProcessGroupStream : public ProcessGroup { ...@@ -174,6 +185,11 @@ class ProcessGroupStream : public ProcessGroup {
int64_t numel, int64_t numel,
bool sync_op) override; bool sync_op) override;
std::shared_ptr<ProcessGroup::Task> Send(const phi::DenseTensor& tensor,
int dst_rank,
bool sync_op,
bool use_calc_stream);
virtual std::shared_ptr<ProcessGroup::Task> Send( virtual std::shared_ptr<ProcessGroup::Task> Send(
const phi::DenseTensor& tensor, const phi::DenseTensor& tensor,
int dst_rank, int dst_rank,
......
...@@ -30,7 +30,7 @@ limitations under the License. */ ...@@ -30,7 +30,7 @@ limitations under the License. */
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
#include "paddle/fluid/distributed/collective/ProcessGroupNCCL.h" #include "paddle/fluid/distributed/collective/process_group_nccl.h"
#include "paddle/fluid/platform/collective_helper.h" #include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/device/gpu/nccl_helper.h" #include "paddle/fluid/platform/device/gpu/nccl_helper.h"
#endif #endif
......
...@@ -23,7 +23,7 @@ limitations under the License. */ ...@@ -23,7 +23,7 @@ limitations under the License. */
#include "paddle/phi/kernels/funcs/elementwise_functor.h" #include "paddle/phi/kernels/funcs/elementwise_functor.h"
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
#include "paddle/fluid/distributed/collective/ProcessGroupNCCL.h" #include "paddle/fluid/distributed/collective/process_group_nccl.h"
#include "paddle/fluid/platform/collective_helper.h" #include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/device/gpu/nccl_helper.h" #include "paddle/fluid/platform/device/gpu/nccl_helper.h"
#endif #endif
......
...@@ -157,7 +157,7 @@ endif() ...@@ -157,7 +157,7 @@ endif()
if(WITH_PYTHON) if(WITH_PYTHON)
set(PYBIND_DEPS ${PYBIND_DEPS} processgroup eager_reducer) set(PYBIND_DEPS ${PYBIND_DEPS} processgroup eager_reducer)
if(WITH_NCCL OR WITH_RCCL) if(WITH_NCCL OR WITH_RCCL)
set(PYBIND_DEPS ${PYBIND_DEPS} processgroup_nccl) set(PYBIND_DEPS ${PYBIND_DEPS} process_group_nccl)
endif() endif()
if(WITH_XPU_BKCL) if(WITH_XPU_BKCL)
set(PYBIND_DEPS ${PYBIND_DEPS} processgroup_bkcl) set(PYBIND_DEPS ${PYBIND_DEPS} processgroup_bkcl)
......
...@@ -22,8 +22,8 @@ limitations under the License. */ ...@@ -22,8 +22,8 @@ limitations under the License. */
#endif #endif
#include "paddle/fluid/distributed/collective/ProcessGroup.h" #include "paddle/fluid/distributed/collective/ProcessGroup.h"
#include "paddle/fluid/distributed/collective/ProcessGroupStream.h"
#include "paddle/fluid/distributed/collective/Types.h" #include "paddle/fluid/distributed/collective/Types.h"
#include "paddle/fluid/distributed/collective/process_group_stream.h"
#include "paddle/fluid/distributed/collective/reducer.h" #include "paddle/fluid/distributed/collective/reducer.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor.h"
...@@ -34,7 +34,7 @@ limitations under the License. */ ...@@ -34,7 +34,7 @@ limitations under the License. */
#include "paddle/phi/api/all.h" #include "paddle/phi/api/all.h"
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
#include "paddle/fluid/distributed/collective/ProcessGroupNCCL.h" #include "paddle/fluid/distributed/collective/process_group_nccl.h"
#endif #endif
#if defined(PADDLE_WITH_MPI) #if defined(PADDLE_WITH_MPI)
...@@ -169,9 +169,7 @@ void BindDistributed(py::module *m) { ...@@ -169,9 +169,7 @@ void BindDistributed(py::module *m) {
auto p_dense = auto p_dense =
std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl()); std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl());
auto out_dense = *p_dense; auto out_dense = *p_dense;
// numel == -1 indicates sending the whole tensor return self.Send(out_dense, dst, sync_op);
return self.Send(
out_dense, dst, /*offset*/ 0, /*numel*/ -1, sync_op);
}, },
py::arg("tensor"), py::arg("tensor"),
py::arg("dst"), py::arg("dst"),
...@@ -215,9 +213,7 @@ void BindDistributed(py::module *m) { ...@@ -215,9 +213,7 @@ void BindDistributed(py::module *m) {
auto p_dense = auto p_dense =
std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl()); std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl());
auto *in_dense = p_dense.get(); auto *in_dense = p_dense.get();
// numel == -1 indicates receiving the whole tensor return self.Recv(in_dense, src, sync_op);
return self.Recv(
in_dense, src, /*offset*/ 0, /*numel*/ -1, sync_op);
}, },
py::arg("tensor"), py::arg("tensor"),
py::arg("src"), py::arg("src"),
...@@ -270,11 +266,7 @@ void BindDistributed(py::module *m) { ...@@ -270,11 +266,7 @@ void BindDistributed(py::module *m) {
auto in_dense = *p_in_tensor; auto in_dense = *p_in_tensor;
auto *dev_ctx = self.GetDeviceContext(in_tensor.place()); auto *dev_ctx = self.GetDeviceContext(in_tensor.place());
auto task = self.AllGather(out_dense, auto task = self.AllGather(out_dense, in_dense, sync_op);
in_dense,
/*offset*/ 0,
/*numel*/ -1,
sync_op);
SplitTensor(*dev_ctx, *out_dense, &out_tensor_list); SplitTensor(*dev_ctx, *out_dense, &out_tensor_list);
task->UpdateWaitChain(*dev_ctx); task->UpdateWaitChain(*dev_ctx);
return task; return task;
...@@ -300,11 +292,7 @@ void BindDistributed(py::module *m) { ...@@ -300,11 +292,7 @@ void BindDistributed(py::module *m) {
in_tensor.impl()); in_tensor.impl());
auto in_dense = *p_in_tensor; auto in_dense = *p_in_tensor;
return self.AllGather(out_dense, return self.AllGather(out_dense, in_dense, sync_op);
in_dense,
/*offset*/ 0,
/*numel*/ -1,
sync_op);
}, },
py::arg("out"), py::arg("out"),
py::arg("in"), py::arg("in"),
...@@ -771,8 +759,6 @@ void BindDistributed(py::module *m) { ...@@ -771,8 +759,6 @@ void BindDistributed(py::module *m) {
auto *dev_ctx = self.GetDeviceContext(in_tensor.place(), true); auto *dev_ctx = self.GetDeviceContext(in_tensor.place(), true);
auto task = self.AllGather(out_dense, auto task = self.AllGather(out_dense,
in_dense, in_dense,
/*offset*/ 0,
/*numel*/ -1,
/*sync_op*/ true, /*sync_op*/ true,
/*use_calc_stream*/ true); /*use_calc_stream*/ true);
SplitTensor(*dev_ctx, *out_dense, &out_tensor_list); SplitTensor(*dev_ctx, *out_dense, &out_tensor_list);
...@@ -799,8 +785,6 @@ void BindDistributed(py::module *m) { ...@@ -799,8 +785,6 @@ void BindDistributed(py::module *m) {
return self.AllGather(out_dense, return self.AllGather(out_dense,
in_dense, in_dense,
/*offset*/ 0,
/*numel*/ -1,
/*sync_op*/ true, /*sync_op*/ true,
/*use_calc_stream*/ true); /*use_calc_stream*/ true);
}, },
...@@ -1127,11 +1111,8 @@ void BindDistributed(py::module *m) { ...@@ -1127,11 +1111,8 @@ void BindDistributed(py::module *m) {
auto p_dense = auto p_dense =
std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl()); std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl());
auto out_dense = *p_dense; auto out_dense = *p_dense;
// numel == -1 indicates sending the whole tensor
return self.Send(out_dense, return self.Send(out_dense,
dst, dst,
/*offset*/ 0,
/*numel*/ -1,
/*sync_op*/ true, /*sync_op*/ true,
/*use_calc_stream*/ true); /*use_calc_stream*/ true);
}, },
...@@ -1177,11 +1158,8 @@ void BindDistributed(py::module *m) { ...@@ -1177,11 +1158,8 @@ void BindDistributed(py::module *m) {
auto p_dense = auto p_dense =
std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl()); std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl());
auto *in_dense = p_dense.get(); auto *in_dense = p_dense.get();
// numel == -1 indicates receiving the whole tensor
return self.Recv(in_dense, return self.Recv(in_dense,
src, src,
/*offset*/ 0,
/*numel*/ -1,
/*sync_op*/ true, /*sync_op*/ true,
/*use_calc_stream*/ true); /*use_calc_stream*/ true);
}, },
......
...@@ -67,7 +67,7 @@ endif() ...@@ -67,7 +67,7 @@ endif()
set(COMM_UTILS_DEPS processgroup) set(COMM_UTILS_DEPS processgroup)
if(WITH_NCCL OR WITH_RCCL) if(WITH_NCCL OR WITH_RCCL)
set(COMM_UTILS_DEPS ${PROCESS_GROUP_UTILS_DEPS} processgroup_nccl) set(COMM_UTILS_DEPS ${PROCESS_GROUP_UTILS_DEPS} process_group_nccl)
endif() endif()
if(WITH_CUSTOM_DEVICE) if(WITH_CUSTOM_DEVICE)
set(COMM_UTILS_DEPS ${PROCESS_GROUP_UTILS_DEPS} processgroup_custom) set(COMM_UTILS_DEPS ${PROCESS_GROUP_UTILS_DEPS} processgroup_custom)
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#include "paddle/fluid/distributed/collective/ProcessGroup.h" #include "paddle/fluid/distributed/collective/ProcessGroup.h"
#include "paddle/phi/backends/c_comm_lib.h" #include "paddle/phi/backends/c_comm_lib.h"
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
#include "paddle/fluid/distributed/collective/ProcessGroupNCCL.h" #include "paddle/fluid/distributed/collective/process_group_nccl.h"
#endif #endif
#if defined(PADDLE_WITH_CUSTOM_DEVICE) #if defined(PADDLE_WITH_CUSTOM_DEVICE)
#include "paddle/fluid/distributed/collective/ProcessGroupCustom.h" #include "paddle/fluid/distributed/collective/ProcessGroupCustom.h"
......
...@@ -80,7 +80,7 @@ set(COMMON_KERNEL_DEPS ...@@ -80,7 +80,7 @@ set(COMMON_KERNEL_DEPS
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} processgroup) set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} processgroup)
if(WITH_NCCL OR WITH_RCCL) if(WITH_NCCL OR WITH_RCCL)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} processgroup_nccl) set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} process_group_nccl)
endif() endif()
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} processgroup_comm_utils) set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} processgroup_comm_utils)
if(WITH_CUDNN_FRONTEND) if(WITH_CUDNN_FRONTEND)
......
...@@ -28,7 +28,7 @@ namespace cub = hipcub; ...@@ -28,7 +28,7 @@ namespace cub = hipcub;
#endif #endif
#include "paddle/fluid/distributed/collective/ProcessGroup.h" #include "paddle/fluid/distributed/collective/ProcessGroup.h"
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
#include "paddle/fluid/distributed/collective/ProcessGroupNCCL.h" #include "paddle/fluid/distributed/collective/process_group_nccl.h"
#endif #endif
#include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/platform/device/gpu/nccl_helper.h" #include "paddle/fluid/platform/device/gpu/nccl_helper.h"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册