From 2fa91d71b4648ca3a111e3eebe421712db06db36 Mon Sep 17 00:00:00 2001 From: yu wentao Date: Wed, 22 Feb 2023 16:59:29 +0800 Subject: [PATCH] * remove broadcast (#50701) --- .../operators/collective/broadcast_op.cc | 84 -------------- .../operators/collective/broadcast_op.cu.cc | 90 --------------- .../operators/collective/broadcast_op_xpu.cc | 105 ------------------ 3 files changed, 279 deletions(-) delete mode 100644 paddle/fluid/operators/collective/broadcast_op.cc delete mode 100644 paddle/fluid/operators/collective/broadcast_op.cu.cc delete mode 100644 paddle/fluid/operators/collective/broadcast_op_xpu.cc diff --git a/paddle/fluid/operators/collective/broadcast_op.cc b/paddle/fluid/operators/collective/broadcast_op.cc deleted file mode 100644 index 787acb066a5..00000000000 --- a/paddle/fluid/operators/collective/broadcast_op.cc +++ /dev/null @@ -1,84 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include -#include - -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -class BroadcastOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput("X"), - true, - platform::errors::InvalidArgument( - "Input(X) of BroadcastOp should not be null.")); - PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), - true, - platform::errors::InvalidArgument( - "Output(Output) of ConvOp should not be null.")); - } -}; - -class BroadcastOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() { - AddInput("X", "(Tensor), tensor to be broadcast."); - AddOutput("Out", "(Tensor) the result of broadcast."); - AddAttr( - "sync_mode", - "(bool) whether to synchronize the CUDA stream after nccl call.") - .SetDefault(false); - AddAttr("root", "(int).").SetDefault(0).EqualGreaterThan(0); - AddComment(R"DOC( -***Broadcast Operator*** - -Call NCCL Broadcast internally. Note that this op must be used when one -thread is managing one GPU device. -)DOC"); - } -}; - -template -class BroadcastOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_THROW(platform::errors::PreconditionNotMet( - "Broadcast op can run on gpu place only for now.")); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_WITHOUT_GRADIENT(broadcast, - ops::BroadcastOp, - ops::BroadcastOpMaker); - -REGISTER_OP_CPU_KERNEL(broadcast, - ops::BroadcastOpKernel, - ops::BroadcastOpKernel, - ops::BroadcastOpKernel, - ops::BroadcastOpKernel, - ops::BroadcastOpKernel); diff --git a/paddle/fluid/operators/collective/broadcast_op.cu.cc b/paddle/fluid/operators/collective/broadcast_op.cu.cc deleted file mode 100644 index 9d1fedc1690..00000000000 --- a/paddle/fluid/operators/collective/broadcast_op.cu.cc +++ /dev/null @@ -1,90 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" - -#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) -#include "paddle/fluid/platform/device/gpu/nccl_helper.h" -#endif -#include "paddle/fluid/framework/convert_utils.h" - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -namespace paddle { -namespace operators { - -template -class NCCLBroadcastOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ( - platform::is_gpu_place(ctx.GetPlace()), - true, - platform::errors::PreconditionNotMet( - "The place of ExecutionContext should be CUDAPlace.")); - -#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) - int dev_id = ctx.GetPlace().device; - int root_dev_id = ctx.Attr("root"); - - auto in = ctx.Input("X"); - auto out = ctx.Output("Out"); - PADDLE_ENFORCE_EQ( - out->IsInitialized(), - true, - platform::errors::PreconditionNotMet( - "Currently, the output of broadcast op must be initialized," - "because this op can only be an In-Place operation.")); - void* send_recv_buffer = out->mutable_data(ctx.GetPlace()); - PADDLE_ENFORCE_EQ( - send_recv_buffer, - in->data(), - platform::errors::PreconditionNotMet("Currently, the broadcast op can " - "only be an In-Place operation.")); - - auto& dev_ctx = ctx.template device_context(); - auto comm = dev_ctx.nccl_comm(); - auto stream = dev_ctx.stream(); - - PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclBcast( - send_recv_buffer, - static_cast(in->numel()), - platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype())), - root_dev_id, - comm, - stream)); - - VLOG(3) << "Bcast " << ctx.InputNames("X")[0] << ", (" << in->numel() << ")" - << " From " << root_dev_id << " to " << dev_id; - - if (ctx.Attr("sync_mode")) { - platform::GpuStreamSync(stream); - } -#else - PADDLE_THROW(platform::errors::PreconditionNotMet( - "PaddlePaddle should compile with GPU.")); -#endif - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OP_CUDA_KERNEL(broadcast, - ops::NCCLBroadcastOpKernel, - ops::NCCLBroadcastOpKernel, - ops::NCCLBroadcastOpKernel, - ops::NCCLBroadcastOpKernel, - ops::NCCLBroadcastOpKernel); diff --git a/paddle/fluid/operators/collective/broadcast_op_xpu.cc b/paddle/fluid/operators/collective/broadcast_op_xpu.cc deleted file mode 100644 index 54eccbead94..00000000000 --- a/paddle/fluid/operators/collective/broadcast_op_xpu.cc +++ /dev/null @@ -1,105 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include - -#include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/op_registry.h" - -#if defined(PADDLE_WITH_XPU_BKCL) -#include "paddle/fluid/platform/collective_helper.h" -#include "paddle/fluid/platform/device/xpu/bkcl_helper.h" -#endif - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -namespace paddle { -namespace operators { - -template -class BKCLBroadcastOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ(platform::is_xpu_place(ctx.GetPlace()), - true, - platform::errors::PreconditionNotMet( - "The place of ExecutionContext should be XPUPlace.")); - -#if defined(PADDLE_WITH_XPU_BKCL) - int dev_id = ctx.GetPlace().device; - int root_dev_id = ctx.Attr("root"); - - auto in = ctx.Input("X"); - auto out = ctx.Output("Out"); - PADDLE_ENFORCE_EQ( - out->IsInitialized(), - true, - platform::errors::PreconditionNotMet( - "Currently, the output of broadcast op must be initialized," - "because this op can only be an In-Place operation.")); - void* send_recv_buffer = out->mutable_data(ctx.GetPlace()); - PADDLE_ENFORCE_EQ( - send_recv_buffer, - in->data(), - platform::errors::PreconditionNotMet("Currently, the broadcast op can " - "only be an In-Place operation.")); - - auto& dev_ctx = ctx.template device_context(); - auto comm = dev_ctx.bkcl_context(); - auto stream = dev_ctx.x_context()->xpu_stream; - - // TODO(wangxi16): bkcl_broadcast only support float type, - // need to converted other type to float before broadcasting. - // Broadcast is equivalent to no type of operation, does not affect - // correctness. - // Once bkcl_broadcast support other type, need chang to: - // BKCLDataType data_type = platform::ToBKCLDataType(in->type()); - BKCLDataType data_type = BKCL_FLOAT; - size_t scale = sizeof(T) / sizeof(float); - auto ret = bkcl_broadcast(comm, - send_recv_buffer, - send_recv_buffer, - static_cast(in->numel()) * scale, - data_type, - root_dev_id, - stream); - PADDLE_ENFORCE_EQ(ret, - BKCL_SUCCESS, - platform::errors::Unavailable("bkcl_broadcast failed")); - - VLOG(3) << "Bcast " << ctx.InputNames("X")[0] << ", (" << in->numel() << ")" - << " From " << root_dev_id << " to " << dev_id; - - if (ctx.Attr("sync_mode")) { - dev_ctx.Wait(); - } -#else - PADDLE_THROW(platform::errors::PreconditionNotMet( - "PaddlePaddle should compile with XPU.")); -#endif - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OP_XPU_KERNEL(broadcast, - ops::BKCLBroadcastOpKernel, - ops::BKCLBroadcastOpKernel, - ops::BKCLBroadcastOpKernel, - ops::BKCLBroadcastOpKernel); -- GitLab