* remove broadcast (#50701)

2fa91d71 · TaoTao Li · GitHub · 629f20e2 · 629f20e2 · 629f20e2
3 changed file
--- a/paddle/fluid/operators/collective/broadcast_op.cc
+++ b/paddle/fluid/operators/collective/broadcast_op.cc
-/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include <algorithm>
-#include <ostream>
-#include <utility>
-#include <vector>
-#include "paddle/fluid/framework/op_registry.h"
-namespace paddle {
-namespace operators {
-class BroadcastOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-  void InferShape(framework::InferShapeContext* ctx) const override {
-    PADDLE_ENFORCE_EQ(ctx->HasInput("X"),
-                      true,
-                      platform::errors::InvalidArgument(
-                          "Input(X) of BroadcastOp should not be null."));
-    PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"),
-                      true,
-                      platform::errors::InvalidArgument(
-                          "Output(Output) of ConvOp should not be null."));
-  }
-};
-class BroadcastOpMaker : public framework::OpProtoAndCheckerMaker {
- public:
-  void Make() {
-    AddInput("X", "(Tensor), tensor to be broadcast.");
-    AddOutput("Out", "(Tensor) the result of broadcast.");
-    AddAttr<bool>(
-        "sync_mode",
-        "(bool) whether to synchronize the CUDA stream after nccl call.")
-        .SetDefault(false);
-    AddAttr<int>("root", "(int).").SetDefault(0).EqualGreaterThan(0);
-    AddComment(R"DOC(
-***Broadcast Operator***
-Call NCCL Broadcast internally. Note that this op must be used when one
-thread is managing one GPU device.
-)DOC");
-  }
-};
-template <typename T>
-class BroadcastOpKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    PADDLE_THROW(platform::errors::PreconditionNotMet(
-        "Broadcast op can run on gpu place only for now."));
-  }
-};
-}  // namespace operators
-}  // namespace paddle
-namespace ops = paddle::operators;
-namespace plat = paddle::platform;
-REGISTER_OP_WITHOUT_GRADIENT(broadcast,
-                             ops::BroadcastOp,
-                             ops::BroadcastOpMaker);
-REGISTER_OP_CPU_KERNEL(broadcast,
-                       ops::BroadcastOpKernel<float>,
-                       ops::BroadcastOpKernel<double>,
-                       ops::BroadcastOpKernel<int>,
-                       ops::BroadcastOpKernel<int64_t>,
-                       ops::BroadcastOpKernel<plat::float16>);
--- a/paddle/fluid/operators/collective/broadcast_op.cu.cc
+++ b/paddle/fluid/operators/collective/broadcast_op.cu.cc
-/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include "paddle/fluid/framework/op_registry.h"
-#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
-#include "paddle/fluid/platform/device/gpu/nccl_helper.h"
-#endif
-#include "paddle/fluid/framework/convert_utils.h"
-namespace ops = paddle::operators;
-namespace plat = paddle::platform;
-namespace paddle {
-namespace operators {
-template <typename T>
-class NCCLBroadcastOpKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    PADDLE_ENFORCE_EQ(
-        platform::is_gpu_place(ctx.GetPlace()),
-        true,
-        platform::errors::PreconditionNotMet(
-            "The place of ExecutionContext should be CUDAPlace."));
-#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
-    int dev_id = ctx.GetPlace().device;
-    int root_dev_id = ctx.Attr<int>("root");
-    auto in = ctx.Input<phi::DenseTensor>("X");
-    auto out = ctx.Output<phi::DenseTensor>("Out");
-    PADDLE_ENFORCE_EQ(
-        out->IsInitialized(),
-        true,
-        platform::errors::PreconditionNotMet(
-            "Currently, the output of broadcast op must be initialized,"
-            "because this op can only be an In-Place operation."));
-    void* send_recv_buffer = out->mutable_data<T>(ctx.GetPlace());
-    PADDLE_ENFORCE_EQ(
-        send_recv_buffer,
-        in->data(),
-        platform::errors::PreconditionNotMet("Currently, the broadcast op can "
-                                             "only be an In-Place operation."));
-    auto& dev_ctx = ctx.template device_context<phi::GPUContext>();
-    auto comm = dev_ctx.nccl_comm();
-    auto stream = dev_ctx.stream();
-    PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclBcast(
-        send_recv_buffer,
-        static_cast<size_t>(in->numel()),
-        platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype())),
-        root_dev_id,
-        comm,
-        stream));
-    VLOG(3) << "Bcast " << ctx.InputNames("X")[0] << ", (" << in->numel() << ")"
-            << " From " << root_dev_id << " to " << dev_id;
-    if (ctx.Attr<bool>("sync_mode")) {
-      platform::GpuStreamSync(stream);
-    }
-#else
-    PADDLE_THROW(platform::errors::PreconditionNotMet(
-        "PaddlePaddle should compile with GPU."));
-#endif
-  }
-};
-}  // namespace operators
-}  // namespace paddle
-REGISTER_OP_CUDA_KERNEL(broadcast,
-                        ops::NCCLBroadcastOpKernel<float>,
-                        ops::NCCLBroadcastOpKernel<double>,
-                        ops::NCCLBroadcastOpKernel<int>,
-                        ops::NCCLBroadcastOpKernel<int64_t>,
-                        ops::NCCLBroadcastOpKernel<plat::float16>);
--- a/paddle/fluid/operators/collective/broadcast_op_xpu.cc
+++ b/paddle/fluid/operators/collective/broadcast_op_xpu.cc
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include <algorithm>
-#include <utility>
-#include <vector>
-#include "paddle/fluid/framework/data_type.h"
-#include "paddle/fluid/framework/lod_tensor.h"
-#include "paddle/fluid/framework/op_registry.h"
-#if defined(PADDLE_WITH_XPU_BKCL)
-#include "paddle/fluid/platform/collective_helper.h"
-#include "paddle/fluid/platform/device/xpu/bkcl_helper.h"
-#endif
-namespace ops = paddle::operators;
-namespace plat = paddle::platform;
-namespace paddle {
-namespace operators {
-template <typename T>
-class BKCLBroadcastOpKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    PADDLE_ENFORCE_EQ(platform::is_xpu_place(ctx.GetPlace()),
-                      true,
-                      platform::errors::PreconditionNotMet(
-                          "The place of ExecutionContext should be XPUPlace."));
-#if defined(PADDLE_WITH_XPU_BKCL)
-    int dev_id = ctx.GetPlace().device;
-    int root_dev_id = ctx.Attr<int>("root");
-    auto in = ctx.Input<phi::DenseTensor>("X");
-    auto out = ctx.Output<phi::DenseTensor>("Out");
-    PADDLE_ENFORCE_EQ(
-        out->IsInitialized(),
-        true,
-        platform::errors::PreconditionNotMet(
-            "Currently, the output of broadcast op must be initialized,"
-            "because this op can only be an In-Place operation."));
-    void* send_recv_buffer = out->mutable_data<T>(ctx.GetPlace());
-    PADDLE_ENFORCE_EQ(
-        send_recv_buffer,
-        in->data(),
-        platform::errors::PreconditionNotMet("Currently, the broadcast op can "
-                                             "only be an In-Place operation."));
-    auto& dev_ctx = ctx.template device_context<platform::XPUDeviceContext>();
-    auto comm = dev_ctx.bkcl_context();
-    auto stream = dev_ctx.x_context()->xpu_stream;
-    // TODO(wangxi16): bkcl_broadcast only support float type,
-    // need to converted other type to float before broadcasting.
-    // Broadcast is equivalent to no type of operation, does not affect
-    // correctness.
-    // Once bkcl_broadcast support other type, need chang to:
-    // BKCLDataType data_type = platform::ToBKCLDataType(in->type());
-    BKCLDataType data_type = BKCL_FLOAT;
-    size_t scale = sizeof(T) / sizeof(float);
-    auto ret = bkcl_broadcast(comm,
-                              send_recv_buffer,
-                              send_recv_buffer,
-                              static_cast<size_t>(in->numel()) * scale,
-                              data_type,
-                              root_dev_id,
-                              stream);
-    PADDLE_ENFORCE_EQ(ret,
-                      BKCL_SUCCESS,
-                      platform::errors::Unavailable("bkcl_broadcast failed"));
-    VLOG(3) << "Bcast " << ctx.InputNames("X")[0] << ", (" << in->numel() << ")"
-            << " From " << root_dev_id << " to " << dev_id;
-    if (ctx.Attr<bool>("sync_mode")) {
-      dev_ctx.Wait();
-    }
-#else
-    PADDLE_THROW(platform::errors::PreconditionNotMet(
-        "PaddlePaddle should compile with XPU."));
-#endif
-  }
-};
-}  // namespace operators
-}  // namespace paddle
-REGISTER_OP_XPU_KERNEL(broadcast,
-                       ops::BKCLBroadcastOpKernel<float>,
-                       ops::BKCLBroadcastOpKernel<double>,
-                       ops::BKCLBroadcastOpKernel<int>,
-                       ops::BKCLBroadcastOpKernel<int64_t>);