From 846c7e707620c5aa4ac1810d5301abbe31e341cd Mon Sep 17 00:00:00 2001 From: Nyakku Shigure Date: Tue, 20 Sep 2022 14:48:53 +0800 Subject: [PATCH] [CodeStyle] remove crlf for cpp files (#46156) --- .../fluid/inference/analysis/analysis_pass.h | 106 ++--- .../collective/c_allgather_op_npu_test.cc | 382 ++++++++--------- .../collective/c_allreduce_max_op_npu_test.cc | 374 ++++++++--------- .../collective/c_allreduce_sum_op_npu_test.cc | 396 +++++++++--------- .../collective/c_broadcast_op_npu_test.cc | 360 ++++++++-------- .../collective/c_reduce_sum_op_npu_test.cc | 382 ++++++++--------- .../collective/c_reducescatter_op_npu_test.cc | 376 ++++++++--------- .../mkldnn/fill_constant_mkldnn_op.cc | 280 ++++++------- paddle/fluid/operators/unbind_op.cc | 180 ++++---- paddle/fluid/operators/unbind_op.h | 108 ++--- 10 files changed, 1472 insertions(+), 1472 deletions(-) diff --git a/paddle/fluid/inference/analysis/analysis_pass.h b/paddle/fluid/inference/analysis/analysis_pass.h index a95498d82d0..463077fad8b 100644 --- a/paddle/fluid/inference/analysis/analysis_pass.h +++ b/paddle/fluid/inference/analysis/analysis_pass.h @@ -1,53 +1,53 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include - -#include -#include - -#include "paddle/fluid/framework/ir/graph.h" -#include "paddle/fluid/inference/analysis/argument.h" -#include "paddle/fluid/inference/analysis/helper.h" - -namespace paddle { -namespace inference { -namespace analysis { - -/* - * AnalysisPass is a pass used to control the IR passes. - */ -class AnalysisPass { - public: - AnalysisPass() = default; - virtual ~AnalysisPass() = default; - - // Run on a single Graph. - void Run(Argument* argument) { RunImpl(argument); } - - // Human-readable short representation. - virtual std::string repr() const = 0; - // Human-readable long description. - virtual std::string description() const { return "No DOC"; } - - protected: - // User should implement these. - virtual void RunImpl(Argument* argument) = 0; -}; - -} // namespace analysis -} // namespace inference -} // namespace paddle +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include + +#include +#include + +#include "paddle/fluid/framework/ir/graph.h" +#include "paddle/fluid/inference/analysis/argument.h" +#include "paddle/fluid/inference/analysis/helper.h" + +namespace paddle { +namespace inference { +namespace analysis { + +/* + * AnalysisPass is a pass used to control the IR passes. + */ +class AnalysisPass { + public: + AnalysisPass() = default; + virtual ~AnalysisPass() = default; + + // Run on a single Graph. + void Run(Argument* argument) { RunImpl(argument); } + + // Human-readable short representation. + virtual std::string repr() const = 0; + // Human-readable long description. + virtual std::string description() const { return "No DOC"; } + + protected: + // User should implement these. + virtual void RunImpl(Argument* argument) = 0; +}; + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/operators/collective/c_allgather_op_npu_test.cc b/paddle/fluid/operators/collective/c_allgather_op_npu_test.cc index b6dcb41d3b0..f2530ac1dda 100644 --- a/paddle/fluid/operators/collective/c_allgather_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_allgather_op_npu_test.cc @@ -1,191 +1,191 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef _WIN32 -#include -#endif - -#include - -#include -#include // NOLINT -#include - -#include "gtest/gtest.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/operators/collective/c_allgather_op.h" -#include "paddle/fluid/operators/collective/c_allreduce_op.h" -#include "paddle/fluid/operators/collective/c_broadcast_op.h" -#include "paddle/fluid/operators/collective/c_reducescatter_op.h" -#include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" -#include "paddle/fluid/string/printf.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -#if defined(PADDLE_WITH_ASCEND_CL) -#include "paddle/fluid/platform/collective_helper.h" -#include "paddle/fluid/platform/device/npu/hccl_helper.h" -#endif - -namespace f = paddle::framework; -namespace p = paddle::platform; - -USE_OP(c_allgather); -USE_NO_KERNEL_OP(c_gen_hccl_id); -USE_NO_KERNEL_OP(c_comm_init_hccl); -USE_OP_DEVICE_KERNEL(c_allgather, NPU); - -DECLARE_string(selected_npus); - -template -void PrintDebugInfo(const std::string preStr, const std::vector& data) { - std::string debugstring = ""; - for (auto ele : data) { - debugstring += std::to_string(ele) + std::string(","); - } - VLOG(2) << preStr << ":" << std::endl << debugstring; -} - -void PrepareUniqueId(f::Scope* scope, - const p::DeviceContext& ctx, - HcclRootInfo* hccl_id) { - int rank_id = atoi(getenv("RANK_ID")); - int device_id = atoi(getenv("DEVICE_ID")); - - VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id - << "; rank_id = " << rank_id - << "; RANK_TABLE_FILE = " << atoi(getenv("DEVICE_ID")); - - std::vector rank_ids{0, 1}; - f::AttributeMap gen_hccl_id; - - std::vector endpointList = {"127.0.0.1:6175", "127.0.0.1:6177"}; - gen_hccl_id["rank"] = rank_id; - gen_hccl_id["endpoint"] = endpointList[rank_id]; - std::vector other_endpoints = { - endpointList[rank_id == 0 ? 1 : 0]}; - gen_hccl_id["other_endpoints"] = other_endpoints; - - auto out = scope->Var("Out"); - auto id = out->GetMutable(); - - VLOG(3) << "break"; - - auto comm_init_op = f::OpRegistry::CreateOp( - "c_gen_hccl_id", {}, {{"Out", {"Out"}}}, gen_hccl_id); - VLOG(3) << "break"; - auto place = ctx.GetPlace(); - comm_init_op->Run(*scope, place); - ctx.Wait(); - - memcpy(hccl_id, id, 1024); -} - -void Prepare(f::Scope* scope, - const p::DeviceContext& ctx, - HcclRootInfo* hccl_id) { - auto x = scope->Var("X"); - auto id = x->GetMutable(); - - memcpy(id, hccl_id, 1024); - - int rank_id = atoi(getenv("RANK_ID")); - int device_id = atoi(getenv("DEVICE_ID")); - - VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id - << "; rank_id = " << rank_id - << "; RANK_TABLE_FILE = " << atoi(getenv("DEVICE_ID")); - - // std::vector rank_ids{0, 1}; - f::AttributeMap comm_init_attrs; - comm_init_attrs["ring_id"] = 0; - comm_init_attrs["rank_ids"] = 2; - comm_init_attrs["rank"] = rank_id; - comm_init_attrs["device_id"] = device_id; - // comm_init_attrs["rank_ids"] = rank_ids; - auto comm_init_op = f::OpRegistry::CreateOp( - "c_comm_init_hccl", {{"X", {"X"}}}, {}, comm_init_attrs); - auto place = ctx.GetPlace(); - comm_init_op->Run(*scope, place); - ctx.Wait(); -} - -void TestHCCLAllGatherOp(f::Scope* scope, const p::DeviceContext& ctx) { - // init - auto x = scope->Var("Data"); - auto tensor_x = x->GetMutable(); - - std::vector init; - int rank_id = atoi(getenv("RANK_ID")); - - int num1 = 1; - int num2 = 4; - - for (int64_t i = 0; i < num1 * num2; ++i) { - init.push_back(1.0 + rank_id); - } - PrintDebugInfo("input data", init); - - paddle::framework::TensorFromVector(init, ctx, tensor_x); - tensor_x->Resize({num1, num2}); - ctx.Wait(); - - auto place = ctx.GetPlace(); - auto out = scope->Var("OutData"); - auto tensor_out = out->GetMutable(); - tensor_out->Resize({num1, num2}); - tensor_out->mutable_data(place); // allocate - ctx.Wait(); - - // run - f::AttributeMap attrs; - attrs["tag"] = std::string("tagx"); - attrs["ring_id"] = 0; - attrs["nranks"] = 2; - - auto op = f::OpRegistry::CreateOp( - "c_allgather", {{"X", {"Data"}}}, {{"Out", {"OutData"}}}, attrs); - - for (int i = 0; i < 10; i++) { - op->Run(*scope, place); - } - ctx.Wait(); - - std::vector out_vec; - paddle::framework::TensorToVector(*tensor_out, ctx, &out_vec); - ctx.Wait(); - - PrintDebugInfo("output data", out_vec); - - EXPECT_EQ(out_vec.size(), init.size() * 2); - for (uint32_t i = 0; i < out_vec.size() / 2; i++) { - EXPECT_EQ(out_vec[i], 1.0); - } - for (uint32_t i = out_vec.size() / 2; i < out_vec.size(); i++) { - EXPECT_EQ(out_vec[i], 2.0); - } -} - -TEST(c_allgather, NPU) { - f::Scope scope; - HcclRootInfo hccl_id; - - // only support one device, if more than one device, use first default - p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str()))); - - PrepareUniqueId(&scope, ctx, &hccl_id); - Prepare(&scope, ctx, &hccl_id); - TestHCCLAllGatherOp(&scope, ctx); -} +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifndef _WIN32 +#include +#endif + +#include + +#include +#include // NOLINT +#include + +#include "gtest/gtest.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/operators/collective/c_allgather_op.h" +#include "paddle/fluid/operators/collective/c_allreduce_op.h" +#include "paddle/fluid/operators/collective/c_broadcast_op.h" +#include "paddle/fluid/operators/collective/c_reducescatter_op.h" +#include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" +#include "paddle/fluid/string/printf.h" +#include "paddle/phi/kernels/funcs/math_function.h" + +#if defined(PADDLE_WITH_ASCEND_CL) +#include "paddle/fluid/platform/collective_helper.h" +#include "paddle/fluid/platform/device/npu/hccl_helper.h" +#endif + +namespace f = paddle::framework; +namespace p = paddle::platform; + +USE_OP(c_allgather); +USE_NO_KERNEL_OP(c_gen_hccl_id); +USE_NO_KERNEL_OP(c_comm_init_hccl); +USE_OP_DEVICE_KERNEL(c_allgather, NPU); + +DECLARE_string(selected_npus); + +template +void PrintDebugInfo(const std::string preStr, const std::vector& data) { + std::string debugstring = ""; + for (auto ele : data) { + debugstring += std::to_string(ele) + std::string(","); + } + VLOG(2) << preStr << ":" << std::endl << debugstring; +} + +void PrepareUniqueId(f::Scope* scope, + const p::DeviceContext& ctx, + HcclRootInfo* hccl_id) { + int rank_id = atoi(getenv("RANK_ID")); + int device_id = atoi(getenv("DEVICE_ID")); + + VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id + << "; rank_id = " << rank_id + << "; RANK_TABLE_FILE = " << atoi(getenv("DEVICE_ID")); + + std::vector rank_ids{0, 1}; + f::AttributeMap gen_hccl_id; + + std::vector endpointList = {"127.0.0.1:6175", "127.0.0.1:6177"}; + gen_hccl_id["rank"] = rank_id; + gen_hccl_id["endpoint"] = endpointList[rank_id]; + std::vector other_endpoints = { + endpointList[rank_id == 0 ? 1 : 0]}; + gen_hccl_id["other_endpoints"] = other_endpoints; + + auto out = scope->Var("Out"); + auto id = out->GetMutable(); + + VLOG(3) << "break"; + + auto comm_init_op = f::OpRegistry::CreateOp( + "c_gen_hccl_id", {}, {{"Out", {"Out"}}}, gen_hccl_id); + VLOG(3) << "break"; + auto place = ctx.GetPlace(); + comm_init_op->Run(*scope, place); + ctx.Wait(); + + memcpy(hccl_id, id, 1024); +} + +void Prepare(f::Scope* scope, + const p::DeviceContext& ctx, + HcclRootInfo* hccl_id) { + auto x = scope->Var("X"); + auto id = x->GetMutable(); + + memcpy(id, hccl_id, 1024); + + int rank_id = atoi(getenv("RANK_ID")); + int device_id = atoi(getenv("DEVICE_ID")); + + VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id + << "; rank_id = " << rank_id + << "; RANK_TABLE_FILE = " << atoi(getenv("DEVICE_ID")); + + // std::vector rank_ids{0, 1}; + f::AttributeMap comm_init_attrs; + comm_init_attrs["ring_id"] = 0; + comm_init_attrs["rank_ids"] = 2; + comm_init_attrs["rank"] = rank_id; + comm_init_attrs["device_id"] = device_id; + // comm_init_attrs["rank_ids"] = rank_ids; + auto comm_init_op = f::OpRegistry::CreateOp( + "c_comm_init_hccl", {{"X", {"X"}}}, {}, comm_init_attrs); + auto place = ctx.GetPlace(); + comm_init_op->Run(*scope, place); + ctx.Wait(); +} + +void TestHCCLAllGatherOp(f::Scope* scope, const p::DeviceContext& ctx) { + // init + auto x = scope->Var("Data"); + auto tensor_x = x->GetMutable(); + + std::vector init; + int rank_id = atoi(getenv("RANK_ID")); + + int num1 = 1; + int num2 = 4; + + for (int64_t i = 0; i < num1 * num2; ++i) { + init.push_back(1.0 + rank_id); + } + PrintDebugInfo("input data", init); + + paddle::framework::TensorFromVector(init, ctx, tensor_x); + tensor_x->Resize({num1, num2}); + ctx.Wait(); + + auto place = ctx.GetPlace(); + auto out = scope->Var("OutData"); + auto tensor_out = out->GetMutable(); + tensor_out->Resize({num1, num2}); + tensor_out->mutable_data(place); // allocate + ctx.Wait(); + + // run + f::AttributeMap attrs; + attrs["tag"] = std::string("tagx"); + attrs["ring_id"] = 0; + attrs["nranks"] = 2; + + auto op = f::OpRegistry::CreateOp( + "c_allgather", {{"X", {"Data"}}}, {{"Out", {"OutData"}}}, attrs); + + for (int i = 0; i < 10; i++) { + op->Run(*scope, place); + } + ctx.Wait(); + + std::vector out_vec; + paddle::framework::TensorToVector(*tensor_out, ctx, &out_vec); + ctx.Wait(); + + PrintDebugInfo("output data", out_vec); + + EXPECT_EQ(out_vec.size(), init.size() * 2); + for (uint32_t i = 0; i < out_vec.size() / 2; i++) { + EXPECT_EQ(out_vec[i], 1.0); + } + for (uint32_t i = out_vec.size() / 2; i < out_vec.size(); i++) { + EXPECT_EQ(out_vec[i], 2.0); + } +} + +TEST(c_allgather, NPU) { + f::Scope scope; + HcclRootInfo hccl_id; + + // only support one device, if more than one device, use first default + p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str()))); + + PrepareUniqueId(&scope, ctx, &hccl_id); + Prepare(&scope, ctx, &hccl_id); + TestHCCLAllGatherOp(&scope, ctx); +} diff --git a/paddle/fluid/operators/collective/c_allreduce_max_op_npu_test.cc b/paddle/fluid/operators/collective/c_allreduce_max_op_npu_test.cc index b418fd78611..836ec072598 100644 --- a/paddle/fluid/operators/collective/c_allreduce_max_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_allreduce_max_op_npu_test.cc @@ -1,187 +1,187 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef _WIN32 -#include -#endif - -#include - -#include -#include // NOLINT -#include - -#include "gtest/gtest.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/operators/collective/c_allgather_op.h" -#include "paddle/fluid/operators/collective/c_allreduce_op.h" -#include "paddle/fluid/operators/collective/c_broadcast_op.h" -#include "paddle/fluid/operators/collective/c_reducescatter_op.h" -#include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" -#include "paddle/fluid/string/printf.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -#if defined(PADDLE_WITH_ASCEND_CL) -#include "paddle/fluid/platform/collective_helper.h" -#include "paddle/fluid/platform/device/npu/hccl_helper.h" -#endif - -namespace f = paddle::framework; -namespace p = paddle::platform; - -USE_OP(c_allreduce_max); -USE_NO_KERNEL_OP(c_gen_hccl_id); -USE_NO_KERNEL_OP(c_comm_init_hccl); -USE_OP_DEVICE_KERNEL(c_allreduce_max, NPU); - -DECLARE_string(selected_npus); - -template -void PrintDebugInfo(const std::string preStr, const std::vector& data) { - std::string debugstring = ""; - for (auto ele : data) { - debugstring += std::to_string(ele) + std::string(","); - } - VLOG(2) << preStr << ":" << std::endl << debugstring; -} - -void PrepareUniqueId(f::Scope* scope, - const p::DeviceContext& ctx, - HcclRootInfo* hccl_id) { - int rank_id = atoi(getenv("RANK_ID")); - int device_id = atoi(getenv("DEVICE_ID")); - - VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id - << "; rank_id = " << rank_id - << "; RANK_TABLE_FILE = " << atoi(getenv("DEVICE_ID")); - - std::vector rank_ids{0, 1}; - f::AttributeMap gen_hccl_id; - - std::vector endpointList = {"127.0.0.1:6175", "127.0.0.1:6177"}; - gen_hccl_id["rank"] = rank_id; - gen_hccl_id["endpoint"] = endpointList[rank_id]; - std::vector other_endpoints = { - endpointList[rank_id == 0 ? 1 : 0]}; - gen_hccl_id["other_endpoints"] = other_endpoints; - - auto out = scope->Var("Out"); - auto id = out->GetMutable(); - - VLOG(3) << "break"; - - auto comm_init_op = f::OpRegistry::CreateOp( - "c_gen_hccl_id", {}, {{"Out", {"Out"}}}, gen_hccl_id); - VLOG(3) << "break"; - auto place = ctx.GetPlace(); - comm_init_op->Run(*scope, place); - ctx.Wait(); - - memcpy(hccl_id, id, 1024); -} - -void Prepare(f::Scope* scope, - const p::DeviceContext& ctx, - HcclRootInfo* hccl_id) { - auto x = scope->Var("X"); - auto id = x->GetMutable(); - - memcpy(id, hccl_id, 1024); - - int rank_id = atoi(getenv("RANK_ID")); - int device_id = atoi(getenv("DEVICE_ID")); - - VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id - << "; rank_id = " << rank_id - << "; RANK_TABLE_FILE = " << atoi(getenv("DEVICE_ID")); - - // std::vector rank_ids{0, 1}; - f::AttributeMap comm_init_attrs; - comm_init_attrs["ring_id"] = 0; - comm_init_attrs["rank_ids"] = 2; - comm_init_attrs["rank"] = rank_id; - comm_init_attrs["device_id"] = device_id; - // comm_init_attrs["rank_ids"] = rank_ids; - auto comm_init_op = f::OpRegistry::CreateOp( - "c_comm_init_hccl", {{"X", {"X"}}}, {}, comm_init_attrs); - auto place = ctx.GetPlace(); - comm_init_op->Run(*scope, place); - ctx.Wait(); -} - -void TestHCCLAllReduceOp(f::Scope* scope, const p::DeviceContext& ctx) { - // init - auto x = scope->Var("Data"); - auto tensor_x = x->GetMutable(); - - std::vector init; - int rank_id = atoi(getenv("RANK_ID")); - - int num1 = 100; - int num2 = 100; - - for (int64_t i = 0; i < num1 * num2; ++i) { - init.push_back(1.0 + rank_id * 3); - } - PrintDebugInfo("input data", init); - - paddle::framework::TensorFromVector(init, ctx, tensor_x); - tensor_x->Resize({num1, num2}); - ctx.Wait(); - - auto place = ctx.GetPlace(); - auto out = scope->Var("OutData"); - auto tensor_out = out->GetMutable(); - tensor_out->Resize({num1, num2}); - tensor_out->mutable_data(place); // allocate - ctx.Wait(); - - // run - f::AttributeMap attrs; - attrs["tag"] = std::string("tagx"); - attrs["ring_id"] = 0; - - auto op = f::OpRegistry::CreateOp( - "c_allreduce_max", {{"X", {"Data"}}}, {{"Out", {"OutData"}}}, attrs); - - for (int i = 0; i < 10; i++) { - op->Run(*scope, place); - } - ctx.Wait(); - - std::vector out_vec; - paddle::framework::TensorToVector(*tensor_out, ctx, &out_vec); - ctx.Wait(); - - PrintDebugInfo("output data", out_vec); - - EXPECT_EQ(out_vec.size(), init.size()); - for (uint32_t i = 0; i < out_vec.size(); i++) { - EXPECT_EQ(out_vec[i], 4.0); - } -} - -TEST(c_allreduce_max, NPU) { - f::Scope scope; - HcclRootInfo hccl_id; - - // only support one device, if more than one device, use first default - p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str()))); - - PrepareUniqueId(&scope, ctx, &hccl_id); - Prepare(&scope, ctx, &hccl_id); - TestHCCLAllReduceOp(&scope, ctx); -} +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifndef _WIN32 +#include +#endif + +#include + +#include +#include // NOLINT +#include + +#include "gtest/gtest.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/operators/collective/c_allgather_op.h" +#include "paddle/fluid/operators/collective/c_allreduce_op.h" +#include "paddle/fluid/operators/collective/c_broadcast_op.h" +#include "paddle/fluid/operators/collective/c_reducescatter_op.h" +#include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" +#include "paddle/fluid/string/printf.h" +#include "paddle/phi/kernels/funcs/math_function.h" + +#if defined(PADDLE_WITH_ASCEND_CL) +#include "paddle/fluid/platform/collective_helper.h" +#include "paddle/fluid/platform/device/npu/hccl_helper.h" +#endif + +namespace f = paddle::framework; +namespace p = paddle::platform; + +USE_OP(c_allreduce_max); +USE_NO_KERNEL_OP(c_gen_hccl_id); +USE_NO_KERNEL_OP(c_comm_init_hccl); +USE_OP_DEVICE_KERNEL(c_allreduce_max, NPU); + +DECLARE_string(selected_npus); + +template +void PrintDebugInfo(const std::string preStr, const std::vector& data) { + std::string debugstring = ""; + for (auto ele : data) { + debugstring += std::to_string(ele) + std::string(","); + } + VLOG(2) << preStr << ":" << std::endl << debugstring; +} + +void PrepareUniqueId(f::Scope* scope, + const p::DeviceContext& ctx, + HcclRootInfo* hccl_id) { + int rank_id = atoi(getenv("RANK_ID")); + int device_id = atoi(getenv("DEVICE_ID")); + + VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id + << "; rank_id = " << rank_id + << "; RANK_TABLE_FILE = " << atoi(getenv("DEVICE_ID")); + + std::vector rank_ids{0, 1}; + f::AttributeMap gen_hccl_id; + + std::vector endpointList = {"127.0.0.1:6175", "127.0.0.1:6177"}; + gen_hccl_id["rank"] = rank_id; + gen_hccl_id["endpoint"] = endpointList[rank_id]; + std::vector other_endpoints = { + endpointList[rank_id == 0 ? 1 : 0]}; + gen_hccl_id["other_endpoints"] = other_endpoints; + + auto out = scope->Var("Out"); + auto id = out->GetMutable(); + + VLOG(3) << "break"; + + auto comm_init_op = f::OpRegistry::CreateOp( + "c_gen_hccl_id", {}, {{"Out", {"Out"}}}, gen_hccl_id); + VLOG(3) << "break"; + auto place = ctx.GetPlace(); + comm_init_op->Run(*scope, place); + ctx.Wait(); + + memcpy(hccl_id, id, 1024); +} + +void Prepare(f::Scope* scope, + const p::DeviceContext& ctx, + HcclRootInfo* hccl_id) { + auto x = scope->Var("X"); + auto id = x->GetMutable(); + + memcpy(id, hccl_id, 1024); + + int rank_id = atoi(getenv("RANK_ID")); + int device_id = atoi(getenv("DEVICE_ID")); + + VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id + << "; rank_id = " << rank_id + << "; RANK_TABLE_FILE = " << atoi(getenv("DEVICE_ID")); + + // std::vector rank_ids{0, 1}; + f::AttributeMap comm_init_attrs; + comm_init_attrs["ring_id"] = 0; + comm_init_attrs["rank_ids"] = 2; + comm_init_attrs["rank"] = rank_id; + comm_init_attrs["device_id"] = device_id; + // comm_init_attrs["rank_ids"] = rank_ids; + auto comm_init_op = f::OpRegistry::CreateOp( + "c_comm_init_hccl", {{"X", {"X"}}}, {}, comm_init_attrs); + auto place = ctx.GetPlace(); + comm_init_op->Run(*scope, place); + ctx.Wait(); +} + +void TestHCCLAllReduceOp(f::Scope* scope, const p::DeviceContext& ctx) { + // init + auto x = scope->Var("Data"); + auto tensor_x = x->GetMutable(); + + std::vector init; + int rank_id = atoi(getenv("RANK_ID")); + + int num1 = 100; + int num2 = 100; + + for (int64_t i = 0; i < num1 * num2; ++i) { + init.push_back(1.0 + rank_id * 3); + } + PrintDebugInfo("input data", init); + + paddle::framework::TensorFromVector(init, ctx, tensor_x); + tensor_x->Resize({num1, num2}); + ctx.Wait(); + + auto place = ctx.GetPlace(); + auto out = scope->Var("OutData"); + auto tensor_out = out->GetMutable(); + tensor_out->Resize({num1, num2}); + tensor_out->mutable_data(place); // allocate + ctx.Wait(); + + // run + f::AttributeMap attrs; + attrs["tag"] = std::string("tagx"); + attrs["ring_id"] = 0; + + auto op = f::OpRegistry::CreateOp( + "c_allreduce_max", {{"X", {"Data"}}}, {{"Out", {"OutData"}}}, attrs); + + for (int i = 0; i < 10; i++) { + op->Run(*scope, place); + } + ctx.Wait(); + + std::vector out_vec; + paddle::framework::TensorToVector(*tensor_out, ctx, &out_vec); + ctx.Wait(); + + PrintDebugInfo("output data", out_vec); + + EXPECT_EQ(out_vec.size(), init.size()); + for (uint32_t i = 0; i < out_vec.size(); i++) { + EXPECT_EQ(out_vec[i], 4.0); + } +} + +TEST(c_allreduce_max, NPU) { + f::Scope scope; + HcclRootInfo hccl_id; + + // only support one device, if more than one device, use first default + p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str()))); + + PrepareUniqueId(&scope, ctx, &hccl_id); + Prepare(&scope, ctx, &hccl_id); + TestHCCLAllReduceOp(&scope, ctx); +} diff --git a/paddle/fluid/operators/collective/c_allreduce_sum_op_npu_test.cc b/paddle/fluid/operators/collective/c_allreduce_sum_op_npu_test.cc index 0b69da09ad1..402d200b847 100644 --- a/paddle/fluid/operators/collective/c_allreduce_sum_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_allreduce_sum_op_npu_test.cc @@ -1,198 +1,198 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef _WIN32 -#include -#endif - -#include - -#include -#include // NOLINT -#include - -#include "gtest/gtest.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/operators/collective/c_allreduce_op.h" -#include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" -#include "paddle/fluid/string/printf.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -#if defined(PADDLE_WITH_ASCEND_CL) -#include "paddle/fluid/platform/collective_helper.h" -#include "paddle/fluid/platform/device/npu/hccl_helper.h" -#endif - -// Node1: HCCL_WHITELIST_DISABLE=1 FLAGS_selected_npus=1 GLOG_v=4 RANK_ID=1 -// DEVICE_ID=1 ./paddle/fluid/operators/collective/c_allreduce_sum_op_npu_test -// Node2: HCCL_WHITELIST_DISABLE=1 FLAGS_selected_npus=0 GLOG_v=4 RANK_ID=0 -// DEVICE_ID=0 ./paddle/fluid/operators/collective/c_allreduce_sum_op_npu_test - -namespace f = paddle::framework; -namespace p = paddle::platform; - -USE_OP(c_allreduce_sum); -USE_NO_KERNEL_OP(c_gen_hccl_id); -USE_NO_KERNEL_OP(c_comm_init_hccl); -USE_OP_DEVICE_KERNEL(c_allreduce_sum, NPU); - -DECLARE_string(selected_npus); - -template -void PrintDebugInfo(const std::string preStr, const std::vector& data) { - std::string debugstring = ""; - std::cout << preStr << ":" << std::endl << debugstring; - for (auto ele : data) { - std::cout << ele << " "; - } - std::cout << std::endl; -} - -void PrepareUniqueId(f::Scope* scope, - const p::DeviceContext& ctx, - HcclRootInfo* hccl_id) { - int rank_id = atoi(getenv("RANK_ID")); - int device_id = atoi(getenv("DEVICE_ID")); - - VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id - << "; rank_id = " << rank_id - << "; RANK_TABLE_FILE = " << atoi(getenv("DEVICE_ID")); - - std::vector rank_ids{0, 1}; - f::AttributeMap gen_hccl_id; - - std::vector endpointList = {"127.0.0.1:6175", "127.0.0.1:6177"}; - gen_hccl_id["rank"] = rank_id; - gen_hccl_id["endpoint"] = endpointList[rank_id]; - std::vector other_endpoints = { - endpointList[rank_id == 0 ? 1 : 0]}; - gen_hccl_id["other_endpoints"] = other_endpoints; - - auto out = scope->Var("Out"); - auto id = out->GetMutable(); - - VLOG(3) << "break"; - - auto comm_init_op = f::OpRegistry::CreateOp( - "c_gen_hccl_id", {}, {{"Out", {"Out"}}}, gen_hccl_id); - VLOG(3) << "break"; - auto place = ctx.GetPlace(); - comm_init_op->Run(*scope, place); - ctx.Wait(); - - memcpy(hccl_id, id, 1024); -} - -void Prepare(f::Scope* scope, - const p::DeviceContext& ctx, - HcclRootInfo* hccl_id) { - auto x = scope->Var("X"); - auto id = x->GetMutable(); - - memcpy(id, hccl_id, 1024); - - int rank_id = atoi(getenv("RANK_ID")); - int device_id = atoi(getenv("DEVICE_ID")); - - VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id - << "; rank_id = " << rank_id - << "; RANK_TABLE_FILE = " << atoi(getenv("DEVICE_ID")); - - // std::vector rank_ids{0, 1}; - f::AttributeMap comm_init_attrs; - comm_init_attrs["ring_id"] = 0; - comm_init_attrs["rank_ids"] = 2; - comm_init_attrs["rank"] = rank_id; - comm_init_attrs["device_id"] = device_id; - // comm_init_attrs["rank_ids"] = rank_ids; - auto comm_init_op = f::OpRegistry::CreateOp( - "c_comm_init_hccl", {{"X", {"X"}}}, {}, comm_init_attrs); - auto place = ctx.GetPlace(); - comm_init_op->Run(*scope, place); - ctx.Wait(); -} - -template -void TestHCCLAllReduceOp(f::Scope* scope, - const p::DeviceContext& ctx, - int iter) { - // init - auto x = scope->Var("Data"); - auto tensor_x = x->GetMutable(); - - int rank_id = atoi(getenv("RANK_ID")); - int num1 = 3; - int num2 = 128; - - std::vector init; - for (int64_t i = 0; i < num1 * num2; ++i) { - init.push_back(static_cast(1.0 + rank_id)); - } - init[0] = static_cast(std::numeric_limits::quiet_NaN()); - PrintDebugInfo("input data", init); - - auto place = ctx.GetPlace(); - - paddle::framework::TensorFromVector(init, ctx, tensor_x); - tensor_x->Resize({num1, num2}); - ctx.Wait(); - - auto out = scope->Var("OutData"); - auto tensor_out = out->GetMutable(); - tensor_out->Resize({num1, num2}); - tensor_out->mutable_data(place); // allocate - ctx.Wait(); - - // run - f::AttributeMap attrs; - attrs["tag"] = std::string("tagx_" + std::to_string(iter)); - attrs["ring_id"] = 0; - attrs["use_calc_stream"] = 1; - - auto op = f::OpRegistry::CreateOp( - "c_allreduce_sum", {{"X", {"Data"}}}, {{"Out", {"OutData"}}}, attrs); - for (int i = 0; i < 1; i++) { - op->Run(*scope, place); - } - ctx.Wait(); - - std::vector out_vec; - paddle::framework::TensorToVector(*tensor_out, ctx, &out_vec); - ctx.Wait(); - - PrintDebugInfo("output data", out_vec); - - float diff = static_cast(out_vec[0]) - 65504; - EXPECT_TRUE(diff < 0.1 && diff > -0.1); - EXPECT_EQ(out_vec.size(), init.size()); - for (uint32_t i = 1; i < 10; i++) { - EXPECT_EQ(out_vec[i], static_cast(3.0)); - } -} - -TEST(c_allreduce_sum, NPU) { - f::Scope scope; - HcclRootInfo hccl_id; - - p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str()))); - - // only support one device, if more than one device, use first default - PrepareUniqueId(&scope, ctx, &hccl_id); - Prepare(&scope, ctx, &hccl_id); - - TestHCCLAllReduceOp(&scope, ctx, 1); - // TestHCCLAllReduceOp(&scope, ctx, 0); -} +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifndef _WIN32 +#include +#endif + +#include + +#include +#include // NOLINT +#include + +#include "gtest/gtest.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/operators/collective/c_allreduce_op.h" +#include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" +#include "paddle/fluid/string/printf.h" +#include "paddle/phi/kernels/funcs/math_function.h" + +#if defined(PADDLE_WITH_ASCEND_CL) +#include "paddle/fluid/platform/collective_helper.h" +#include "paddle/fluid/platform/device/npu/hccl_helper.h" +#endif + +// Node1: HCCL_WHITELIST_DISABLE=1 FLAGS_selected_npus=1 GLOG_v=4 RANK_ID=1 +// DEVICE_ID=1 ./paddle/fluid/operators/collective/c_allreduce_sum_op_npu_test +// Node2: HCCL_WHITELIST_DISABLE=1 FLAGS_selected_npus=0 GLOG_v=4 RANK_ID=0 +// DEVICE_ID=0 ./paddle/fluid/operators/collective/c_allreduce_sum_op_npu_test + +namespace f = paddle::framework; +namespace p = paddle::platform; + +USE_OP(c_allreduce_sum); +USE_NO_KERNEL_OP(c_gen_hccl_id); +USE_NO_KERNEL_OP(c_comm_init_hccl); +USE_OP_DEVICE_KERNEL(c_allreduce_sum, NPU); + +DECLARE_string(selected_npus); + +template +void PrintDebugInfo(const std::string preStr, const std::vector& data) { + std::string debugstring = ""; + std::cout << preStr << ":" << std::endl << debugstring; + for (auto ele : data) { + std::cout << ele << " "; + } + std::cout << std::endl; +} + +void PrepareUniqueId(f::Scope* scope, + const p::DeviceContext& ctx, + HcclRootInfo* hccl_id) { + int rank_id = atoi(getenv("RANK_ID")); + int device_id = atoi(getenv("DEVICE_ID")); + + VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id + << "; rank_id = " << rank_id + << "; RANK_TABLE_FILE = " << atoi(getenv("DEVICE_ID")); + + std::vector rank_ids{0, 1}; + f::AttributeMap gen_hccl_id; + + std::vector endpointList = {"127.0.0.1:6175", "127.0.0.1:6177"}; + gen_hccl_id["rank"] = rank_id; + gen_hccl_id["endpoint"] = endpointList[rank_id]; + std::vector other_endpoints = { + endpointList[rank_id == 0 ? 1 : 0]}; + gen_hccl_id["other_endpoints"] = other_endpoints; + + auto out = scope->Var("Out"); + auto id = out->GetMutable(); + + VLOG(3) << "break"; + + auto comm_init_op = f::OpRegistry::CreateOp( + "c_gen_hccl_id", {}, {{"Out", {"Out"}}}, gen_hccl_id); + VLOG(3) << "break"; + auto place = ctx.GetPlace(); + comm_init_op->Run(*scope, place); + ctx.Wait(); + + memcpy(hccl_id, id, 1024); +} + +void Prepare(f::Scope* scope, + const p::DeviceContext& ctx, + HcclRootInfo* hccl_id) { + auto x = scope->Var("X"); + auto id = x->GetMutable(); + + memcpy(id, hccl_id, 1024); + + int rank_id = atoi(getenv("RANK_ID")); + int device_id = atoi(getenv("DEVICE_ID")); + + VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id + << "; rank_id = " << rank_id + << "; RANK_TABLE_FILE = " << atoi(getenv("DEVICE_ID")); + + // std::vector rank_ids{0, 1}; + f::AttributeMap comm_init_attrs; + comm_init_attrs["ring_id"] = 0; + comm_init_attrs["rank_ids"] = 2; + comm_init_attrs["rank"] = rank_id; + comm_init_attrs["device_id"] = device_id; + // comm_init_attrs["rank_ids"] = rank_ids; + auto comm_init_op = f::OpRegistry::CreateOp( + "c_comm_init_hccl", {{"X", {"X"}}}, {}, comm_init_attrs); + auto place = ctx.GetPlace(); + comm_init_op->Run(*scope, place); + ctx.Wait(); +} + +template +void TestHCCLAllReduceOp(f::Scope* scope, + const p::DeviceContext& ctx, + int iter) { + // init + auto x = scope->Var("Data"); + auto tensor_x = x->GetMutable(); + + int rank_id = atoi(getenv("RANK_ID")); + int num1 = 3; + int num2 = 128; + + std::vector init; + for (int64_t i = 0; i < num1 * num2; ++i) { + init.push_back(static_cast(1.0 + rank_id)); + } + init[0] = static_cast(std::numeric_limits::quiet_NaN()); + PrintDebugInfo("input data", init); + + auto place = ctx.GetPlace(); + + paddle::framework::TensorFromVector(init, ctx, tensor_x); + tensor_x->Resize({num1, num2}); + ctx.Wait(); + + auto out = scope->Var("OutData"); + auto tensor_out = out->GetMutable(); + tensor_out->Resize({num1, num2}); + tensor_out->mutable_data(place); // allocate + ctx.Wait(); + + // run + f::AttributeMap attrs; + attrs["tag"] = std::string("tagx_" + std::to_string(iter)); + attrs["ring_id"] = 0; + attrs["use_calc_stream"] = 1; + + auto op = f::OpRegistry::CreateOp( + "c_allreduce_sum", {{"X", {"Data"}}}, {{"Out", {"OutData"}}}, attrs); + for (int i = 0; i < 1; i++) { + op->Run(*scope, place); + } + ctx.Wait(); + + std::vector out_vec; + paddle::framework::TensorToVector(*tensor_out, ctx, &out_vec); + ctx.Wait(); + + PrintDebugInfo("output data", out_vec); + + float diff = static_cast(out_vec[0]) - 65504; + EXPECT_TRUE(diff < 0.1 && diff > -0.1); + EXPECT_EQ(out_vec.size(), init.size()); + for (uint32_t i = 1; i < 10; i++) { + EXPECT_EQ(out_vec[i], static_cast(3.0)); + } +} + +TEST(c_allreduce_sum, NPU) { + f::Scope scope; + HcclRootInfo hccl_id; + + p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str()))); + + // only support one device, if more than one device, use first default + PrepareUniqueId(&scope, ctx, &hccl_id); + Prepare(&scope, ctx, &hccl_id); + + TestHCCLAllReduceOp(&scope, ctx, 1); + // TestHCCLAllReduceOp(&scope, ctx, 0); +} diff --git a/paddle/fluid/operators/collective/c_broadcast_op_npu_test.cc b/paddle/fluid/operators/collective/c_broadcast_op_npu_test.cc index ff48ff0abc4..6e7d4375b00 100644 --- a/paddle/fluid/operators/collective/c_broadcast_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_broadcast_op_npu_test.cc @@ -1,180 +1,180 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef _WIN32 -#include -#endif - -#include - -#include -#include // NOLINT -#include - -#include "gtest/gtest.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/operators/collective/c_broadcast_op.h" -#include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" -#include "paddle/fluid/string/printf.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -#if defined(PADDLE_WITH_ASCEND_CL) -#include "paddle/fluid/platform/collective_helper.h" -#include "paddle/fluid/platform/device/npu/hccl_helper.h" -#endif - -namespace f = paddle::framework; -namespace p = paddle::platform; - -USE_OP(c_broadcast); -USE_NO_KERNEL_OP(c_gen_hccl_id); -USE_NO_KERNEL_OP(c_comm_init_hccl); -USE_OP_DEVICE_KERNEL(c_broadcast, NPU); - -DECLARE_string(selected_npus); - -template -void PrintDebugInfo(const std::string preStr, const std::vector& data) { - std::string debugstring = ""; - for (auto ele : data) { - debugstring += std::to_string(ele) + std::string(","); - } - VLOG(2) << preStr << ":" << std::endl << debugstring; -} - -void PrepareUniqueId(f::Scope* scope, - const p::DeviceContext& ctx, - HcclRootInfo* hccl_id) { - int rank_id = atoi(getenv("RANK_ID")); - int device_id = atoi(getenv("DEVICE_ID")); - - VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id - << "; rank_id = " << rank_id - << "; RANK_TABLE_FILE = " << atoi(getenv("DEVICE_ID")); - - std::vector rank_ids{0, 1}; - f::AttributeMap gen_hccl_id; - - std::vector endpointList = {"127.0.0.1:6175", "127.0.0.1:6177"}; - gen_hccl_id["rank"] = rank_id; - gen_hccl_id["endpoint"] = endpointList[rank_id]; - std::vector other_endpoints = { - endpointList[rank_id == 0 ? 1 : 0]}; - gen_hccl_id["other_endpoints"] = other_endpoints; - - auto out = scope->Var("Out"); - auto id = out->GetMutable(); - - VLOG(3) << "break"; - - auto comm_init_op = f::OpRegistry::CreateOp( - "c_gen_hccl_id", {}, {{"Out", {"Out"}}}, gen_hccl_id); - VLOG(3) << "break"; - auto place = ctx.GetPlace(); - comm_init_op->Run(*scope, place); - ctx.Wait(); - - memcpy(hccl_id, id, 1024); -} - -void Prepare(f::Scope* scope, - const p::DeviceContext& ctx, - HcclRootInfo* hccl_id) { - auto x = scope->Var("X"); - auto id = x->GetMutable(); - - memcpy(id, hccl_id, 1024); - - int rank_id = atoi(getenv("RANK_ID")); - int device_id = atoi(getenv("DEVICE_ID")); - - VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id - << "; rank_id = " << rank_id - << "; RANK_TABLE_FILE = " << atoi(getenv("DEVICE_ID")); - - // std::vector rank_ids{0, 1}; - f::AttributeMap comm_init_attrs; - comm_init_attrs["ring_id"] = 0; - comm_init_attrs["rank_ids"] = 2; - comm_init_attrs["rank"] = rank_id; - comm_init_attrs["device_id"] = device_id; - // comm_init_attrs["rank_ids"] = rank_ids; - auto comm_init_op = f::OpRegistry::CreateOp( - "c_comm_init_hccl", {{"X", {"X"}}}, {}, comm_init_attrs); - auto place = ctx.GetPlace(); - comm_init_op->Run(*scope, place); - ctx.Wait(); -} - -void TestHCCLBroadcastOp(f::Scope* scope, const p::DeviceContext& ctx) { - // init - auto x = scope->Var("Data"); - auto tensor_x = x->GetMutable(); - int num = 2; - std::vector init; - int rank_id = atoi(getenv("RANK_ID")); - - for (int64_t i = 0; i < num * num; ++i) { - init.push_back(1.0 + rank_id); - } - PrintDebugInfo("input data", init); - - paddle::framework::TensorFromVector(init, ctx, tensor_x); - tensor_x->Resize({num, num}); - ctx.Wait(); - - auto place = ctx.GetPlace(); - auto out = scope->Var("OutData"); - auto tensor_out = out->GetMutable(); - tensor_out->Resize({num, num}); - tensor_out->mutable_data(place); // allocate - ctx.Wait(); - - // run - f::AttributeMap attrs; - attrs["tag"] = std::string("tagx"); - attrs["root"] = 0; - attrs["ring_id"] = 0; - - auto op = f::OpRegistry::CreateOp( - "c_broadcast", {{"X", {"Data"}}}, {{"Out", {"OutData"}}}, attrs); - - for (int i = 0; i < 10; i++) { - op->Run(*scope, place); - } - ctx.Wait(); - - std::vector out_vec; - paddle::framework::TensorToVector(*tensor_out, ctx, &out_vec); - ctx.Wait(); - - PrintDebugInfo("output data", out_vec); - EXPECT_EQ(out_vec.size(), init.size()); - for (uint32_t i = 0; i < out_vec.size(); i++) { - EXPECT_EQ(out_vec[i], 1.0); - } -} - -TEST(c_broadcast, NPU) { - f::Scope scope; - HcclRootInfo hccl_id; - // only support one device, if more than one device, use first default - p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str()))); - - PrepareUniqueId(&scope, ctx, &hccl_id); - Prepare(&scope, ctx, &hccl_id); - TestHCCLBroadcastOp(&scope, ctx); -} +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifndef _WIN32 +#include +#endif + +#include + +#include +#include // NOLINT +#include + +#include "gtest/gtest.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/operators/collective/c_broadcast_op.h" +#include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" +#include "paddle/fluid/string/printf.h" +#include "paddle/phi/kernels/funcs/math_function.h" + +#if defined(PADDLE_WITH_ASCEND_CL) +#include "paddle/fluid/platform/collective_helper.h" +#include "paddle/fluid/platform/device/npu/hccl_helper.h" +#endif + +namespace f = paddle::framework; +namespace p = paddle::platform; + +USE_OP(c_broadcast); +USE_NO_KERNEL_OP(c_gen_hccl_id); +USE_NO_KERNEL_OP(c_comm_init_hccl); +USE_OP_DEVICE_KERNEL(c_broadcast, NPU); + +DECLARE_string(selected_npus); + +template +void PrintDebugInfo(const std::string preStr, const std::vector& data) { + std::string debugstring = ""; + for (auto ele : data) { + debugstring += std::to_string(ele) + std::string(","); + } + VLOG(2) << preStr << ":" << std::endl << debugstring; +} + +void PrepareUniqueId(f::Scope* scope, + const p::DeviceContext& ctx, + HcclRootInfo* hccl_id) { + int rank_id = atoi(getenv("RANK_ID")); + int device_id = atoi(getenv("DEVICE_ID")); + + VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id + << "; rank_id = " << rank_id + << "; RANK_TABLE_FILE = " << atoi(getenv("DEVICE_ID")); + + std::vector rank_ids{0, 1}; + f::AttributeMap gen_hccl_id; + + std::vector endpointList = {"127.0.0.1:6175", "127.0.0.1:6177"}; + gen_hccl_id["rank"] = rank_id; + gen_hccl_id["endpoint"] = endpointList[rank_id]; + std::vector other_endpoints = { + endpointList[rank_id == 0 ? 1 : 0]}; + gen_hccl_id["other_endpoints"] = other_endpoints; + + auto out = scope->Var("Out"); + auto id = out->GetMutable(); + + VLOG(3) << "break"; + + auto comm_init_op = f::OpRegistry::CreateOp( + "c_gen_hccl_id", {}, {{"Out", {"Out"}}}, gen_hccl_id); + VLOG(3) << "break"; + auto place = ctx.GetPlace(); + comm_init_op->Run(*scope, place); + ctx.Wait(); + + memcpy(hccl_id, id, 1024); +} + +void Prepare(f::Scope* scope, + const p::DeviceContext& ctx, + HcclRootInfo* hccl_id) { + auto x = scope->Var("X"); + auto id = x->GetMutable(); + + memcpy(id, hccl_id, 1024); + + int rank_id = atoi(getenv("RANK_ID")); + int device_id = atoi(getenv("DEVICE_ID")); + + VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id + << "; rank_id = " << rank_id + << "; RANK_TABLE_FILE = " << atoi(getenv("DEVICE_ID")); + + // std::vector rank_ids{0, 1}; + f::AttributeMap comm_init_attrs; + comm_init_attrs["ring_id"] = 0; + comm_init_attrs["rank_ids"] = 2; + comm_init_attrs["rank"] = rank_id; + comm_init_attrs["device_id"] = device_id; + // comm_init_attrs["rank_ids"] = rank_ids; + auto comm_init_op = f::OpRegistry::CreateOp( + "c_comm_init_hccl", {{"X", {"X"}}}, {}, comm_init_attrs); + auto place = ctx.GetPlace(); + comm_init_op->Run(*scope, place); + ctx.Wait(); +} + +void TestHCCLBroadcastOp(f::Scope* scope, const p::DeviceContext& ctx) { + // init + auto x = scope->Var("Data"); + auto tensor_x = x->GetMutable(); + int num = 2; + std::vector init; + int rank_id = atoi(getenv("RANK_ID")); + + for (int64_t i = 0; i < num * num; ++i) { + init.push_back(1.0 + rank_id); + } + PrintDebugInfo("input data", init); + + paddle::framework::TensorFromVector(init, ctx, tensor_x); + tensor_x->Resize({num, num}); + ctx.Wait(); + + auto place = ctx.GetPlace(); + auto out = scope->Var("OutData"); + auto tensor_out = out->GetMutable(); + tensor_out->Resize({num, num}); + tensor_out->mutable_data(place); // allocate + ctx.Wait(); + + // run + f::AttributeMap attrs; + attrs["tag"] = std::string("tagx"); + attrs["root"] = 0; + attrs["ring_id"] = 0; + + auto op = f::OpRegistry::CreateOp( + "c_broadcast", {{"X", {"Data"}}}, {{"Out", {"OutData"}}}, attrs); + + for (int i = 0; i < 10; i++) { + op->Run(*scope, place); + } + ctx.Wait(); + + std::vector out_vec; + paddle::framework::TensorToVector(*tensor_out, ctx, &out_vec); + ctx.Wait(); + + PrintDebugInfo("output data", out_vec); + EXPECT_EQ(out_vec.size(), init.size()); + for (uint32_t i = 0; i < out_vec.size(); i++) { + EXPECT_EQ(out_vec[i], 1.0); + } +} + +TEST(c_broadcast, NPU) { + f::Scope scope; + HcclRootInfo hccl_id; + // only support one device, if more than one device, use first default + p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str()))); + + PrepareUniqueId(&scope, ctx, &hccl_id); + Prepare(&scope, ctx, &hccl_id); + TestHCCLBroadcastOp(&scope, ctx); +} diff --git a/paddle/fluid/operators/collective/c_reduce_sum_op_npu_test.cc b/paddle/fluid/operators/collective/c_reduce_sum_op_npu_test.cc index 817b5b54e1f..ca279e459f1 100644 --- a/paddle/fluid/operators/collective/c_reduce_sum_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_reduce_sum_op_npu_test.cc @@ -1,191 +1,191 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef _WIN32 -#include -#endif - -#include - -#include -#include // NOLINT -#include - -#include "gtest/gtest.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/operators/collective/c_reduce_op.h" -#include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" -#include "paddle/fluid/string/printf.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -#if defined(PADDLE_WITH_ASCEND_CL) -#include "paddle/fluid/platform/collective_helper.h" -#include "paddle/fluid/platform/device/npu/hccl_helper.h" -#endif - -namespace f = paddle::framework; -namespace p = paddle::platform; - -USE_OP(c_reduce_sum); -USE_NO_KERNEL_OP(c_gen_hccl_id); -USE_NO_KERNEL_OP(c_comm_init_hccl); -USE_OP_DEVICE_KERNEL(c_reduce_sum, NPU); - -DECLARE_string(selected_npus); - -template -void PrintDebugInfo(const std::string preStr, const std::vector& data) { - std::string debugstring = ""; - for (auto ele : data) { - debugstring += std::to_string(ele) + std::string(","); - } - VLOG(3) << preStr << ":" << std::endl << debugstring; -} - -void PrepareUniqueId(f::Scope* scope, - const p::DeviceContext& ctx, - HcclRootInfo* hccl_id) { - int rank_id = atoi(getenv("RANK_ID")); - int device_id = atoi(getenv("DEVICE_ID")); - - VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id - << "; rank_id = " << rank_id - << "; RANK_TABLE_FILE = " << atoi(getenv("DEVICE_ID")); - - std::vector rank_ids{0, 1}; - f::AttributeMap gen_hccl_id; - - std::vector endpointList = {"127.0.0.1:6175", "127.0.0.1:6177"}; - gen_hccl_id["rank"] = rank_id; - gen_hccl_id["endpoint"] = endpointList[rank_id]; - std::vector other_endpoints = { - endpointList[rank_id == 0 ? 1 : 0]}; - gen_hccl_id["other_endpoints"] = other_endpoints; - - auto out = scope->Var("Out"); - auto id = out->GetMutable(); - - VLOG(3) << "break"; - - auto comm_init_op = f::OpRegistry::CreateOp( - "c_gen_hccl_id", {}, {{"Out", {"Out"}}}, gen_hccl_id); - VLOG(3) << "break"; - auto place = ctx.GetPlace(); - comm_init_op->Run(*scope, place); - ctx.Wait(); - - memcpy(hccl_id, id, 1024); -} - -void Prepare(f::Scope* scope, - const p::DeviceContext& ctx, - HcclRootInfo* hccl_id) { - auto x = scope->Var("X"); - auto id = x->GetMutable(); - - memcpy(id, hccl_id, 1024); - - int rank_id = atoi(getenv("RANK_ID")); - int device_id = atoi(getenv("DEVICE_ID")); - - VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id - << "; rank_id = " << rank_id - << "; RANK_TABLE_FILE = " << atoi(getenv("DEVICE_ID")); - - // std::vector rank_ids{0, 1}; - f::AttributeMap comm_init_attrs; - comm_init_attrs["ring_id"] = 0; - comm_init_attrs["rank_ids"] = 2; - comm_init_attrs["rank"] = rank_id; - comm_init_attrs["device_id"] = device_id; - // comm_init_attrs["rank_ids"] = rank_ids; - auto comm_init_op = f::OpRegistry::CreateOp( - "c_comm_init_hccl", {{"X", {"X"}}}, {}, comm_init_attrs); - auto place = ctx.GetPlace(); - comm_init_op->Run(*scope, place); - ctx.Wait(); -} - -void TestHCCLReduceOp(f::Scope* scope, const p::DeviceContext& ctx, int iter) { - // init - auto x = scope->Var("Data"); - auto tensor_x = x->GetMutable(); - - int rank_id = atoi(getenv("RANK_ID")); - int num1 = 3; - int num2 = 128; - - std::vector init; - for (int64_t i = 0; i < num1 * num2; ++i) { - init.push_back(1.0 + rank_id); - } - PrintDebugInfo("input data", init); - - auto place = ctx.GetPlace(); - - paddle::framework::TensorFromVector(init, ctx, tensor_x); - tensor_x->Resize({num1, num2}); - ctx.Wait(); - - auto out = scope->Var("OutData"); - auto tensor_out = out->GetMutable(); - tensor_out->Resize({num1, num2}); - tensor_out->mutable_data(place); // allocate - ctx.Wait(); - - // run - f::AttributeMap attrs; - attrs["tag"] = std::string("tagx_" + std::to_string(iter)); - attrs["ring_id"] = 0; - int root_id = 0; - attrs["root_id"] = root_id; - - auto op = f::OpRegistry::CreateOp( - "c_reduce_sum", {{"X", {"Data"}}}, {{"Out", {"OutData"}}}, attrs); - - op->Run(*scope, place); - ctx.Wait(); - - std::vector out_vec; - paddle::framework::TensorToVector(*tensor_out, ctx, &out_vec); - ctx.Wait(); - - PrintDebugInfo("output data", out_vec); - - EXPECT_EQ(out_vec.size(), init.size()); - for (uint32_t i = 0; i < out_vec.size(); i++) { - if (rank_id == root_id) { - EXPECT_EQ(out_vec[i], 3.0); - } else { - EXPECT_EQ(out_vec[i], init[i]); - } - } -} - -TEST(c_reduce_sum, NPU) { - f::Scope scope; - HcclRootInfo hccl_id; - - // only support one device, if more than one device, use first default - p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str()))); - - PrepareUniqueId(&scope, ctx, &hccl_id); - Prepare(&scope, ctx, &hccl_id); - for (int i = 0; i < 2; i++) { - VLOG(2) << "iter num: " << i; - TestHCCLReduceOp(&scope, ctx, i); - } -} +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifndef _WIN32 +#include +#endif + +#include + +#include +#include // NOLINT +#include + +#include "gtest/gtest.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/operators/collective/c_reduce_op.h" +#include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" +#include "paddle/fluid/string/printf.h" +#include "paddle/phi/kernels/funcs/math_function.h" + +#if defined(PADDLE_WITH_ASCEND_CL) +#include "paddle/fluid/platform/collective_helper.h" +#include "paddle/fluid/platform/device/npu/hccl_helper.h" +#endif + +namespace f = paddle::framework; +namespace p = paddle::platform; + +USE_OP(c_reduce_sum); +USE_NO_KERNEL_OP(c_gen_hccl_id); +USE_NO_KERNEL_OP(c_comm_init_hccl); +USE_OP_DEVICE_KERNEL(c_reduce_sum, NPU); + +DECLARE_string(selected_npus); + +template +void PrintDebugInfo(const std::string preStr, const std::vector& data) { + std::string debugstring = ""; + for (auto ele : data) { + debugstring += std::to_string(ele) + std::string(","); + } + VLOG(3) << preStr << ":" << std::endl << debugstring; +} + +void PrepareUniqueId(f::Scope* scope, + const p::DeviceContext& ctx, + HcclRootInfo* hccl_id) { + int rank_id = atoi(getenv("RANK_ID")); + int device_id = atoi(getenv("DEVICE_ID")); + + VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id + << "; rank_id = " << rank_id + << "; RANK_TABLE_FILE = " << atoi(getenv("DEVICE_ID")); + + std::vector rank_ids{0, 1}; + f::AttributeMap gen_hccl_id; + + std::vector endpointList = {"127.0.0.1:6175", "127.0.0.1:6177"}; + gen_hccl_id["rank"] = rank_id; + gen_hccl_id["endpoint"] = endpointList[rank_id]; + std::vector other_endpoints = { + endpointList[rank_id == 0 ? 1 : 0]}; + gen_hccl_id["other_endpoints"] = other_endpoints; + + auto out = scope->Var("Out"); + auto id = out->GetMutable(); + + VLOG(3) << "break"; + + auto comm_init_op = f::OpRegistry::CreateOp( + "c_gen_hccl_id", {}, {{"Out", {"Out"}}}, gen_hccl_id); + VLOG(3) << "break"; + auto place = ctx.GetPlace(); + comm_init_op->Run(*scope, place); + ctx.Wait(); + + memcpy(hccl_id, id, 1024); +} + +void Prepare(f::Scope* scope, + const p::DeviceContext& ctx, + HcclRootInfo* hccl_id) { + auto x = scope->Var("X"); + auto id = x->GetMutable(); + + memcpy(id, hccl_id, 1024); + + int rank_id = atoi(getenv("RANK_ID")); + int device_id = atoi(getenv("DEVICE_ID")); + + VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id + << "; rank_id = " << rank_id + << "; RANK_TABLE_FILE = " << atoi(getenv("DEVICE_ID")); + + // std::vector rank_ids{0, 1}; + f::AttributeMap comm_init_attrs; + comm_init_attrs["ring_id"] = 0; + comm_init_attrs["rank_ids"] = 2; + comm_init_attrs["rank"] = rank_id; + comm_init_attrs["device_id"] = device_id; + // comm_init_attrs["rank_ids"] = rank_ids; + auto comm_init_op = f::OpRegistry::CreateOp( + "c_comm_init_hccl", {{"X", {"X"}}}, {}, comm_init_attrs); + auto place = ctx.GetPlace(); + comm_init_op->Run(*scope, place); + ctx.Wait(); +} + +void TestHCCLReduceOp(f::Scope* scope, const p::DeviceContext& ctx, int iter) { + // init + auto x = scope->Var("Data"); + auto tensor_x = x->GetMutable(); + + int rank_id = atoi(getenv("RANK_ID")); + int num1 = 3; + int num2 = 128; + + std::vector init; + for (int64_t i = 0; i < num1 * num2; ++i) { + init.push_back(1.0 + rank_id); + } + PrintDebugInfo("input data", init); + + auto place = ctx.GetPlace(); + + paddle::framework::TensorFromVector(init, ctx, tensor_x); + tensor_x->Resize({num1, num2}); + ctx.Wait(); + + auto out = scope->Var("OutData"); + auto tensor_out = out->GetMutable(); + tensor_out->Resize({num1, num2}); + tensor_out->mutable_data(place); // allocate + ctx.Wait(); + + // run + f::AttributeMap attrs; + attrs["tag"] = std::string("tagx_" + std::to_string(iter)); + attrs["ring_id"] = 0; + int root_id = 0; + attrs["root_id"] = root_id; + + auto op = f::OpRegistry::CreateOp( + "c_reduce_sum", {{"X", {"Data"}}}, {{"Out", {"OutData"}}}, attrs); + + op->Run(*scope, place); + ctx.Wait(); + + std::vector out_vec; + paddle::framework::TensorToVector(*tensor_out, ctx, &out_vec); + ctx.Wait(); + + PrintDebugInfo("output data", out_vec); + + EXPECT_EQ(out_vec.size(), init.size()); + for (uint32_t i = 0; i < out_vec.size(); i++) { + if (rank_id == root_id) { + EXPECT_EQ(out_vec[i], 3.0); + } else { + EXPECT_EQ(out_vec[i], init[i]); + } + } +} + +TEST(c_reduce_sum, NPU) { + f::Scope scope; + HcclRootInfo hccl_id; + + // only support one device, if more than one device, use first default + p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str()))); + + PrepareUniqueId(&scope, ctx, &hccl_id); + Prepare(&scope, ctx, &hccl_id); + for (int i = 0; i < 2; i++) { + VLOG(2) << "iter num: " << i; + TestHCCLReduceOp(&scope, ctx, i); + } +} diff --git a/paddle/fluid/operators/collective/c_reducescatter_op_npu_test.cc b/paddle/fluid/operators/collective/c_reducescatter_op_npu_test.cc index 3d4ff08cdcc..5f3dad7d738 100644 --- a/paddle/fluid/operators/collective/c_reducescatter_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_reducescatter_op_npu_test.cc @@ -1,188 +1,188 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef _WIN32 -#include -#endif - -#include - -#include -#include // NOLINT -#include - -#include "gtest/gtest.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/operators/collective/c_allgather_op.h" -#include "paddle/fluid/operators/collective/c_allreduce_op.h" -#include "paddle/fluid/operators/collective/c_broadcast_op.h" -#include "paddle/fluid/operators/collective/c_reducescatter_op.h" -#include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" -#include "paddle/fluid/string/printf.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -#if defined(PADDLE_WITH_ASCEND_CL) -#include "paddle/fluid/platform/collective_helper.h" -#include "paddle/fluid/platform/device/npu/hccl_helper.h" -#endif - -namespace f = paddle::framework; -namespace p = paddle::platform; - -USE_OP(c_reducescatter); -USE_NO_KERNEL_OP(c_gen_hccl_id); -USE_NO_KERNEL_OP(c_comm_init_hccl); -USE_OP_DEVICE_KERNEL(c_reducescatter, NPU); - -DECLARE_string(selected_npus); - -template -void PrintDebugInfo(const std::string preStr, const std::vector& data) { - std::string debugstring = ""; - for (auto ele : data) { - debugstring += std::to_string(ele) + std::string(","); - } - VLOG(2) << preStr << ":" << std::endl << debugstring; -} - -void PrepareUniqueId(f::Scope* scope, - const p::DeviceContext& ctx, - HcclRootInfo* hccl_id) { - int rank_id = atoi(getenv("RANK_ID")); - int device_id = atoi(getenv("DEVICE_ID")); - - VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id - << "; rank_id = " << rank_id - << "; RANK_TABLE_FILE = " << atoi(getenv("DEVICE_ID")); - - std::vector rank_ids{0, 1}; - f::AttributeMap gen_hccl_id; - - std::vector endpointList = {"127.0.0.1:6175", "127.0.0.1:6177"}; - gen_hccl_id["rank"] = rank_id; - gen_hccl_id["endpoint"] = endpointList[rank_id]; - std::vector other_endpoints = { - endpointList[rank_id == 0 ? 1 : 0]}; - gen_hccl_id["other_endpoints"] = other_endpoints; - - auto out = scope->Var("Out"); - auto id = out->GetMutable(); - - VLOG(3) << "break"; - - auto comm_init_op = f::OpRegistry::CreateOp( - "c_gen_hccl_id", {}, {{"Out", {"Out"}}}, gen_hccl_id); - VLOG(3) << "break"; - auto place = ctx.GetPlace(); - comm_init_op->Run(*scope, place); - ctx.Wait(); - - memcpy(hccl_id, id, 1024); -} - -void Prepare(f::Scope* scope, - const p::DeviceContext& ctx, - HcclRootInfo* hccl_id) { - auto x = scope->Var("X"); - auto id = x->GetMutable(); - - memcpy(id, hccl_id, 1024); - - int rank_id = atoi(getenv("RANK_ID")); - int device_id = atoi(getenv("DEVICE_ID")); - - VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id - << "; rank_id = " << rank_id - << "; RANK_TABLE_FILE = " << atoi(getenv("DEVICE_ID")); - - // std::vector rank_ids{0, 1}; - f::AttributeMap comm_init_attrs; - comm_init_attrs["ring_id"] = 0; - comm_init_attrs["rank_ids"] = 2; - comm_init_attrs["rank"] = rank_id; - comm_init_attrs["device_id"] = device_id; - // comm_init_attrs["rank_ids"] = rank_ids; - auto comm_init_op = f::OpRegistry::CreateOp( - "c_comm_init_hccl", {{"X", {"X"}}}, {}, comm_init_attrs); - auto place = ctx.GetPlace(); - comm_init_op->Run(*scope, place); - ctx.Wait(); -} - -void TestHCCLReduceScatterOp(f::Scope* scope, const p::DeviceContext& ctx) { - // init - auto x = scope->Var("Data"); - auto tensor_x = x->GetMutable(); - - std::vector init; - int num1 = 4; - int num2 = 1; - - for (int64_t i = 0; i < num1 * num2; ++i) { - init.push_back(1.0); - } - PrintDebugInfo("input data", init); - - paddle::framework::TensorFromVector(init, ctx, tensor_x); - tensor_x->Resize({num1, num2}); - - ctx.Wait(); - - auto place = ctx.GetPlace(); - auto out = scope->Var("OutData"); - auto tensor_out = out->GetMutable(); - tensor_out->Resize({num1, num2}); - tensor_out->mutable_data(place); // allocate - - ctx.Wait(); - - // run - f::AttributeMap attrs; - attrs["tag"] = std::string("tagx"); - attrs["ring_id"] = 0; - attrs["nranks"] = 2; - - auto op = f::OpRegistry::CreateOp( - "c_reducescatter", {{"X", {"Data"}}}, {{"Out", {"OutData"}}}, attrs); - - int iter_num = 10; - for (int i = 0; i < iter_num; i++) { - op->Run(*scope, place); - ctx.Wait(); - } - - std::vector out_vec; - paddle::framework::TensorToVector(*tensor_out, ctx, &out_vec); - ctx.Wait(); - - PrintDebugInfo("output data", out_vec); - EXPECT_EQ(out_vec.size(), init.size() / 2); - for (uint32_t i = 0; i < out_vec.size(); i++) { - EXPECT_EQ(out_vec[i], 2.0); - } -} - -TEST(c_reducescatter, NPU) { - f::Scope scope; - HcclRootInfo hccl_id; - - // only support one device, if more than one device, use first default - p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str()))); - - PrepareUniqueId(&scope, ctx, &hccl_id); - Prepare(&scope, ctx, &hccl_id); - TestHCCLReduceScatterOp(&scope, ctx); -} +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifndef _WIN32 +#include +#endif + +#include + +#include +#include // NOLINT +#include + +#include "gtest/gtest.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/operators/collective/c_allgather_op.h" +#include "paddle/fluid/operators/collective/c_allreduce_op.h" +#include "paddle/fluid/operators/collective/c_broadcast_op.h" +#include "paddle/fluid/operators/collective/c_reducescatter_op.h" +#include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" +#include "paddle/fluid/string/printf.h" +#include "paddle/phi/kernels/funcs/math_function.h" + +#if defined(PADDLE_WITH_ASCEND_CL) +#include "paddle/fluid/platform/collective_helper.h" +#include "paddle/fluid/platform/device/npu/hccl_helper.h" +#endif + +namespace f = paddle::framework; +namespace p = paddle::platform; + +USE_OP(c_reducescatter); +USE_NO_KERNEL_OP(c_gen_hccl_id); +USE_NO_KERNEL_OP(c_comm_init_hccl); +USE_OP_DEVICE_KERNEL(c_reducescatter, NPU); + +DECLARE_string(selected_npus); + +template +void PrintDebugInfo(const std::string preStr, const std::vector& data) { + std::string debugstring = ""; + for (auto ele : data) { + debugstring += std::to_string(ele) + std::string(","); + } + VLOG(2) << preStr << ":" << std::endl << debugstring; +} + +void PrepareUniqueId(f::Scope* scope, + const p::DeviceContext& ctx, + HcclRootInfo* hccl_id) { + int rank_id = atoi(getenv("RANK_ID")); + int device_id = atoi(getenv("DEVICE_ID")); + + VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id + << "; rank_id = " << rank_id + << "; RANK_TABLE_FILE = " << atoi(getenv("DEVICE_ID")); + + std::vector rank_ids{0, 1}; + f::AttributeMap gen_hccl_id; + + std::vector endpointList = {"127.0.0.1:6175", "127.0.0.1:6177"}; + gen_hccl_id["rank"] = rank_id; + gen_hccl_id["endpoint"] = endpointList[rank_id]; + std::vector other_endpoints = { + endpointList[rank_id == 0 ? 1 : 0]}; + gen_hccl_id["other_endpoints"] = other_endpoints; + + auto out = scope->Var("Out"); + auto id = out->GetMutable(); + + VLOG(3) << "break"; + + auto comm_init_op = f::OpRegistry::CreateOp( + "c_gen_hccl_id", {}, {{"Out", {"Out"}}}, gen_hccl_id); + VLOG(3) << "break"; + auto place = ctx.GetPlace(); + comm_init_op->Run(*scope, place); + ctx.Wait(); + + memcpy(hccl_id, id, 1024); +} + +void Prepare(f::Scope* scope, + const p::DeviceContext& ctx, + HcclRootInfo* hccl_id) { + auto x = scope->Var("X"); + auto id = x->GetMutable(); + + memcpy(id, hccl_id, 1024); + + int rank_id = atoi(getenv("RANK_ID")); + int device_id = atoi(getenv("DEVICE_ID")); + + VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id + << "; rank_id = " << rank_id + << "; RANK_TABLE_FILE = " << atoi(getenv("DEVICE_ID")); + + // std::vector rank_ids{0, 1}; + f::AttributeMap comm_init_attrs; + comm_init_attrs["ring_id"] = 0; + comm_init_attrs["rank_ids"] = 2; + comm_init_attrs["rank"] = rank_id; + comm_init_attrs["device_id"] = device_id; + // comm_init_attrs["rank_ids"] = rank_ids; + auto comm_init_op = f::OpRegistry::CreateOp( + "c_comm_init_hccl", {{"X", {"X"}}}, {}, comm_init_attrs); + auto place = ctx.GetPlace(); + comm_init_op->Run(*scope, place); + ctx.Wait(); +} + +void TestHCCLReduceScatterOp(f::Scope* scope, const p::DeviceContext& ctx) { + // init + auto x = scope->Var("Data"); + auto tensor_x = x->GetMutable(); + + std::vector init; + int num1 = 4; + int num2 = 1; + + for (int64_t i = 0; i < num1 * num2; ++i) { + init.push_back(1.0); + } + PrintDebugInfo("input data", init); + + paddle::framework::TensorFromVector(init, ctx, tensor_x); + tensor_x->Resize({num1, num2}); + + ctx.Wait(); + + auto place = ctx.GetPlace(); + auto out = scope->Var("OutData"); + auto tensor_out = out->GetMutable(); + tensor_out->Resize({num1, num2}); + tensor_out->mutable_data(place); // allocate + + ctx.Wait(); + + // run + f::AttributeMap attrs; + attrs["tag"] = std::string("tagx"); + attrs["ring_id"] = 0; + attrs["nranks"] = 2; + + auto op = f::OpRegistry::CreateOp( + "c_reducescatter", {{"X", {"Data"}}}, {{"Out", {"OutData"}}}, attrs); + + int iter_num = 10; + for (int i = 0; i < iter_num; i++) { + op->Run(*scope, place); + ctx.Wait(); + } + + std::vector out_vec; + paddle::framework::TensorToVector(*tensor_out, ctx, &out_vec); + ctx.Wait(); + + PrintDebugInfo("output data", out_vec); + EXPECT_EQ(out_vec.size(), init.size() / 2); + for (uint32_t i = 0; i < out_vec.size(); i++) { + EXPECT_EQ(out_vec[i], 2.0); + } +} + +TEST(c_reducescatter, NPU) { + f::Scope scope; + HcclRootInfo hccl_id; + + // only support one device, if more than one device, use first default + p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str()))); + + PrepareUniqueId(&scope, ctx, &hccl_id); + Prepare(&scope, ctx, &hccl_id); + TestHCCLReduceScatterOp(&scope, ctx); +} diff --git a/paddle/fluid/operators/mkldnn/fill_constant_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/fill_constant_mkldnn_op.cc index 615f43bb32c..e7e45b4b6e4 100644 --- a/paddle/fluid/operators/mkldnn/fill_constant_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/fill_constant_mkldnn_op.cc @@ -1,140 +1,140 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/utils.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" - -namespace paddle { -namespace operators { - -using framework::Tensor; - -template -class FillConstantMKLDNNHandler - : public platform::MKLDNNHandlerNoCachingT { - public: - FillConstantMKLDNNHandler(Tensor* out, - dnnl::engine engine, - platform::Place cpu_place) - : platform::MKLDNNHandlerNoCachingT(engine, cpu_place) { - const auto src0_md = - dnnl::memory::desc({out->numel(), sizeof(T)}, - platform::MKLDNNGetDataType(), - dnnl::memory::format_tag::ab); - - dnnl::primitive_attr attrs; - attrs.set_scales(DNNL_ARG_SRC_0, /* mask = */ 0, {0.0f}); - - this->AcquireForwardPrimitiveDescriptor( - attrs, dnnl::algorithm::binary_add, src0_md, src1_md, src0_md); - } - - static const dnnl::memory::desc src1_md; -}; - -template -const dnnl::memory::desc FillConstantMKLDNNHandler::src1_md( - {1, sizeof(T)}, - platform::MKLDNNGetDataType(), - dnnl::memory::format_tag::ab); - -template -class FillConstantMKLDNNKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - this->RunKernel(ctx); - } - - void RunKernel(const framework::ExecutionContext& ctx) const { - const auto& dev_ctx = - ctx.template device_context(); - const auto& dnnl_engine = dev_ctx.GetEngine(); - - auto* out = ctx.Output("Out"); - T fill_value = CalculateFillValue(ctx); - - auto shape = GetShape(ctx); - out->Resize(shape); - - FillConstantMKLDNNHandler handler(out, dnnl_engine, ctx.GetPlace()); - - dnnl::memory constant_value_memory = - dnnl::memory(FillConstantMKLDNNHandler::src1_md, - dnnl_engine, - reinterpret_cast(&fill_value)); - - auto src0_memory_p = handler.AcquireDstMemory(out); - auto fill_constant_p = handler.AcquireForwardPrimitive(); - - auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); - fill_constant_p->execute(astream, - {{DNNL_ARG_SRC_0, *src0_memory_p}, - {DNNL_ARG_SRC_1, constant_value_memory}, - {DNNL_ARG_DST, *src0_memory_p}}); - astream.wait(); - - // src0_memory_p's md was just to allow the usage of a binary - // primitive as a memset, and now we need to create a real one - out->set_mem_desc({phi::vectorize(shape), - platform::MKLDNNGetDataType(), - platform::GetPlainMKLDNNFormat(shape.size())}); - } - - T CalculateFillValue(const framework::ExecutionContext& ctx) const { - const auto str_value = ctx.Attr("str_value"); - const auto float_value = ctx.Attr("value"); - - T value; - - if (str_value.empty()) { - value = static_cast(float_value); - } else { - // handle NaN/Inf first, which cannot be read from stream - if (str_value == "inf") { - value = static_cast(std::numeric_limits::infinity()); - } else if (str_value == "-inf") { - value = static_cast(-std::numeric_limits::infinity()); - } else if (str_value == "nan") { - value = static_cast(std::numeric_limits::quiet_NaN()); - } else { - std::stringstream convert_stream(str_value); - double tmp_value; - convert_stream >> tmp_value; - value = static_cast(tmp_value); - } - } - - if (ctx.HasInput("ValueTensor")) { - const auto* value_tensor = ctx.Input("ValueTensor"); - PADDLE_ENFORCE_EQ( - value_tensor->numel(), - 1, - platform::errors::InvalidArgument( - "When use Tensor as value to set Tensor value in fill_constant, " - "value input(ValueTensor) size must be 1, but got %d", - value_tensor->numel())); - value = value_tensor->data()[0]; - } - - return value; - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_KERNEL(fill_constant, - MKLDNN, - paddle::platform::CPUPlace, - ops::FillConstantMKLDNNKernel); +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/utils.h" +#include "paddle/fluid/platform/mkldnn_reuse.h" + +namespace paddle { +namespace operators { + +using framework::Tensor; + +template +class FillConstantMKLDNNHandler + : public platform::MKLDNNHandlerNoCachingT { + public: + FillConstantMKLDNNHandler(Tensor* out, + dnnl::engine engine, + platform::Place cpu_place) + : platform::MKLDNNHandlerNoCachingT(engine, cpu_place) { + const auto src0_md = + dnnl::memory::desc({out->numel(), sizeof(T)}, + platform::MKLDNNGetDataType(), + dnnl::memory::format_tag::ab); + + dnnl::primitive_attr attrs; + attrs.set_scales(DNNL_ARG_SRC_0, /* mask = */ 0, {0.0f}); + + this->AcquireForwardPrimitiveDescriptor( + attrs, dnnl::algorithm::binary_add, src0_md, src1_md, src0_md); + } + + static const dnnl::memory::desc src1_md; +}; + +template +const dnnl::memory::desc FillConstantMKLDNNHandler::src1_md( + {1, sizeof(T)}, + platform::MKLDNNGetDataType(), + dnnl::memory::format_tag::ab); + +template +class FillConstantMKLDNNKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + this->RunKernel(ctx); + } + + void RunKernel(const framework::ExecutionContext& ctx) const { + const auto& dev_ctx = + ctx.template device_context(); + const auto& dnnl_engine = dev_ctx.GetEngine(); + + auto* out = ctx.Output("Out"); + T fill_value = CalculateFillValue(ctx); + + auto shape = GetShape(ctx); + out->Resize(shape); + + FillConstantMKLDNNHandler handler(out, dnnl_engine, ctx.GetPlace()); + + dnnl::memory constant_value_memory = + dnnl::memory(FillConstantMKLDNNHandler::src1_md, + dnnl_engine, + reinterpret_cast(&fill_value)); + + auto src0_memory_p = handler.AcquireDstMemory(out); + auto fill_constant_p = handler.AcquireForwardPrimitive(); + + auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); + fill_constant_p->execute(astream, + {{DNNL_ARG_SRC_0, *src0_memory_p}, + {DNNL_ARG_SRC_1, constant_value_memory}, + {DNNL_ARG_DST, *src0_memory_p}}); + astream.wait(); + + // src0_memory_p's md was just to allow the usage of a binary + // primitive as a memset, and now we need to create a real one + out->set_mem_desc({phi::vectorize(shape), + platform::MKLDNNGetDataType(), + platform::GetPlainMKLDNNFormat(shape.size())}); + } + + T CalculateFillValue(const framework::ExecutionContext& ctx) const { + const auto str_value = ctx.Attr("str_value"); + const auto float_value = ctx.Attr("value"); + + T value; + + if (str_value.empty()) { + value = static_cast(float_value); + } else { + // handle NaN/Inf first, which cannot be read from stream + if (str_value == "inf") { + value = static_cast(std::numeric_limits::infinity()); + } else if (str_value == "-inf") { + value = static_cast(-std::numeric_limits::infinity()); + } else if (str_value == "nan") { + value = static_cast(std::numeric_limits::quiet_NaN()); + } else { + std::stringstream convert_stream(str_value); + double tmp_value; + convert_stream >> tmp_value; + value = static_cast(tmp_value); + } + } + + if (ctx.HasInput("ValueTensor")) { + const auto* value_tensor = ctx.Input("ValueTensor"); + PADDLE_ENFORCE_EQ( + value_tensor->numel(), + 1, + platform::errors::InvalidArgument( + "When use Tensor as value to set Tensor value in fill_constant, " + "value input(ValueTensor) size must be 1, but got %d", + value_tensor->numel())); + value = value_tensor->data()[0]; + } + + return value; + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_KERNEL(fill_constant, + MKLDNN, + paddle::platform::CPUPlace, + ops::FillConstantMKLDNNKernel); diff --git a/paddle/fluid/operators/unbind_op.cc b/paddle/fluid/operators/unbind_op.cc index 3c435f79859..d059c626fe7 100644 --- a/paddle/fluid/operators/unbind_op.cc +++ b/paddle/fluid/operators/unbind_op.cc @@ -1,90 +1,90 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/unbind_op.h" - -#include - -#include "paddle/fluid/framework/infershape_utils.h" -#include "paddle/phi/core/infermeta_utils.h" -#include "paddle/phi/infermeta/unary.h" - -namespace paddle { -namespace operators { -using framework::Tensor; - -class UnbindOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE_EQ( - ctx->HasInput("X"), - true, - platform::errors::NotFound("Input(X) of UnbindOp is not found.")); - PADDLE_ENFORCE_GE( - ctx->Outputs("Out").size(), - 1UL, - platform::errors::NotFound("Outputs(Out) of UnbindOp is not found.")); - auto in_dims = ctx->GetInputDim("X"); - auto outs_names = ctx->Outputs("Out"); - int axis = ctx->Attrs().Get("axis"); - const size_t outs_number = outs_names.size(); - auto out_dims = UnbindOutsDims(in_dims, axis); - std::vector outs_dims(outs_number, out_dims); - ctx->SetOutputsDim("Out", outs_dims); - for (size_t i = 0; i < outs_number; ++i) { - ctx->ShareLoD("X", "Out", 0, i); - } - } -}; - -class UnbindOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor) Input tensor of the split operator."); - AddOutput("Out", "(Tensor) Output tensors of the unbind operator.") - .AsDuplicable(); - AddComment(R"DOC( -Unbind operator - -Remove a tensor dimension. - -Example: - Input = [[1,2], - [3,4], - [5,6]] - axis = 0 - Output[0] = [1,2] - Output[1] = [3,4] - Output[2] = [5,6] - - )DOC"); - AddAttr("axis", - "(int, default 0) " - "dimension to remove.") - .SetDefault(0); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OPERATOR(unbind, - ops::UnbindOp, - ops::UnbindOpMaker, - ops::UnbindGradMaker, - ops::UnbindGradMaker); +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/unbind_op.h" + +#include + +#include "paddle/fluid/framework/infershape_utils.h" +#include "paddle/phi/core/infermeta_utils.h" +#include "paddle/phi/infermeta/unary.h" + +namespace paddle { +namespace operators { +using framework::Tensor; + +class UnbindOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE_EQ( + ctx->HasInput("X"), + true, + platform::errors::NotFound("Input(X) of UnbindOp is not found.")); + PADDLE_ENFORCE_GE( + ctx->Outputs("Out").size(), + 1UL, + platform::errors::NotFound("Outputs(Out) of UnbindOp is not found.")); + auto in_dims = ctx->GetInputDim("X"); + auto outs_names = ctx->Outputs("Out"); + int axis = ctx->Attrs().Get("axis"); + const size_t outs_number = outs_names.size(); + auto out_dims = UnbindOutsDims(in_dims, axis); + std::vector outs_dims(outs_number, out_dims); + ctx->SetOutputsDim("Out", outs_dims); + for (size_t i = 0; i < outs_number; ++i) { + ctx->ShareLoD("X", "Out", 0, i); + } + } +}; + +class UnbindOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", "(Tensor) Input tensor of the split operator."); + AddOutput("Out", "(Tensor) Output tensors of the unbind operator.") + .AsDuplicable(); + AddComment(R"DOC( +Unbind operator + +Remove a tensor dimension. + +Example: + Input = [[1,2], + [3,4], + [5,6]] + axis = 0 + Output[0] = [1,2] + Output[1] = [3,4] + Output[2] = [5,6] + + )DOC"); + AddAttr("axis", + "(int, default 0) " + "dimension to remove.") + .SetDefault(0); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OPERATOR(unbind, + ops::UnbindOp, + ops::UnbindOpMaker, + ops::UnbindGradMaker, + ops::UnbindGradMaker); diff --git a/paddle/fluid/operators/unbind_op.h b/paddle/fluid/operators/unbind_op.h index 8e6cd391578..082e4584616 100644 --- a/paddle/fluid/operators/unbind_op.h +++ b/paddle/fluid/operators/unbind_op.h @@ -1,54 +1,54 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include // NOLINT -#include -#include -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/concat_and_split.h" -#include "paddle/fluid/operators/strided_memcpy.h" -#include "paddle/fluid/operators/utils.h" - -namespace paddle { -namespace operators { -static inline framework::DDim UnbindOutsDims(const framework::DDim in_dims, - int axis) { - std::vector out_dims; - axis = axis < 0 ? in_dims.size() + axis : axis; - for (int i = 0; i < in_dims.size(); i++) { - if (i != axis) out_dims.push_back(in_dims[i]); - } - return phi::make_ddim(out_dims); -} - -template -class UnbindGradMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("stack"); - op->SetInput("X", this->OutputGrad("Out")); - op->SetOutput("Y", this->InputGrad("X")); - op->SetAttrMap(this->Attrs()); - } -}; - -} // namespace operators -} // namespace paddle +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include // NOLINT +#include +#include +#include + +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/math/concat_and_split.h" +#include "paddle/fluid/operators/strided_memcpy.h" +#include "paddle/fluid/operators/utils.h" + +namespace paddle { +namespace operators { +static inline framework::DDim UnbindOutsDims(const framework::DDim in_dims, + int axis) { + std::vector out_dims; + axis = axis < 0 ? in_dims.size() + axis : axis; + for (int i = 0; i < in_dims.size(); i++) { + if (i != axis) out_dims.push_back(in_dims[i]); + } + return phi::make_ddim(out_dims); +} + +template +class UnbindGradMaker : public framework::SingleGradOpMaker { + public: + using framework::SingleGradOpMaker::SingleGradOpMaker; + + protected: + void Apply(GradOpPtr op) const override { + op->SetType("stack"); + op->SetInput("X", this->OutputGrad("Out")); + op->SetOutput("Y", this->InputGrad("X")); + op->SetAttrMap(this->Attrs()); + } +}; + +} // namespace operators +} // namespace paddle -- GitLab