From 5ad94e7b609a738e23378b828779eff10c7c3781 Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Mon, 12 Apr 2021 16:12:18 +0800 Subject: [PATCH] fix NPUDeviceContext in all c++ unittest (#32198) * fix NPUDeviceContext in all c++ unittest * refine log Co-authored-by: pangyoki --- .../check_finite_and_unscale_op_npu_test.cc | 8 +- paddle/fluid/operators/assign_op_npu_test.cc | 15 +- .../collective/c_allgather_op_npu_test.cc | 56 ++++--- .../collective/c_allreduce_max_op_npu_test.cc | 43 +++-- .../collective/c_allreduce_sum_op_npu_test.cc | 48 +++--- .../collective/c_broadcast_op_npu_test.cc | 49 +++--- .../collective/c_reduce_sum_op_npu_test.cc | 50 +++--- .../collective/c_reducescatter_op_npu_test.cc | 45 +++--- .../c_sync_calc_stream_op_npu_test.cc | 4 +- .../c_sync_comm_stream_op_npu_test.cc | 6 +- .../collective/recv_v2_op_npu_test.cc | 152 +++++++++--------- .../collective/send_v2_op_npu_test.cc | 129 +++++++-------- .../elementwise/elementwise_op_npu_test.cc | 24 +-- paddle/fluid/operators/expand_op_npu_test.cc | 4 +- paddle/fluid/operators/gather_op_npu_test.cc | 12 +- paddle/fluid/operators/gelu_op_npu_test.cc | 29 ++-- .../fluid/operators/increment_op_npu_test.cc | 19 +-- paddle/fluid/operators/range_op_npu_test.cc | 15 +- .../reduce_ops/reduce_any_op_npu_test.cc | 4 +- paddle/fluid/operators/softmax_op_npu_test.cc | 47 +++--- paddle/fluid/operators/squeeze_op_npu_test.cc | 12 +- .../fluid/operators/transpose_op_npu_test.cc | 8 +- .../fluid/operators/unsqueeze_op_npu_test.cc | 10 +- paddle/fluid/platform/device_context.cc | 2 +- 24 files changed, 380 insertions(+), 411 deletions(-) diff --git a/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu_test.cc b/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu_test.cc index 1ed188b1593..a80b83f0cbe 100644 --- a/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu_test.cc +++ b/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu_test.cc @@ -120,12 +120,12 @@ void Compare(f::Scope *scope, const p::DeviceContext &ctx) { TEST(check_finite_and_unscale, NPU_fp32) { f::Scope scope; - p::NPUDeviceContext ctx(p::NPUPlace(0)); - Compare(&scope, ctx); + auto *ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0)); + Compare(&scope, *ctx); } TEST(check_finite_and_unscale, NPU_fp16) { f::Scope scope; - p::NPUDeviceContext ctx(p::NPUPlace(0)); - Compare(&scope, ctx); + auto *ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0)); + Compare(&scope, *ctx); } diff --git a/paddle/fluid/operators/assign_op_npu_test.cc b/paddle/fluid/operators/assign_op_npu_test.cc index 111f4b177b9..792d01a5efe 100644 --- a/paddle/fluid/operators/assign_op_npu_test.cc +++ b/paddle/fluid/operators/assign_op_npu_test.cc @@ -56,10 +56,8 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx, auto out = scope->Var("Out"); auto tensor_out = out->GetMutable(); - auto op = f::OpRegistry::CreateOp(op_type, - {{"X", {"X"}}}, - {{"Out", {"Out"}}}, - {}); + auto op = + f::OpRegistry::CreateOp(op_type, {{"X", {"X"}}}, {{"Out", {"Out"}}}, {}); op->Run(*scope, place); @@ -75,11 +73,8 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx, EXPECT_EQ(out_vec[3], static_cast(4.0)); } - TEST(assign, NPU_fp32) { - f::Scope scope; - p::NPUDeviceContext ctx(p::NPUPlace(0)); - Compare(&scope, ctx, "assign"); + f::Scope scope; + auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0)); + Compare(&scope, *ctx, "assign"); } - - diff --git a/paddle/fluid/operators/collective/c_allgather_op_npu_test.cc b/paddle/fluid/operators/collective/c_allgather_op_npu_test.cc index 38f19170af9..f8b30b25516 100644 --- a/paddle/fluid/operators/collective/c_allgather_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_allgather_op_npu_test.cc @@ -16,23 +16,23 @@ limitations under the License. */ #include #endif +#include #include #include // NOLINT #include -#include #include "gtest/gtest.h" -#include "paddle/fluid/string/printf.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/dropout_op.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/operators/dropout_op.h" #include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/string/printf.h" -#include "paddle/fluid/operators/collective/c_broadcast_op.h" -#include "paddle/fluid/operators/collective/c_allreduce_op.h" #include "paddle/fluid/operators/collective/c_allgather_op.h" +#include "paddle/fluid/operators/collective/c_allreduce_op.h" +#include "paddle/fluid/operators/collective/c_broadcast_op.h" #include "paddle/fluid/operators/collective/c_reducescatter_op.h" #if defined(PADDLE_WITH_ASCEND_CL) @@ -50,25 +50,23 @@ USE_OP_DEVICE_KERNEL(c_allgather, NPU); DECLARE_string(selected_npus); -template -void PrintDebugInfo(const std::string preStr, const std::vector &data){ +template +void PrintDebugInfo(const std::string preStr, const std::vector& data) { std::string debugstring = ""; for (auto ele : data) { debugstring += std::to_string(ele) + std::string(","); } - VLOG(2) << preStr << ":" << std::endl < rank_ids{0, 1}; f::AttributeMap comm_init_attrs; comm_init_attrs["ring_id"] = 0; @@ -90,7 +88,7 @@ void TestHCCLAllGatherOp(f::Scope* scope, const p::DeviceContext& ctx) { std::vector init; int rank_id = atoi(getenv("RANK_ID")); - + int num1 = 1; int num2 = 4; @@ -112,18 +110,18 @@ void TestHCCLAllGatherOp(f::Scope* scope, const p::DeviceContext& ctx) { // run f::AttributeMap attrs; - attrs["tag"]=std::string("tagx"); - attrs["ring_id"]=0; - attrs["nranks"]=2; - + attrs["tag"] = std::string("tagx"); + attrs["ring_id"] = 0; + attrs["nranks"] = 2; + auto op = f::OpRegistry::CreateOp("c_allgather", {{"X", {"X"}}}, - {{"Out", {"Out"}}}, attrs); + {{"Out", {"Out"}}}, attrs); - for (int i = 0; i < 10; i ++) { + for (int i = 0; i < 10; i++) { op->Run(*scope, place); } ctx.Wait(); - + std::vector out_vec; TensorToVector(*tensor_out, ctx, &out_vec); ctx.Wait(); @@ -139,13 +137,13 @@ void TestHCCLAllGatherOp(f::Scope* scope, const p::DeviceContext& ctx) { } } - TEST(c_allgather, NPU) { f::Scope scope; - // only support one device, if more than one device, use first default - p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str()))); + // only support one device, if more than one device, use first default + auto* ctx = p::DeviceContextPool::Instance().Get( + p::NPUPlace(atoi(FLAGS_selected_npus.c_str()))); - Prepare(&scope, ctx); - TestHCCLAllGatherOp(&scope, ctx); + Prepare(&scope, *ctx); + TestHCCLAllGatherOp(&scope, *ctx); } diff --git a/paddle/fluid/operators/collective/c_allreduce_max_op_npu_test.cc b/paddle/fluid/operators/collective/c_allreduce_max_op_npu_test.cc index 3fdc8595067..c00c3a9ea16 100644 --- a/paddle/fluid/operators/collective/c_allreduce_max_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_allreduce_max_op_npu_test.cc @@ -16,23 +16,23 @@ limitations under the License. */ #include #endif +#include #include #include // NOLINT #include -#include #include "gtest/gtest.h" -#include "paddle/fluid/string/printf.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/dropout_op.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/operators/dropout_op.h" #include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/string/printf.h" -#include "paddle/fluid/operators/collective/c_broadcast_op.h" -#include "paddle/fluid/operators/collective/c_allreduce_op.h" #include "paddle/fluid/operators/collective/c_allgather_op.h" +#include "paddle/fluid/operators/collective/c_allreduce_op.h" +#include "paddle/fluid/operators/collective/c_broadcast_op.h" #include "paddle/fluid/operators/collective/c_reducescatter_op.h" #if defined(PADDLE_WITH_ASCEND_CL) @@ -50,24 +50,22 @@ USE_OP_DEVICE_KERNEL(c_allreduce_max, NPU); DECLARE_string(selected_npus); -template -void PrintDebugInfo(const std::string preStr, const std::vector &data){ +template +void PrintDebugInfo(const std::string preStr, const std::vector& data) { std::string debugstring = ""; for (auto ele : data) { debugstring += std::to_string(ele) + std::string(","); } - VLOG(2) << preStr << ":" << std::endl < rank_ids{0, 1}; f::AttributeMap comm_init_attrs; @@ -112,13 +110,13 @@ void TestHCCLAllReduceOp(f::Scope* scope, const p::DeviceContext& ctx) { // run f::AttributeMap attrs; - attrs["tag"]=std::string("tagx"); - attrs["ring_id"]=0; + attrs["tag"] = std::string("tagx"); + attrs["ring_id"] = 0; auto op = f::OpRegistry::CreateOp("c_allreduce_max", {{"X", {"X"}}}, - {{"Out", {"Out"}}}, attrs); + {{"Out", {"Out"}}}, attrs); - for (int i = 0; i < 10; i ++) { + for (int i = 0; i < 10; i++) { op->Run(*scope, place); } ctx.Wait(); @@ -139,8 +137,9 @@ TEST(c_allreduce_max, NPU) { f::Scope scope; // only support one device, if more than one device, use first default - p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str()))); + auto* ctx = p::DeviceContextPool::Instance().Get( + p::NPUPlace(atoi(FLAGS_selected_npus.c_str()))); - Prepare(&scope, ctx); - TestHCCLAllReduceOp(&scope, ctx); + Prepare(&scope, *ctx); + TestHCCLAllReduceOp(&scope, *ctx); } diff --git a/paddle/fluid/operators/collective/c_allreduce_sum_op_npu_test.cc b/paddle/fluid/operators/collective/c_allreduce_sum_op_npu_test.cc index 2fff84593c0..2bfab0ee737 100644 --- a/paddle/fluid/operators/collective/c_allreduce_sum_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_allreduce_sum_op_npu_test.cc @@ -16,19 +16,19 @@ limitations under the License. */ #include #endif +#include #include #include // NOLINT #include -#include #include "gtest/gtest.h" -#include "paddle/fluid/string/printf.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/dropout_op.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/operators/dropout_op.h" #include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/string/printf.h" #include "paddle/fluid/operators/collective/c_allreduce_op.h" @@ -47,24 +47,22 @@ USE_OP_DEVICE_KERNEL(c_allreduce_sum, NPU); DECLARE_string(selected_npus); -template -void PrintDebugInfo(const std::string preStr, const std::vector &data){ +template +void PrintDebugInfo(const std::string preStr, const std::vector& data) { std::string debugstring = ""; for (auto ele : data) { debugstring += std::to_string(ele) + std::string(","); } - VLOG(3) << preStr << ":" << std::endl < rank_ids{0, 1}; f::AttributeMap comm_init_attrs; @@ -80,7 +78,8 @@ void Prepare(f::Scope* scope, const p::DeviceContext& ctx){ ctx.Wait(); } -void TestHCCLAllReduceOp(f::Scope* scope, const p::DeviceContext& ctx, int iter) { +void TestHCCLAllReduceOp(f::Scope* scope, const p::DeviceContext& ctx, + int iter) { // init auto x = scope->Var("X"); auto tensor_x = x->GetMutable(); @@ -109,15 +108,13 @@ void TestHCCLAllReduceOp(f::Scope* scope, const p::DeviceContext& ctx, int iter) // run f::AttributeMap attrs; - attrs["tag"]=std::string("tagx_"+ std::to_string(iter)); - attrs["ring_id"]=0; + attrs["tag"] = std::string("tagx_" + std::to_string(iter)); + attrs["ring_id"] = 0; - auto op = f::OpRegistry::CreateOp("c_allreduce_sum", - {{"X", {"X"}}}, - {{"Out", {"Out"}}}, - attrs); + auto op = f::OpRegistry::CreateOp("c_allreduce_sum", {{"X", {"X"}}}, + {{"Out", {"Out"}}}, attrs); - for (int i = 0; i < 10; i ++) { + for (int i = 0; i < 10; i++) { op->Run(*scope, place); } ctx.Wait(); @@ -138,11 +135,12 @@ TEST(c_allreduce_sum, NPU) { f::Scope scope; // only support one device, if more than one device, use first default - p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str()))); + auto* ctx = p::DeviceContextPool::Instance().Get( + p::NPUPlace(atoi(FLAGS_selected_npus.c_str()))); - Prepare(&scope, ctx); - for(int i = 0; i < 1; i ++){ + Prepare(&scope, *ctx); + for (int i = 0; i < 1; i++) { VLOG(2) << "iter num: " << i; - TestHCCLAllReduceOp(&scope, ctx, i); + TestHCCLAllReduceOp(&scope, *ctx, i); } } diff --git a/paddle/fluid/operators/collective/c_broadcast_op_npu_test.cc b/paddle/fluid/operators/collective/c_broadcast_op_npu_test.cc index 66158e5ff28..ccffe36681b 100644 --- a/paddle/fluid/operators/collective/c_broadcast_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_broadcast_op_npu_test.cc @@ -16,19 +16,19 @@ limitations under the License. */ #include #endif +#include #include #include // NOLINT #include -#include #include "gtest/gtest.h" -#include "paddle/fluid/string/printf.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/dropout_op.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/operators/dropout_op.h" #include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/string/printf.h" #include "paddle/fluid/operators/collective/c_broadcast_op.h" @@ -47,25 +47,23 @@ USE_OP_DEVICE_KERNEL(c_broadcast, NPU); DECLARE_string(selected_npus); -template -void PrintDebugInfo(const std::string preStr, const std::vector &data){ +template +void PrintDebugInfo(const std::string preStr, const std::vector& data) { std::string debugstring = ""; for (auto ele : data) { debugstring += std::to_string(ele) + std::string(","); } - VLOG(2) << preStr << ":" << std::endl < rank_ids{0, 1}; f::AttributeMap comm_init_attrs; comm_init_attrs["ring_id"] = 0; @@ -87,7 +85,7 @@ void TestHCCLBroadcastOp(f::Scope* scope, const p::DeviceContext& ctx) { int num = 2; std::vector init; int rank_id = atoi(getenv("RANK_ID")); - + for (int64_t i = 0; i < num * num; ++i) { init.push_back(1.0 + rank_id); } @@ -106,18 +104,18 @@ void TestHCCLBroadcastOp(f::Scope* scope, const p::DeviceContext& ctx) { // run f::AttributeMap attrs; - attrs["tag"]=std::string("tagx"); - attrs["root"]=0; - attrs["ring_id"]=0; + attrs["tag"] = std::string("tagx"); + attrs["root"] = 0; + attrs["ring_id"] = 0; auto op = f::OpRegistry::CreateOp("c_broadcast", {{"X", {"X"}}}, - {{"Out", {"Out"}}}, attrs); + {{"Out", {"Out"}}}, attrs); - for (int i = 0; i < 10; i ++) { + for (int i = 0; i < 10; i++) { op->Run(*scope, place); } ctx.Wait(); - + std::vector out_vec; TensorToVector(*tensor_out, ctx, &out_vec); ctx.Wait(); @@ -132,9 +130,10 @@ void TestHCCLBroadcastOp(f::Scope* scope, const p::DeviceContext& ctx) { TEST(c_broadcast, NPU) { f::Scope scope; - // only support one device, if more than one device, use first default - p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str()))); + // only support one device, if more than one device, use first default + auto* ctx = p::DeviceContextPool::Instance().Get( + p::NPUPlace(atoi(FLAGS_selected_npus.c_str()))); - Prepare(&scope, ctx); - TestHCCLBroadcastOp(&scope, ctx); + Prepare(&scope, *ctx); + TestHCCLBroadcastOp(&scope, *ctx); } diff --git a/paddle/fluid/operators/collective/c_reduce_sum_op_npu_test.cc b/paddle/fluid/operators/collective/c_reduce_sum_op_npu_test.cc index 36ec6d155a2..8d1da15c8c7 100644 --- a/paddle/fluid/operators/collective/c_reduce_sum_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_reduce_sum_op_npu_test.cc @@ -16,19 +16,19 @@ limitations under the License. */ #include #endif +#include #include #include // NOLINT #include -#include #include "gtest/gtest.h" -#include "paddle/fluid/string/printf.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/dropout_op.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/operators/dropout_op.h" #include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/string/printf.h" #include "paddle/fluid/operators/collective/c_reduce_op.h" @@ -47,24 +47,22 @@ USE_OP_DEVICE_KERNEL(c_reduce_sum, NPU); DECLARE_string(selected_npus); -template -void PrintDebugInfo(const std::string preStr, const std::vector &data){ +template +void PrintDebugInfo(const std::string preStr, const std::vector& data) { std::string debugstring = ""; for (auto ele : data) { debugstring += std::to_string(ele) + std::string(","); } - VLOG(3) << preStr << ":" << std::endl < rank_ids{0, 1}; f::AttributeMap comm_init_attrs; @@ -109,15 +107,13 @@ void TestHCCLReduceOp(f::Scope* scope, const p::DeviceContext& ctx, int iter) { // run f::AttributeMap attrs; - attrs["tag"]=std::string("tagx_"+ std::to_string(iter)); - attrs["ring_id"]=0; + attrs["tag"] = std::string("tagx_" + std::to_string(iter)); + attrs["ring_id"] = 0; int root_id = 0; - attrs["root_id"]=root_id; + attrs["root_id"] = root_id; - auto op = f::OpRegistry::CreateOp("c_reduce_sum", - {{"X", {"X"}}}, - {{"Out", {"Out"}}}, - attrs); + auto op = f::OpRegistry::CreateOp("c_reduce_sum", {{"X", {"X"}}}, + {{"Out", {"Out"}}}, attrs); op->Run(*scope, place); ctx.Wait(); @@ -130,10 +126,9 @@ void TestHCCLReduceOp(f::Scope* scope, const p::DeviceContext& ctx, int iter) { EXPECT_EQ(out_vec.size(), init.size()); for (uint32_t i = 0; i < out_vec.size(); i++) { - if(rank_id == root_id){ + if (rank_id == root_id) { EXPECT_EQ(out_vec[i], 3.0); - } - else{ + } else { EXPECT_EQ(out_vec[i], init[i]); } } @@ -143,11 +138,12 @@ TEST(c_reduce_sum, NPU) { f::Scope scope; // only support one device, if more than one device, use first default - p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str()))); + auto* ctx = p::DeviceContextPool::Instance().Get( + p::NPUPlace(atoi(FLAGS_selected_npus.c_str()))); - Prepare(&scope, ctx); - for(int i = 0; i < 2; i ++){ + Prepare(&scope, *ctx); + for (int i = 0; i < 2; i++) { VLOG(2) << "iter num: " << i; - TestHCCLReduceOp(&scope, ctx, i); + TestHCCLReduceOp(&scope, *ctx, i); } } diff --git a/paddle/fluid/operators/collective/c_reducescatter_op_npu_test.cc b/paddle/fluid/operators/collective/c_reducescatter_op_npu_test.cc index 1c21ab19b95..d116b6a7d8c 100644 --- a/paddle/fluid/operators/collective/c_reducescatter_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_reducescatter_op_npu_test.cc @@ -16,23 +16,23 @@ limitations under the License. */ #include #endif +#include #include #include // NOLINT #include -#include #include "gtest/gtest.h" -#include "paddle/fluid/string/printf.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/dropout_op.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/operators/dropout_op.h" #include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/string/printf.h" -#include "paddle/fluid/operators/collective/c_broadcast_op.h" -#include "paddle/fluid/operators/collective/c_allreduce_op.h" #include "paddle/fluid/operators/collective/c_allgather_op.h" +#include "paddle/fluid/operators/collective/c_allreduce_op.h" +#include "paddle/fluid/operators/collective/c_broadcast_op.h" #include "paddle/fluid/operators/collective/c_reducescatter_op.h" #if defined(PADDLE_WITH_ASCEND_CL) @@ -50,24 +50,22 @@ USE_OP_DEVICE_KERNEL(c_reducescatter, NPU); DECLARE_string(selected_npus); -template -void PrintDebugInfo(const std::string preStr, const std::vector &data){ +template +void PrintDebugInfo(const std::string preStr, const std::vector& data) { std::string debugstring = ""; for (auto ele : data) { debugstring += std::to_string(ele) + std::string(","); } - VLOG(2) << preStr << ":" << std::endl < rank_ids{0, 1}; f::AttributeMap comm_init_attrs; @@ -112,15 +110,15 @@ void TestHCCLReduceScatterOp(f::Scope* scope, const p::DeviceContext& ctx) { // run f::AttributeMap attrs; - attrs["tag"]=std::string("tagx"); - attrs["ring_id"]=0; - attrs["nranks"]=2; + attrs["tag"] = std::string("tagx"); + attrs["ring_id"] = 0; + attrs["nranks"] = 2; auto op = f::OpRegistry::CreateOp("c_reducescatter", {{"X", {"X"}}}, - {{"Out", {"Out"}}}, attrs); + {{"Out", {"Out"}}}, attrs); int iter_num = 10; - for (int i = 0; i < iter_num; i ++) { + for (int i = 0; i < iter_num; i++) { op->Run(*scope, place); } ctx.Wait(); @@ -140,8 +138,9 @@ TEST(c_reducescatter, NPU) { f::Scope scope; // only support one device, if more than one device, use first default - p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str()))); + auto* ctx = p::DeviceContextPool::Instance().Get( + p::NPUPlace(atoi(FLAGS_selected_npus.c_str()))); - Prepare(&scope, ctx); - TestHCCLReduceScatterOp(&scope, ctx); + Prepare(&scope, *ctx); + TestHCCLReduceScatterOp(&scope, *ctx); } diff --git a/paddle/fluid/operators/collective/c_sync_calc_stream_op_npu_test.cc b/paddle/fluid/operators/collective/c_sync_calc_stream_op_npu_test.cc index 4b1f7bb3401..94d89356d4a 100644 --- a/paddle/fluid/operators/collective/c_sync_calc_stream_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_sync_calc_stream_op_npu_test.cc @@ -102,6 +102,6 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { TEST(c_sync_calc_stream, NPU_fp32) { f::Scope scope; - p::NPUDeviceContext ctx(p::NPUPlace(0)); - Compare(&scope, ctx); + auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0)); + Compare(&scope, *ctx); } diff --git a/paddle/fluid/operators/collective/c_sync_comm_stream_op_npu_test.cc b/paddle/fluid/operators/collective/c_sync_comm_stream_op_npu_test.cc index 371bcc4cfcf..a8e61398ca5 100644 --- a/paddle/fluid/operators/collective/c_sync_comm_stream_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_sync_comm_stream_op_npu_test.cc @@ -124,8 +124,8 @@ TEST(c_broadcast, NPU) { f::Scope scope; char* npu_id = getenv("FLAGS_selected_npus"); - p::NPUDeviceContext ctx(p::NPUPlace(atoi(npu_id))); + auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(atoi(npu_id))); - Prepare(&scope, ctx); - TestHCCLBroadcastOp(&scope, ctx); + Prepare(&scope, *ctx); + TestHCCLBroadcastOp(&scope, *ctx); } diff --git a/paddle/fluid/operators/collective/recv_v2_op_npu_test.cc b/paddle/fluid/operators/collective/recv_v2_op_npu_test.cc index 727d8be5a8f..0067ebcb55d 100644 --- a/paddle/fluid/operators/collective/recv_v2_op_npu_test.cc +++ b/paddle/fluid/operators/collective/recv_v2_op_npu_test.cc @@ -16,19 +16,19 @@ limitations under the License. */ #include #endif +#include #include #include // NOLINT #include -#include #include "gtest/gtest.h" -#include "paddle/fluid/string/printf.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/dropout_op.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/operators/dropout_op.h" #include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/string/printf.h" #include "paddle/fluid/operators/collective/recv_v2_op.h" @@ -45,80 +45,80 @@ USE_OP(recv_v2); USE_NO_KERNEL_OP(c_comm_init_hcom); USE_OP_DEVICE_KERNEL(recv_v2, NPU); -void Prepare(f::Scope* scope, const p::DeviceContext& ctx){ - - std::string rank_table_file = getenv("RANK_TABLE_FILE"); - int rank_id = atoi(getenv("RANK_ID")); - int device_id = atoi(getenv("DEVICE_ID")); - int src_rank = atoi(getenv("SRC_RANK")); - int dest_rank = atoi(getenv("DEST_RANK")); - VLOG(3)<<"rank_id "<< rank_id << "src_rank"<< src_rank <<"dest_rank" < rank_ids = {0,1}; - f::AttributeMap comm_init_attrs; - comm_init_attrs["ring_id"] = 0; - comm_init_attrs["nranks"] = 2; - comm_init_attrs["rank"] = rank_id; - comm_init_attrs["device_id"] = device_id; - comm_init_attrs["rank_ids"] = rank_ids; - auto comm_init_op = f::OpRegistry::CreateOp("c_comm_init_hcom", {}, {}, comm_init_attrs); - VLOG(3) << "CreateOp c_comm_init_hcom"; - auto place = ctx.GetPlace(); - comm_init_op->Run(*scope, place); - ctx.Wait(); +void Prepare(f::Scope* scope, const p::DeviceContext& ctx) { + std::string rank_table_file = getenv("RANK_TABLE_FILE"); + int rank_id = atoi(getenv("RANK_ID")); + int device_id = atoi(getenv("DEVICE_ID")); + int src_rank = atoi(getenv("SRC_RANK")); + int dest_rank = atoi(getenv("DEST_RANK")); + VLOG(3) << "rank_id " << rank_id << "src_rank" << src_rank << "dest_rank" + << dest_rank; + + std::vector rank_ids = {0, 1}; + f::AttributeMap comm_init_attrs; + comm_init_attrs["ring_id"] = 0; + comm_init_attrs["nranks"] = 2; + comm_init_attrs["rank"] = rank_id; + comm_init_attrs["device_id"] = device_id; + comm_init_attrs["rank_ids"] = rank_ids; + auto comm_init_op = + f::OpRegistry::CreateOp("c_comm_init_hcom", {}, {}, comm_init_attrs); + VLOG(3) << "CreateOp c_comm_init_hcom"; + auto place = ctx.GetPlace(); + comm_init_op->Run(*scope, place); + ctx.Wait(); } -void TestHcomRecvOp(f::Scope* scope, const p::DeviceContext& ctx){ - std::cout << "BEGIN TEST:" << __FUNCTION__ << std::endl; - - int num = atoi(getenv("DATA_SIZE")); - EXPECT_GT(num, 0); - EXPECT_LT(num, 1 << 15); - int rank_id = atoi(getenv("RANK_ID")); - VLOG(3) << "rank_id:" << rank_id<Var("Out"); - auto tensor_out = out->GetMutable(); - tensor_out->Resize({num, num}); - tensor_out->mutable_data(place); // allocate - - ctx.Wait(); - - f::AttributeMap attrs; - attrs["tag"]=std::string("srtest"); - attrs["peer"]=atoi(getenv("SRC_RANK")); - attrs["ring_id"]=0; - attrs["srTag"]=0; - std::vector out_shape; - out_shape.push_back(num); - out_shape.push_back(num); - attrs["out_shape"]=out_shape; - - auto op = f::OpRegistry::CreateOp("recv_v2", {}, {{"Out", {"Out"}}}, attrs); - VLOG(3) << "CreateOp recv_v2"; - - for (int i = 0; i < 10; i ++) { - op->Run(*scope, place); - } - VLOG(3) << "Run op recv_v2"; - std::vector out_vec; - TensorToVector(*tensor_out, ctx, &out_vec); - ctx.Wait(); - std::vector init(num*num, 1.0 * atoi(getenv("DEST_RANK"))); - EXPECT_EQ(out_vec == init, true); +void TestHcomRecvOp(f::Scope* scope, const p::DeviceContext& ctx) { + std::cout << "BEGIN TEST:" << __FUNCTION__ << std::endl; + + int num = atoi(getenv("DATA_SIZE")); + EXPECT_GT(num, 0); + EXPECT_LT(num, 1 << 15); + int rank_id = atoi(getenv("RANK_ID")); + VLOG(3) << "rank_id:" << rank_id << std::endl; + + ctx.Wait(); + auto place = ctx.GetPlace(); + auto out = scope->Var("Out"); + auto tensor_out = out->GetMutable(); + tensor_out->Resize({num, num}); + tensor_out->mutable_data(place); // allocate + + ctx.Wait(); + + f::AttributeMap attrs; + attrs["tag"] = std::string("srtest"); + attrs["peer"] = atoi(getenv("SRC_RANK")); + attrs["ring_id"] = 0; + attrs["srTag"] = 0; + std::vector out_shape; + out_shape.push_back(num); + out_shape.push_back(num); + attrs["out_shape"] = out_shape; + + auto op = f::OpRegistry::CreateOp("recv_v2", {}, {{"Out", {"Out"}}}, attrs); + VLOG(3) << "CreateOp recv_v2"; + + for (int i = 0; i < 10; i++) { + op->Run(*scope, place); + } + VLOG(3) << "Run op recv_v2"; + std::vector out_vec; + TensorToVector(*tensor_out, ctx, &out_vec); + ctx.Wait(); + std::vector init(num * num, 1.0 * atoi(getenv("DEST_RANK"))); + EXPECT_EQ(out_vec == init, true); } - -TEST(recv_v2, NPU){ - f::Scope scope; - char * npu_id=getenv("FLAGS_selected_npus"); - VLOG(3) << "Select npu:" << npu_id; - p::NPUDeviceContext ctx(p::NPUPlace(atoi(npu_id))); - VLOG(3) << "Place over"; - Prepare(&scope, ctx); - VLOG(3) << "Prepare over"; - TestHcomRecvOp(&scope, ctx); - VLOG(3) << "Test over"; +TEST(recv_v2, NPU) { + f::Scope scope; + char* npu_id = getenv("FLAGS_selected_npus"); + VLOG(3) << "Select npu:" << npu_id; + auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(atoi(npu_id))); + VLOG(3) << "Place over"; + Prepare(&scope, *ctx); + VLOG(3) << "Prepare over"; + TestHcomRecvOp(&scope, *ctx); + VLOG(3) << "Test over"; } diff --git a/paddle/fluid/operators/collective/send_v2_op_npu_test.cc b/paddle/fluid/operators/collective/send_v2_op_npu_test.cc index 7916d155ee7..3bb208e0441 100644 --- a/paddle/fluid/operators/collective/send_v2_op_npu_test.cc +++ b/paddle/fluid/operators/collective/send_v2_op_npu_test.cc @@ -16,18 +16,18 @@ limitations under the License. */ #include #endif +#include #include #include // NOLINT #include -#include #include "gtest/gtest.h" -#include "paddle/fluid/string/printf.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/dropout_op.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/operators/dropout_op.h" #include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/string/printf.h" #include "paddle/fluid/operators/collective/send_v2_op.h" @@ -44,68 +44,69 @@ USE_OP(send_v2); USE_NO_KERNEL_OP(c_comm_init_hcom); USE_OP_DEVICE_KERNEL(send_v2, NPU); -void Prepare(f::Scope* scope, const p::DeviceContext& ctx){ - - std::string rank_table_file = getenv("RANK_TABLE_FILE"); - int rank_id = atoi(getenv("RANK_ID")); - int device_id = atoi(getenv("DEVICE_ID")); - int src_rank = atoi(getenv("SRC_RANK")); - int dest_rank = atoi(getenv("DEST_RANK")); - VLOG(3)<<"rank_id "<< rank_id << "src_rank"<< src_rank <<"dest_rank" < rank_ids = {0, 1}; - f::AttributeMap comm_init_attrs; - comm_init_attrs["ring_id"] = 0; - comm_init_attrs["nranks"] = 2; - comm_init_attrs["rank"] = rank_id; - comm_init_attrs["device_id"] = device_id; - comm_init_attrs["rank_ids"] = rank_ids; - auto comm_init_op = f::OpRegistry::CreateOp("c_comm_init_hcom", {}, {}, comm_init_attrs); - auto place = ctx.GetPlace(); - comm_init_op->Run(*scope, place); - ctx.Wait(); +void Prepare(f::Scope* scope, const p::DeviceContext& ctx) { + std::string rank_table_file = getenv("RANK_TABLE_FILE"); + int rank_id = atoi(getenv("RANK_ID")); + int device_id = atoi(getenv("DEVICE_ID")); + int src_rank = atoi(getenv("SRC_RANK")); + int dest_rank = atoi(getenv("DEST_RANK")); + VLOG(3) << "rank_id " << rank_id << "src_rank" << src_rank << "dest_rank" + << dest_rank; + + std::vector rank_ids = {0, 1}; + f::AttributeMap comm_init_attrs; + comm_init_attrs["ring_id"] = 0; + comm_init_attrs["nranks"] = 2; + comm_init_attrs["rank"] = rank_id; + comm_init_attrs["device_id"] = device_id; + comm_init_attrs["rank_ids"] = rank_ids; + auto comm_init_op = + f::OpRegistry::CreateOp("c_comm_init_hcom", {}, {}, comm_init_attrs); + auto place = ctx.GetPlace(); + comm_init_op->Run(*scope, place); + ctx.Wait(); } -void TestHcomSendOp(f::Scope* scope, const p::DeviceContext& ctx){ - std::cout<< "BEGIN TEST:"<< __FUNCTION__ <Var("X"); - auto tensor_x = x->GetMutable(); - int num = atoi(getenv("DATA_SIZE"));; - EXPECT_GT(num, 0); - EXPECT_LT(num, 1 << 15); - std::vector init(num*num, 1.0 * atoi(getenv("DEST_RANK"))); - int rank_id = atoi(getenv("RANK_ID")); - VLOG(3)<<"rank id:"<Resize({num, num}); - ctx.Wait(); - auto place = ctx.GetPlace(); - ctx.Wait(); - - f::AttributeMap attrs; - attrs["tag"]=std::string("srtest"); - attrs["peer"]=atoi(getenv("DEST_RANK")); - attrs["ring_id"]=0; - attrs["srTag"]=0; - - auto op = f::OpRegistry::CreateOp("send_v2", {{"X", {"X"}}}, {}, attrs); - - for (int i = 0; i < 10; i ++) { - op->Run(*scope, place); - } - VLOG(3)<<"send run over"; - ctx.Wait(); +void TestHcomSendOp(f::Scope* scope, const p::DeviceContext& ctx) { + std::cout << "BEGIN TEST:" << __FUNCTION__ << std::endl; + auto x = scope->Var("X"); + auto tensor_x = x->GetMutable(); + int num = atoi(getenv("DATA_SIZE")); + + EXPECT_GT(num, 0); + EXPECT_LT(num, 1 << 15); + std::vector init(num * num, 1.0 * atoi(getenv("DEST_RANK"))); + int rank_id = atoi(getenv("RANK_ID")); + VLOG(3) << "rank id:" << rank_id; + TensorFromVector(init, ctx, tensor_x); + tensor_x->Resize({num, num}); + ctx.Wait(); + auto place = ctx.GetPlace(); + ctx.Wait(); + + f::AttributeMap attrs; + attrs["tag"] = std::string("srtest"); + attrs["peer"] = atoi(getenv("DEST_RANK")); + attrs["ring_id"] = 0; + attrs["srTag"] = 0; + + auto op = f::OpRegistry::CreateOp("send_v2", {{"X", {"X"}}}, {}, attrs); + + for (int i = 0; i < 10; i++) { + op->Run(*scope, place); + } + VLOG(3) << "send run over"; + ctx.Wait(); } -TEST(send_v2, NPU){ - f::Scope scope; - char * npu_id=getenv("FLAGS_selected_npus"); - VLOG(3) << "Select npu:" << npu_id; - p::NPUDeviceContext ctx(p::NPUPlace(atoi(npu_id))); - VLOG(3) << "Place over"; - Prepare(&scope, ctx); - VLOG(3) << "Prepare over"; - TestHcomSendOp(&scope, ctx); - VLOG(3) << "Test over"; - +TEST(send_v2, NPU) { + f::Scope scope; + char* npu_id = getenv("FLAGS_selected_npus"); + VLOG(3) << "Select npu:" << npu_id; + auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(atoi(npu_id))); + VLOG(3) << "Place over"; + Prepare(&scope, *ctx); + VLOG(3) << "Prepare over"; + TestHcomSendOp(&scope, *ctx); + VLOG(3) << "Test over"; } diff --git a/paddle/fluid/operators/elementwise/elementwise_op_npu_test.cc b/paddle/fluid/operators/elementwise/elementwise_op_npu_test.cc index e6b5e5f8b78..f06dbd26873 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op_npu_test.cc +++ b/paddle/fluid/operators/elementwise/elementwise_op_npu_test.cc @@ -38,7 +38,7 @@ USE_OP(elementwise_sub); USE_OP_DEVICE_KERNEL(elementwise_sub, NPU); template -void Compare(f::Scope* scope, const p::DeviceContext& ctx, +void Compare(f::Scope *scope, const p::DeviceContext &ctx, std::string op_type) { // init auto x = scope->Var("X"); @@ -90,7 +90,7 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx, } template -void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx, +void CompareGrad(f::Scope *scope, const p::DeviceContext &ctx, std::string op_type) { // init auto dout = scope->Var("DOut"); @@ -154,30 +154,30 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx, TEST(elementwise_add, NPU_fp32) { f::Scope scope; - p::NPUDeviceContext ctx(p::NPUPlace(0)); - Compare(&scope, ctx, "elementwise_add"); + auto *ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0)); + Compare(&scope, *ctx, "elementwise_add"); } TEST(elementwise_sub, NPU_fp32) { f::Scope scope; - p::NPUDeviceContext ctx(p::NPUPlace(0)); - Compare(&scope, ctx, "elementwise_sub"); + auto *ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0)); + Compare(&scope, *ctx, "elementwise_sub"); } TEST(elementwise_sub, NPU_fp16) { f::Scope scope; - p::NPUDeviceContext ctx(p::NPUPlace(0)); - Compare(&scope, ctx, "elementwise_sub"); + auto *ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0)); + Compare(&scope, *ctx, "elementwise_sub"); } TEST(elementwise_sub_grad, NPU) { f::Scope scope; - p::NPUDeviceContext ctx(p::NPUPlace(0)); - CompareGrad(&scope, ctx, "elementwise_sub_grad"); + auto *ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0)); + CompareGrad(&scope, *ctx, "elementwise_sub_grad"); } TEST(elementwise_add_grad, NPU) { f::Scope scope; - p::NPUDeviceContext ctx(p::NPUPlace(0)); - CompareGrad(&scope, ctx, "elementwise_add_grad"); + auto *ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0)); + CompareGrad(&scope, *ctx, "elementwise_add_grad"); } diff --git a/paddle/fluid/operators/expand_op_npu_test.cc b/paddle/fluid/operators/expand_op_npu_test.cc index 95f7865a8a3..880eb341f20 100644 --- a/paddle/fluid/operators/expand_op_npu_test.cc +++ b/paddle/fluid/operators/expand_op_npu_test.cc @@ -69,6 +69,6 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { TEST(expand, NPU_fp32) { f::Scope scope; - p::NPUDeviceContext ctx(p::NPUPlace(0)); - Compare(&scope, ctx); + auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0)); + Compare(&scope, *ctx); } diff --git a/paddle/fluid/operators/gather_op_npu_test.cc b/paddle/fluid/operators/gather_op_npu_test.cc index de067e45585..31e19d8f600 100644 --- a/paddle/fluid/operators/gather_op_npu_test.cc +++ b/paddle/fluid/operators/gather_op_npu_test.cc @@ -152,18 +152,18 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx, TEST(gather, NPU_fp32) { f::Scope scope; - p::NPUDeviceContext ctx(p::NPUPlace(0)); - Compare(&scope, ctx, "gather"); + auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0)); + Compare(&scope, *ctx, "gather"); } TEST(gather, NPU_fp16) { f::Scope scope; - p::NPUDeviceContext ctx(p::NPUPlace(0)); - Compare(&scope, ctx, "gather"); + auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0)); + Compare(&scope, *ctx, "gather"); } TEST(gather_grad, NPU_fp32) { f::Scope scope; - p::NPUDeviceContext ctx(p::NPUPlace(0)); - CompareGrad(&scope, ctx, "gather_grad"); + auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0)); + CompareGrad(&scope, *ctx, "gather_grad"); } diff --git a/paddle/fluid/operators/gelu_op_npu_test.cc b/paddle/fluid/operators/gelu_op_npu_test.cc index d0846e5c90e..830dcd59839 100644 --- a/paddle/fluid/operators/gelu_op_npu_test.cc +++ b/paddle/fluid/operators/gelu_op_npu_test.cc @@ -59,8 +59,8 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { // run auto place = ctx.GetPlace(); - auto op = f::OpRegistry::CreateOp("gelu", {{"X", {"X"}}}, - {{"Out", {"Out"}}}, attrs); + auto op = f::OpRegistry::CreateOp("gelu", {{"X", {"X"}}}, {{"Out", {"Out"}}}, + attrs); op->Run(*scope, place); ctx.Wait(); @@ -76,8 +76,8 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { ctx.Wait(); gettimeofday(&end, NULL); - int micros = (((end.tv_sec - start.tv_sec) * 1000000) + - end.tv_usec) - (start.tv_usec); + int micros = + (((end.tv_sec - start.tv_sec) * 1000000) + end.tv_usec) - (start.tv_usec); printf("used time: %d\n", micros / 100); // eval value @@ -124,8 +124,8 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) { auto place = ctx.GetPlace(); auto op = f::OpRegistry::CreateOp("gelu_grad", - {{"Out@GRAD", {"DOut"}}, {"X", {"X"}}}, - {{"X@GRAD", {"DX"}}}, attrs); + {{"Out@GRAD", {"DOut"}}, {"X", {"X"}}}, + {{"X@GRAD", {"DX"}}}, attrs); op->Run(*scope, place); ctx.Wait(); @@ -141,8 +141,8 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) { ctx.Wait(); gettimeofday(&end, NULL); - int micros = (((end.tv_sec - start.tv_sec) * 1000000) + - end.tv_usec) - (start.tv_usec); + int micros = + (((end.tv_sec - start.tv_sec) * 1000000) + end.tv_usec) - (start.tv_usec); printf("used time: %d\n", micros / 100); // eval value @@ -156,14 +156,13 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) { } TEST(gelu, NPU_fp32) { - f::Scope scope; - p::NPUDeviceContext ctx(p::NPUPlace(0)); - Compare(&scope, ctx); + f::Scope scope; + auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0)); + Compare(&scope, *ctx); } TEST(gelu_grad, NPU) { - f::Scope scope; - p::NPUDeviceContext ctx(p::NPUPlace(0)); - CompareGrad(&scope, ctx); + f::Scope scope; + auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0)); + CompareGrad(&scope, *ctx); } - diff --git a/paddle/fluid/operators/increment_op_npu_test.cc b/paddle/fluid/operators/increment_op_npu_test.cc index f4ce9ffe40b..bde349b0a33 100644 --- a/paddle/fluid/operators/increment_op_npu_test.cc +++ b/paddle/fluid/operators/increment_op_npu_test.cc @@ -54,10 +54,9 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx, auto out = scope->Var("Out"); auto tensor_out = out->GetMutable(); - f::AttributeMap attr_input = { {"step", static_cast(2.0)} }; + f::AttributeMap attr_input = {{"step", static_cast(2.0)}}; auto op = f::OpRegistry::CreateOp("increment", {{"X", {"X"}}}, - {{"Out", {"Out"}}}, - attr_input); + {{"Out", {"Out"}}}, attr_input); op->Run(*scope, place); @@ -70,16 +69,14 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx, EXPECT_EQ(out_vec[0], static_cast(3.0)); } - TEST(increment, NPU_fp32) { - f::Scope scope; - p::NPUDeviceContext ctx(p::NPUPlace(0)); - Compare(&scope, ctx, "increment"); + f::Scope scope; + auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0)); + Compare(&scope, *ctx, "increment"); } TEST(increment, NPU_fp64) { - f::Scope scope; - p::NPUDeviceContext ctx(p::NPUPlace(0)); - Compare(&scope, ctx, "increment"); + f::Scope scope; + auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0)); + Compare(&scope, *ctx, "increment"); } - diff --git a/paddle/fluid/operators/range_op_npu_test.cc b/paddle/fluid/operators/range_op_npu_test.cc index f4ec2fe7158..f2f395314c0 100644 --- a/paddle/fluid/operators/range_op_npu_test.cc +++ b/paddle/fluid/operators/range_op_npu_test.cc @@ -67,10 +67,9 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx, auto tensor_out = out->GetMutable(); // run - auto op = f::OpRegistry::CreateOp(op_type, {{"Start", {"Start"}}, - {"End", {"End"}}, - {"Step", {"Step"}}}, - {{"Out", {"Out"}}}, {}); + auto op = f::OpRegistry::CreateOp( + op_type, {{"Start", {"Start"}}, {"End", {"End"}}, {"Step", {"Step"}}}, + {{"Out", {"Out"}}}, {}); op->Run(*scope, place); @@ -86,10 +85,8 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx, EXPECT_EQ(static_cast(out_vec[4]), static_cast(9.0)); } - TEST(range, NPU) { - f::Scope scope; - p::NPUDeviceContext ctx(p::NPUPlace(0)); - Compare(&scope, ctx, "range"); + f::Scope scope; + auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0)); + Compare(&scope, *ctx, "range"); } - diff --git a/paddle/fluid/operators/reduce_ops/reduce_any_op_npu_test.cc b/paddle/fluid/operators/reduce_ops/reduce_any_op_npu_test.cc index d408ff3988f..1eeeb5e1f8a 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_any_op_npu_test.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_any_op_npu_test.cc @@ -78,6 +78,6 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { TEST(reduce_any, NPU) { f::Scope scope; - p::NPUDeviceContext ctx(p::NPUPlace(0)); - Compare(&scope, ctx); + auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0)); + Compare(&scope, *ctx); } diff --git a/paddle/fluid/operators/softmax_op_npu_test.cc b/paddle/fluid/operators/softmax_op_npu_test.cc index 89357705ce0..d20b3ac04bf 100644 --- a/paddle/fluid/operators/softmax_op_npu_test.cc +++ b/paddle/fluid/operators/softmax_op_npu_test.cc @@ -21,11 +21,10 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/operators/dropout_op.h" #include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" -#include "paddle/fluid/framework/tensor_util.h" - namespace f = paddle::framework; namespace p = paddle::platform; @@ -59,15 +58,13 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { // run int axis = 1; f::AttributeMap attrs = { - {"axis", axis}, - {"use_cudnn", false}, - {"use_mkldnn", false}, - {"mkldnn_data_type", std::string("float32")}, - {"is_test", false}, }; + {"axis", axis}, {"use_cudnn", false}, + {"use_mkldnn", false}, {"mkldnn_data_type", std::string("float32")}, + {"is_test", false}, + }; - auto op = - f::OpRegistry::CreateOp("softmax", {{"X", {"X"}}}, - {{"Out", {"Out"}}}, attrs); + auto op = f::OpRegistry::CreateOp("softmax", {{"X", {"X"}}}, + {{"Out", {"Out"}}}, attrs); op->Run(*scope, place); ctx.Wait(); @@ -76,7 +73,7 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { TensorToVector(*tensor_out, ctx, &out_vec); for (int i = 0; i < static_cast(out_vec.size()); ++i) { - VLOG(3) << "out_vec[" << i << "] : "<< out_vec[i]; + VLOG(3) << "out_vec[" << i << "] : " << out_vec[i]; } ctx.Wait(); @@ -84,7 +81,6 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { EXPECT_EQ((uint32_t)out_vec.size(), (uint32_t)(6)); } - template void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) { // init @@ -128,16 +124,15 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) { attrs = { {"name", std::string("softmax_grad")}, {"axis", static_cast(0)}, - {"use_cudnn", false}, - {"use_mkldnn", false}, - {"mkldnn_data_type", std::string("float32")}, - {"is_test", false}, - {"data_format", std::string("AnyLayout")}, }; - auto op = - f::OpRegistry::CreateOp("softmax_grad", - {{"Out", {"Out"}}, - {"Out@GRAD", {"DOut"}}}, - {{"X@GRAD", {"DX"}}}, attrs); + {"use_cudnn", false}, + {"use_mkldnn", false}, + {"mkldnn_data_type", std::string("float32")}, + {"is_test", false}, + {"data_format", std::string("AnyLayout")}, + }; + auto op = f::OpRegistry::CreateOp("softmax_grad", + {{"Out", {"Out"}}, {"Out@GRAD", {"DOut"}}}, + {{"X@GRAD", {"DX"}}}, attrs); auto place = ctx.GetPlace(); op->Run(*scope, place); @@ -164,12 +159,12 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) { TEST(softmax, NPU_fp32) { f::Scope scope; - p::NPUDeviceContext ctx(p::NPUPlace(0)); - Compare(&scope, ctx); + auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0)); + Compare(&scope, *ctx); } TEST(softmax_grad, NPU_fp32) { f::Scope scope; - p::NPUDeviceContext ctx(p::NPUPlace(0)); - CompareGrad(&scope, ctx); + auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0)); + CompareGrad(&scope, *ctx); } diff --git a/paddle/fluid/operators/squeeze_op_npu_test.cc b/paddle/fluid/operators/squeeze_op_npu_test.cc index 9b0464fc223..1de7ca8c7bd 100644 --- a/paddle/fluid/operators/squeeze_op_npu_test.cc +++ b/paddle/fluid/operators/squeeze_op_npu_test.cc @@ -64,9 +64,8 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { axis.push_back(2); f::AttributeMap attrs = {{"axes", axis}}; - auto op = - f::OpRegistry::CreateOp("squeeze", {{"X", {"X"}}}, - {{"Out", {"Out"}}}, attrs); + auto op = f::OpRegistry::CreateOp("squeeze", {{"X", {"X"}}}, + {{"Out", {"Out"}}}, attrs); op->Run(*scope, place); ctx.Wait(); @@ -74,7 +73,7 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { EXPECT_EQ((uint32_t)tensor_out->dims().size(), uint32_t(2)); EXPECT_EQ((uint32_t)tensor_out->dims()[0], uint32_t(dim0)); EXPECT_EQ((uint32_t)tensor_out->dims()[1], uint32_t(dim1)); - + std::vector out_vec; TensorToVector(*tensor_out, ctx, &out_vec); for (uint32_t i = 0; i < out_vec.size(); i++) { @@ -86,7 +85,6 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { TEST(squeeze, NPU_fp32) { f::Scope scope; - p::NPUDeviceContext ctx(p::NPUPlace(0)); - Compare(&scope, ctx); + auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0)); + Compare(&scope, *ctx); } - diff --git a/paddle/fluid/operators/transpose_op_npu_test.cc b/paddle/fluid/operators/transpose_op_npu_test.cc index 36f7a695358..f6712814e1e 100644 --- a/paddle/fluid/operators/transpose_op_npu_test.cc +++ b/paddle/fluid/operators/transpose_op_npu_test.cc @@ -126,12 +126,12 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) { TEST(transpose2, NPU_fp32) { f::Scope scope; - p::NPUDeviceContext ctx(p::NPUPlace(0)); - Compare(&scope, ctx); + auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0)); + Compare(&scope, *ctx); } TEST(transpose2_grad, NPU_fp32) { f::Scope scope; - p::NPUDeviceContext ctx(p::NPUPlace(0)); - CompareGrad(&scope, ctx); + auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0)); + CompareGrad(&scope, *ctx); } diff --git a/paddle/fluid/operators/unsqueeze_op_npu_test.cc b/paddle/fluid/operators/unsqueeze_op_npu_test.cc index 6b53cc328a1..a145c914a86 100644 --- a/paddle/fluid/operators/unsqueeze_op_npu_test.cc +++ b/paddle/fluid/operators/unsqueeze_op_npu_test.cc @@ -63,9 +63,8 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { axis.push_back(1); f::AttributeMap attrs = {{"axes", axis}}; - auto op = - f::OpRegistry::CreateOp("unsqueeze", {{"X", {"X"}}}, - {{"Out", {"Out"}}}, attrs); + auto op = f::OpRegistry::CreateOp("unsqueeze", {{"X", {"X"}}}, + {{"Out", {"Out"}}}, attrs); op->Run(*scope, place); ctx.Wait(); @@ -86,7 +85,6 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { TEST(unsqueeze, NPU_fp32) { f::Scope scope; - p::NPUDeviceContext ctx(p::NPUPlace(0)); - Compare(&scope, ctx); + auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0)); + Compare(&scope, *ctx); } - diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index e28ace52167..71eee4fe121 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -254,7 +254,7 @@ NPUDeviceContext::~NPUDeviceContext() { void NPUDeviceContext::Wait() const { platform::RecordEvent record_event("NPUDeviceContext/wait"); - VLOG(4) << "NPU context Wait"; + VLOG(4) << "NPU context(" << this << ") Wait"; stream_->Wait(); } -- GitLab