未验证 提交 5ad94e7b 编写于 作者: L Leo Chen 提交者: GitHub

fix NPUDeviceContext in all c++ unittest (#32198)

* fix NPUDeviceContext in all c++ unittest

* refine log
Co-authored-by: Npangyoki <pangyoki@126.com>
上级 054f8e7a
...@@ -120,12 +120,12 @@ void Compare(f::Scope *scope, const p::DeviceContext &ctx) { ...@@ -120,12 +120,12 @@ void Compare(f::Scope *scope, const p::DeviceContext &ctx) {
TEST(check_finite_and_unscale, NPU_fp32) { TEST(check_finite_and_unscale, NPU_fp32) {
f::Scope scope; f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0)); auto *ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<float>(&scope, ctx); Compare<float>(&scope, *ctx);
} }
TEST(check_finite_and_unscale, NPU_fp16) { TEST(check_finite_and_unscale, NPU_fp16) {
f::Scope scope; f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0)); auto *ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<p::float16>(&scope, ctx); Compare<p::float16>(&scope, *ctx);
} }
...@@ -56,10 +56,8 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx, ...@@ -56,10 +56,8 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx,
auto out = scope->Var("Out"); auto out = scope->Var("Out");
auto tensor_out = out->GetMutable<f::LoDTensor>(); auto tensor_out = out->GetMutable<f::LoDTensor>();
auto op = f::OpRegistry::CreateOp(op_type, auto op =
{{"X", {"X"}}}, f::OpRegistry::CreateOp(op_type, {{"X", {"X"}}}, {{"Out", {"Out"}}}, {});
{{"Out", {"Out"}}},
{});
op->Run(*scope, place); op->Run(*scope, place);
...@@ -75,11 +73,8 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx, ...@@ -75,11 +73,8 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx,
EXPECT_EQ(out_vec[3], static_cast<T>(4.0)); EXPECT_EQ(out_vec[3], static_cast<T>(4.0));
} }
TEST(assign, NPU_fp32) { TEST(assign, NPU_fp32) {
f::Scope scope; f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0)); auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<float>(&scope, ctx, "assign"); Compare<float>(&scope, *ctx, "assign");
} }
...@@ -16,23 +16,23 @@ limitations under the License. */ ...@@ -16,23 +16,23 @@ limitations under the License. */
#include <unistd.h> #include <unistd.h>
#endif #endif
#include <stdio.h>
#include <string> #include <string>
#include <thread> // NOLINT #include <thread> // NOLINT
#include <vector> #include <vector>
#include <stdio.h>
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/operators/collective/c_broadcast_op.h"
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
#include "paddle/fluid/operators/collective/c_allgather_op.h" #include "paddle/fluid/operators/collective/c_allgather_op.h"
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
#include "paddle/fluid/operators/collective/c_broadcast_op.h"
#include "paddle/fluid/operators/collective/c_reducescatter_op.h" #include "paddle/fluid/operators/collective/c_reducescatter_op.h"
#if defined(PADDLE_WITH_ASCEND_CL) #if defined(PADDLE_WITH_ASCEND_CL)
...@@ -50,25 +50,23 @@ USE_OP_DEVICE_KERNEL(c_allgather, NPU); ...@@ -50,25 +50,23 @@ USE_OP_DEVICE_KERNEL(c_allgather, NPU);
DECLARE_string(selected_npus); DECLARE_string(selected_npus);
template<typename T> template <typename T>
void PrintDebugInfo(const std::string preStr, const std::vector<T> &data){ void PrintDebugInfo(const std::string preStr, const std::vector<T>& data) {
std::string debugstring = ""; std::string debugstring = "";
for (auto ele : data) { for (auto ele : data) {
debugstring += std::to_string(ele) + std::string(","); debugstring += std::to_string(ele) + std::string(",");
} }
VLOG(2) << preStr << ":" << std::endl <<debugstring; VLOG(2) << preStr << ":" << std::endl << debugstring;
} }
void Prepare(f::Scope* scope, const p::DeviceContext& ctx){ void Prepare(f::Scope* scope, const p::DeviceContext& ctx) {
int rank_id = atoi(getenv("RANK_ID")); int rank_id = atoi(getenv("RANK_ID"));
int device_id = atoi(getenv("DEVICE_ID")); int device_id = atoi(getenv("DEVICE_ID"));
VLOG(2) << "rank_id = " << rank_id VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id
<< "; device_id = " << device_id << "; rank_id = " << rank_id
<< "; rank_id = " << rank_id << "; RANK_TABLE_FILE = " << atoi(getenv("RANK_TABLE_FILE"));
<< "; RANK_TABLE_FILE = " << atoi(getenv("RANK_TABLE_FILE"));
std::vector<int> rank_ids{0, 1}; std::vector<int> rank_ids{0, 1};
f::AttributeMap comm_init_attrs; f::AttributeMap comm_init_attrs;
comm_init_attrs["ring_id"] = 0; comm_init_attrs["ring_id"] = 0;
...@@ -90,7 +88,7 @@ void TestHCCLAllGatherOp(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -90,7 +88,7 @@ void TestHCCLAllGatherOp(f::Scope* scope, const p::DeviceContext& ctx) {
std::vector<float> init; std::vector<float> init;
int rank_id = atoi(getenv("RANK_ID")); int rank_id = atoi(getenv("RANK_ID"));
int num1 = 1; int num1 = 1;
int num2 = 4; int num2 = 4;
...@@ -112,18 +110,18 @@ void TestHCCLAllGatherOp(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -112,18 +110,18 @@ void TestHCCLAllGatherOp(f::Scope* scope, const p::DeviceContext& ctx) {
// run // run
f::AttributeMap attrs; f::AttributeMap attrs;
attrs["tag"]=std::string("tagx"); attrs["tag"] = std::string("tagx");
attrs["ring_id"]=0; attrs["ring_id"] = 0;
attrs["nranks"]=2; attrs["nranks"] = 2;
auto op = f::OpRegistry::CreateOp("c_allgather", {{"X", {"X"}}}, auto op = f::OpRegistry::CreateOp("c_allgather", {{"X", {"X"}}},
{{"Out", {"Out"}}}, attrs); {{"Out", {"Out"}}}, attrs);
for (int i = 0; i < 10; i ++) { for (int i = 0; i < 10; i++) {
op->Run(*scope, place); op->Run(*scope, place);
} }
ctx.Wait(); ctx.Wait();
std::vector<float> out_vec; std::vector<float> out_vec;
TensorToVector(*tensor_out, ctx, &out_vec); TensorToVector(*tensor_out, ctx, &out_vec);
ctx.Wait(); ctx.Wait();
...@@ -139,13 +137,13 @@ void TestHCCLAllGatherOp(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -139,13 +137,13 @@ void TestHCCLAllGatherOp(f::Scope* scope, const p::DeviceContext& ctx) {
} }
} }
TEST(c_allgather, NPU) { TEST(c_allgather, NPU) {
f::Scope scope; f::Scope scope;
// only support one device, if more than one device, use first default // only support one device, if more than one device, use first default
p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str()))); auto* ctx = p::DeviceContextPool::Instance().Get(
p::NPUPlace(atoi(FLAGS_selected_npus.c_str())));
Prepare(&scope, ctx); Prepare(&scope, *ctx);
TestHCCLAllGatherOp(&scope, ctx); TestHCCLAllGatherOp(&scope, *ctx);
} }
...@@ -16,23 +16,23 @@ limitations under the License. */ ...@@ -16,23 +16,23 @@ limitations under the License. */
#include <unistd.h> #include <unistd.h>
#endif #endif
#include <stdio.h>
#include <string> #include <string>
#include <thread> // NOLINT #include <thread> // NOLINT
#include <vector> #include <vector>
#include <stdio.h>
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/operators/collective/c_broadcast_op.h"
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
#include "paddle/fluid/operators/collective/c_allgather_op.h" #include "paddle/fluid/operators/collective/c_allgather_op.h"
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
#include "paddle/fluid/operators/collective/c_broadcast_op.h"
#include "paddle/fluid/operators/collective/c_reducescatter_op.h" #include "paddle/fluid/operators/collective/c_reducescatter_op.h"
#if defined(PADDLE_WITH_ASCEND_CL) #if defined(PADDLE_WITH_ASCEND_CL)
...@@ -50,24 +50,22 @@ USE_OP_DEVICE_KERNEL(c_allreduce_max, NPU); ...@@ -50,24 +50,22 @@ USE_OP_DEVICE_KERNEL(c_allreduce_max, NPU);
DECLARE_string(selected_npus); DECLARE_string(selected_npus);
template<typename T> template <typename T>
void PrintDebugInfo(const std::string preStr, const std::vector<T> &data){ void PrintDebugInfo(const std::string preStr, const std::vector<T>& data) {
std::string debugstring = ""; std::string debugstring = "";
for (auto ele : data) { for (auto ele : data) {
debugstring += std::to_string(ele) + std::string(","); debugstring += std::to_string(ele) + std::string(",");
} }
VLOG(2) << preStr << ":" << std::endl <<debugstring; VLOG(2) << preStr << ":" << std::endl << debugstring;
} }
void Prepare(f::Scope* scope, const p::DeviceContext& ctx){ void Prepare(f::Scope* scope, const p::DeviceContext& ctx) {
int rank_id = atoi(getenv("RANK_ID")); int rank_id = atoi(getenv("RANK_ID"));
int device_id = atoi(getenv("DEVICE_ID")); int device_id = atoi(getenv("DEVICE_ID"));
VLOG(2) << "rank_id = " << rank_id VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id
<< "; device_id = " << device_id << "; rank_id = " << rank_id
<< "; rank_id = " << rank_id << "; RANK_TABLE_FILE = " << atoi(getenv("RANK_TABLE_FILE"));
<< "; RANK_TABLE_FILE = " << atoi(getenv("RANK_TABLE_FILE"));
std::vector<int> rank_ids{0, 1}; std::vector<int> rank_ids{0, 1};
f::AttributeMap comm_init_attrs; f::AttributeMap comm_init_attrs;
...@@ -112,13 +110,13 @@ void TestHCCLAllReduceOp(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -112,13 +110,13 @@ void TestHCCLAllReduceOp(f::Scope* scope, const p::DeviceContext& ctx) {
// run // run
f::AttributeMap attrs; f::AttributeMap attrs;
attrs["tag"]=std::string("tagx"); attrs["tag"] = std::string("tagx");
attrs["ring_id"]=0; attrs["ring_id"] = 0;
auto op = f::OpRegistry::CreateOp("c_allreduce_max", {{"X", {"X"}}}, auto op = f::OpRegistry::CreateOp("c_allreduce_max", {{"X", {"X"}}},
{{"Out", {"Out"}}}, attrs); {{"Out", {"Out"}}}, attrs);
for (int i = 0; i < 10; i ++) { for (int i = 0; i < 10; i++) {
op->Run(*scope, place); op->Run(*scope, place);
} }
ctx.Wait(); ctx.Wait();
...@@ -139,8 +137,9 @@ TEST(c_allreduce_max, NPU) { ...@@ -139,8 +137,9 @@ TEST(c_allreduce_max, NPU) {
f::Scope scope; f::Scope scope;
// only support one device, if more than one device, use first default // only support one device, if more than one device, use first default
p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str()))); auto* ctx = p::DeviceContextPool::Instance().Get(
p::NPUPlace(atoi(FLAGS_selected_npus.c_str())));
Prepare(&scope, ctx); Prepare(&scope, *ctx);
TestHCCLAllReduceOp(&scope, ctx); TestHCCLAllReduceOp(&scope, *ctx);
} }
...@@ -16,19 +16,19 @@ limitations under the License. */ ...@@ -16,19 +16,19 @@ limitations under the License. */
#include <unistd.h> #include <unistd.h>
#endif #endif
#include <stdio.h>
#include <string> #include <string>
#include <thread> // NOLINT #include <thread> // NOLINT
#include <vector> #include <vector>
#include <stdio.h>
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/operators/collective/c_allreduce_op.h" #include "paddle/fluid/operators/collective/c_allreduce_op.h"
...@@ -47,24 +47,22 @@ USE_OP_DEVICE_KERNEL(c_allreduce_sum, NPU); ...@@ -47,24 +47,22 @@ USE_OP_DEVICE_KERNEL(c_allreduce_sum, NPU);
DECLARE_string(selected_npus); DECLARE_string(selected_npus);
template<typename T> template <typename T>
void PrintDebugInfo(const std::string preStr, const std::vector<T> &data){ void PrintDebugInfo(const std::string preStr, const std::vector<T>& data) {
std::string debugstring = ""; std::string debugstring = "";
for (auto ele : data) { for (auto ele : data) {
debugstring += std::to_string(ele) + std::string(","); debugstring += std::to_string(ele) + std::string(",");
} }
VLOG(3) << preStr << ":" << std::endl <<debugstring; VLOG(3) << preStr << ":" << std::endl << debugstring;
} }
void Prepare(f::Scope* scope, const p::DeviceContext& ctx){ void Prepare(f::Scope* scope, const p::DeviceContext& ctx) {
int rank_id = atoi(getenv("RANK_ID")); int rank_id = atoi(getenv("RANK_ID"));
int device_id = atoi(getenv("DEVICE_ID")); int device_id = atoi(getenv("DEVICE_ID"));
VLOG(2) << "rank_id = " << rank_id VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id
<< "; device_id = " << device_id << "; rank_id = " << rank_id
<< "; rank_id = " << rank_id << "; RANK_TABLE_FILE = " << atoi(getenv("RANK_TABLE_FILE"));
<< "; RANK_TABLE_FILE = " << atoi(getenv("RANK_TABLE_FILE"));
std::vector<int> rank_ids{0, 1}; std::vector<int> rank_ids{0, 1};
f::AttributeMap comm_init_attrs; f::AttributeMap comm_init_attrs;
...@@ -80,7 +78,8 @@ void Prepare(f::Scope* scope, const p::DeviceContext& ctx){ ...@@ -80,7 +78,8 @@ void Prepare(f::Scope* scope, const p::DeviceContext& ctx){
ctx.Wait(); ctx.Wait();
} }
void TestHCCLAllReduceOp(f::Scope* scope, const p::DeviceContext& ctx, int iter) { void TestHCCLAllReduceOp(f::Scope* scope, const p::DeviceContext& ctx,
int iter) {
// init // init
auto x = scope->Var("X"); auto x = scope->Var("X");
auto tensor_x = x->GetMutable<f::LoDTensor>(); auto tensor_x = x->GetMutable<f::LoDTensor>();
...@@ -109,15 +108,13 @@ void TestHCCLAllReduceOp(f::Scope* scope, const p::DeviceContext& ctx, int iter) ...@@ -109,15 +108,13 @@ void TestHCCLAllReduceOp(f::Scope* scope, const p::DeviceContext& ctx, int iter)
// run // run
f::AttributeMap attrs; f::AttributeMap attrs;
attrs["tag"]=std::string("tagx_"+ std::to_string(iter)); attrs["tag"] = std::string("tagx_" + std::to_string(iter));
attrs["ring_id"]=0; attrs["ring_id"] = 0;
auto op = f::OpRegistry::CreateOp("c_allreduce_sum", auto op = f::OpRegistry::CreateOp("c_allreduce_sum", {{"X", {"X"}}},
{{"X", {"X"}}}, {{"Out", {"Out"}}}, attrs);
{{"Out", {"Out"}}},
attrs);
for (int i = 0; i < 10; i ++) { for (int i = 0; i < 10; i++) {
op->Run(*scope, place); op->Run(*scope, place);
} }
ctx.Wait(); ctx.Wait();
...@@ -138,11 +135,12 @@ TEST(c_allreduce_sum, NPU) { ...@@ -138,11 +135,12 @@ TEST(c_allreduce_sum, NPU) {
f::Scope scope; f::Scope scope;
// only support one device, if more than one device, use first default // only support one device, if more than one device, use first default
p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str()))); auto* ctx = p::DeviceContextPool::Instance().Get(
p::NPUPlace(atoi(FLAGS_selected_npus.c_str())));
Prepare(&scope, ctx); Prepare(&scope, *ctx);
for(int i = 0; i < 1; i ++){ for (int i = 0; i < 1; i++) {
VLOG(2) << "iter num: " << i; VLOG(2) << "iter num: " << i;
TestHCCLAllReduceOp(&scope, ctx, i); TestHCCLAllReduceOp(&scope, *ctx, i);
} }
} }
...@@ -16,19 +16,19 @@ limitations under the License. */ ...@@ -16,19 +16,19 @@ limitations under the License. */
#include <unistd.h> #include <unistd.h>
#endif #endif
#include <stdio.h>
#include <string> #include <string>
#include <thread> // NOLINT #include <thread> // NOLINT
#include <vector> #include <vector>
#include <stdio.h>
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/operators/collective/c_broadcast_op.h" #include "paddle/fluid/operators/collective/c_broadcast_op.h"
...@@ -47,25 +47,23 @@ USE_OP_DEVICE_KERNEL(c_broadcast, NPU); ...@@ -47,25 +47,23 @@ USE_OP_DEVICE_KERNEL(c_broadcast, NPU);
DECLARE_string(selected_npus); DECLARE_string(selected_npus);
template<typename T> template <typename T>
void PrintDebugInfo(const std::string preStr, const std::vector<T> &data){ void PrintDebugInfo(const std::string preStr, const std::vector<T>& data) {
std::string debugstring = ""; std::string debugstring = "";
for (auto ele : data) { for (auto ele : data) {
debugstring += std::to_string(ele) + std::string(","); debugstring += std::to_string(ele) + std::string(",");
} }
VLOG(2) << preStr << ":" << std::endl <<debugstring; VLOG(2) << preStr << ":" << std::endl << debugstring;
} }
void Prepare(f::Scope* scope, const p::DeviceContext& ctx){ void Prepare(f::Scope* scope, const p::DeviceContext& ctx) {
int rank_id = atoi(getenv("RANK_ID")); int rank_id = atoi(getenv("RANK_ID"));
int device_id = atoi(getenv("DEVICE_ID")); int device_id = atoi(getenv("DEVICE_ID"));
VLOG(2) << "rank_id = " << rank_id VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id
<< "; device_id = " << device_id << "; rank_id = " << rank_id
<< "; rank_id = " << rank_id << "; RANK_TABLE_FILE = " << atoi(getenv("DEVICE_ID"));
<< "; RANK_TABLE_FILE = " << atoi(getenv("DEVICE_ID"));
std::vector<int> rank_ids{0, 1}; std::vector<int> rank_ids{0, 1};
f::AttributeMap comm_init_attrs; f::AttributeMap comm_init_attrs;
comm_init_attrs["ring_id"] = 0; comm_init_attrs["ring_id"] = 0;
...@@ -87,7 +85,7 @@ void TestHCCLBroadcastOp(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -87,7 +85,7 @@ void TestHCCLBroadcastOp(f::Scope* scope, const p::DeviceContext& ctx) {
int num = 2; int num = 2;
std::vector<float> init; std::vector<float> init;
int rank_id = atoi(getenv("RANK_ID")); int rank_id = atoi(getenv("RANK_ID"));
for (int64_t i = 0; i < num * num; ++i) { for (int64_t i = 0; i < num * num; ++i) {
init.push_back(1.0 + rank_id); init.push_back(1.0 + rank_id);
} }
...@@ -106,18 +104,18 @@ void TestHCCLBroadcastOp(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -106,18 +104,18 @@ void TestHCCLBroadcastOp(f::Scope* scope, const p::DeviceContext& ctx) {
// run // run
f::AttributeMap attrs; f::AttributeMap attrs;
attrs["tag"]=std::string("tagx"); attrs["tag"] = std::string("tagx");
attrs["root"]=0; attrs["root"] = 0;
attrs["ring_id"]=0; attrs["ring_id"] = 0;
auto op = f::OpRegistry::CreateOp("c_broadcast", {{"X", {"X"}}}, auto op = f::OpRegistry::CreateOp("c_broadcast", {{"X", {"X"}}},
{{"Out", {"Out"}}}, attrs); {{"Out", {"Out"}}}, attrs);
for (int i = 0; i < 10; i ++) { for (int i = 0; i < 10; i++) {
op->Run(*scope, place); op->Run(*scope, place);
} }
ctx.Wait(); ctx.Wait();
std::vector<float> out_vec; std::vector<float> out_vec;
TensorToVector(*tensor_out, ctx, &out_vec); TensorToVector(*tensor_out, ctx, &out_vec);
ctx.Wait(); ctx.Wait();
...@@ -132,9 +130,10 @@ void TestHCCLBroadcastOp(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -132,9 +130,10 @@ void TestHCCLBroadcastOp(f::Scope* scope, const p::DeviceContext& ctx) {
TEST(c_broadcast, NPU) { TEST(c_broadcast, NPU) {
f::Scope scope; f::Scope scope;
// only support one device, if more than one device, use first default // only support one device, if more than one device, use first default
p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str()))); auto* ctx = p::DeviceContextPool::Instance().Get(
p::NPUPlace(atoi(FLAGS_selected_npus.c_str())));
Prepare(&scope, ctx); Prepare(&scope, *ctx);
TestHCCLBroadcastOp(&scope, ctx); TestHCCLBroadcastOp(&scope, *ctx);
} }
...@@ -16,19 +16,19 @@ limitations under the License. */ ...@@ -16,19 +16,19 @@ limitations under the License. */
#include <unistd.h> #include <unistd.h>
#endif #endif
#include <stdio.h>
#include <string> #include <string>
#include <thread> // NOLINT #include <thread> // NOLINT
#include <vector> #include <vector>
#include <stdio.h>
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/operators/collective/c_reduce_op.h" #include "paddle/fluid/operators/collective/c_reduce_op.h"
...@@ -47,24 +47,22 @@ USE_OP_DEVICE_KERNEL(c_reduce_sum, NPU); ...@@ -47,24 +47,22 @@ USE_OP_DEVICE_KERNEL(c_reduce_sum, NPU);
DECLARE_string(selected_npus); DECLARE_string(selected_npus);
template<typename T> template <typename T>
void PrintDebugInfo(const std::string preStr, const std::vector<T> &data){ void PrintDebugInfo(const std::string preStr, const std::vector<T>& data) {
std::string debugstring = ""; std::string debugstring = "";
for (auto ele : data) { for (auto ele : data) {
debugstring += std::to_string(ele) + std::string(","); debugstring += std::to_string(ele) + std::string(",");
} }
VLOG(3) << preStr << ":" << std::endl <<debugstring; VLOG(3) << preStr << ":" << std::endl << debugstring;
} }
void Prepare(f::Scope* scope, const p::DeviceContext& ctx){ void Prepare(f::Scope* scope, const p::DeviceContext& ctx) {
int rank_id = atoi(getenv("RANK_ID")); int rank_id = atoi(getenv("RANK_ID"));
int device_id = atoi(getenv("DEVICE_ID")); int device_id = atoi(getenv("DEVICE_ID"));
VLOG(2) << "rank_id = " << rank_id VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id
<< "; device_id = " << device_id << "; rank_id = " << rank_id
<< "; rank_id = " << rank_id << "; RANK_TABLE_FILE = " << atoi(getenv("RANK_TABLE_FILE"));
<< "; RANK_TABLE_FILE = " << atoi(getenv("RANK_TABLE_FILE"));
std::vector<int> rank_ids{0, 1}; std::vector<int> rank_ids{0, 1};
f::AttributeMap comm_init_attrs; f::AttributeMap comm_init_attrs;
...@@ -109,15 +107,13 @@ void TestHCCLReduceOp(f::Scope* scope, const p::DeviceContext& ctx, int iter) { ...@@ -109,15 +107,13 @@ void TestHCCLReduceOp(f::Scope* scope, const p::DeviceContext& ctx, int iter) {
// run // run
f::AttributeMap attrs; f::AttributeMap attrs;
attrs["tag"]=std::string("tagx_"+ std::to_string(iter)); attrs["tag"] = std::string("tagx_" + std::to_string(iter));
attrs["ring_id"]=0; attrs["ring_id"] = 0;
int root_id = 0; int root_id = 0;
attrs["root_id"]=root_id; attrs["root_id"] = root_id;
auto op = f::OpRegistry::CreateOp("c_reduce_sum", auto op = f::OpRegistry::CreateOp("c_reduce_sum", {{"X", {"X"}}},
{{"X", {"X"}}}, {{"Out", {"Out"}}}, attrs);
{{"Out", {"Out"}}},
attrs);
op->Run(*scope, place); op->Run(*scope, place);
ctx.Wait(); ctx.Wait();
...@@ -130,10 +126,9 @@ void TestHCCLReduceOp(f::Scope* scope, const p::DeviceContext& ctx, int iter) { ...@@ -130,10 +126,9 @@ void TestHCCLReduceOp(f::Scope* scope, const p::DeviceContext& ctx, int iter) {
EXPECT_EQ(out_vec.size(), init.size()); EXPECT_EQ(out_vec.size(), init.size());
for (uint32_t i = 0; i < out_vec.size(); i++) { for (uint32_t i = 0; i < out_vec.size(); i++) {
if(rank_id == root_id){ if (rank_id == root_id) {
EXPECT_EQ(out_vec[i], 3.0); EXPECT_EQ(out_vec[i], 3.0);
} } else {
else{
EXPECT_EQ(out_vec[i], init[i]); EXPECT_EQ(out_vec[i], init[i]);
} }
} }
...@@ -143,11 +138,12 @@ TEST(c_reduce_sum, NPU) { ...@@ -143,11 +138,12 @@ TEST(c_reduce_sum, NPU) {
f::Scope scope; f::Scope scope;
// only support one device, if more than one device, use first default // only support one device, if more than one device, use first default
p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str()))); auto* ctx = p::DeviceContextPool::Instance().Get(
p::NPUPlace(atoi(FLAGS_selected_npus.c_str())));
Prepare(&scope, ctx); Prepare(&scope, *ctx);
for(int i = 0; i < 2; i ++){ for (int i = 0; i < 2; i++) {
VLOG(2) << "iter num: " << i; VLOG(2) << "iter num: " << i;
TestHCCLReduceOp(&scope, ctx, i); TestHCCLReduceOp(&scope, *ctx, i);
} }
} }
...@@ -16,23 +16,23 @@ limitations under the License. */ ...@@ -16,23 +16,23 @@ limitations under the License. */
#include <unistd.h> #include <unistd.h>
#endif #endif
#include <stdio.h>
#include <string> #include <string>
#include <thread> // NOLINT #include <thread> // NOLINT
#include <vector> #include <vector>
#include <stdio.h>
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/operators/collective/c_broadcast_op.h"
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
#include "paddle/fluid/operators/collective/c_allgather_op.h" #include "paddle/fluid/operators/collective/c_allgather_op.h"
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
#include "paddle/fluid/operators/collective/c_broadcast_op.h"
#include "paddle/fluid/operators/collective/c_reducescatter_op.h" #include "paddle/fluid/operators/collective/c_reducescatter_op.h"
#if defined(PADDLE_WITH_ASCEND_CL) #if defined(PADDLE_WITH_ASCEND_CL)
...@@ -50,24 +50,22 @@ USE_OP_DEVICE_KERNEL(c_reducescatter, NPU); ...@@ -50,24 +50,22 @@ USE_OP_DEVICE_KERNEL(c_reducescatter, NPU);
DECLARE_string(selected_npus); DECLARE_string(selected_npus);
template<typename T> template <typename T>
void PrintDebugInfo(const std::string preStr, const std::vector<T> &data){ void PrintDebugInfo(const std::string preStr, const std::vector<T>& data) {
std::string debugstring = ""; std::string debugstring = "";
for (auto ele : data) { for (auto ele : data) {
debugstring += std::to_string(ele) + std::string(","); debugstring += std::to_string(ele) + std::string(",");
} }
VLOG(2) << preStr << ":" << std::endl <<debugstring; VLOG(2) << preStr << ":" << std::endl << debugstring;
} }
void Prepare(f::Scope* scope, const p::DeviceContext& ctx){ void Prepare(f::Scope* scope, const p::DeviceContext& ctx) {
int rank_id = atoi(getenv("RANK_ID")); int rank_id = atoi(getenv("RANK_ID"));
int device_id = atoi(getenv("DEVICE_ID")); int device_id = atoi(getenv("DEVICE_ID"));
VLOG(2) << "rank_id = " << rank_id VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id
<< "; device_id = " << device_id << "; rank_id = " << rank_id
<< "; rank_id = " << rank_id << "; RANK_TABLE_FILE = " << atoi(getenv("DEVICE_ID"));
<< "; RANK_TABLE_FILE = " << atoi(getenv("DEVICE_ID"));
std::vector<int> rank_ids{0, 1}; std::vector<int> rank_ids{0, 1};
f::AttributeMap comm_init_attrs; f::AttributeMap comm_init_attrs;
...@@ -112,15 +110,15 @@ void TestHCCLReduceScatterOp(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -112,15 +110,15 @@ void TestHCCLReduceScatterOp(f::Scope* scope, const p::DeviceContext& ctx) {
// run // run
f::AttributeMap attrs; f::AttributeMap attrs;
attrs["tag"]=std::string("tagx"); attrs["tag"] = std::string("tagx");
attrs["ring_id"]=0; attrs["ring_id"] = 0;
attrs["nranks"]=2; attrs["nranks"] = 2;
auto op = f::OpRegistry::CreateOp("c_reducescatter", {{"X", {"X"}}}, auto op = f::OpRegistry::CreateOp("c_reducescatter", {{"X", {"X"}}},
{{"Out", {"Out"}}}, attrs); {{"Out", {"Out"}}}, attrs);
int iter_num = 10; int iter_num = 10;
for (int i = 0; i < iter_num; i ++) { for (int i = 0; i < iter_num; i++) {
op->Run(*scope, place); op->Run(*scope, place);
} }
ctx.Wait(); ctx.Wait();
...@@ -140,8 +138,9 @@ TEST(c_reducescatter, NPU) { ...@@ -140,8 +138,9 @@ TEST(c_reducescatter, NPU) {
f::Scope scope; f::Scope scope;
// only support one device, if more than one device, use first default // only support one device, if more than one device, use first default
p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str()))); auto* ctx = p::DeviceContextPool::Instance().Get(
p::NPUPlace(atoi(FLAGS_selected_npus.c_str())));
Prepare(&scope, ctx); Prepare(&scope, *ctx);
TestHCCLReduceScatterOp(&scope, ctx); TestHCCLReduceScatterOp(&scope, *ctx);
} }
...@@ -102,6 +102,6 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -102,6 +102,6 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
TEST(c_sync_calc_stream, NPU_fp32) { TEST(c_sync_calc_stream, NPU_fp32) {
f::Scope scope; f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0)); auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<float>(&scope, ctx); Compare<float>(&scope, *ctx);
} }
...@@ -124,8 +124,8 @@ TEST(c_broadcast, NPU) { ...@@ -124,8 +124,8 @@ TEST(c_broadcast, NPU) {
f::Scope scope; f::Scope scope;
char* npu_id = getenv("FLAGS_selected_npus"); char* npu_id = getenv("FLAGS_selected_npus");
p::NPUDeviceContext ctx(p::NPUPlace(atoi(npu_id))); auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(atoi(npu_id)));
Prepare(&scope, ctx); Prepare(&scope, *ctx);
TestHCCLBroadcastOp(&scope, ctx); TestHCCLBroadcastOp(&scope, *ctx);
} }
...@@ -16,19 +16,19 @@ limitations under the License. */ ...@@ -16,19 +16,19 @@ limitations under the License. */
#include <unistd.h> #include <unistd.h>
#endif #endif
#include <stdio.h>
#include <string> #include <string>
#include <thread> // NOLINT #include <thread> // NOLINT
#include <vector> #include <vector>
#include <stdio.h>
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/operators/collective/recv_v2_op.h" #include "paddle/fluid/operators/collective/recv_v2_op.h"
...@@ -45,80 +45,80 @@ USE_OP(recv_v2); ...@@ -45,80 +45,80 @@ USE_OP(recv_v2);
USE_NO_KERNEL_OP(c_comm_init_hcom); USE_NO_KERNEL_OP(c_comm_init_hcom);
USE_OP_DEVICE_KERNEL(recv_v2, NPU); USE_OP_DEVICE_KERNEL(recv_v2, NPU);
void Prepare(f::Scope* scope, const p::DeviceContext& ctx){ void Prepare(f::Scope* scope, const p::DeviceContext& ctx) {
std::string rank_table_file = getenv("RANK_TABLE_FILE");
std::string rank_table_file = getenv("RANK_TABLE_FILE"); int rank_id = atoi(getenv("RANK_ID"));
int rank_id = atoi(getenv("RANK_ID")); int device_id = atoi(getenv("DEVICE_ID"));
int device_id = atoi(getenv("DEVICE_ID")); int src_rank = atoi(getenv("SRC_RANK"));
int src_rank = atoi(getenv("SRC_RANK")); int dest_rank = atoi(getenv("DEST_RANK"));
int dest_rank = atoi(getenv("DEST_RANK")); VLOG(3) << "rank_id " << rank_id << "src_rank" << src_rank << "dest_rank"
VLOG(3)<<"rank_id "<< rank_id << "src_rank"<< src_rank <<"dest_rank" <<dest_rank; << dest_rank;
std::vector<int> rank_ids = {0,1}; std::vector<int> rank_ids = {0, 1};
f::AttributeMap comm_init_attrs; f::AttributeMap comm_init_attrs;
comm_init_attrs["ring_id"] = 0; comm_init_attrs["ring_id"] = 0;
comm_init_attrs["nranks"] = 2; comm_init_attrs["nranks"] = 2;
comm_init_attrs["rank"] = rank_id; comm_init_attrs["rank"] = rank_id;
comm_init_attrs["device_id"] = device_id; comm_init_attrs["device_id"] = device_id;
comm_init_attrs["rank_ids"] = rank_ids; comm_init_attrs["rank_ids"] = rank_ids;
auto comm_init_op = f::OpRegistry::CreateOp("c_comm_init_hcom", {}, {}, comm_init_attrs); auto comm_init_op =
VLOG(3) << "CreateOp c_comm_init_hcom"; f::OpRegistry::CreateOp("c_comm_init_hcom", {}, {}, comm_init_attrs);
auto place = ctx.GetPlace(); VLOG(3) << "CreateOp c_comm_init_hcom";
comm_init_op->Run(*scope, place); auto place = ctx.GetPlace();
ctx.Wait(); comm_init_op->Run(*scope, place);
ctx.Wait();
} }
void TestHcomRecvOp(f::Scope* scope, const p::DeviceContext& ctx){ void TestHcomRecvOp(f::Scope* scope, const p::DeviceContext& ctx) {
std::cout << "BEGIN TEST:" << __FUNCTION__ << std::endl; std::cout << "BEGIN TEST:" << __FUNCTION__ << std::endl;
int num = atoi(getenv("DATA_SIZE")); int num = atoi(getenv("DATA_SIZE"));
EXPECT_GT(num, 0); EXPECT_GT(num, 0);
EXPECT_LT(num, 1 << 15); EXPECT_LT(num, 1 << 15);
int rank_id = atoi(getenv("RANK_ID")); int rank_id = atoi(getenv("RANK_ID"));
VLOG(3) << "rank_id:" << rank_id<<std::endl; VLOG(3) << "rank_id:" << rank_id << std::endl;
ctx.Wait(); ctx.Wait();
auto place = ctx.GetPlace(); auto place = ctx.GetPlace();
auto out = scope->Var("Out"); auto out = scope->Var("Out");
auto tensor_out = out->GetMutable<f::LoDTensor>(); auto tensor_out = out->GetMutable<f::LoDTensor>();
tensor_out->Resize({num, num}); tensor_out->Resize({num, num});
tensor_out->mutable_data<float>(place); // allocate tensor_out->mutable_data<float>(place); // allocate
ctx.Wait(); ctx.Wait();
f::AttributeMap attrs; f::AttributeMap attrs;
attrs["tag"]=std::string("srtest"); attrs["tag"] = std::string("srtest");
attrs["peer"]=atoi(getenv("SRC_RANK")); attrs["peer"] = atoi(getenv("SRC_RANK"));
attrs["ring_id"]=0; attrs["ring_id"] = 0;
attrs["srTag"]=0; attrs["srTag"] = 0;
std::vector<int> out_shape; std::vector<int> out_shape;
out_shape.push_back(num); out_shape.push_back(num);
out_shape.push_back(num); out_shape.push_back(num);
attrs["out_shape"]=out_shape; attrs["out_shape"] = out_shape;
auto op = f::OpRegistry::CreateOp("recv_v2", {}, {{"Out", {"Out"}}}, attrs); auto op = f::OpRegistry::CreateOp("recv_v2", {}, {{"Out", {"Out"}}}, attrs);
VLOG(3) << "CreateOp recv_v2"; VLOG(3) << "CreateOp recv_v2";
for (int i = 0; i < 10; i ++) { for (int i = 0; i < 10; i++) {
op->Run(*scope, place); op->Run(*scope, place);
} }
VLOG(3) << "Run op recv_v2"; VLOG(3) << "Run op recv_v2";
std::vector<float> out_vec; std::vector<float> out_vec;
TensorToVector(*tensor_out, ctx, &out_vec); TensorToVector(*tensor_out, ctx, &out_vec);
ctx.Wait(); ctx.Wait();
std::vector<float> init(num*num, 1.0 * atoi(getenv("DEST_RANK"))); std::vector<float> init(num * num, 1.0 * atoi(getenv("DEST_RANK")));
EXPECT_EQ(out_vec == init, true); EXPECT_EQ(out_vec == init, true);
} }
TEST(recv_v2, NPU) {
TEST(recv_v2, NPU){ f::Scope scope;
f::Scope scope; char* npu_id = getenv("FLAGS_selected_npus");
char * npu_id=getenv("FLAGS_selected_npus"); VLOG(3) << "Select npu:" << npu_id;
VLOG(3) << "Select npu:" << npu_id; auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(atoi(npu_id)));
p::NPUDeviceContext ctx(p::NPUPlace(atoi(npu_id))); VLOG(3) << "Place over";
VLOG(3) << "Place over"; Prepare(&scope, *ctx);
Prepare(&scope, ctx); VLOG(3) << "Prepare over";
VLOG(3) << "Prepare over"; TestHcomRecvOp(&scope, *ctx);
TestHcomRecvOp(&scope, ctx); VLOG(3) << "Test over";
VLOG(3) << "Test over";
} }
...@@ -16,18 +16,18 @@ limitations under the License. */ ...@@ -16,18 +16,18 @@ limitations under the License. */
#include <unistd.h> #include <unistd.h>
#endif #endif
#include <stdio.h>
#include <string> #include <string>
#include <thread> // NOLINT #include <thread> // NOLINT
#include <vector> #include <vector>
#include <stdio.h>
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/operators/collective/send_v2_op.h" #include "paddle/fluid/operators/collective/send_v2_op.h"
...@@ -44,68 +44,69 @@ USE_OP(send_v2); ...@@ -44,68 +44,69 @@ USE_OP(send_v2);
USE_NO_KERNEL_OP(c_comm_init_hcom); USE_NO_KERNEL_OP(c_comm_init_hcom);
USE_OP_DEVICE_KERNEL(send_v2, NPU); USE_OP_DEVICE_KERNEL(send_v2, NPU);
void Prepare(f::Scope* scope, const p::DeviceContext& ctx){ void Prepare(f::Scope* scope, const p::DeviceContext& ctx) {
std::string rank_table_file = getenv("RANK_TABLE_FILE");
std::string rank_table_file = getenv("RANK_TABLE_FILE"); int rank_id = atoi(getenv("RANK_ID"));
int rank_id = atoi(getenv("RANK_ID")); int device_id = atoi(getenv("DEVICE_ID"));
int device_id = atoi(getenv("DEVICE_ID")); int src_rank = atoi(getenv("SRC_RANK"));
int src_rank = atoi(getenv("SRC_RANK")); int dest_rank = atoi(getenv("DEST_RANK"));
int dest_rank = atoi(getenv("DEST_RANK")); VLOG(3) << "rank_id " << rank_id << "src_rank" << src_rank << "dest_rank"
VLOG(3)<<"rank_id "<< rank_id << "src_rank"<< src_rank <<"dest_rank" <<dest_rank; << dest_rank;
std::vector<int> rank_ids = {0, 1}; std::vector<int> rank_ids = {0, 1};
f::AttributeMap comm_init_attrs; f::AttributeMap comm_init_attrs;
comm_init_attrs["ring_id"] = 0; comm_init_attrs["ring_id"] = 0;
comm_init_attrs["nranks"] = 2; comm_init_attrs["nranks"] = 2;
comm_init_attrs["rank"] = rank_id; comm_init_attrs["rank"] = rank_id;
comm_init_attrs["device_id"] = device_id; comm_init_attrs["device_id"] = device_id;
comm_init_attrs["rank_ids"] = rank_ids; comm_init_attrs["rank_ids"] = rank_ids;
auto comm_init_op = f::OpRegistry::CreateOp("c_comm_init_hcom", {}, {}, comm_init_attrs); auto comm_init_op =
auto place = ctx.GetPlace(); f::OpRegistry::CreateOp("c_comm_init_hcom", {}, {}, comm_init_attrs);
comm_init_op->Run(*scope, place); auto place = ctx.GetPlace();
ctx.Wait(); comm_init_op->Run(*scope, place);
ctx.Wait();
} }
void TestHcomSendOp(f::Scope* scope, const p::DeviceContext& ctx){ void TestHcomSendOp(f::Scope* scope, const p::DeviceContext& ctx) {
std::cout<< "BEGIN TEST:"<< __FUNCTION__ <<std::endl; std::cout << "BEGIN TEST:" << __FUNCTION__ << std::endl;
auto x = scope->Var("X"); auto x = scope->Var("X");
auto tensor_x = x->GetMutable<f::LoDTensor>(); auto tensor_x = x->GetMutable<f::LoDTensor>();
int num = atoi(getenv("DATA_SIZE"));; int num = atoi(getenv("DATA_SIZE"));
EXPECT_GT(num, 0);
EXPECT_LT(num, 1 << 15); EXPECT_GT(num, 0);
std::vector<float> init(num*num, 1.0 * atoi(getenv("DEST_RANK"))); EXPECT_LT(num, 1 << 15);
int rank_id = atoi(getenv("RANK_ID")); std::vector<float> init(num * num, 1.0 * atoi(getenv("DEST_RANK")));
VLOG(3)<<"rank id:"<<rank_id; int rank_id = atoi(getenv("RANK_ID"));
TensorFromVector(init, ctx, tensor_x); VLOG(3) << "rank id:" << rank_id;
tensor_x->Resize({num, num}); TensorFromVector(init, ctx, tensor_x);
ctx.Wait(); tensor_x->Resize({num, num});
auto place = ctx.GetPlace(); ctx.Wait();
ctx.Wait(); auto place = ctx.GetPlace();
ctx.Wait();
f::AttributeMap attrs;
attrs["tag"]=std::string("srtest"); f::AttributeMap attrs;
attrs["peer"]=atoi(getenv("DEST_RANK")); attrs["tag"] = std::string("srtest");
attrs["ring_id"]=0; attrs["peer"] = atoi(getenv("DEST_RANK"));
attrs["srTag"]=0; attrs["ring_id"] = 0;
attrs["srTag"] = 0;
auto op = f::OpRegistry::CreateOp("send_v2", {{"X", {"X"}}}, {}, attrs);
auto op = f::OpRegistry::CreateOp("send_v2", {{"X", {"X"}}}, {}, attrs);
for (int i = 0; i < 10; i ++) {
op->Run(*scope, place); for (int i = 0; i < 10; i++) {
} op->Run(*scope, place);
VLOG(3)<<"send run over"; }
ctx.Wait(); VLOG(3) << "send run over";
ctx.Wait();
} }
TEST(send_v2, NPU){ TEST(send_v2, NPU) {
f::Scope scope; f::Scope scope;
char * npu_id=getenv("FLAGS_selected_npus"); char* npu_id = getenv("FLAGS_selected_npus");
VLOG(3) << "Select npu:" << npu_id; VLOG(3) << "Select npu:" << npu_id;
p::NPUDeviceContext ctx(p::NPUPlace(atoi(npu_id))); auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(atoi(npu_id)));
VLOG(3) << "Place over"; VLOG(3) << "Place over";
Prepare(&scope, ctx); Prepare(&scope, *ctx);
VLOG(3) << "Prepare over"; VLOG(3) << "Prepare over";
TestHcomSendOp(&scope, ctx); TestHcomSendOp(&scope, *ctx);
VLOG(3) << "Test over"; VLOG(3) << "Test over";
} }
...@@ -38,7 +38,7 @@ USE_OP(elementwise_sub); ...@@ -38,7 +38,7 @@ USE_OP(elementwise_sub);
USE_OP_DEVICE_KERNEL(elementwise_sub, NPU); USE_OP_DEVICE_KERNEL(elementwise_sub, NPU);
template <typename T> template <typename T>
void Compare(f::Scope* scope, const p::DeviceContext& ctx, void Compare(f::Scope *scope, const p::DeviceContext &ctx,
std::string op_type) { std::string op_type) {
// init // init
auto x = scope->Var("X"); auto x = scope->Var("X");
...@@ -90,7 +90,7 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx, ...@@ -90,7 +90,7 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx,
} }
template <typename T> template <typename T>
void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx, void CompareGrad(f::Scope *scope, const p::DeviceContext &ctx,
std::string op_type) { std::string op_type) {
// init // init
auto dout = scope->Var("DOut"); auto dout = scope->Var("DOut");
...@@ -154,30 +154,30 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx, ...@@ -154,30 +154,30 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx,
TEST(elementwise_add, NPU_fp32) { TEST(elementwise_add, NPU_fp32) {
f::Scope scope; f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0)); auto *ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<float>(&scope, ctx, "elementwise_add"); Compare<float>(&scope, *ctx, "elementwise_add");
} }
TEST(elementwise_sub, NPU_fp32) { TEST(elementwise_sub, NPU_fp32) {
f::Scope scope; f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0)); auto *ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<float>(&scope, ctx, "elementwise_sub"); Compare<float>(&scope, *ctx, "elementwise_sub");
} }
TEST(elementwise_sub, NPU_fp16) { TEST(elementwise_sub, NPU_fp16) {
f::Scope scope; f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0)); auto *ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<p::float16>(&scope, ctx, "elementwise_sub"); Compare<p::float16>(&scope, *ctx, "elementwise_sub");
} }
TEST(elementwise_sub_grad, NPU) { TEST(elementwise_sub_grad, NPU) {
f::Scope scope; f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0)); auto *ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
CompareGrad<float>(&scope, ctx, "elementwise_sub_grad"); CompareGrad<float>(&scope, *ctx, "elementwise_sub_grad");
} }
TEST(elementwise_add_grad, NPU) { TEST(elementwise_add_grad, NPU) {
f::Scope scope; f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0)); auto *ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
CompareGrad<float>(&scope, ctx, "elementwise_add_grad"); CompareGrad<float>(&scope, *ctx, "elementwise_add_grad");
} }
...@@ -69,6 +69,6 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -69,6 +69,6 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
TEST(expand, NPU_fp32) { TEST(expand, NPU_fp32) {
f::Scope scope; f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0)); auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<float>(&scope, ctx); Compare<float>(&scope, *ctx);
} }
...@@ -152,18 +152,18 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx, ...@@ -152,18 +152,18 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx,
TEST(gather, NPU_fp32) { TEST(gather, NPU_fp32) {
f::Scope scope; f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0)); auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<float>(&scope, ctx, "gather"); Compare<float>(&scope, *ctx, "gather");
} }
TEST(gather, NPU_fp16) { TEST(gather, NPU_fp16) {
f::Scope scope; f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0)); auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<p::float16>(&scope, ctx, "gather"); Compare<p::float16>(&scope, *ctx, "gather");
} }
TEST(gather_grad, NPU_fp32) { TEST(gather_grad, NPU_fp32) {
f::Scope scope; f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0)); auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
CompareGrad<float>(&scope, ctx, "gather_grad"); CompareGrad<float>(&scope, *ctx, "gather_grad");
} }
...@@ -59,8 +59,8 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -59,8 +59,8 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
// run // run
auto place = ctx.GetPlace(); auto place = ctx.GetPlace();
auto op = f::OpRegistry::CreateOp("gelu", {{"X", {"X"}}}, auto op = f::OpRegistry::CreateOp("gelu", {{"X", {"X"}}}, {{"Out", {"Out"}}},
{{"Out", {"Out"}}}, attrs); attrs);
op->Run(*scope, place); op->Run(*scope, place);
ctx.Wait(); ctx.Wait();
...@@ -76,8 +76,8 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -76,8 +76,8 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
ctx.Wait(); ctx.Wait();
gettimeofday(&end, NULL); gettimeofday(&end, NULL);
int micros = (((end.tv_sec - start.tv_sec) * 1000000) + int micros =
end.tv_usec) - (start.tv_usec); (((end.tv_sec - start.tv_sec) * 1000000) + end.tv_usec) - (start.tv_usec);
printf("used time: %d\n", micros / 100); printf("used time: %d\n", micros / 100);
// eval value // eval value
...@@ -124,8 +124,8 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -124,8 +124,8 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) {
auto place = ctx.GetPlace(); auto place = ctx.GetPlace();
auto op = f::OpRegistry::CreateOp("gelu_grad", auto op = f::OpRegistry::CreateOp("gelu_grad",
{{"Out@GRAD", {"DOut"}}, {"X", {"X"}}}, {{"Out@GRAD", {"DOut"}}, {"X", {"X"}}},
{{"X@GRAD", {"DX"}}}, attrs); {{"X@GRAD", {"DX"}}}, attrs);
op->Run(*scope, place); op->Run(*scope, place);
ctx.Wait(); ctx.Wait();
...@@ -141,8 +141,8 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -141,8 +141,8 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) {
ctx.Wait(); ctx.Wait();
gettimeofday(&end, NULL); gettimeofday(&end, NULL);
int micros = (((end.tv_sec - start.tv_sec) * 1000000) + int micros =
end.tv_usec) - (start.tv_usec); (((end.tv_sec - start.tv_sec) * 1000000) + end.tv_usec) - (start.tv_usec);
printf("used time: %d\n", micros / 100); printf("used time: %d\n", micros / 100);
// eval value // eval value
...@@ -156,14 +156,13 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -156,14 +156,13 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) {
} }
TEST(gelu, NPU_fp32) { TEST(gelu, NPU_fp32) {
f::Scope scope; f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0)); auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<float>(&scope, ctx); Compare<float>(&scope, *ctx);
} }
TEST(gelu_grad, NPU) { TEST(gelu_grad, NPU) {
f::Scope scope; f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0)); auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
CompareGrad<float>(&scope, ctx); CompareGrad<float>(&scope, *ctx);
} }
...@@ -54,10 +54,9 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx, ...@@ -54,10 +54,9 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx,
auto out = scope->Var("Out"); auto out = scope->Var("Out");
auto tensor_out = out->GetMutable<f::LoDTensor>(); auto tensor_out = out->GetMutable<f::LoDTensor>();
f::AttributeMap attr_input = { {"step", static_cast<float>(2.0)} }; f::AttributeMap attr_input = {{"step", static_cast<float>(2.0)}};
auto op = f::OpRegistry::CreateOp("increment", {{"X", {"X"}}}, auto op = f::OpRegistry::CreateOp("increment", {{"X", {"X"}}},
{{"Out", {"Out"}}}, {{"Out", {"Out"}}}, attr_input);
attr_input);
op->Run(*scope, place); op->Run(*scope, place);
...@@ -70,16 +69,14 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx, ...@@ -70,16 +69,14 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx,
EXPECT_EQ(out_vec[0], static_cast<T>(3.0)); EXPECT_EQ(out_vec[0], static_cast<T>(3.0));
} }
TEST(increment, NPU_fp32) { TEST(increment, NPU_fp32) {
f::Scope scope; f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0)); auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<float>(&scope, ctx, "increment"); Compare<float>(&scope, *ctx, "increment");
} }
TEST(increment, NPU_fp64) { TEST(increment, NPU_fp64) {
f::Scope scope; f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0)); auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<float>(&scope, ctx, "increment"); Compare<double>(&scope, *ctx, "increment");
} }
...@@ -67,10 +67,9 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx, ...@@ -67,10 +67,9 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx,
auto tensor_out = out->GetMutable<f::LoDTensor>(); auto tensor_out = out->GetMutable<f::LoDTensor>();
// run // run
auto op = f::OpRegistry::CreateOp(op_type, {{"Start", {"Start"}}, auto op = f::OpRegistry::CreateOp(
{"End", {"End"}}, op_type, {{"Start", {"Start"}}, {"End", {"End"}}, {"Step", {"Step"}}},
{"Step", {"Step"}}}, {{"Out", {"Out"}}}, {});
{{"Out", {"Out"}}}, {});
op->Run(*scope, place); op->Run(*scope, place);
...@@ -86,10 +85,8 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx, ...@@ -86,10 +85,8 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx,
EXPECT_EQ(static_cast<T>(out_vec[4]), static_cast<T>(9.0)); EXPECT_EQ(static_cast<T>(out_vec[4]), static_cast<T>(9.0));
} }
TEST(range, NPU) { TEST(range, NPU) {
f::Scope scope; f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0)); auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<int>(&scope, ctx, "range"); Compare<int>(&scope, *ctx, "range");
} }
...@@ -78,6 +78,6 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -78,6 +78,6 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
TEST(reduce_any, NPU) { TEST(reduce_any, NPU) {
f::Scope scope; f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0)); auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<bool>(&scope, ctx); Compare<bool>(&scope, *ctx);
} }
...@@ -21,11 +21,10 @@ limitations under the License. */ ...@@ -21,11 +21,10 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/dropout_op.h" #include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/string/printf.h" #include "paddle/fluid/string/printf.h"
#include "paddle/fluid/framework/tensor_util.h"
namespace f = paddle::framework; namespace f = paddle::framework;
namespace p = paddle::platform; namespace p = paddle::platform;
...@@ -59,15 +58,13 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -59,15 +58,13 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
// run // run
int axis = 1; int axis = 1;
f::AttributeMap attrs = { f::AttributeMap attrs = {
{"axis", axis}, {"axis", axis}, {"use_cudnn", false},
{"use_cudnn", false}, {"use_mkldnn", false}, {"mkldnn_data_type", std::string("float32")},
{"use_mkldnn", false}, {"is_test", false},
{"mkldnn_data_type", std::string("float32")}, };
{"is_test", false}, };
auto op = auto op = f::OpRegistry::CreateOp("softmax", {{"X", {"X"}}},
f::OpRegistry::CreateOp("softmax", {{"X", {"X"}}}, {{"Out", {"Out"}}}, attrs);
{{"Out", {"Out"}}}, attrs);
op->Run(*scope, place); op->Run(*scope, place);
ctx.Wait(); ctx.Wait();
...@@ -76,7 +73,7 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -76,7 +73,7 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
TensorToVector(*tensor_out, ctx, &out_vec); TensorToVector(*tensor_out, ctx, &out_vec);
for (int i = 0; i < static_cast<int>(out_vec.size()); ++i) { for (int i = 0; i < static_cast<int>(out_vec.size()); ++i) {
VLOG(3) << "out_vec[" << i << "] : "<< out_vec[i]; VLOG(3) << "out_vec[" << i << "] : " << out_vec[i];
} }
ctx.Wait(); ctx.Wait();
...@@ -84,7 +81,6 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -84,7 +81,6 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
EXPECT_EQ((uint32_t)out_vec.size(), (uint32_t)(6)); EXPECT_EQ((uint32_t)out_vec.size(), (uint32_t)(6));
} }
template <typename T> template <typename T>
void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) { void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) {
// init // init
...@@ -128,16 +124,15 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -128,16 +124,15 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) {
attrs = { attrs = {
{"name", std::string("softmax_grad")}, {"name", std::string("softmax_grad")},
{"axis", static_cast<int>(0)}, {"axis", static_cast<int>(0)},
{"use_cudnn", false}, {"use_cudnn", false},
{"use_mkldnn", false}, {"use_mkldnn", false},
{"mkldnn_data_type", std::string("float32")}, {"mkldnn_data_type", std::string("float32")},
{"is_test", false}, {"is_test", false},
{"data_format", std::string("AnyLayout")}, }; {"data_format", std::string("AnyLayout")},
auto op = };
f::OpRegistry::CreateOp("softmax_grad", auto op = f::OpRegistry::CreateOp("softmax_grad",
{{"Out", {"Out"}}, {{"Out", {"Out"}}, {"Out@GRAD", {"DOut"}}},
{"Out@GRAD", {"DOut"}}}, {{"X@GRAD", {"DX"}}}, attrs);
{{"X@GRAD", {"DX"}}}, attrs);
auto place = ctx.GetPlace(); auto place = ctx.GetPlace();
op->Run(*scope, place); op->Run(*scope, place);
...@@ -164,12 +159,12 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -164,12 +159,12 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) {
TEST(softmax, NPU_fp32) { TEST(softmax, NPU_fp32) {
f::Scope scope; f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0)); auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<float>(&scope, ctx); Compare<float>(&scope, *ctx);
} }
TEST(softmax_grad, NPU_fp32) { TEST(softmax_grad, NPU_fp32) {
f::Scope scope; f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0)); auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
CompareGrad<float>(&scope, ctx); CompareGrad<float>(&scope, *ctx);
} }
...@@ -64,9 +64,8 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -64,9 +64,8 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
axis.push_back(2); axis.push_back(2);
f::AttributeMap attrs = {{"axes", axis}}; f::AttributeMap attrs = {{"axes", axis}};
auto op = auto op = f::OpRegistry::CreateOp("squeeze", {{"X", {"X"}}},
f::OpRegistry::CreateOp("squeeze", {{"X", {"X"}}}, {{"Out", {"Out"}}}, attrs);
{{"Out", {"Out"}}}, attrs);
op->Run(*scope, place); op->Run(*scope, place);
ctx.Wait(); ctx.Wait();
...@@ -74,7 +73,7 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -74,7 +73,7 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
EXPECT_EQ((uint32_t)tensor_out->dims().size(), uint32_t(2)); EXPECT_EQ((uint32_t)tensor_out->dims().size(), uint32_t(2));
EXPECT_EQ((uint32_t)tensor_out->dims()[0], uint32_t(dim0)); EXPECT_EQ((uint32_t)tensor_out->dims()[0], uint32_t(dim0));
EXPECT_EQ((uint32_t)tensor_out->dims()[1], uint32_t(dim1)); EXPECT_EQ((uint32_t)tensor_out->dims()[1], uint32_t(dim1));
std::vector<T> out_vec; std::vector<T> out_vec;
TensorToVector(*tensor_out, ctx, &out_vec); TensorToVector(*tensor_out, ctx, &out_vec);
for (uint32_t i = 0; i < out_vec.size(); i++) { for (uint32_t i = 0; i < out_vec.size(); i++) {
...@@ -86,7 +85,6 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -86,7 +85,6 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
TEST(squeeze, NPU_fp32) { TEST(squeeze, NPU_fp32) {
f::Scope scope; f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0)); auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<float>(&scope, ctx); Compare<float>(&scope, *ctx);
} }
...@@ -126,12 +126,12 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -126,12 +126,12 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) {
TEST(transpose2, NPU_fp32) { TEST(transpose2, NPU_fp32) {
f::Scope scope; f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0)); auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<float>(&scope, ctx); Compare<float>(&scope, *ctx);
} }
TEST(transpose2_grad, NPU_fp32) { TEST(transpose2_grad, NPU_fp32) {
f::Scope scope; f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0)); auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
CompareGrad<float>(&scope, ctx); CompareGrad<float>(&scope, *ctx);
} }
...@@ -63,9 +63,8 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -63,9 +63,8 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
axis.push_back(1); axis.push_back(1);
f::AttributeMap attrs = {{"axes", axis}}; f::AttributeMap attrs = {{"axes", axis}};
auto op = auto op = f::OpRegistry::CreateOp("unsqueeze", {{"X", {"X"}}},
f::OpRegistry::CreateOp("unsqueeze", {{"X", {"X"}}}, {{"Out", {"Out"}}}, attrs);
{{"Out", {"Out"}}}, attrs);
op->Run(*scope, place); op->Run(*scope, place);
ctx.Wait(); ctx.Wait();
...@@ -86,7 +85,6 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -86,7 +85,6 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
TEST(unsqueeze, NPU_fp32) { TEST(unsqueeze, NPU_fp32) {
f::Scope scope; f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0)); auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<float>(&scope, ctx); Compare<float>(&scope, *ctx);
} }
...@@ -254,7 +254,7 @@ NPUDeviceContext::~NPUDeviceContext() { ...@@ -254,7 +254,7 @@ NPUDeviceContext::~NPUDeviceContext() {
void NPUDeviceContext::Wait() const { void NPUDeviceContext::Wait() const {
platform::RecordEvent record_event("NPUDeviceContext/wait"); platform::RecordEvent record_event("NPUDeviceContext/wait");
VLOG(4) << "NPU context Wait"; VLOG(4) << "NPU context(" << this << ") Wait";
stream_->Wait(); stream_->Wait();
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册