未验证 提交 5ad94e7b 编写于 作者: L Leo Chen 提交者: GitHub

fix NPUDeviceContext in all c++ unittest (#32198)

* fix NPUDeviceContext in all c++ unittest

* refine log
Co-authored-by: Npangyoki <pangyoki@126.com>
上级 054f8e7a
......@@ -120,12 +120,12 @@ void Compare(f::Scope *scope, const p::DeviceContext &ctx) {
TEST(check_finite_and_unscale, NPU_fp32) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0));
Compare<float>(&scope, ctx);
auto *ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<float>(&scope, *ctx);
}
TEST(check_finite_and_unscale, NPU_fp16) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0));
Compare<p::float16>(&scope, ctx);
auto *ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<p::float16>(&scope, *ctx);
}
......@@ -56,10 +56,8 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx,
auto out = scope->Var("Out");
auto tensor_out = out->GetMutable<f::LoDTensor>();
auto op = f::OpRegistry::CreateOp(op_type,
{{"X", {"X"}}},
{{"Out", {"Out"}}},
{});
auto op =
f::OpRegistry::CreateOp(op_type, {{"X", {"X"}}}, {{"Out", {"Out"}}}, {});
op->Run(*scope, place);
......@@ -75,11 +73,8 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx,
EXPECT_EQ(out_vec[3], static_cast<T>(4.0));
}
TEST(assign, NPU_fp32) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0));
Compare<float>(&scope, ctx, "assign");
f::Scope scope;
auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<float>(&scope, *ctx, "assign");
}
......@@ -16,23 +16,23 @@ limitations under the License. */
#include <unistd.h>
#endif
#include <stdio.h>
#include <string>
#include <thread> // NOLINT
#include <vector>
#include <stdio.h>
#include "gtest/gtest.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/operators/collective/c_broadcast_op.h"
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
#include "paddle/fluid/operators/collective/c_allgather_op.h"
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
#include "paddle/fluid/operators/collective/c_broadcast_op.h"
#include "paddle/fluid/operators/collective/c_reducescatter_op.h"
#if defined(PADDLE_WITH_ASCEND_CL)
......@@ -50,25 +50,23 @@ USE_OP_DEVICE_KERNEL(c_allgather, NPU);
DECLARE_string(selected_npus);
template<typename T>
void PrintDebugInfo(const std::string preStr, const std::vector<T> &data){
template <typename T>
void PrintDebugInfo(const std::string preStr, const std::vector<T>& data) {
std::string debugstring = "";
for (auto ele : data) {
debugstring += std::to_string(ele) + std::string(",");
}
VLOG(2) << preStr << ":" << std::endl <<debugstring;
VLOG(2) << preStr << ":" << std::endl << debugstring;
}
void Prepare(f::Scope* scope, const p::DeviceContext& ctx){
void Prepare(f::Scope* scope, const p::DeviceContext& ctx) {
int rank_id = atoi(getenv("RANK_ID"));
int device_id = atoi(getenv("DEVICE_ID"));
VLOG(2) << "rank_id = " << rank_id
<< "; device_id = " << device_id
<< "; rank_id = " << rank_id
<< "; RANK_TABLE_FILE = " << atoi(getenv("RANK_TABLE_FILE"));
VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id
<< "; rank_id = " << rank_id
<< "; RANK_TABLE_FILE = " << atoi(getenv("RANK_TABLE_FILE"));
std::vector<int> rank_ids{0, 1};
f::AttributeMap comm_init_attrs;
comm_init_attrs["ring_id"] = 0;
......@@ -90,7 +88,7 @@ void TestHCCLAllGatherOp(f::Scope* scope, const p::DeviceContext& ctx) {
std::vector<float> init;
int rank_id = atoi(getenv("RANK_ID"));
int num1 = 1;
int num2 = 4;
......@@ -112,18 +110,18 @@ void TestHCCLAllGatherOp(f::Scope* scope, const p::DeviceContext& ctx) {
// run
f::AttributeMap attrs;
attrs["tag"]=std::string("tagx");
attrs["ring_id"]=0;
attrs["nranks"]=2;
attrs["tag"] = std::string("tagx");
attrs["ring_id"] = 0;
attrs["nranks"] = 2;
auto op = f::OpRegistry::CreateOp("c_allgather", {{"X", {"X"}}},
{{"Out", {"Out"}}}, attrs);
{{"Out", {"Out"}}}, attrs);
for (int i = 0; i < 10; i ++) {
for (int i = 0; i < 10; i++) {
op->Run(*scope, place);
}
ctx.Wait();
std::vector<float> out_vec;
TensorToVector(*tensor_out, ctx, &out_vec);
ctx.Wait();
......@@ -139,13 +137,13 @@ void TestHCCLAllGatherOp(f::Scope* scope, const p::DeviceContext& ctx) {
}
}
TEST(c_allgather, NPU) {
f::Scope scope;
// only support one device, if more than one device, use first default
p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str())));
// only support one device, if more than one device, use first default
auto* ctx = p::DeviceContextPool::Instance().Get(
p::NPUPlace(atoi(FLAGS_selected_npus.c_str())));
Prepare(&scope, ctx);
TestHCCLAllGatherOp(&scope, ctx);
Prepare(&scope, *ctx);
TestHCCLAllGatherOp(&scope, *ctx);
}
......@@ -16,23 +16,23 @@ limitations under the License. */
#include <unistd.h>
#endif
#include <stdio.h>
#include <string>
#include <thread> // NOLINT
#include <vector>
#include <stdio.h>
#include "gtest/gtest.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/operators/collective/c_broadcast_op.h"
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
#include "paddle/fluid/operators/collective/c_allgather_op.h"
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
#include "paddle/fluid/operators/collective/c_broadcast_op.h"
#include "paddle/fluid/operators/collective/c_reducescatter_op.h"
#if defined(PADDLE_WITH_ASCEND_CL)
......@@ -50,24 +50,22 @@ USE_OP_DEVICE_KERNEL(c_allreduce_max, NPU);
DECLARE_string(selected_npus);
template<typename T>
void PrintDebugInfo(const std::string preStr, const std::vector<T> &data){
template <typename T>
void PrintDebugInfo(const std::string preStr, const std::vector<T>& data) {
std::string debugstring = "";
for (auto ele : data) {
debugstring += std::to_string(ele) + std::string(",");
}
VLOG(2) << preStr << ":" << std::endl <<debugstring;
VLOG(2) << preStr << ":" << std::endl << debugstring;
}
void Prepare(f::Scope* scope, const p::DeviceContext& ctx){
void Prepare(f::Scope* scope, const p::DeviceContext& ctx) {
int rank_id = atoi(getenv("RANK_ID"));
int device_id = atoi(getenv("DEVICE_ID"));
VLOG(2) << "rank_id = " << rank_id
<< "; device_id = " << device_id
<< "; rank_id = " << rank_id
<< "; RANK_TABLE_FILE = " << atoi(getenv("RANK_TABLE_FILE"));
VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id
<< "; rank_id = " << rank_id
<< "; RANK_TABLE_FILE = " << atoi(getenv("RANK_TABLE_FILE"));
std::vector<int> rank_ids{0, 1};
f::AttributeMap comm_init_attrs;
......@@ -112,13 +110,13 @@ void TestHCCLAllReduceOp(f::Scope* scope, const p::DeviceContext& ctx) {
// run
f::AttributeMap attrs;
attrs["tag"]=std::string("tagx");
attrs["ring_id"]=0;
attrs["tag"] = std::string("tagx");
attrs["ring_id"] = 0;
auto op = f::OpRegistry::CreateOp("c_allreduce_max", {{"X", {"X"}}},
{{"Out", {"Out"}}}, attrs);
{{"Out", {"Out"}}}, attrs);
for (int i = 0; i < 10; i ++) {
for (int i = 0; i < 10; i++) {
op->Run(*scope, place);
}
ctx.Wait();
......@@ -139,8 +137,9 @@ TEST(c_allreduce_max, NPU) {
f::Scope scope;
// only support one device, if more than one device, use first default
p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str())));
auto* ctx = p::DeviceContextPool::Instance().Get(
p::NPUPlace(atoi(FLAGS_selected_npus.c_str())));
Prepare(&scope, ctx);
TestHCCLAllReduceOp(&scope, ctx);
Prepare(&scope, *ctx);
TestHCCLAllReduceOp(&scope, *ctx);
}
......@@ -16,19 +16,19 @@ limitations under the License. */
#include <unistd.h>
#endif
#include <stdio.h>
#include <string>
#include <thread> // NOLINT
#include <vector>
#include <stdio.h>
#include "gtest/gtest.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
......@@ -47,24 +47,22 @@ USE_OP_DEVICE_KERNEL(c_allreduce_sum, NPU);
DECLARE_string(selected_npus);
template<typename T>
void PrintDebugInfo(const std::string preStr, const std::vector<T> &data){
template <typename T>
void PrintDebugInfo(const std::string preStr, const std::vector<T>& data) {
std::string debugstring = "";
for (auto ele : data) {
debugstring += std::to_string(ele) + std::string(",");
}
VLOG(3) << preStr << ":" << std::endl <<debugstring;
VLOG(3) << preStr << ":" << std::endl << debugstring;
}
void Prepare(f::Scope* scope, const p::DeviceContext& ctx){
void Prepare(f::Scope* scope, const p::DeviceContext& ctx) {
int rank_id = atoi(getenv("RANK_ID"));
int device_id = atoi(getenv("DEVICE_ID"));
VLOG(2) << "rank_id = " << rank_id
<< "; device_id = " << device_id
<< "; rank_id = " << rank_id
<< "; RANK_TABLE_FILE = " << atoi(getenv("RANK_TABLE_FILE"));
VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id
<< "; rank_id = " << rank_id
<< "; RANK_TABLE_FILE = " << atoi(getenv("RANK_TABLE_FILE"));
std::vector<int> rank_ids{0, 1};
f::AttributeMap comm_init_attrs;
......@@ -80,7 +78,8 @@ void Prepare(f::Scope* scope, const p::DeviceContext& ctx){
ctx.Wait();
}
void TestHCCLAllReduceOp(f::Scope* scope, const p::DeviceContext& ctx, int iter) {
void TestHCCLAllReduceOp(f::Scope* scope, const p::DeviceContext& ctx,
int iter) {
// init
auto x = scope->Var("X");
auto tensor_x = x->GetMutable<f::LoDTensor>();
......@@ -109,15 +108,13 @@ void TestHCCLAllReduceOp(f::Scope* scope, const p::DeviceContext& ctx, int iter)
// run
f::AttributeMap attrs;
attrs["tag"]=std::string("tagx_"+ std::to_string(iter));
attrs["ring_id"]=0;
attrs["tag"] = std::string("tagx_" + std::to_string(iter));
attrs["ring_id"] = 0;
auto op = f::OpRegistry::CreateOp("c_allreduce_sum",
{{"X", {"X"}}},
{{"Out", {"Out"}}},
attrs);
auto op = f::OpRegistry::CreateOp("c_allreduce_sum", {{"X", {"X"}}},
{{"Out", {"Out"}}}, attrs);
for (int i = 0; i < 10; i ++) {
for (int i = 0; i < 10; i++) {
op->Run(*scope, place);
}
ctx.Wait();
......@@ -138,11 +135,12 @@ TEST(c_allreduce_sum, NPU) {
f::Scope scope;
// only support one device, if more than one device, use first default
p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str())));
auto* ctx = p::DeviceContextPool::Instance().Get(
p::NPUPlace(atoi(FLAGS_selected_npus.c_str())));
Prepare(&scope, ctx);
for(int i = 0; i < 1; i ++){
Prepare(&scope, *ctx);
for (int i = 0; i < 1; i++) {
VLOG(2) << "iter num: " << i;
TestHCCLAllReduceOp(&scope, ctx, i);
TestHCCLAllReduceOp(&scope, *ctx, i);
}
}
......@@ -16,19 +16,19 @@ limitations under the License. */
#include <unistd.h>
#endif
#include <stdio.h>
#include <string>
#include <thread> // NOLINT
#include <vector>
#include <stdio.h>
#include "gtest/gtest.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/operators/collective/c_broadcast_op.h"
......@@ -47,25 +47,23 @@ USE_OP_DEVICE_KERNEL(c_broadcast, NPU);
DECLARE_string(selected_npus);
template<typename T>
void PrintDebugInfo(const std::string preStr, const std::vector<T> &data){
template <typename T>
void PrintDebugInfo(const std::string preStr, const std::vector<T>& data) {
std::string debugstring = "";
for (auto ele : data) {
debugstring += std::to_string(ele) + std::string(",");
}
VLOG(2) << preStr << ":" << std::endl <<debugstring;
VLOG(2) << preStr << ":" << std::endl << debugstring;
}
void Prepare(f::Scope* scope, const p::DeviceContext& ctx){
void Prepare(f::Scope* scope, const p::DeviceContext& ctx) {
int rank_id = atoi(getenv("RANK_ID"));
int device_id = atoi(getenv("DEVICE_ID"));
VLOG(2) << "rank_id = " << rank_id
<< "; device_id = " << device_id
<< "; rank_id = " << rank_id
<< "; RANK_TABLE_FILE = " << atoi(getenv("DEVICE_ID"));
VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id
<< "; rank_id = " << rank_id
<< "; RANK_TABLE_FILE = " << atoi(getenv("DEVICE_ID"));
std::vector<int> rank_ids{0, 1};
f::AttributeMap comm_init_attrs;
comm_init_attrs["ring_id"] = 0;
......@@ -87,7 +85,7 @@ void TestHCCLBroadcastOp(f::Scope* scope, const p::DeviceContext& ctx) {
int num = 2;
std::vector<float> init;
int rank_id = atoi(getenv("RANK_ID"));
for (int64_t i = 0; i < num * num; ++i) {
init.push_back(1.0 + rank_id);
}
......@@ -106,18 +104,18 @@ void TestHCCLBroadcastOp(f::Scope* scope, const p::DeviceContext& ctx) {
// run
f::AttributeMap attrs;
attrs["tag"]=std::string("tagx");
attrs["root"]=0;
attrs["ring_id"]=0;
attrs["tag"] = std::string("tagx");
attrs["root"] = 0;
attrs["ring_id"] = 0;
auto op = f::OpRegistry::CreateOp("c_broadcast", {{"X", {"X"}}},
{{"Out", {"Out"}}}, attrs);
{{"Out", {"Out"}}}, attrs);
for (int i = 0; i < 10; i ++) {
for (int i = 0; i < 10; i++) {
op->Run(*scope, place);
}
ctx.Wait();
std::vector<float> out_vec;
TensorToVector(*tensor_out, ctx, &out_vec);
ctx.Wait();
......@@ -132,9 +130,10 @@ void TestHCCLBroadcastOp(f::Scope* scope, const p::DeviceContext& ctx) {
TEST(c_broadcast, NPU) {
f::Scope scope;
// only support one device, if more than one device, use first default
p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str())));
// only support one device, if more than one device, use first default
auto* ctx = p::DeviceContextPool::Instance().Get(
p::NPUPlace(atoi(FLAGS_selected_npus.c_str())));
Prepare(&scope, ctx);
TestHCCLBroadcastOp(&scope, ctx);
Prepare(&scope, *ctx);
TestHCCLBroadcastOp(&scope, *ctx);
}
......@@ -16,19 +16,19 @@ limitations under the License. */
#include <unistd.h>
#endif
#include <stdio.h>
#include <string>
#include <thread> // NOLINT
#include <vector>
#include <stdio.h>
#include "gtest/gtest.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/operators/collective/c_reduce_op.h"
......@@ -47,24 +47,22 @@ USE_OP_DEVICE_KERNEL(c_reduce_sum, NPU);
DECLARE_string(selected_npus);
template<typename T>
void PrintDebugInfo(const std::string preStr, const std::vector<T> &data){
template <typename T>
void PrintDebugInfo(const std::string preStr, const std::vector<T>& data) {
std::string debugstring = "";
for (auto ele : data) {
debugstring += std::to_string(ele) + std::string(",");
}
VLOG(3) << preStr << ":" << std::endl <<debugstring;
VLOG(3) << preStr << ":" << std::endl << debugstring;
}
void Prepare(f::Scope* scope, const p::DeviceContext& ctx){
void Prepare(f::Scope* scope, const p::DeviceContext& ctx) {
int rank_id = atoi(getenv("RANK_ID"));
int device_id = atoi(getenv("DEVICE_ID"));
VLOG(2) << "rank_id = " << rank_id
<< "; device_id = " << device_id
<< "; rank_id = " << rank_id
<< "; RANK_TABLE_FILE = " << atoi(getenv("RANK_TABLE_FILE"));
VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id
<< "; rank_id = " << rank_id
<< "; RANK_TABLE_FILE = " << atoi(getenv("RANK_TABLE_FILE"));
std::vector<int> rank_ids{0, 1};
f::AttributeMap comm_init_attrs;
......@@ -109,15 +107,13 @@ void TestHCCLReduceOp(f::Scope* scope, const p::DeviceContext& ctx, int iter) {
// run
f::AttributeMap attrs;
attrs["tag"]=std::string("tagx_"+ std::to_string(iter));
attrs["ring_id"]=0;
attrs["tag"] = std::string("tagx_" + std::to_string(iter));
attrs["ring_id"] = 0;
int root_id = 0;
attrs["root_id"]=root_id;
attrs["root_id"] = root_id;
auto op = f::OpRegistry::CreateOp("c_reduce_sum",
{{"X", {"X"}}},
{{"Out", {"Out"}}},
attrs);
auto op = f::OpRegistry::CreateOp("c_reduce_sum", {{"X", {"X"}}},
{{"Out", {"Out"}}}, attrs);
op->Run(*scope, place);
ctx.Wait();
......@@ -130,10 +126,9 @@ void TestHCCLReduceOp(f::Scope* scope, const p::DeviceContext& ctx, int iter) {
EXPECT_EQ(out_vec.size(), init.size());
for (uint32_t i = 0; i < out_vec.size(); i++) {
if(rank_id == root_id){
if (rank_id == root_id) {
EXPECT_EQ(out_vec[i], 3.0);
}
else{
} else {
EXPECT_EQ(out_vec[i], init[i]);
}
}
......@@ -143,11 +138,12 @@ TEST(c_reduce_sum, NPU) {
f::Scope scope;
// only support one device, if more than one device, use first default
p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str())));
auto* ctx = p::DeviceContextPool::Instance().Get(
p::NPUPlace(atoi(FLAGS_selected_npus.c_str())));
Prepare(&scope, ctx);
for(int i = 0; i < 2; i ++){
Prepare(&scope, *ctx);
for (int i = 0; i < 2; i++) {
VLOG(2) << "iter num: " << i;
TestHCCLReduceOp(&scope, ctx, i);
TestHCCLReduceOp(&scope, *ctx, i);
}
}
......@@ -16,23 +16,23 @@ limitations under the License. */
#include <unistd.h>
#endif
#include <stdio.h>
#include <string>
#include <thread> // NOLINT
#include <vector>
#include <stdio.h>
#include "gtest/gtest.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/operators/collective/c_broadcast_op.h"
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
#include "paddle/fluid/operators/collective/c_allgather_op.h"
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
#include "paddle/fluid/operators/collective/c_broadcast_op.h"
#include "paddle/fluid/operators/collective/c_reducescatter_op.h"
#if defined(PADDLE_WITH_ASCEND_CL)
......@@ -50,24 +50,22 @@ USE_OP_DEVICE_KERNEL(c_reducescatter, NPU);
DECLARE_string(selected_npus);
template<typename T>
void PrintDebugInfo(const std::string preStr, const std::vector<T> &data){
template <typename T>
void PrintDebugInfo(const std::string preStr, const std::vector<T>& data) {
std::string debugstring = "";
for (auto ele : data) {
debugstring += std::to_string(ele) + std::string(",");
}
VLOG(2) << preStr << ":" << std::endl <<debugstring;
VLOG(2) << preStr << ":" << std::endl << debugstring;
}
void Prepare(f::Scope* scope, const p::DeviceContext& ctx){
void Prepare(f::Scope* scope, const p::DeviceContext& ctx) {
int rank_id = atoi(getenv("RANK_ID"));
int device_id = atoi(getenv("DEVICE_ID"));
VLOG(2) << "rank_id = " << rank_id
<< "; device_id = " << device_id
<< "; rank_id = " << rank_id
<< "; RANK_TABLE_FILE = " << atoi(getenv("DEVICE_ID"));
VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id
<< "; rank_id = " << rank_id
<< "; RANK_TABLE_FILE = " << atoi(getenv("DEVICE_ID"));
std::vector<int> rank_ids{0, 1};
f::AttributeMap comm_init_attrs;
......@@ -112,15 +110,15 @@ void TestHCCLReduceScatterOp(f::Scope* scope, const p::DeviceContext& ctx) {
// run
f::AttributeMap attrs;
attrs["tag"]=std::string("tagx");
attrs["ring_id"]=0;
attrs["nranks"]=2;
attrs["tag"] = std::string("tagx");
attrs["ring_id"] = 0;
attrs["nranks"] = 2;
auto op = f::OpRegistry::CreateOp("c_reducescatter", {{"X", {"X"}}},
{{"Out", {"Out"}}}, attrs);
{{"Out", {"Out"}}}, attrs);
int iter_num = 10;
for (int i = 0; i < iter_num; i ++) {
for (int i = 0; i < iter_num; i++) {
op->Run(*scope, place);
}
ctx.Wait();
......@@ -140,8 +138,9 @@ TEST(c_reducescatter, NPU) {
f::Scope scope;
// only support one device, if more than one device, use first default
p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str())));
auto* ctx = p::DeviceContextPool::Instance().Get(
p::NPUPlace(atoi(FLAGS_selected_npus.c_str())));
Prepare(&scope, ctx);
TestHCCLReduceScatterOp(&scope, ctx);
Prepare(&scope, *ctx);
TestHCCLReduceScatterOp(&scope, *ctx);
}
......@@ -102,6 +102,6 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
TEST(c_sync_calc_stream, NPU_fp32) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0));
Compare<float>(&scope, ctx);
auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<float>(&scope, *ctx);
}
......@@ -124,8 +124,8 @@ TEST(c_broadcast, NPU) {
f::Scope scope;
char* npu_id = getenv("FLAGS_selected_npus");
p::NPUDeviceContext ctx(p::NPUPlace(atoi(npu_id)));
auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(atoi(npu_id)));
Prepare(&scope, ctx);
TestHCCLBroadcastOp(&scope, ctx);
Prepare(&scope, *ctx);
TestHCCLBroadcastOp(&scope, *ctx);
}
......@@ -16,19 +16,19 @@ limitations under the License. */
#include <unistd.h>
#endif
#include <stdio.h>
#include <string>
#include <thread> // NOLINT
#include <vector>
#include <stdio.h>
#include "gtest/gtest.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/operators/collective/recv_v2_op.h"
......@@ -45,80 +45,80 @@ USE_OP(recv_v2);
USE_NO_KERNEL_OP(c_comm_init_hcom);
USE_OP_DEVICE_KERNEL(recv_v2, NPU);
void Prepare(f::Scope* scope, const p::DeviceContext& ctx){
std::string rank_table_file = getenv("RANK_TABLE_FILE");
int rank_id = atoi(getenv("RANK_ID"));
int device_id = atoi(getenv("DEVICE_ID"));
int src_rank = atoi(getenv("SRC_RANK"));
int dest_rank = atoi(getenv("DEST_RANK"));
VLOG(3)<<"rank_id "<< rank_id << "src_rank"<< src_rank <<"dest_rank" <<dest_rank;
std::vector<int> rank_ids = {0,1};
f::AttributeMap comm_init_attrs;
comm_init_attrs["ring_id"] = 0;
comm_init_attrs["nranks"] = 2;
comm_init_attrs["rank"] = rank_id;
comm_init_attrs["device_id"] = device_id;
comm_init_attrs["rank_ids"] = rank_ids;
auto comm_init_op = f::OpRegistry::CreateOp("c_comm_init_hcom", {}, {}, comm_init_attrs);
VLOG(3) << "CreateOp c_comm_init_hcom";
auto place = ctx.GetPlace();
comm_init_op->Run(*scope, place);
ctx.Wait();
void Prepare(f::Scope* scope, const p::DeviceContext& ctx) {
std::string rank_table_file = getenv("RANK_TABLE_FILE");
int rank_id = atoi(getenv("RANK_ID"));
int device_id = atoi(getenv("DEVICE_ID"));
int src_rank = atoi(getenv("SRC_RANK"));
int dest_rank = atoi(getenv("DEST_RANK"));
VLOG(3) << "rank_id " << rank_id << "src_rank" << src_rank << "dest_rank"
<< dest_rank;
std::vector<int> rank_ids = {0, 1};
f::AttributeMap comm_init_attrs;
comm_init_attrs["ring_id"] = 0;
comm_init_attrs["nranks"] = 2;
comm_init_attrs["rank"] = rank_id;
comm_init_attrs["device_id"] = device_id;
comm_init_attrs["rank_ids"] = rank_ids;
auto comm_init_op =
f::OpRegistry::CreateOp("c_comm_init_hcom", {}, {}, comm_init_attrs);
VLOG(3) << "CreateOp c_comm_init_hcom";
auto place = ctx.GetPlace();
comm_init_op->Run(*scope, place);
ctx.Wait();
}
void TestHcomRecvOp(f::Scope* scope, const p::DeviceContext& ctx){
std::cout << "BEGIN TEST:" << __FUNCTION__ << std::endl;
int num = atoi(getenv("DATA_SIZE"));
EXPECT_GT(num, 0);
EXPECT_LT(num, 1 << 15);
int rank_id = atoi(getenv("RANK_ID"));
VLOG(3) << "rank_id:" << rank_id<<std::endl;
ctx.Wait();
auto place = ctx.GetPlace();
auto out = scope->Var("Out");
auto tensor_out = out->GetMutable<f::LoDTensor>();
tensor_out->Resize({num, num});
tensor_out->mutable_data<float>(place); // allocate
ctx.Wait();
f::AttributeMap attrs;
attrs["tag"]=std::string("srtest");
attrs["peer"]=atoi(getenv("SRC_RANK"));
attrs["ring_id"]=0;
attrs["srTag"]=0;
std::vector<int> out_shape;
out_shape.push_back(num);
out_shape.push_back(num);
attrs["out_shape"]=out_shape;
auto op = f::OpRegistry::CreateOp("recv_v2", {}, {{"Out", {"Out"}}}, attrs);
VLOG(3) << "CreateOp recv_v2";
for (int i = 0; i < 10; i ++) {
op->Run(*scope, place);
}
VLOG(3) << "Run op recv_v2";
std::vector<float> out_vec;
TensorToVector(*tensor_out, ctx, &out_vec);
ctx.Wait();
std::vector<float> init(num*num, 1.0 * atoi(getenv("DEST_RANK")));
EXPECT_EQ(out_vec == init, true);
void TestHcomRecvOp(f::Scope* scope, const p::DeviceContext& ctx) {
std::cout << "BEGIN TEST:" << __FUNCTION__ << std::endl;
int num = atoi(getenv("DATA_SIZE"));
EXPECT_GT(num, 0);
EXPECT_LT(num, 1 << 15);
int rank_id = atoi(getenv("RANK_ID"));
VLOG(3) << "rank_id:" << rank_id << std::endl;
ctx.Wait();
auto place = ctx.GetPlace();
auto out = scope->Var("Out");
auto tensor_out = out->GetMutable<f::LoDTensor>();
tensor_out->Resize({num, num});
tensor_out->mutable_data<float>(place); // allocate
ctx.Wait();
f::AttributeMap attrs;
attrs["tag"] = std::string("srtest");
attrs["peer"] = atoi(getenv("SRC_RANK"));
attrs["ring_id"] = 0;
attrs["srTag"] = 0;
std::vector<int> out_shape;
out_shape.push_back(num);
out_shape.push_back(num);
attrs["out_shape"] = out_shape;
auto op = f::OpRegistry::CreateOp("recv_v2", {}, {{"Out", {"Out"}}}, attrs);
VLOG(3) << "CreateOp recv_v2";
for (int i = 0; i < 10; i++) {
op->Run(*scope, place);
}
VLOG(3) << "Run op recv_v2";
std::vector<float> out_vec;
TensorToVector(*tensor_out, ctx, &out_vec);
ctx.Wait();
std::vector<float> init(num * num, 1.0 * atoi(getenv("DEST_RANK")));
EXPECT_EQ(out_vec == init, true);
}
TEST(recv_v2, NPU){
f::Scope scope;
char * npu_id=getenv("FLAGS_selected_npus");
VLOG(3) << "Select npu:" << npu_id;
p::NPUDeviceContext ctx(p::NPUPlace(atoi(npu_id)));
VLOG(3) << "Place over";
Prepare(&scope, ctx);
VLOG(3) << "Prepare over";
TestHcomRecvOp(&scope, ctx);
VLOG(3) << "Test over";
TEST(recv_v2, NPU) {
f::Scope scope;
char* npu_id = getenv("FLAGS_selected_npus");
VLOG(3) << "Select npu:" << npu_id;
auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(atoi(npu_id)));
VLOG(3) << "Place over";
Prepare(&scope, *ctx);
VLOG(3) << "Prepare over";
TestHcomRecvOp(&scope, *ctx);
VLOG(3) << "Test over";
}
......@@ -16,18 +16,18 @@ limitations under the License. */
#include <unistd.h>
#endif
#include <stdio.h>
#include <string>
#include <thread> // NOLINT
#include <vector>
#include <stdio.h>
#include "gtest/gtest.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/operators/collective/send_v2_op.h"
......@@ -44,68 +44,69 @@ USE_OP(send_v2);
USE_NO_KERNEL_OP(c_comm_init_hcom);
USE_OP_DEVICE_KERNEL(send_v2, NPU);
void Prepare(f::Scope* scope, const p::DeviceContext& ctx){
std::string rank_table_file = getenv("RANK_TABLE_FILE");
int rank_id = atoi(getenv("RANK_ID"));
int device_id = atoi(getenv("DEVICE_ID"));
int src_rank = atoi(getenv("SRC_RANK"));
int dest_rank = atoi(getenv("DEST_RANK"));
VLOG(3)<<"rank_id "<< rank_id << "src_rank"<< src_rank <<"dest_rank" <<dest_rank;
std::vector<int> rank_ids = {0, 1};
f::AttributeMap comm_init_attrs;
comm_init_attrs["ring_id"] = 0;
comm_init_attrs["nranks"] = 2;
comm_init_attrs["rank"] = rank_id;
comm_init_attrs["device_id"] = device_id;
comm_init_attrs["rank_ids"] = rank_ids;
auto comm_init_op = f::OpRegistry::CreateOp("c_comm_init_hcom", {}, {}, comm_init_attrs);
auto place = ctx.GetPlace();
comm_init_op->Run(*scope, place);
ctx.Wait();
void Prepare(f::Scope* scope, const p::DeviceContext& ctx) {
std::string rank_table_file = getenv("RANK_TABLE_FILE");
int rank_id = atoi(getenv("RANK_ID"));
int device_id = atoi(getenv("DEVICE_ID"));
int src_rank = atoi(getenv("SRC_RANK"));
int dest_rank = atoi(getenv("DEST_RANK"));
VLOG(3) << "rank_id " << rank_id << "src_rank" << src_rank << "dest_rank"
<< dest_rank;
std::vector<int> rank_ids = {0, 1};
f::AttributeMap comm_init_attrs;
comm_init_attrs["ring_id"] = 0;
comm_init_attrs["nranks"] = 2;
comm_init_attrs["rank"] = rank_id;
comm_init_attrs["device_id"] = device_id;
comm_init_attrs["rank_ids"] = rank_ids;
auto comm_init_op =
f::OpRegistry::CreateOp("c_comm_init_hcom", {}, {}, comm_init_attrs);
auto place = ctx.GetPlace();
comm_init_op->Run(*scope, place);
ctx.Wait();
}
void TestHcomSendOp(f::Scope* scope, const p::DeviceContext& ctx){
std::cout<< "BEGIN TEST:"<< __FUNCTION__ <<std::endl;
auto x = scope->Var("X");
auto tensor_x = x->GetMutable<f::LoDTensor>();
int num = atoi(getenv("DATA_SIZE"));;
EXPECT_GT(num, 0);
EXPECT_LT(num, 1 << 15);
std::vector<float> init(num*num, 1.0 * atoi(getenv("DEST_RANK")));
int rank_id = atoi(getenv("RANK_ID"));
VLOG(3)<<"rank id:"<<rank_id;
TensorFromVector(init, ctx, tensor_x);
tensor_x->Resize({num, num});
ctx.Wait();
auto place = ctx.GetPlace();
ctx.Wait();
f::AttributeMap attrs;
attrs["tag"]=std::string("srtest");
attrs["peer"]=atoi(getenv("DEST_RANK"));
attrs["ring_id"]=0;
attrs["srTag"]=0;
auto op = f::OpRegistry::CreateOp("send_v2", {{"X", {"X"}}}, {}, attrs);
for (int i = 0; i < 10; i ++) {
op->Run(*scope, place);
}
VLOG(3)<<"send run over";
ctx.Wait();
void TestHcomSendOp(f::Scope* scope, const p::DeviceContext& ctx) {
std::cout << "BEGIN TEST:" << __FUNCTION__ << std::endl;
auto x = scope->Var("X");
auto tensor_x = x->GetMutable<f::LoDTensor>();
int num = atoi(getenv("DATA_SIZE"));
EXPECT_GT(num, 0);
EXPECT_LT(num, 1 << 15);
std::vector<float> init(num * num, 1.0 * atoi(getenv("DEST_RANK")));
int rank_id = atoi(getenv("RANK_ID"));
VLOG(3) << "rank id:" << rank_id;
TensorFromVector(init, ctx, tensor_x);
tensor_x->Resize({num, num});
ctx.Wait();
auto place = ctx.GetPlace();
ctx.Wait();
f::AttributeMap attrs;
attrs["tag"] = std::string("srtest");
attrs["peer"] = atoi(getenv("DEST_RANK"));
attrs["ring_id"] = 0;
attrs["srTag"] = 0;
auto op = f::OpRegistry::CreateOp("send_v2", {{"X", {"X"}}}, {}, attrs);
for (int i = 0; i < 10; i++) {
op->Run(*scope, place);
}
VLOG(3) << "send run over";
ctx.Wait();
}
TEST(send_v2, NPU){
f::Scope scope;
char * npu_id=getenv("FLAGS_selected_npus");
VLOG(3) << "Select npu:" << npu_id;
p::NPUDeviceContext ctx(p::NPUPlace(atoi(npu_id)));
VLOG(3) << "Place over";
Prepare(&scope, ctx);
VLOG(3) << "Prepare over";
TestHcomSendOp(&scope, ctx);
VLOG(3) << "Test over";
TEST(send_v2, NPU) {
f::Scope scope;
char* npu_id = getenv("FLAGS_selected_npus");
VLOG(3) << "Select npu:" << npu_id;
auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(atoi(npu_id)));
VLOG(3) << "Place over";
Prepare(&scope, *ctx);
VLOG(3) << "Prepare over";
TestHcomSendOp(&scope, *ctx);
VLOG(3) << "Test over";
}
......@@ -38,7 +38,7 @@ USE_OP(elementwise_sub);
USE_OP_DEVICE_KERNEL(elementwise_sub, NPU);
template <typename T>
void Compare(f::Scope* scope, const p::DeviceContext& ctx,
void Compare(f::Scope *scope, const p::DeviceContext &ctx,
std::string op_type) {
// init
auto x = scope->Var("X");
......@@ -90,7 +90,7 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx,
}
template <typename T>
void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx,
void CompareGrad(f::Scope *scope, const p::DeviceContext &ctx,
std::string op_type) {
// init
auto dout = scope->Var("DOut");
......@@ -154,30 +154,30 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx,
TEST(elementwise_add, NPU_fp32) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0));
Compare<float>(&scope, ctx, "elementwise_add");
auto *ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<float>(&scope, *ctx, "elementwise_add");
}
TEST(elementwise_sub, NPU_fp32) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0));
Compare<float>(&scope, ctx, "elementwise_sub");
auto *ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<float>(&scope, *ctx, "elementwise_sub");
}
TEST(elementwise_sub, NPU_fp16) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0));
Compare<p::float16>(&scope, ctx, "elementwise_sub");
auto *ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<p::float16>(&scope, *ctx, "elementwise_sub");
}
TEST(elementwise_sub_grad, NPU) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0));
CompareGrad<float>(&scope, ctx, "elementwise_sub_grad");
auto *ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
CompareGrad<float>(&scope, *ctx, "elementwise_sub_grad");
}
TEST(elementwise_add_grad, NPU) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0));
CompareGrad<float>(&scope, ctx, "elementwise_add_grad");
auto *ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
CompareGrad<float>(&scope, *ctx, "elementwise_add_grad");
}
......@@ -69,6 +69,6 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
TEST(expand, NPU_fp32) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0));
Compare<float>(&scope, ctx);
auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<float>(&scope, *ctx);
}
......@@ -152,18 +152,18 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx,
TEST(gather, NPU_fp32) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0));
Compare<float>(&scope, ctx, "gather");
auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<float>(&scope, *ctx, "gather");
}
TEST(gather, NPU_fp16) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0));
Compare<p::float16>(&scope, ctx, "gather");
auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<p::float16>(&scope, *ctx, "gather");
}
TEST(gather_grad, NPU_fp32) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0));
CompareGrad<float>(&scope, ctx, "gather_grad");
auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
CompareGrad<float>(&scope, *ctx, "gather_grad");
}
......@@ -59,8 +59,8 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
// run
auto place = ctx.GetPlace();
auto op = f::OpRegistry::CreateOp("gelu", {{"X", {"X"}}},
{{"Out", {"Out"}}}, attrs);
auto op = f::OpRegistry::CreateOp("gelu", {{"X", {"X"}}}, {{"Out", {"Out"}}},
attrs);
op->Run(*scope, place);
ctx.Wait();
......@@ -76,8 +76,8 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
ctx.Wait();
gettimeofday(&end, NULL);
int micros = (((end.tv_sec - start.tv_sec) * 1000000) +
end.tv_usec) - (start.tv_usec);
int micros =
(((end.tv_sec - start.tv_sec) * 1000000) + end.tv_usec) - (start.tv_usec);
printf("used time: %d\n", micros / 100);
// eval value
......@@ -124,8 +124,8 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) {
auto place = ctx.GetPlace();
auto op = f::OpRegistry::CreateOp("gelu_grad",
{{"Out@GRAD", {"DOut"}}, {"X", {"X"}}},
{{"X@GRAD", {"DX"}}}, attrs);
{{"Out@GRAD", {"DOut"}}, {"X", {"X"}}},
{{"X@GRAD", {"DX"}}}, attrs);
op->Run(*scope, place);
ctx.Wait();
......@@ -141,8 +141,8 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) {
ctx.Wait();
gettimeofday(&end, NULL);
int micros = (((end.tv_sec - start.tv_sec) * 1000000) +
end.tv_usec) - (start.tv_usec);
int micros =
(((end.tv_sec - start.tv_sec) * 1000000) + end.tv_usec) - (start.tv_usec);
printf("used time: %d\n", micros / 100);
// eval value
......@@ -156,14 +156,13 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) {
}
TEST(gelu, NPU_fp32) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0));
Compare<float>(&scope, ctx);
f::Scope scope;
auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<float>(&scope, *ctx);
}
TEST(gelu_grad, NPU) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0));
CompareGrad<float>(&scope, ctx);
f::Scope scope;
auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
CompareGrad<float>(&scope, *ctx);
}
......@@ -54,10 +54,9 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx,
auto out = scope->Var("Out");
auto tensor_out = out->GetMutable<f::LoDTensor>();
f::AttributeMap attr_input = { {"step", static_cast<float>(2.0)} };
f::AttributeMap attr_input = {{"step", static_cast<float>(2.0)}};
auto op = f::OpRegistry::CreateOp("increment", {{"X", {"X"}}},
{{"Out", {"Out"}}},
attr_input);
{{"Out", {"Out"}}}, attr_input);
op->Run(*scope, place);
......@@ -70,16 +69,14 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx,
EXPECT_EQ(out_vec[0], static_cast<T>(3.0));
}
TEST(increment, NPU_fp32) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0));
Compare<float>(&scope, ctx, "increment");
f::Scope scope;
auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<float>(&scope, *ctx, "increment");
}
TEST(increment, NPU_fp64) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0));
Compare<float>(&scope, ctx, "increment");
f::Scope scope;
auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<double>(&scope, *ctx, "increment");
}
......@@ -67,10 +67,9 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx,
auto tensor_out = out->GetMutable<f::LoDTensor>();
// run
auto op = f::OpRegistry::CreateOp(op_type, {{"Start", {"Start"}},
{"End", {"End"}},
{"Step", {"Step"}}},
{{"Out", {"Out"}}}, {});
auto op = f::OpRegistry::CreateOp(
op_type, {{"Start", {"Start"}}, {"End", {"End"}}, {"Step", {"Step"}}},
{{"Out", {"Out"}}}, {});
op->Run(*scope, place);
......@@ -86,10 +85,8 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx,
EXPECT_EQ(static_cast<T>(out_vec[4]), static_cast<T>(9.0));
}
TEST(range, NPU) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0));
Compare<int>(&scope, ctx, "range");
f::Scope scope;
auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<int>(&scope, *ctx, "range");
}
......@@ -78,6 +78,6 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
TEST(reduce_any, NPU) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0));
Compare<bool>(&scope, ctx);
auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<bool>(&scope, *ctx);
}
......@@ -21,11 +21,10 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/framework/tensor_util.h"
namespace f = paddle::framework;
namespace p = paddle::platform;
......@@ -59,15 +58,13 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
// run
int axis = 1;
f::AttributeMap attrs = {
{"axis", axis},
{"use_cudnn", false},
{"use_mkldnn", false},
{"mkldnn_data_type", std::string("float32")},
{"is_test", false}, };
{"axis", axis}, {"use_cudnn", false},
{"use_mkldnn", false}, {"mkldnn_data_type", std::string("float32")},
{"is_test", false},
};
auto op =
f::OpRegistry::CreateOp("softmax", {{"X", {"X"}}},
{{"Out", {"Out"}}}, attrs);
auto op = f::OpRegistry::CreateOp("softmax", {{"X", {"X"}}},
{{"Out", {"Out"}}}, attrs);
op->Run(*scope, place);
ctx.Wait();
......@@ -76,7 +73,7 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
TensorToVector(*tensor_out, ctx, &out_vec);
for (int i = 0; i < static_cast<int>(out_vec.size()); ++i) {
VLOG(3) << "out_vec[" << i << "] : "<< out_vec[i];
VLOG(3) << "out_vec[" << i << "] : " << out_vec[i];
}
ctx.Wait();
......@@ -84,7 +81,6 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
EXPECT_EQ((uint32_t)out_vec.size(), (uint32_t)(6));
}
template <typename T>
void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) {
// init
......@@ -128,16 +124,15 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) {
attrs = {
{"name", std::string("softmax_grad")},
{"axis", static_cast<int>(0)},
{"use_cudnn", false},
{"use_mkldnn", false},
{"mkldnn_data_type", std::string("float32")},
{"is_test", false},
{"data_format", std::string("AnyLayout")}, };
auto op =
f::OpRegistry::CreateOp("softmax_grad",
{{"Out", {"Out"}},
{"Out@GRAD", {"DOut"}}},
{{"X@GRAD", {"DX"}}}, attrs);
{"use_cudnn", false},
{"use_mkldnn", false},
{"mkldnn_data_type", std::string("float32")},
{"is_test", false},
{"data_format", std::string("AnyLayout")},
};
auto op = f::OpRegistry::CreateOp("softmax_grad",
{{"Out", {"Out"}}, {"Out@GRAD", {"DOut"}}},
{{"X@GRAD", {"DX"}}}, attrs);
auto place = ctx.GetPlace();
op->Run(*scope, place);
......@@ -164,12 +159,12 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) {
TEST(softmax, NPU_fp32) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0));
Compare<float>(&scope, ctx);
auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<float>(&scope, *ctx);
}
TEST(softmax_grad, NPU_fp32) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0));
CompareGrad<float>(&scope, ctx);
auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
CompareGrad<float>(&scope, *ctx);
}
......@@ -64,9 +64,8 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
axis.push_back(2);
f::AttributeMap attrs = {{"axes", axis}};
auto op =
f::OpRegistry::CreateOp("squeeze", {{"X", {"X"}}},
{{"Out", {"Out"}}}, attrs);
auto op = f::OpRegistry::CreateOp("squeeze", {{"X", {"X"}}},
{{"Out", {"Out"}}}, attrs);
op->Run(*scope, place);
ctx.Wait();
......@@ -74,7 +73,7 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
EXPECT_EQ((uint32_t)tensor_out->dims().size(), uint32_t(2));
EXPECT_EQ((uint32_t)tensor_out->dims()[0], uint32_t(dim0));
EXPECT_EQ((uint32_t)tensor_out->dims()[1], uint32_t(dim1));
std::vector<T> out_vec;
TensorToVector(*tensor_out, ctx, &out_vec);
for (uint32_t i = 0; i < out_vec.size(); i++) {
......@@ -86,7 +85,6 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
TEST(squeeze, NPU_fp32) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0));
Compare<float>(&scope, ctx);
auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<float>(&scope, *ctx);
}
......@@ -126,12 +126,12 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) {
TEST(transpose2, NPU_fp32) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0));
Compare<float>(&scope, ctx);
auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<float>(&scope, *ctx);
}
TEST(transpose2_grad, NPU_fp32) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0));
CompareGrad<float>(&scope, ctx);
auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
CompareGrad<float>(&scope, *ctx);
}
......@@ -63,9 +63,8 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
axis.push_back(1);
f::AttributeMap attrs = {{"axes", axis}};
auto op =
f::OpRegistry::CreateOp("unsqueeze", {{"X", {"X"}}},
{{"Out", {"Out"}}}, attrs);
auto op = f::OpRegistry::CreateOp("unsqueeze", {{"X", {"X"}}},
{{"Out", {"Out"}}}, attrs);
op->Run(*scope, place);
ctx.Wait();
......@@ -86,7 +85,6 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
TEST(unsqueeze, NPU_fp32) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0));
Compare<float>(&scope, ctx);
auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<float>(&scope, *ctx);
}
......@@ -254,7 +254,7 @@ NPUDeviceContext::~NPUDeviceContext() {
void NPUDeviceContext::Wait() const {
platform::RecordEvent record_event("NPUDeviceContext/wait");
VLOG(4) << "NPU context Wait";
VLOG(4) << "NPU context(" << this << ") Wait";
stream_->Wait();
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册