未验证 提交 7f00716c 编写于 作者: K kexinzhao 提交者: GitHub

Add context wait in type_transform (#8850)

上级 6f50dee4
...@@ -53,6 +53,7 @@ struct CastDataType { ...@@ -53,6 +53,7 @@ struct CastDataType {
auto* context = static_cast<const platform::CUDADeviceContext*>(ctx_); auto* context = static_cast<const platform::CUDADeviceContext*>(ctx_);
trans(*context, in_begin, in_end, out_begin, trans(*context, in_begin, in_end, out_begin,
CastDataTypeFunctor<InType, OutType>()); CastDataTypeFunctor<InType, OutType>());
context->Wait();
#endif #endif
} else { } else {
PADDLE_THROW("Unsupported place!"); PADDLE_THROW("Unsupported place!");
......
...@@ -50,13 +50,13 @@ TEST(DataTypeTransform, CPUTransform) { ...@@ -50,13 +50,13 @@ TEST(DataTypeTransform, CPUTransform) {
TransDataType(kernel_fp32, kernel_fp64, in, &out); TransDataType(kernel_fp32, kernel_fp64, in, &out);
double* out_data_double = out.data<double>(); double* out_data_double = out.data<double>();
for (int i = 0; i < data_number; ++i) { for (int i = 0; i < data_number; ++i) {
ASSERT_EQ(out_data_double[i], static_cast<double>(i / 3)); EXPECT_EQ(out_data_double[i], static_cast<double>(i / 3));
} }
TransDataType(kernel_fp32, kernel_int32, in, &out); TransDataType(kernel_fp32, kernel_int32, in, &out);
int* out_data_int = out.data<int>(); int* out_data_int = out.data<int>();
for (int i = 0; i < data_number; ++i) { for (int i = 0; i < data_number; ++i) {
ASSERT_EQ(out_data_int[i], static_cast<int>(i / 3)); EXPECT_EQ(out_data_int[i], static_cast<int>(i / 3));
} }
} }
...@@ -76,31 +76,31 @@ TEST(DataTypeTransform, CPUTransform) { ...@@ -76,31 +76,31 @@ TEST(DataTypeTransform, CPUTransform) {
TransDataType(kernel_fp16, kernel_fp32, in, &out); TransDataType(kernel_fp16, kernel_fp32, in, &out);
float* out_data_float = out.data<float>(); float* out_data_float = out.data<float>();
for (int i = 0; i < data_number; ++i) { for (int i = 0; i < data_number; ++i) {
ASSERT_EQ(out_data_float[i], static_cast<float>(ptr[i])); EXPECT_EQ(out_data_float[i], static_cast<float>(ptr[i]));
} }
TransDataType(kernel_fp16, kernel_fp64, in, &out); TransDataType(kernel_fp16, kernel_fp64, in, &out);
double* out_data_double = out.data<double>(); double* out_data_double = out.data<double>();
for (int i = 0; i < data_number; ++i) { for (int i = 0; i < data_number; ++i) {
ASSERT_EQ(out_data_double[i], static_cast<double>(ptr[i])); EXPECT_EQ(out_data_double[i], static_cast<double>(ptr[i]));
} }
TransDataType(kernel_fp16, kernel_int32, in, &out); TransDataType(kernel_fp16, kernel_int32, in, &out);
int* out_data_int = out.data<int>(); int* out_data_int = out.data<int>();
for (int i = 0; i < data_number; ++i) { for (int i = 0; i < data_number; ++i) {
ASSERT_EQ(out_data_int[i], static_cast<int>(ptr[i])); EXPECT_EQ(out_data_int[i], static_cast<int>(ptr[i]));
} }
TransDataType(kernel_fp16, kernel_int64, in, &out); TransDataType(kernel_fp16, kernel_int64, in, &out);
int64_t* out_data_int64 = out.data<int64_t>(); int64_t* out_data_int64 = out.data<int64_t>();
for (int i = 0; i < data_number; ++i) { for (int i = 0; i < data_number; ++i) {
ASSERT_EQ(out_data_int64[i], static_cast<int64_t>(ptr[i])); EXPECT_EQ(out_data_int64[i], static_cast<int64_t>(ptr[i]));
} }
TransDataType(kernel_fp16, kernel_bool, in, &out); TransDataType(kernel_fp16, kernel_bool, in, &out);
bool* out_data_bool = out.data<bool>(); bool* out_data_bool = out.data<bool>();
for (int i = 0; i < data_number; ++i) { for (int i = 0; i < data_number; ++i) {
ASSERT_EQ(out_data_bool[i], static_cast<bool>(ptr[i])); EXPECT_EQ(out_data_bool[i], static_cast<bool>(ptr[i]));
} }
// transform float to float16 // transform float to float16
...@@ -112,7 +112,7 @@ TEST(DataTypeTransform, CPUTransform) { ...@@ -112,7 +112,7 @@ TEST(DataTypeTransform, CPUTransform) {
TransDataType(kernel_fp32, kernel_fp16, in, &out); TransDataType(kernel_fp32, kernel_fp16, in, &out);
ptr = out.data<float16>(); ptr = out.data<float16>();
for (int i = 0; i < data_number; ++i) { for (int i = 0; i < data_number; ++i) {
ASSERT_EQ(ptr[i].x, static_cast<float16>(in_data_float[i]).x); EXPECT_EQ(ptr[i].x, static_cast<float16>(in_data_float[i]).x);
} }
// transform double to float16 // transform double to float16
...@@ -124,7 +124,7 @@ TEST(DataTypeTransform, CPUTransform) { ...@@ -124,7 +124,7 @@ TEST(DataTypeTransform, CPUTransform) {
TransDataType(kernel_fp64, kernel_fp16, in, &out); TransDataType(kernel_fp64, kernel_fp16, in, &out);
ptr = out.data<float16>(); ptr = out.data<float16>();
for (int i = 0; i < data_number; ++i) { for (int i = 0; i < data_number; ++i) {
ASSERT_EQ(ptr[i].x, static_cast<float16>(in_data_double[i]).x); EXPECT_EQ(ptr[i].x, static_cast<float16>(in_data_double[i]).x);
} }
// transform int to float16 // transform int to float16
...@@ -136,7 +136,7 @@ TEST(DataTypeTransform, CPUTransform) { ...@@ -136,7 +136,7 @@ TEST(DataTypeTransform, CPUTransform) {
TransDataType(kernel_int32, kernel_fp16, in, &out); TransDataType(kernel_int32, kernel_fp16, in, &out);
ptr = out.data<float16>(); ptr = out.data<float16>();
for (int i = 0; i < data_number; ++i) { for (int i = 0; i < data_number; ++i) {
ASSERT_EQ(ptr[i].x, static_cast<float16>(in_data_int[i]).x); EXPECT_EQ(ptr[i].x, static_cast<float16>(in_data_int[i]).x);
} }
// transform int64 to float16 // transform int64 to float16
...@@ -148,7 +148,7 @@ TEST(DataTypeTransform, CPUTransform) { ...@@ -148,7 +148,7 @@ TEST(DataTypeTransform, CPUTransform) {
TransDataType(kernel_int64, kernel_fp16, in, &out); TransDataType(kernel_int64, kernel_fp16, in, &out);
ptr = out.data<float16>(); ptr = out.data<float16>();
for (int i = 0; i < data_number; ++i) { for (int i = 0; i < data_number; ++i) {
ASSERT_EQ(ptr[i].x, static_cast<float16>(in_data_int64[i]).x); EXPECT_EQ(ptr[i].x, static_cast<float16>(in_data_int64[i]).x);
} }
// transform bool to float16 // transform bool to float16
...@@ -160,7 +160,7 @@ TEST(DataTypeTransform, CPUTransform) { ...@@ -160,7 +160,7 @@ TEST(DataTypeTransform, CPUTransform) {
TransDataType(kernel_bool, kernel_fp16, in, &out); TransDataType(kernel_bool, kernel_fp16, in, &out);
ptr = out.data<float16>(); ptr = out.data<float16>();
for (int i = 0; i < data_number; ++i) { for (int i = 0; i < data_number; ++i) {
ASSERT_EQ(ptr[i].x, static_cast<float16>(in_data_bool[i]).x); EXPECT_EQ(ptr[i].x, static_cast<float16>(in_data_bool[i]).x);
} }
} }
} }
...@@ -49,15 +49,16 @@ TEST(DataTypeTransform, GPUTransform) { ...@@ -49,15 +49,16 @@ TEST(DataTypeTransform, GPUTransform) {
float arr[6] = {0, 1, 2, 3, 4, 5}; float arr[6] = {0, 1, 2, 3, 4, 5};
int data_number = sizeof(arr) / sizeof(arr[0]); int data_number = sizeof(arr) / sizeof(arr[0]);
memcpy(in_ptr, arr, sizeof(arr)); memcpy(in_ptr, arr, sizeof(arr));
TensorCopy(in, gpu_place, context, &in_gpu);
TensorCopy(in, gpu_place, context, &in_gpu);
context.Wait();
TransDataType(kernel_fp32, kernel_fp64, in_gpu, &out_gpu); TransDataType(kernel_fp32, kernel_fp64, in_gpu, &out_gpu);
TensorCopy(out_gpu, cpu_place, context, &out); TensorCopy(out_gpu, cpu_place, context, &out);
context.Wait(); context.Wait();
double* out_data_double = out.data<double>(); double* out_data_double = out.data<double>();
for (int i = 0; i < data_number; ++i) { for (int i = 0; i < data_number; ++i) {
ASSERT_EQ(out_data_double[i], static_cast<double>(arr[i])); EXPECT_EQ(out_data_double[i], static_cast<double>(arr[i]));
} }
TransDataType(kernel_fp32, kernel_int32, in_gpu, &out_gpu); TransDataType(kernel_fp32, kernel_int32, in_gpu, &out_gpu);
...@@ -66,7 +67,7 @@ TEST(DataTypeTransform, GPUTransform) { ...@@ -66,7 +67,7 @@ TEST(DataTypeTransform, GPUTransform) {
int* out_data_int = out.data<int>(); int* out_data_int = out.data<int>();
for (int i = 0; i < data_number; ++i) { for (int i = 0; i < data_number; ++i) {
ASSERT_EQ(out_data_int[i], static_cast<int>(arr[i])); EXPECT_EQ(out_data_int[i], static_cast<int>(arr[i]));
} }
} }
...@@ -83,6 +84,7 @@ TEST(DataTypeTransform, GPUTransform) { ...@@ -83,6 +84,7 @@ TEST(DataTypeTransform, GPUTransform) {
int data_number = sizeof(arr) / sizeof(arr[0]); int data_number = sizeof(arr) / sizeof(arr[0]);
memcpy(ptr, arr, sizeof(arr)); memcpy(ptr, arr, sizeof(arr));
TensorCopy(in, gpu_place, context, &in_gpu); TensorCopy(in, gpu_place, context, &in_gpu);
context.Wait();
// transform from float16 to other data types // transform from float16 to other data types
TransDataType(kernel_fp16, kernel_fp32, in_gpu, &out_gpu); TransDataType(kernel_fp16, kernel_fp32, in_gpu, &out_gpu);
...@@ -91,7 +93,7 @@ TEST(DataTypeTransform, GPUTransform) { ...@@ -91,7 +93,7 @@ TEST(DataTypeTransform, GPUTransform) {
float* out_data_float = out.data<float>(); float* out_data_float = out.data<float>();
for (int i = 0; i < data_number; ++i) { for (int i = 0; i < data_number; ++i) {
ASSERT_EQ(out_data_float[i], static_cast<float>(ptr[i])); EXPECT_EQ(out_data_float[i], static_cast<float>(ptr[i]));
} }
TransDataType(kernel_fp16, kernel_fp64, in_gpu, &out_gpu); TransDataType(kernel_fp16, kernel_fp64, in_gpu, &out_gpu);
...@@ -100,7 +102,7 @@ TEST(DataTypeTransform, GPUTransform) { ...@@ -100,7 +102,7 @@ TEST(DataTypeTransform, GPUTransform) {
double* out_data_double = out.data<double>(); double* out_data_double = out.data<double>();
for (int i = 0; i < data_number; ++i) { for (int i = 0; i < data_number; ++i) {
ASSERT_EQ(out_data_double[i], static_cast<double>(ptr[i])); EXPECT_EQ(out_data_double[i], static_cast<double>(ptr[i]));
} }
TransDataType(kernel_fp16, kernel_int32, in_gpu, &out_gpu); TransDataType(kernel_fp16, kernel_int32, in_gpu, &out_gpu);
...@@ -109,7 +111,7 @@ TEST(DataTypeTransform, GPUTransform) { ...@@ -109,7 +111,7 @@ TEST(DataTypeTransform, GPUTransform) {
int* out_data_int = out.data<int>(); int* out_data_int = out.data<int>();
for (int i = 0; i < data_number; ++i) { for (int i = 0; i < data_number; ++i) {
ASSERT_EQ(out_data_int[i], static_cast<int>(ptr[i])); EXPECT_EQ(out_data_int[i], static_cast<int>(ptr[i]));
} }
TransDataType(kernel_fp16, kernel_int64, in_gpu, &out_gpu); TransDataType(kernel_fp16, kernel_int64, in_gpu, &out_gpu);
...@@ -118,7 +120,7 @@ TEST(DataTypeTransform, GPUTransform) { ...@@ -118,7 +120,7 @@ TEST(DataTypeTransform, GPUTransform) {
int64_t* out_data_int64 = out.data<int64_t>(); int64_t* out_data_int64 = out.data<int64_t>();
for (int i = 0; i < data_number; ++i) { for (int i = 0; i < data_number; ++i) {
ASSERT_EQ(out_data_int64[i], static_cast<int64_t>(ptr[i])); EXPECT_EQ(out_data_int64[i], static_cast<int64_t>(ptr[i]));
} }
TransDataType(kernel_fp16, kernel_bool, in_gpu, &out_gpu); TransDataType(kernel_fp16, kernel_bool, in_gpu, &out_gpu);
...@@ -127,7 +129,7 @@ TEST(DataTypeTransform, GPUTransform) { ...@@ -127,7 +129,7 @@ TEST(DataTypeTransform, GPUTransform) {
bool* out_data_bool = out.data<bool>(); bool* out_data_bool = out.data<bool>();
for (int i = 0; i < data_number; ++i) { for (int i = 0; i < data_number; ++i) {
ASSERT_EQ(out_data_bool[i], static_cast<bool>(ptr[i])); EXPECT_EQ(out_data_bool[i], static_cast<bool>(ptr[i]));
} }
// transform float to float16 // transform float to float16
...@@ -137,13 +139,14 @@ TEST(DataTypeTransform, GPUTransform) { ...@@ -137,13 +139,14 @@ TEST(DataTypeTransform, GPUTransform) {
} }
TensorCopy(in, gpu_place, context, &in_gpu); TensorCopy(in, gpu_place, context, &in_gpu);
context.Wait();
TransDataType(kernel_fp32, kernel_fp16, in_gpu, &out_gpu); TransDataType(kernel_fp32, kernel_fp16, in_gpu, &out_gpu);
TensorCopy(out_gpu, cpu_place, context, &out); TensorCopy(out_gpu, cpu_place, context, &out);
context.Wait(); context.Wait();
ptr = out.data<float16>(); ptr = out.data<float16>();
for (int i = 0; i < data_number; ++i) { for (int i = 0; i < data_number; ++i) {
ASSERT_EQ(ptr[i].x, static_cast<float16>(in_data_float[i]).x); EXPECT_EQ(ptr[i].x, static_cast<float16>(in_data_float[i]).x);
} }
// transform double to float16 // transform double to float16
...@@ -154,13 +157,14 @@ TEST(DataTypeTransform, GPUTransform) { ...@@ -154,13 +157,14 @@ TEST(DataTypeTransform, GPUTransform) {
} }
TensorCopy(in, gpu_place, context, &in_gpu); TensorCopy(in, gpu_place, context, &in_gpu);
context.Wait();
TransDataType(kernel_fp64, kernel_fp16, in_gpu, &out_gpu); TransDataType(kernel_fp64, kernel_fp16, in_gpu, &out_gpu);
TensorCopy(out_gpu, cpu_place, context, &out); TensorCopy(out_gpu, cpu_place, context, &out);
context.Wait(); context.Wait();
ptr = out.data<float16>(); ptr = out.data<float16>();
for (int i = 0; i < data_number; ++i) { for (int i = 0; i < data_number; ++i) {
ASSERT_EQ(ptr[i].x, static_cast<float16>(in_data_double[i]).x); EXPECT_EQ(ptr[i].x, static_cast<float16>(in_data_double[i]).x);
} }
// transform int to float16 // transform int to float16
...@@ -170,13 +174,14 @@ TEST(DataTypeTransform, GPUTransform) { ...@@ -170,13 +174,14 @@ TEST(DataTypeTransform, GPUTransform) {
} }
TensorCopy(in, gpu_place, context, &in_gpu); TensorCopy(in, gpu_place, context, &in_gpu);
context.Wait();
TransDataType(kernel_int32, kernel_fp16, in_gpu, &out_gpu); TransDataType(kernel_int32, kernel_fp16, in_gpu, &out_gpu);
TensorCopy(out_gpu, cpu_place, context, &out); TensorCopy(out_gpu, cpu_place, context, &out);
context.Wait(); context.Wait();
ptr = out.data<float16>(); ptr = out.data<float16>();
for (int i = 0; i < data_number; ++i) { for (int i = 0; i < data_number; ++i) {
ASSERT_EQ(ptr[i].x, static_cast<float16>(in_data_int[i]).x); EXPECT_EQ(ptr[i].x, static_cast<float16>(in_data_int[i]).x);
} }
// transform int64 to float16 // transform int64 to float16
...@@ -187,13 +192,14 @@ TEST(DataTypeTransform, GPUTransform) { ...@@ -187,13 +192,14 @@ TEST(DataTypeTransform, GPUTransform) {
} }
TensorCopy(in, gpu_place, context, &in_gpu); TensorCopy(in, gpu_place, context, &in_gpu);
context.Wait();
TransDataType(kernel_int64, kernel_fp16, in_gpu, &out_gpu); TransDataType(kernel_int64, kernel_fp16, in_gpu, &out_gpu);
TensorCopy(out_gpu, cpu_place, context, &out); TensorCopy(out_gpu, cpu_place, context, &out);
context.Wait(); context.Wait();
ptr = out.data<float16>(); ptr = out.data<float16>();
for (int i = 0; i < data_number; ++i) { for (int i = 0; i < data_number; ++i) {
ASSERT_EQ(ptr[i].x, static_cast<float16>(in_data_int64[i]).x); EXPECT_EQ(ptr[i].x, static_cast<float16>(in_data_int64[i]).x);
} }
// transform bool to float16 // transform bool to float16
...@@ -203,13 +209,14 @@ TEST(DataTypeTransform, GPUTransform) { ...@@ -203,13 +209,14 @@ TEST(DataTypeTransform, GPUTransform) {
} }
TensorCopy(in, gpu_place, context, &in_gpu); TensorCopy(in, gpu_place, context, &in_gpu);
context.Wait();
TransDataType(kernel_bool, kernel_fp16, in_gpu, &out_gpu); TransDataType(kernel_bool, kernel_fp16, in_gpu, &out_gpu);
TensorCopy(out_gpu, cpu_place, context, &out); TensorCopy(out_gpu, cpu_place, context, &out);
context.Wait(); context.Wait();
ptr = out.data<float16>(); ptr = out.data<float16>();
for (int i = 0; i < data_number; ++i) { for (int i = 0; i < data_number; ++i) {
ASSERT_EQ(ptr[i].x, static_cast<float16>(in_data_bool[i]).x); EXPECT_EQ(ptr[i].x, static_cast<float16>(in_data_bool[i]).x);
} }
} }
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册