diff --git a/paddle/fluid/framework/data_type_transform.cc b/paddle/fluid/framework/data_type_transform.cc index 554cd58916c5a1ba09a411b4dc0b3a834ccc486a..c0523f3c795b103c0c27081ec5dc717f6a0f11e0 100644 --- a/paddle/fluid/framework/data_type_transform.cc +++ b/paddle/fluid/framework/data_type_transform.cc @@ -53,6 +53,7 @@ struct CastDataType { auto* context = static_cast(ctx_); trans(*context, in_begin, in_end, out_begin, CastDataTypeFunctor()); + context->Wait(); #endif } else { PADDLE_THROW("Unsupported place!"); diff --git a/paddle/fluid/framework/data_type_transform_test.cc b/paddle/fluid/framework/data_type_transform_test.cc index c992cba9a3611d50839a8ec056ee6ab954cd88b6..6b9a8f5e28b372c45abfaa2c20575a55d9a9dd03 100644 --- a/paddle/fluid/framework/data_type_transform_test.cc +++ b/paddle/fluid/framework/data_type_transform_test.cc @@ -50,13 +50,13 @@ TEST(DataTypeTransform, CPUTransform) { TransDataType(kernel_fp32, kernel_fp64, in, &out); double* out_data_double = out.data(); for (int i = 0; i < data_number; ++i) { - ASSERT_EQ(out_data_double[i], static_cast(i / 3)); + EXPECT_EQ(out_data_double[i], static_cast(i / 3)); } TransDataType(kernel_fp32, kernel_int32, in, &out); int* out_data_int = out.data(); for (int i = 0; i < data_number; ++i) { - ASSERT_EQ(out_data_int[i], static_cast(i / 3)); + EXPECT_EQ(out_data_int[i], static_cast(i / 3)); } } @@ -76,31 +76,31 @@ TEST(DataTypeTransform, CPUTransform) { TransDataType(kernel_fp16, kernel_fp32, in, &out); float* out_data_float = out.data(); for (int i = 0; i < data_number; ++i) { - ASSERT_EQ(out_data_float[i], static_cast(ptr[i])); + EXPECT_EQ(out_data_float[i], static_cast(ptr[i])); } TransDataType(kernel_fp16, kernel_fp64, in, &out); double* out_data_double = out.data(); for (int i = 0; i < data_number; ++i) { - ASSERT_EQ(out_data_double[i], static_cast(ptr[i])); + EXPECT_EQ(out_data_double[i], static_cast(ptr[i])); } TransDataType(kernel_fp16, kernel_int32, in, &out); int* out_data_int = out.data(); for (int i = 0; i < data_number; ++i) { - ASSERT_EQ(out_data_int[i], static_cast(ptr[i])); + EXPECT_EQ(out_data_int[i], static_cast(ptr[i])); } TransDataType(kernel_fp16, kernel_int64, in, &out); int64_t* out_data_int64 = out.data(); for (int i = 0; i < data_number; ++i) { - ASSERT_EQ(out_data_int64[i], static_cast(ptr[i])); + EXPECT_EQ(out_data_int64[i], static_cast(ptr[i])); } TransDataType(kernel_fp16, kernel_bool, in, &out); bool* out_data_bool = out.data(); for (int i = 0; i < data_number; ++i) { - ASSERT_EQ(out_data_bool[i], static_cast(ptr[i])); + EXPECT_EQ(out_data_bool[i], static_cast(ptr[i])); } // transform float to float16 @@ -112,7 +112,7 @@ TEST(DataTypeTransform, CPUTransform) { TransDataType(kernel_fp32, kernel_fp16, in, &out); ptr = out.data(); for (int i = 0; i < data_number; ++i) { - ASSERT_EQ(ptr[i].x, static_cast(in_data_float[i]).x); + EXPECT_EQ(ptr[i].x, static_cast(in_data_float[i]).x); } // transform double to float16 @@ -124,7 +124,7 @@ TEST(DataTypeTransform, CPUTransform) { TransDataType(kernel_fp64, kernel_fp16, in, &out); ptr = out.data(); for (int i = 0; i < data_number; ++i) { - ASSERT_EQ(ptr[i].x, static_cast(in_data_double[i]).x); + EXPECT_EQ(ptr[i].x, static_cast(in_data_double[i]).x); } // transform int to float16 @@ -136,7 +136,7 @@ TEST(DataTypeTransform, CPUTransform) { TransDataType(kernel_int32, kernel_fp16, in, &out); ptr = out.data(); for (int i = 0; i < data_number; ++i) { - ASSERT_EQ(ptr[i].x, static_cast(in_data_int[i]).x); + EXPECT_EQ(ptr[i].x, static_cast(in_data_int[i]).x); } // transform int64 to float16 @@ -148,7 +148,7 @@ TEST(DataTypeTransform, CPUTransform) { TransDataType(kernel_int64, kernel_fp16, in, &out); ptr = out.data(); for (int i = 0; i < data_number; ++i) { - ASSERT_EQ(ptr[i].x, static_cast(in_data_int64[i]).x); + EXPECT_EQ(ptr[i].x, static_cast(in_data_int64[i]).x); } // transform bool to float16 @@ -160,7 +160,7 @@ TEST(DataTypeTransform, CPUTransform) { TransDataType(kernel_bool, kernel_fp16, in, &out); ptr = out.data(); for (int i = 0; i < data_number; ++i) { - ASSERT_EQ(ptr[i].x, static_cast(in_data_bool[i]).x); + EXPECT_EQ(ptr[i].x, static_cast(in_data_bool[i]).x); } } } diff --git a/paddle/fluid/framework/data_type_transform_test.cu b/paddle/fluid/framework/data_type_transform_test.cu index 3939bc5e754cd3c2829cfcb3353f83969af055a9..de389ddabcb86de0155757406a406e44086c5474 100644 --- a/paddle/fluid/framework/data_type_transform_test.cu +++ b/paddle/fluid/framework/data_type_transform_test.cu @@ -49,15 +49,16 @@ TEST(DataTypeTransform, GPUTransform) { float arr[6] = {0, 1, 2, 3, 4, 5}; int data_number = sizeof(arr) / sizeof(arr[0]); memcpy(in_ptr, arr, sizeof(arr)); - TensorCopy(in, gpu_place, context, &in_gpu); + TensorCopy(in, gpu_place, context, &in_gpu); + context.Wait(); TransDataType(kernel_fp32, kernel_fp64, in_gpu, &out_gpu); TensorCopy(out_gpu, cpu_place, context, &out); context.Wait(); double* out_data_double = out.data(); for (int i = 0; i < data_number; ++i) { - ASSERT_EQ(out_data_double[i], static_cast(arr[i])); + EXPECT_EQ(out_data_double[i], static_cast(arr[i])); } TransDataType(kernel_fp32, kernel_int32, in_gpu, &out_gpu); @@ -66,7 +67,7 @@ TEST(DataTypeTransform, GPUTransform) { int* out_data_int = out.data(); for (int i = 0; i < data_number; ++i) { - ASSERT_EQ(out_data_int[i], static_cast(arr[i])); + EXPECT_EQ(out_data_int[i], static_cast(arr[i])); } } @@ -83,6 +84,7 @@ TEST(DataTypeTransform, GPUTransform) { int data_number = sizeof(arr) / sizeof(arr[0]); memcpy(ptr, arr, sizeof(arr)); TensorCopy(in, gpu_place, context, &in_gpu); + context.Wait(); // transform from float16 to other data types TransDataType(kernel_fp16, kernel_fp32, in_gpu, &out_gpu); @@ -91,7 +93,7 @@ TEST(DataTypeTransform, GPUTransform) { float* out_data_float = out.data(); for (int i = 0; i < data_number; ++i) { - ASSERT_EQ(out_data_float[i], static_cast(ptr[i])); + EXPECT_EQ(out_data_float[i], static_cast(ptr[i])); } TransDataType(kernel_fp16, kernel_fp64, in_gpu, &out_gpu); @@ -100,7 +102,7 @@ TEST(DataTypeTransform, GPUTransform) { double* out_data_double = out.data(); for (int i = 0; i < data_number; ++i) { - ASSERT_EQ(out_data_double[i], static_cast(ptr[i])); + EXPECT_EQ(out_data_double[i], static_cast(ptr[i])); } TransDataType(kernel_fp16, kernel_int32, in_gpu, &out_gpu); @@ -109,7 +111,7 @@ TEST(DataTypeTransform, GPUTransform) { int* out_data_int = out.data(); for (int i = 0; i < data_number; ++i) { - ASSERT_EQ(out_data_int[i], static_cast(ptr[i])); + EXPECT_EQ(out_data_int[i], static_cast(ptr[i])); } TransDataType(kernel_fp16, kernel_int64, in_gpu, &out_gpu); @@ -118,7 +120,7 @@ TEST(DataTypeTransform, GPUTransform) { int64_t* out_data_int64 = out.data(); for (int i = 0; i < data_number; ++i) { - ASSERT_EQ(out_data_int64[i], static_cast(ptr[i])); + EXPECT_EQ(out_data_int64[i], static_cast(ptr[i])); } TransDataType(kernel_fp16, kernel_bool, in_gpu, &out_gpu); @@ -127,7 +129,7 @@ TEST(DataTypeTransform, GPUTransform) { bool* out_data_bool = out.data(); for (int i = 0; i < data_number; ++i) { - ASSERT_EQ(out_data_bool[i], static_cast(ptr[i])); + EXPECT_EQ(out_data_bool[i], static_cast(ptr[i])); } // transform float to float16 @@ -137,13 +139,14 @@ TEST(DataTypeTransform, GPUTransform) { } TensorCopy(in, gpu_place, context, &in_gpu); + context.Wait(); TransDataType(kernel_fp32, kernel_fp16, in_gpu, &out_gpu); TensorCopy(out_gpu, cpu_place, context, &out); context.Wait(); ptr = out.data(); for (int i = 0; i < data_number; ++i) { - ASSERT_EQ(ptr[i].x, static_cast(in_data_float[i]).x); + EXPECT_EQ(ptr[i].x, static_cast(in_data_float[i]).x); } // transform double to float16 @@ -154,13 +157,14 @@ TEST(DataTypeTransform, GPUTransform) { } TensorCopy(in, gpu_place, context, &in_gpu); + context.Wait(); TransDataType(kernel_fp64, kernel_fp16, in_gpu, &out_gpu); TensorCopy(out_gpu, cpu_place, context, &out); context.Wait(); ptr = out.data(); for (int i = 0; i < data_number; ++i) { - ASSERT_EQ(ptr[i].x, static_cast(in_data_double[i]).x); + EXPECT_EQ(ptr[i].x, static_cast(in_data_double[i]).x); } // transform int to float16 @@ -170,13 +174,14 @@ TEST(DataTypeTransform, GPUTransform) { } TensorCopy(in, gpu_place, context, &in_gpu); + context.Wait(); TransDataType(kernel_int32, kernel_fp16, in_gpu, &out_gpu); TensorCopy(out_gpu, cpu_place, context, &out); context.Wait(); ptr = out.data(); for (int i = 0; i < data_number; ++i) { - ASSERT_EQ(ptr[i].x, static_cast(in_data_int[i]).x); + EXPECT_EQ(ptr[i].x, static_cast(in_data_int[i]).x); } // transform int64 to float16 @@ -187,13 +192,14 @@ TEST(DataTypeTransform, GPUTransform) { } TensorCopy(in, gpu_place, context, &in_gpu); + context.Wait(); TransDataType(kernel_int64, kernel_fp16, in_gpu, &out_gpu); TensorCopy(out_gpu, cpu_place, context, &out); context.Wait(); ptr = out.data(); for (int i = 0; i < data_number; ++i) { - ASSERT_EQ(ptr[i].x, static_cast(in_data_int64[i]).x); + EXPECT_EQ(ptr[i].x, static_cast(in_data_int64[i]).x); } // transform bool to float16 @@ -203,13 +209,14 @@ TEST(DataTypeTransform, GPUTransform) { } TensorCopy(in, gpu_place, context, &in_gpu); + context.Wait(); TransDataType(kernel_bool, kernel_fp16, in_gpu, &out_gpu); TensorCopy(out_gpu, cpu_place, context, &out); context.Wait(); ptr = out.data(); for (int i = 0; i < data_number; ++i) { - ASSERT_EQ(ptr[i].x, static_cast(in_data_bool[i]).x); + EXPECT_EQ(ptr[i].x, static_cast(in_data_bool[i]).x); } } }