diff --git a/paddle/operators/math/CMakeLists.txt b/paddle/operators/math/CMakeLists.txt index d34bc92594dca06440d3ae9dc1ab351914e210d0..bae11905b78f37e46feffeae2996a8802f91880e 100644 --- a/paddle/operators/math/CMakeLists.txt +++ b/paddle/operators/math/CMakeLists.txt @@ -11,3 +11,6 @@ else() cc_library(math_function SRCS math_function.cc DEPS cblas device_context) endif() endif() + + +nv_test(math_function_test SRCS math_function_test.cc DEPS math_function) diff --git a/paddle/operators/math/math_function_test.cc b/paddle/operators/math/math_function_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..f7b453a20c0649edd1421f6649088f891843b250 --- /dev/null +++ b/paddle/operators/math/math_function_test.cc @@ -0,0 +1,34 @@ +#include "paddle/operators/math/math_function.h" +#include "gtest/gtest.h" + +#ifndef PADDLE_ONLY_CPU +TEST(math_function, GPU) { + paddle::framework::Tensor input1; + paddle::framework::Tensor input1_gpu; + paddle::framework::Tensor input2_gpu; + paddle::framework::Tensor out_gpu; + paddle::framework::Tensor out; + + auto* cpu_place = new paddle::platform::CPUPlace(); + float* input1_ptr = input1.mutable_data({2, 2}, *cpu_place); + float arr[4] = {0, 1, 2, 3}; + + auto* gpu_place = new paddle::platform::GPUPlace(0); + paddle::platform::DeviceContext* context = new CUDADeviceContext(gpu_place); + + input1_gpu.CopyFrom(input1, *gpu_place); + input2_gpu.CopyFrom(input1, *gpu_place); + out_gpu.CopyFrom(input1, *gpu_place); + + matmul(input1_gpu, false, input2_gpu, + false, 1, &out_gpu, 0, context); + + out.CopyFrom(out_gpu, *cpu_place); + + float* out_ptr = out.data(); + EXPECT_EQ(out_ptr[0], 2); + EXPECT_EQ(out_ptr[1], 3); + EXPECT_EQ(out_ptr[2], 6); + EXPECT_EQ(out_ptr[3], 11); +} +#endif \ No newline at end of file