From eb9e330548bb306ad272355550bf08cb61cd5ac6 Mon Sep 17 00:00:00 2001 From: Zhanlue Yang Date: Tue, 30 Nov 2021 14:51:36 +0800 Subject: [PATCH] Enabled performance benchmark tests for Eager Dygraph (#37653) * Enabled performance benchmark tests for Eager Dygraph * Protected CUDA tests with macro * Fixed dependency issues for windows-ci --- paddle/fluid/eager/CMakeLists.txt | 2 +- .../fluid/eager/api/generated/CMakeLists.txt | 2 +- .../eager/auto_code_generator/CMakeLists.txt | 33 +++++++++++++++++-- paddle/fluid/eager/tests/CMakeLists.txt | 4 +++ .../tests/performance_tests/CMakeLists.txt | 7 ++++ .../performance_tests/benchmark_eager_cpu.cc | 20 +++++------ .../performance_tests/benchmark_eager_cuda.cc | 24 ++++++++------ .../performance_tests/benchmark_fluid_cpu.cc | 8 ++--- .../performance_tests/benchmark_fluid_cuda.cc | 12 ++++--- .../performance_tests/benchmark_utils.cc | 28 +++++++++------- .../eager/tests/task_tests/CMakeLists.txt | 2 +- 11 files changed, 97 insertions(+), 45 deletions(-) create mode 100644 paddle/fluid/eager/tests/performance_tests/CMakeLists.txt diff --git a/paddle/fluid/eager/CMakeLists.txt b/paddle/fluid/eager/CMakeLists.txt index e8cb55b7afe..d5abf639c83 100644 --- a/paddle/fluid/eager/CMakeLists.txt +++ b/paddle/fluid/eager/CMakeLists.txt @@ -2,7 +2,7 @@ set(eager_deps pten pten_api hook_utils tensor_utils utils global_utils backward set(fluid_deps tracer layer proto_desc operator op_registry variable_helper memcpy) set(generated_deps dygraph_function dygraph_node) -if(NOT DEFINED ON_INFER) +if(NOT ON_INFER) message("Performing Eager Dygraph Auto Code Generation") add_subdirectory(auto_code_generator) endif() diff --git a/paddle/fluid/eager/api/generated/CMakeLists.txt b/paddle/fluid/eager/api/generated/CMakeLists.txt index 407a8d69e52..ebbef286f79 100644 --- a/paddle/fluid/eager/api/generated/CMakeLists.txt +++ b/paddle/fluid/eager/api/generated/CMakeLists.txt @@ -1,5 +1,5 @@ add_subdirectory(eager_generated) -if(NOT DEFINED ON_INFER) +if(NOT ON_INFER) add_subdirectory(fluid_generated) endif() diff --git a/paddle/fluid/eager/auto_code_generator/CMakeLists.txt b/paddle/fluid/eager/auto_code_generator/CMakeLists.txt index 5d31c9139ba..03cec80b682 100644 --- a/paddle/fluid/eager/auto_code_generator/CMakeLists.txt +++ b/paddle/fluid/eager/auto_code_generator/CMakeLists.txt @@ -17,9 +17,38 @@ execute_process( ) if(WIN32) + set(EAGER_CODEGEN_DEPS eager_generator) + if("${CMAKE_GENERATOR}" STREQUAL "Ninja") + set(eager_generator_path "${CMAKE_CURRENT_BINARY_DIR}") + else() + set(eager_generator_path "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}") + endif() + + if(${CBLAS_PROVIDER} STREQUAL MKLML) + message("Copied libiomp5md.dll for Eager AutoCodeGen") + ADD_CUSTOM_COMMAND(OUTPUT ${eager_generator_path}/libiomp5md.dll + COMMAND ${CMAKE_COMMAND} -E copy ${MKLML_SHARED_IOMP_LIB} ${eager_generator_path} + DEPENDS mklml) + list(APPEND EAGER_CODEGEN_DEPS ${eager_generator_path}/libiomp5md.dll) + else(${CBLAS_PROVIDER} STREQUAL EXTERN_OPENBLAS) + message("Copied openblas.dll for Eager AutoCodeGen") + ADD_CUSTOM_COMMAND(OUTPUT ${eager_generator_path}/openblas.dll + COMMAND ${CMAKE_COMMAND} -E copy ${OPENBLAS_SHARED_LIB} ${eager_generator_path} + DEPENDS extern_openblas) + list(APPEND EAGER_CODEGEN_DEPS ${eager_generator_path}/openblas.dll) + endif() + + if(WITH_MKLDNN) + message("Copied mkldnn.dll for Eager AutoCodeGen") + ADD_CUSTOM_COMMAND(OUTPUT ${eager_generator_path}/mkldnn.dll + COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_SHARED_LIB} ${eager_generator_path} + DEPENDS mkldnn) + list(APPEND EAGER_CODEGEN_DEPS ${eager_generator_path}/mkldnn.dll) + endif() + add_custom_target(eager_codegen - COMMAND "${CMAKE_CURRENT_BINARY_DIR}/eager_generator.exe" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated" - DEPENDS eager_generator + COMMAND "${eager_generator_path}/eager_generator.exe" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated" + DEPENDS ${EAGER_CODEGEN_DEPS} VERBATIM) else() add_custom_target(eager_codegen diff --git a/paddle/fluid/eager/tests/CMakeLists.txt b/paddle/fluid/eager/tests/CMakeLists.txt index 289f24dfa63..c1506d8139b 100644 --- a/paddle/fluid/eager/tests/CMakeLists.txt +++ b/paddle/fluid/eager/tests/CMakeLists.txt @@ -1,2 +1,6 @@ add_subdirectory(data_structure_tests) add_subdirectory(task_tests) + +if(NOT ON_INFER) + add_subdirectory(performance_tests) +endif() diff --git a/paddle/fluid/eager/tests/performance_tests/CMakeLists.txt b/paddle/fluid/eager/tests/performance_tests/CMakeLists.txt new file mode 100644 index 00000000000..8811aa8ad38 --- /dev/null +++ b/paddle/fluid/eager/tests/performance_tests/CMakeLists.txt @@ -0,0 +1,7 @@ +cc_library(performance_benchmark_utils SRCS benchmark_utils.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps} eager_scale scale_node scale_op matmul_v2_op) + +cc_test(test_egr_performance_benchmark_eager_cpu SRCS benchmark_eager_cpu.cc DEPS performance_benchmark_utils ${eager_deps} ${fluid_deps}) +cc_test(test_egr_performance_benchmark_fluid_cpu SRCS benchmark_fluid_cpu.cc DEPS performance_benchmark_utils ${eager_deps} ${fluid_deps}) + +cc_test(test_egr_performance_benchmark_eager_cuda SRCS benchmark_eager_cuda.cc DEPS performance_benchmark_utils ${eager_deps} ${fluid_deps}) +cc_test(test_egr_performance_benchmark_fluid_cuda SRCS benchmark_fluid_cuda.cc DEPS performance_benchmark_utils ${eager_deps} ${fluid_deps}) diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc index 0a84f3b523a..0637ff2bb23 100644 --- a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc +++ b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc @@ -25,7 +25,7 @@ #include "paddle/fluid/imperative/tracer.h" -#include "paddle/fluid/eager/tests/benchmark/benchmark_utils.h" +#include "paddle/fluid/eager/tests/performance_tests/benchmark_utils.h" #include "paddle/fluid/eager/tests/test_utils.h" #ifdef WITH_GPERFTOOLS @@ -42,11 +42,11 @@ TEST(Benchmark, Init) { FLAGS_run_pten_kernel = false; } TEST(Benchmark, EagerScaleCPU) { // Prepare Device Contexts - egr::InitEnv(paddle::platform::CPUPlace()); + eager_test::InitEnv(paddle::platform::CPUPlace()); for (const std::string& mode : {"Accuracy", "Performance"}) { paddle::framework::DDim ddim = paddle::framework::make_ddim({2, 4, 4, 4}); - egr::EagerTensor tensor = EagerUtils::CreateTensorWithValue( + egr::EagerTensor tensor = CreateTensorWithValue( ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 5.0, true); RetainGradForTensor(tensor); @@ -78,20 +78,20 @@ TEST(Benchmark, EagerScaleCPU) { TEST(Benchmark, EagerIntermediateMatmulCPU) { // Prepare Device Contexts - InitEnv(paddle::platform::CPUPlace()); + eager_test::InitEnv(paddle::platform::CPUPlace()); auto tracer = std::make_shared(); paddle::imperative::SetCurrentTracer(tracer); for (const std::string& mode : {"Accuracy", "Performance"}) { paddle::framework::DDim ddimX = paddle::framework::make_ddim({2, 2}); - egr::EagerTensor X = EagerUtils::CreateTensorWithValue( + egr::EagerTensor X = CreateTensorWithValue( ddimX, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 1.0, true); RetainGradForTensor(X); paddle::framework::DDim ddimY = paddle::framework::make_ddim({2, 2}); - egr::EagerTensor Y = EagerUtils::CreateTensorWithValue( + egr::EagerTensor Y = CreateTensorWithValue( ddimY, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 2.0, true); RetainGradForTensor(Y); @@ -122,7 +122,7 @@ TEST(Benchmark, EagerIntermediateMatmulCPU) { TEST(Benchmark, EagerIntermediateMLPCPU) { // Prepare Device Contexts - InitEnv(paddle::platform::CPUPlace()); + eager_test::InitEnv(paddle::platform::CPUPlace()); auto tracer = std::make_shared(); paddle::imperative::SetCurrentTracer(tracer); @@ -130,7 +130,7 @@ TEST(Benchmark, EagerIntermediateMLPCPU) { for (const std::string& mode : {"Accuracy", "Performance"}) { paddle::framework::DDim ddimX = paddle::framework::make_ddim({MLP_M, MLP_N}); - egr::EagerTensor X = EagerUtils::CreateTensorWithValue( + egr::EagerTensor X = CreateTensorWithValue( ddimX, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, MLP_X_VAL, true); RetainGradForTensor(X); @@ -140,13 +140,13 @@ TEST(Benchmark, EagerIntermediateMLPCPU) { for (size_t i = 0; i < MLP_NUM_LINEAR; i++) { paddle::framework::DDim ddimW = paddle::framework::make_ddim({MLP_N, MLP_K}); - egr::EagerTensor W = EagerUtils::CreateTensorWithValue( + egr::EagerTensor W = CreateTensorWithValue( ddimW, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, MLP_W_VAL, true); RetainGradForTensor(W); paddle::framework::DDim ddimB = paddle::framework::make_ddim({MLP_K}); - egr::EagerTensor B = EagerUtils::CreateTensorWithValue( + egr::EagerTensor B = CreateTensorWithValue( ddimB, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, MLP_B_VAL, true); RetainGradForTensor(B); diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc index b373802c79e..96dff14184f 100644 --- a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc +++ b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc @@ -24,7 +24,7 @@ #include "paddle/fluid/imperative/tracer.h" -#include "paddle/fluid/eager/tests/benchmark/benchmark_utils.h" +#include "paddle/fluid/eager/tests/performance_tests/benchmark_utils.h" #include "paddle/fluid/eager/tests/test_utils.h" #ifdef WITH_GPERFTOOLS @@ -38,12 +38,14 @@ DECLARE_bool(run_pten_kernel); TEST(Benchmark, Init) { FLAGS_run_pten_kernel = false; } +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + TEST(Benchmark, EagerScaleCUDA) { - egr::InitEnv(paddle::platform::CUDAPlace()); + eager_test::InitEnv(paddle::platform::CUDAPlace()); for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) { paddle::framework::DDim ddim = paddle::framework::make_ddim({2, 4, 4, 4}); - egr::EagerTensor tensor = EagerUtils::CreateTensorWithValue( + egr::EagerTensor tensor = CreateTensorWithValue( ddim, paddle::platform::CUDAPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 5.0 /*value*/, true /*is_leaf*/); RetainGradForTensor(tensor); @@ -77,7 +79,7 @@ TEST(Benchmark, EagerScaleCUDA) { TEST(Benchmark, EagerIntermediateMatmulCUDA) { paddle::platform::CUDAPlace place; - egr::InitEnv(place); + eager_test::InitEnv(place); auto tracer = std::make_shared(); tracer->SetExpectedPlace(place); @@ -85,13 +87,13 @@ TEST(Benchmark, EagerIntermediateMatmulCUDA) { for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) { paddle::framework::DDim ddimX = paddle::framework::make_ddim({2, 2}); - egr::EagerTensor X = EagerUtils::CreateTensorWithValue( + egr::EagerTensor X = CreateTensorWithValue( ddimX, paddle::platform::CUDAPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 1.0, true); RetainGradForTensor(X); paddle::framework::DDim ddimY = paddle::framework::make_ddim({2, 2}); - egr::EagerTensor Y = EagerUtils::CreateTensorWithValue( + egr::EagerTensor Y = CreateTensorWithValue( ddimY, paddle::platform::CUDAPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 2.0, true); RetainGradForTensor(Y); @@ -125,7 +127,7 @@ TEST(Benchmark, EagerIntermediateMatmulCUDA) { TEST(Benchmark, EagerIntermediateMLPCUDA) { paddle::platform::CUDAPlace place; - egr::InitEnv(place); + eager_test::InitEnv(place); auto tracer = std::make_shared(); tracer->SetExpectedPlace(place); @@ -134,7 +136,7 @@ TEST(Benchmark, EagerIntermediateMLPCUDA) { for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) { paddle::framework::DDim ddimX = paddle::framework::make_ddim({MLP_M, MLP_N}); - egr::EagerTensor X = EagerUtils::CreateTensorWithValue( + egr::EagerTensor X = CreateTensorWithValue( ddimX, paddle::platform::CUDAPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, MLP_X_VAL, true); RetainGradForTensor(X); @@ -144,13 +146,13 @@ TEST(Benchmark, EagerIntermediateMLPCUDA) { for (size_t i = 0; i < MLP_NUM_LINEAR; i++) { paddle::framework::DDim ddimW = paddle::framework::make_ddim({MLP_N, MLP_K}); - egr::EagerTensor W = EagerUtils::CreateTensorWithValue( + egr::EagerTensor W = CreateTensorWithValue( ddimW, paddle::platform::CUDAPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, MLP_W_VAL, true); RetainGradForTensor(W); paddle::framework::DDim ddimB = paddle::framework::make_ddim({MLP_K}); - egr::EagerTensor B = EagerUtils::CreateTensorWithValue( + egr::EagerTensor B = CreateTensorWithValue( ddimB, paddle::platform::CUDAPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, MLP_B_VAL, true); RetainGradForTensor(B); @@ -185,3 +187,5 @@ TEST(Benchmark, EagerIntermediateMLPCUDA) { } } } + +#endif // PADDLE_WITH_CUDA || PADDLE_WITH_HIP diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cpu.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cpu.cc index 20844055e30..d98000b71fd 100644 --- a/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cpu.cc +++ b/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cpu.cc @@ -24,7 +24,7 @@ #include "glog/logging.h" #include "gtest/gtest.h" -#include "paddle/fluid/eager/tests/benchmark/benchmark_utils.h" +#include "paddle/fluid/eager/tests/performance_tests/benchmark_utils.h" #include "paddle/fluid/eager/tests/test_utils.h" #include "paddle/fluid/imperative/basic_engine.h" #include "paddle/fluid/imperative/tracer.h" @@ -45,7 +45,7 @@ namespace imperative { TEST(Benchmark, FluidScaleCPU) { // Prepare Device Contexts platform::CPUPlace place; - egr::InitEnv(place); + eager_test::InitEnv(place); for (const std::string& mode : {"Accuracy", "Performance"}) { std::shared_ptr X(new imperative::VarBase(true, "X")); @@ -88,7 +88,7 @@ TEST(Benchmark, FluidScaleCPU) { TEST(Benchmark, FluidMatmulCPU) { // Prepare Device Contexts platform::CPUPlace place; - egr::InitEnv(place); + eager_test::InitEnv(place); for (const std::string& mode : {"Accuracy", "Performance"}) { std::shared_ptr X(new imperative::VarBase(true, "X")); @@ -141,7 +141,7 @@ TEST(Benchmark, FluidMatmulCPU) { TEST(Benchmark, FluidMLPCPU) { // Prepare Device Contexts platform::CPUPlace place; - egr::InitEnv(place); + eager_test::InitEnv(place); for (const std::string& mode : {"Accuracy", "Performance"}) { std::vector x_src_data(MLP_M * MLP_N, MLP_X_VAL); diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc index 620a4d1cd12..918ebadba0a 100644 --- a/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc +++ b/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc @@ -24,7 +24,7 @@ #include "glog/logging.h" #include "gtest/gtest.h" -#include "paddle/fluid/eager/tests/benchmark/benchmark_utils.h" +#include "paddle/fluid/eager/tests/performance_tests/benchmark_utils.h" #include "paddle/fluid/eager/tests/test_utils.h" #include "paddle/fluid/imperative/basic_engine.h" #include "paddle/fluid/imperative/tracer.h" @@ -39,13 +39,15 @@ DECLARE_bool(run_pten_kernel); TEST(Benchmark, Init) { FLAGS_run_pten_kernel = false; } +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + namespace paddle { namespace imperative { TEST(Benchmark, FluidScaleCUDA) { // Prepare Device Contexts platform::CUDAPlace place; - egr::InitEnv(place); + eager_test::InitEnv(place); for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) { std::shared_ptr X(new imperative::VarBase(true, "X")); @@ -98,7 +100,7 @@ TEST(Benchmark, FluidScaleCUDA) { TEST(Benchmark, FluidMatmulCUDA) { // Prepare Device Contexts platform::CUDAPlace place; - egr::InitEnv(place); + eager_test::InitEnv(place); for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) { std::shared_ptr X(new imperative::VarBase(true, "X")); @@ -161,7 +163,7 @@ TEST(Benchmark, FluidMatmulCUDA) { TEST(Benchmark, FluidMLPCUDA) { // Prepare Device Contexts platform::CUDAPlace place; - egr::InitEnv(place); + eager_test::InitEnv(place); for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) { paddle::platform::DeviceContextPool& pool = @@ -252,3 +254,5 @@ USE_OP(scale); USE_OP(matmul_v2); USE_OP(reduce_sum); USE_OP(reduce_sum_grad); + +#endif // PADDLE_WITH_CUDA || PADDLE_WITH_HIP diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc index ae5d02c1e94..baa99dc93c2 100644 --- a/paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc +++ b/paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc @@ -36,10 +36,6 @@ #include "paddle/fluid/imperative/tracer.h" #include "paddle/fluid/memory/memcpy.h" -#include "paddle/fluid/eager/tests/benchmark/benchmark_utils.h" - -#include "paddle/pten/core/kernel_registry.h" - static size_t max_num_benchmark_runs = 5000; namespace egr { @@ -64,9 +60,9 @@ void benchmark_eager_scale(const EagerTensor& tensor, bool accuracy_check) { if (accuracy_check) { // Examine Forward Grad (w.r.t max_num_runs = 10) - CompareTensorWithValue(input_tensor, 8189.0); + eager_test::CompareTensorWithValue(input_tensor, 8189.0); // Examine Backward Grad (w.r.t max_num_runs = 10) - CompareGradTensorWithValue(tensor, 1024.0); + eager_test::CompareGradTensorWithValue(tensor, 1024.0); } } @@ -89,10 +85,10 @@ void benchmark_eager_intermediate_matmul(const EagerTensor& X, if (accuracy_check) { // Examine Forward Grad (w.r.t max_num_runs = 2) - CompareVariableWithValue(input_tensor0, 16); + eager_test::CompareVariableWithValue(input_tensor0, 16); // Examine Backward Grad (w.r.t max_num_runs = 2) - CompareGradVariableWithValue(X, 16); - CompareGradVariableWithValue(Y, 16); + eager_test::CompareGradVariableWithValue(X, 16); + eager_test::CompareGradVariableWithValue(Y, 16); } } @@ -122,11 +118,11 @@ void benchmark_eager_intermediate_mlp(const EagerTensor& X, compute_mlp_expected_results(); // Examine Forward Grad (w.r.t max_num_runs = 2) - CompareVariableWithValue(Out, result["Out"]); + eager_test::CompareVariableWithValue(Out, result["Out"]); // Examine Backward Grad (w.r.t max_num_runs = 2) - CompareGradVariableWithValue(X, result["GradX"]); - CompareGradVariableWithValue(Ws[0], result["GradW"]); + eager_test::CompareGradVariableWithValue(X, result["GradX"]); + eager_test::CompareGradVariableWithValue(Ws[0], result["GradW"]); } } @@ -141,6 +137,8 @@ static void FluidCheckTensorValue(const std::shared_ptr& X, auto* tensor = X->MutableVar()->GetMutable(); float* t_ptr = tensor->mutable_data(place); std::vector host_data(tensor->numel()); + +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) if (place == paddle::platform::CUDAPlace()) { paddle::platform::DeviceContextPool& pool = paddle::platform::DeviceContextPool::Instance(); @@ -153,6 +151,8 @@ static void FluidCheckTensorValue(const std::shared_ptr& X, sizeof(float) * tensor->numel(), stream); t_ptr = host_data.data(); } +#endif + VLOG(6) << "Tensor Value: " << t_ptr[0] << ", Expected Value: " << value; PADDLE_ENFORCE( t_ptr[0] == value, @@ -166,6 +166,8 @@ static void FluidCheckGradTensorValue( auto* grad_tensor = X->MutableGradVar()->GetMutable(); float* g_ptr = grad_tensor->mutable_data(place); std::vector g_host_data(grad_tensor->numel()); + +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) if (place == paddle::platform::CUDAPlace()) { paddle::platform::DeviceContextPool& pool = paddle::platform::DeviceContextPool::Instance(); @@ -178,6 +180,8 @@ static void FluidCheckGradTensorValue( sizeof(float) * grad_tensor->numel(), stream); g_ptr = g_host_data.data(); } +#endif + VLOG(6) << "Tensor Value: " << g_ptr[0] << ", Expected Value: " << value; PADDLE_ENFORCE( g_ptr[0] == value, diff --git a/paddle/fluid/eager/tests/task_tests/CMakeLists.txt b/paddle/fluid/eager/tests/task_tests/CMakeLists.txt index 3921ce5b69c..c03db1a1575 100644 --- a/paddle/fluid/eager/tests/task_tests/CMakeLists.txt +++ b/paddle/fluid/eager/tests/task_tests/CMakeLists.txt @@ -6,6 +6,6 @@ cc_test(test_egr_task_hook SRCS hook_test.cc DEPS ${eager_deps} ${fluid_deps} ea cc_test(test_egr_task_cross_batch SRCS cross_batch_accumulation_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node) cc_test(test_egr_task_fwd_bwd_joint SRCS fwd_bwd_joint_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node) -if(NOT DEFINED ON_INFER) +if(NOT ON_INFER) cc_test(test_egr_task_autocodegen SRCS generated_test.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps}) endif() -- GitLab