From b1a458aca3a88828f0e7a021b6166707594cf13e Mon Sep 17 00:00:00 2001 From: houj04 <35131887+houj04@users.noreply.github.com> Date: Wed, 26 Jan 2022 11:08:15 +0800 Subject: [PATCH] fix gradient accumulator bug. test=kunlun (#39127) * fix gradient accumulator bug. test=kunlun * fix typo. test=kunlun * fix typo. test=kunlun * fix unit tests. test=kunlun * using TensorCopySync. test=kunlun * only fix for xpu place. test=kunlun --- .../fluid/imperative/gradient_accumulator.cc | 7 ++ .../tests/test_gradient_accmulator.cc | 92 ++++++++++++++----- 2 files changed, 77 insertions(+), 22 deletions(-) diff --git a/paddle/fluid/imperative/gradient_accumulator.cc b/paddle/fluid/imperative/gradient_accumulator.cc index e075e11a08..9ae8b75075 100644 --- a/paddle/fluid/imperative/gradient_accumulator.cc +++ b/paddle/fluid/imperative/gradient_accumulator.cc @@ -243,6 +243,13 @@ void TensorAdd(const framework::Variable& src, framework::Variable* dst) { "should be equal, Otherwise, the calculation results " "will be incorrect.")); +#ifdef PADDLE_WITH_XPU + // if src and dst are in different place, copy dst to src's place + if (dst_tensor->place() != place) { + paddle::framework::TensorCopySync(*dst_tensor, place, dst_tensor); + } +#endif + #define PADDLE_TENSOR_ADD(cpp_type) \ if (data_type == framework::DataTypeTrait::DataType()) { \ TensorAddFunctor func( \ diff --git a/paddle/fluid/imperative/tests/test_gradient_accmulator.cc b/paddle/fluid/imperative/tests/test_gradient_accmulator.cc index 6cf5585ba8..25ffab4706 100644 --- a/paddle/fluid/imperative/tests/test_gradient_accmulator.cc +++ b/paddle/fluid/imperative/tests/test_gradient_accmulator.cc @@ -15,6 +15,7 @@ #include #include #include + #include "gtest/gtest.h" #include "paddle/fluid/framework/variable.h" #include "paddle/fluid/imperative/gradient_accumulator.h" @@ -29,8 +30,8 @@ namespace imperative { void TensorAdd(const framework::Variable& src, framework::Variable* dst); -template -int TensorddTest(Place place, T t1, T t2) { +template +int TensorddTest(Place1 place1, Place2 place2, T t1, T t2) { framework::Variable var1; framework::Variable var2; std::vector src_data(10, t1); @@ -46,18 +47,25 @@ int TensorddTest(Place place, T t1, T t2) { auto* dst = var2.GetMutable(); src->Resize(framework::make_ddim(dims)); dst->Resize(framework::make_ddim(dims)); - auto* src_mutable = src->mutable_data(place); - auto* dst_mutable = dst->mutable_data(place); - if (!std::is_same::value) { - paddle::memory::Copy(place, src_mutable, src_place, src_data.data(), + auto* src_mutable = src->mutable_data(place1); + auto* dst_mutable = dst->mutable_data(place2); + + if (!std::is_same::value) { + paddle::memory::Copy(place1, src_mutable, src_place, src_data.data(), sizeof(T) * src_data.size()); - paddle::memory::Copy(place, dst_mutable, src_place, dst_data.data(), - sizeof(T) * dst_data.size()); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) } else { - paddle::memory::Copy(place, src_mutable, src_place, src_data.data(), + paddle::memory::Copy(place1, src_mutable, src_place, src_data.data(), sizeof(T) * src_data.size(), 0); - paddle::memory::Copy(place, dst_mutable, src_place, dst_data.data(), +#endif + } + + if (!std::is_same::value) { + paddle::memory::Copy(place2, dst_mutable, src_place, dst_data.data(), + sizeof(T) * dst_data.size()); +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + } else { + paddle::memory::Copy(place2, dst_mutable, src_place, dst_data.data(), sizeof(T) * dst_data.size(), 0); #endif } @@ -80,25 +88,64 @@ TEST(test_add_functor, add_functor) { platform::CPUPlace cpu_place; int cpu_res = 1; - cpu_res = TensorddTest(cpu_place, 1.0, 0.0); + + // float32 + cpu_res = TensorddTest(cpu_place, cpu_place, static_cast(1.0), + static_cast(2.0)); EXPECT_EQ(cpu_res, 0); - cpu_res = TensorddTest(cpu_place, static_cast(1.0), - static_cast(2.0)); + // float16 + cpu_res = + TensorddTest(cpu_place, cpu_place, static_cast(1.0), + static_cast(2.0)); EXPECT_EQ(cpu_res, 0); - cpu_res = TensorddTest(cpu_place, static_cast(1.0), - static_cast(2.0)); + +#ifndef PADDLE_WITH_XPU + // does not support double when compiled using xpu + cpu_res = TensorddTest(cpu_place, cpu_place, static_cast(1.0), + static_cast(2.0)); EXPECT_EQ(cpu_res, 0); +#endif + #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) int gpu_res = 1; - gpu_res = TensorddTest(gpu_place, 1.0, 0.0); + gpu_res = TensorddTest(gpu_place, gpu_place, 1.0, 0.0); EXPECT_EQ(gpu_res, 0); - gpu_res = TensorddTest(gpu_place, static_cast(1.0), + gpu_res = TensorddTest(gpu_place, gpu_place, static_cast(1.0), static_cast(2.0)); EXPECT_EQ(gpu_res, 0); - gpu_res = TensorddTest(gpu_place, static_cast(1.0), - static_cast(2.0)); + gpu_res = + TensorddTest(gpu_place, gpu_place, static_cast(1.0), + static_cast(2.0)); EXPECT_EQ(gpu_res, 0); #endif + +#ifdef PADDLE_WITH_XPU + platform::XPUPlace xpu_place(0); + int xpu_res = 1; + // normal + xpu_res = TensorddTest(xpu_place, xpu_place, static_cast(1.0), + static_cast(2.0)); + EXPECT_EQ(xpu_res, 0); + xpu_res = + TensorddTest(xpu_place, xpu_place, static_cast(1.0), + static_cast(2.0)); + EXPECT_EQ(xpu_res, 0); + // different places + xpu_res = TensorddTest(cpu_place, xpu_place, static_cast(1.0), + static_cast(2.0)); + EXPECT_EQ(xpu_res, 0); + xpu_res = TensorddTest(xpu_place, cpu_place, static_cast(1.0), + static_cast(2.0)); + EXPECT_EQ(xpu_res, 0); + xpu_res = + TensorddTest(cpu_place, xpu_place, static_cast(1.0), + static_cast(2.0)); + EXPECT_EQ(xpu_res, 0); + xpu_res = + TensorddTest(xpu_place, cpu_place, static_cast(1.0), + static_cast(2.0)); + EXPECT_EQ(xpu_res, 0); +#endif } TEST(test_add_functor, execption) { @@ -106,10 +153,11 @@ TEST(test_add_functor, execption) { platform::CUDAPlace cuda_place(0); platform::CPUPlace cpu_place; - ASSERT_ANY_THROW(TensorddTest(cpu_place, 1, 0)); + ASSERT_ANY_THROW(TensorddTest(cpu_place, cpu_place, 1, 0)); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - ASSERT_ANY_THROW(TensorddTest(cuda_pinned_place, 1.0, 0.0)); - ASSERT_ANY_THROW(TensorddTest(cuda_pinned_place, + ASSERT_ANY_THROW( + TensorddTest(cuda_pinned_place, cuda_pinned_place, 1.0, 0.0)); + ASSERT_ANY_THROW(TensorddTest(cuda_pinned_place, cuda_pinned_place, static_cast(1.0), static_cast(2.0))); #endif -- GitLab