hook_test_intermidiate.cc 11.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <sstream>

#include "gtest/gtest.h"
#include "paddle/fluid/eager/api/all.h"
19
#include "paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.h"
20 21
#include "paddle/fluid/eager/backward.h"
#include "paddle/fluid/eager/grad_node_info.h"
22
#include "paddle/fluid/eager/hooks.h"
23 24
#include "paddle/fluid/eager/tests/test_utils.h"
#include "paddle/fluid/imperative/tracer.h"
25 26
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/kernel_registry.h"
27

28 29 30 31 32
PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(matmul, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(matmul_grad, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(add_grad, CPU, ALL_LAYOUT);
33 34
PD_DECLARE_KERNEL(sigmoid, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(sigmoid_grad, CPU, ALL_LAYOUT);
35

36 37 38 39
namespace egr {

paddle::experimental::Tensor hook_function(
    const paddle::experimental::Tensor& t) {
40
  auto t_dense = std::dynamic_pointer_cast<phi::DenseTensor>(t.impl());
41

42 43
  auto ret_meta = phi::DenseTensorMeta(
      t_dense->dtype(), t_dense->dims(), t_dense->layout());
44
  auto place = t_dense->place();
45 46
  size_t bytes_size = phi::product(t_dense->dims()) * SizeOf(t_dense->dtype());
  auto ret_dense = std::make_shared<phi::DenseTensor>(
Z
zyfncg 已提交
47
      paddle::memory::Alloc(place, bytes_size), std::move(ret_meta));
48 49 50 51 52 53 54

  float* t_ptr = t_dense->mutable_data<float>(place);
  float* ret_ptr = ret_dense->mutable_data<float>(place);
  for (int i = 0; i < ret_dense->numel(); i++) {
    ret_ptr[i] = t_ptr[i] + 3.0;
  }

55
  auto ret_impl = std::dynamic_pointer_cast<phi::TensorBase>(ret_dense);
56 57 58 59 60 61
  paddle::experimental::Tensor ret = paddle::experimental::Tensor();
  ret.set_impl(ret_impl);

  return ret;
}

62
void test_sigmoid(bool is_remove_gradient_hook) {
63 64 65 66 67
  // Prepare Device Contexts
  VLOG(6) << "Init Env";
  eager_test::InitEnv(paddle::platform::CPUPlace());

  VLOG(6) << "Make Dim";
68
  paddle::framework::DDim ddim = phi::make_ddim({2, 4, 4, 4});
69 70

  VLOG(6) << "Make paddle::experimental::Tensor";
71 72 73 74 75 76 77
  paddle::experimental::Tensor tensor =
      egr_utils_api::CreateTensorWithValue(ddim,
                                           paddle::platform::CPUPlace(),
                                           phi::DataType::FLOAT32,
                                           phi::DataLayout::NCHW,
                                           0.0,
                                           true);
78

79 80
  VLOG(6) << "Make ReduceHook function";
  auto reduce_hook = [&](void) -> void {
81
    auto* t_ptr = std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl())
82 83 84 85 86 87
                      ->data<float>();
    for (int i = 0; i < tensor.numel(); i++) {
      t_ptr[i] = 100.0;  // set to 100.0
    }
  };

88 89 90 91
  VLOG(6) << "Retain Grad for Tensor";
  egr_utils_api::RetainGradForTensor(tensor);

  VLOG(6) << "Register GradientHook for Tensor";
92 93
  int64_t hook_id =
      egr_utils_api::RegisterGradientHookForTensor(tensor, hook_function);
94

95
  VLOG(6) << "Register ReduceHook for Tensor";
96
  egr_utils_api::RegisterReduceHookForTensor(tensor, reduce_hook);
97

98 99 100 101 102 103 104 105
  VLOG(6) << "Runing Forward";
  auto output_tensor = sigmoid_dygraph_function(tensor, {});
  VLOG(6) << "Finish Forward";

  eager_test::CompareTensorWithValue<float>(output_tensor, 0.5);

  std::vector<paddle::experimental::Tensor> target_tensors = {output_tensor};

106 107 108 109 110
  if (is_remove_gradient_hook) {
    std::shared_ptr<GradNodeBase> grad_node_tmp = EagerUtils::grad_node(tensor);
    grad_node_tmp->RemoveGradientHook(hook_id);
  }

111
  VLOG(6) << "Runing Backward";
112
  Backward(target_tensors, {});
113 114
  VLOG(6) << "Finish Backward";

115 116
  eager_test::CompareGradTensorWithValue<float>(
      tensor, is_remove_gradient_hook ? 0.25 : 0.25 + 3.0);
117 118 119

  VLOG(6) << "Checking ReduceHook results";
  for (int i = 0; i < tensor.numel(); i++) {
120
    CHECK_EQ(std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl())
121 122 123
                 ->data<float>()[i],
             static_cast<float>(100.0f));
  }
124 125 126
  VLOG(6) << "After Tests";
}

127
void test_elementwiseAdd(bool is_remove_gradient_hook) {
128 129 130 131 132 133 134
  // Prepare Device Contexts
  eager_test::InitEnv(paddle::platform::CPUPlace());

  auto tracer = std::make_shared<paddle::imperative::Tracer>();
  paddle::imperative::SetCurrentTracer(tracer);

  // 1. Prepare Input
135
  paddle::framework::DDim ddimX = phi::make_ddim({4, 16});
136 137 138 139 140 141 142
  paddle::experimental::Tensor X =
      egr_utils_api::CreateTensorWithValue(ddimX,
                                           paddle::platform::CPUPlace(),
                                           phi::DataType::FLOAT32,
                                           phi::DataLayout::NCHW,
                                           3.0,
                                           true);
143 144
  egr_utils_api::RetainGradForTensor(X);

145
  paddle::framework::DDim ddimY = phi::make_ddim({4, 16});
146 147 148 149 150 151 152
  paddle::experimental::Tensor Y =
      egr_utils_api::CreateTensorWithValue(ddimY,
                                           paddle::platform::CPUPlace(),
                                           phi::DataType::FLOAT32,
                                           phi::DataLayout::NCHW,
                                           2.0,
                                           true);
153

154
  auto reduce_hook = [&]() -> void {
155
    auto* t_ptr =
156
        std::dynamic_pointer_cast<phi::DenseTensor>(Y.impl())->data<float>();
157 158 159 160 161
    for (int i = 0; i < Y.numel(); i++) {
      t_ptr[i] = 100.0;  // set to 100.0
    }
  };

162
  egr_utils_api::RetainGradForTensor(Y);
163 164 165
  int64_t hook_id =
      egr_utils_api::RegisterGradientHookForTensor(Y, hook_function);
  egr_utils_api::RegisterReduceHookForTensor(Y, reduce_hook);
166 167 168 169 170

  auto output_tensor = elementwise_add_dygraph_function(X, Y, {});

  eager_test::CompareTensorWithValue<float>(output_tensor, 5);
  std::vector<paddle::experimental::Tensor> target_tensors = {output_tensor};
171 172 173 174 175 176

  if (is_remove_gradient_hook) {
    std::shared_ptr<GradNodeBase> grad_node_tmp = EagerUtils::grad_node(Y);
    grad_node_tmp->RemoveGradientHook(hook_id);
  }

177
  Backward(target_tensors, {});
178 179

  eager_test::CompareGradTensorWithValue<float>(X, 1.0);
180 181
  eager_test::CompareGradTensorWithValue<float>(
      Y, is_remove_gradient_hook ? 1.0 : 1.0 + 3.0);
182 183 184

  // Checking ReduceHook results
  for (int i = 0; i < Y.numel(); i++) {
185 186 187
    CHECK_EQ(
        std::dynamic_pointer_cast<phi::DenseTensor>(Y.impl())->data<float>()[i],
        static_cast<float>(100.0f));
188
  }
189 190
}

191
void test_matmul(bool is_remove_gradient_hook) {
192 193 194 195 196 197 198
  // Prepare Device Contexts
  eager_test::InitEnv(paddle::platform::CPUPlace());

  auto tracer = std::make_shared<paddle::imperative::Tracer>();
  paddle::imperative::SetCurrentTracer(tracer);

  // 1. Prepare Input
199
  paddle::framework::DDim ddimX = phi::make_ddim({4, 16});
200 201 202 203 204 205 206
  paddle::experimental::Tensor X =
      egr_utils_api::CreateTensorWithValue(ddimX,
                                           paddle::platform::CPUPlace(),
                                           phi::DataType::FLOAT32,
                                           phi::DataLayout::NCHW,
                                           3.0,
                                           true);
207 208
  egr_utils_api::RetainGradForTensor(X);

209
  paddle::framework::DDim ddimY = phi::make_ddim({16, 20});
210 211 212 213 214 215 216
  paddle::experimental::Tensor Y =
      egr_utils_api::CreateTensorWithValue(ddimY,
                                           paddle::platform::CPUPlace(),
                                           phi::DataType::FLOAT32,
                                           phi::DataLayout::NCHW,
                                           2.0,
                                           true);
217

218 219
  auto reduce_hook = [&](void) -> void {
    auto* t_ptr =
220
        std::dynamic_pointer_cast<phi::DenseTensor>(Y.impl())->data<float>();
221 222 223 224 225
    for (int i = 0; i < Y.numel(); i++) {
      t_ptr[i] = 100.0;  // set to 100.0
    }
  };

226
  egr_utils_api::RetainGradForTensor(Y);
227 228 229
  int64_t hook_id =
      egr_utils_api::RegisterGradientHookForTensor(Y, hook_function);
  egr_utils_api::RegisterReduceHookForTensor(Y, reduce_hook);
230 231 232 233 234 235

  auto output_tensor = matmul_v2_dygraph_function(
      X, Y, {{"trans_x", false}, {"trans_y", false}});

  eager_test::CompareTensorWithValue<float>(output_tensor, 96);
  std::vector<paddle::experimental::Tensor> target_tensors = {output_tensor};
236 237 238 239 240 241

  if (is_remove_gradient_hook) {
    std::shared_ptr<GradNodeBase> grad_node_tmp = EagerUtils::grad_node(Y);
    grad_node_tmp->RemoveGradientHook(hook_id);
  }

242
  Backward(target_tensors, {});
243 244

  eager_test::CompareGradTensorWithValue<float>(X, 2.0 * 20);
245 246
  eager_test::CompareGradTensorWithValue<float>(
      Y, is_remove_gradient_hook ? 3.0 * 4 : 3.0 * 4 + 3);
247

248 249
  // Checking ReduceHook results
  for (int i = 0; i < Y.numel(); i++) {
250 251 252
    CHECK_EQ(
        std::dynamic_pointer_cast<phi::DenseTensor>(Y.impl())->data<float>()[i],
        static_cast<float>(100.0f));
253 254
  }
}
255

256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308
void test_backward_final_hooks() {
  // Prepare Device Contexts
  VLOG(6) << "Init Env";
  eager_test::InitEnv(paddle::platform::CPUPlace());

  VLOG(6) << "Make paddle::experimental::Tensor";
  paddle::framework::DDim ddimX = phi::make_ddim({4, 16});
  paddle::experimental::Tensor X =
      egr_utils_api::CreateTensorWithValue(ddimX,
                                           paddle::platform::CPUPlace(),
                                           phi::DataType::FLOAT32,
                                           phi::DataLayout::NCHW,
                                           3.0,
                                           true);
  paddle::framework::DDim ddimY = phi::make_ddim({16, 20});
  egr_utils_api::RetainGradForTensor(X);

  paddle::experimental::Tensor Y =
      egr_utils_api::CreateTensorWithValue(ddimY,
                                           paddle::platform::CPUPlace(),
                                           phi::DataType::FLOAT32,
                                           phi::DataLayout::NCHW,
                                           2.0,
                                           true);

  VLOG(6) << "Make ReduceHook function";
  auto backward_final_hook = [&](void) -> void {
    auto* t_ptr =
        std::dynamic_pointer_cast<phi::DenseTensor>(X.impl())->data<float>();
    VLOG(6) << "Run Target Backward Hook";
    for (int i = 0; i < X.numel(); i++) {
      t_ptr[i] = 100.0;  // set to 100.0
    }
  };
  VLOG(6) << "Register Backward Final Hook";
  egr_utils_api::RegisterBackwardFinalHook(backward_final_hook);

  VLOG(6) << "Runing Forward";
  auto output_tensor = matmul_v2_dygraph_function(
      X, Y, {{"trans_x", false}, {"trans_y", false}});
  auto res = sigmoid_dygraph_function(output_tensor, {});
  VLOG(6) << "Finish Forward";

  eager_test::CompareTensorWithValue<float>(X, 3.0);

  std::vector<paddle::experimental::Tensor> target_tensors = {output_tensor};

  VLOG(6) << "Runing Backward";
  Backward(target_tensors, {});
  VLOG(6) << "Finish Backward";
  eager_test::CompareTensorWithValue<float>(X, 100.0);
}

309 310 311 312 313 314 315 316 317 318 319 320 321 322 323
TEST(Hook_intermidiate, Sigmoid) {
  // True or false represents whether to call RemoveGradientHook
  test_sigmoid(true);
  test_sigmoid(false);
}

TEST(Hook_intermidiate, ElementwiseAdd) {
  test_elementwiseAdd(true);
  test_elementwiseAdd(false);
}

TEST(Hook_intermidiate, Matmul_v2) {
  test_matmul(true);
  test_matmul(false);
}
324 325

TEST(Hook_intermidiate, BackwardFinal) { test_backward_final_hooks(); }
326 327
}  // namespace egr

Y
YuanRisheng 已提交
328
USE_OP_ITSELF(sigmoid);
329 330
USE_OP_ITSELF(elementwise_add);
USE_OP_ITSELF(matmul_v2);