hook_test_intermidiate.cc 9.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <sstream>

#include "gtest/gtest.h"
#include "paddle/fluid/eager/api/all.h"
19
#include "paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.h"
20 21
#include "paddle/fluid/eager/backward.h"
#include "paddle/fluid/eager/grad_node_info.h"
22
#include "paddle/fluid/eager/hooks.h"
23 24
#include "paddle/fluid/eager/tests/test_utils.h"
#include "paddle/fluid/imperative/tracer.h"
25 26
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/kernel_registry.h"
27

28 29 30 31 32
PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(matmul, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(matmul_grad, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(add_grad, CPU, ALL_LAYOUT);
33 34
PD_DECLARE_KERNEL(sigmoid, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(sigmoid_grad, CPU, ALL_LAYOUT);
35

36 37 38 39
namespace egr {

paddle::experimental::Tensor hook_function(
    const paddle::experimental::Tensor& t) {
40
  auto t_dense = std::dynamic_pointer_cast<phi::DenseTensor>(t.impl());
41

42 43
  auto ret_meta = phi::DenseTensorMeta(
      t_dense->dtype(), t_dense->dims(), t_dense->layout());
44
  auto place = t_dense->place();
45 46
  size_t bytes_size = phi::product(t_dense->dims()) * SizeOf(t_dense->dtype());
  auto ret_dense = std::make_shared<phi::DenseTensor>(
Z
zyfncg 已提交
47
      paddle::memory::Alloc(place, bytes_size), std::move(ret_meta));
48 49 50 51 52 53 54

  float* t_ptr = t_dense->mutable_data<float>(place);
  float* ret_ptr = ret_dense->mutable_data<float>(place);
  for (int i = 0; i < ret_dense->numel(); i++) {
    ret_ptr[i] = t_ptr[i] + 3.0;
  }

55
  auto ret_impl = std::dynamic_pointer_cast<phi::TensorBase>(ret_dense);
56 57 58 59 60 61
  paddle::experimental::Tensor ret = paddle::experimental::Tensor();
  ret.set_impl(ret_impl);

  return ret;
}

62
void test_sigmoid(bool is_remove_gradient_hook) {
63 64 65 66 67
  // Prepare Device Contexts
  VLOG(6) << "Init Env";
  eager_test::InitEnv(paddle::platform::CPUPlace());

  VLOG(6) << "Make Dim";
68
  paddle::framework::DDim ddim = phi::make_ddim({2, 4, 4, 4});
69 70

  VLOG(6) << "Make paddle::experimental::Tensor";
71 72 73 74 75 76 77
  paddle::experimental::Tensor tensor =
      egr_utils_api::CreateTensorWithValue(ddim,
                                           paddle::platform::CPUPlace(),
                                           phi::DataType::FLOAT32,
                                           phi::DataLayout::NCHW,
                                           0.0,
                                           true);
78

79 80
  VLOG(6) << "Make ReduceHook function";
  auto reduce_hook = [&](void) -> void {
81
    auto* t_ptr = std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl())
82 83 84 85 86 87
                      ->data<float>();
    for (int i = 0; i < tensor.numel(); i++) {
      t_ptr[i] = 100.0;  // set to 100.0
    }
  };

88 89 90 91
  VLOG(6) << "Retain Grad for Tensor";
  egr_utils_api::RetainGradForTensor(tensor);

  VLOG(6) << "Register GradientHook for Tensor";
92 93
  int64_t hook_id = egr_utils_api::RegisterGradientHookForTensor(
      tensor, std::make_shared<CppTensorHook>(hook_function));
94

95
  VLOG(6) << "Register ReduceHook for Tensor";
96 97
  egr_utils_api::RegisterReduceHookForTensor(
      tensor, std::make_shared<CppTensorVoidHook>(reduce_hook));
98

99 100 101 102 103 104 105 106
  VLOG(6) << "Runing Forward";
  auto output_tensor = sigmoid_dygraph_function(tensor, {});
  VLOG(6) << "Finish Forward";

  eager_test::CompareTensorWithValue<float>(output_tensor, 0.5);

  std::vector<paddle::experimental::Tensor> target_tensors = {output_tensor};

107 108 109 110 111
  if (is_remove_gradient_hook) {
    std::shared_ptr<GradNodeBase> grad_node_tmp = EagerUtils::grad_node(tensor);
    grad_node_tmp->RemoveGradientHook(hook_id);
  }

112
  VLOG(6) << "Runing Backward";
113
  Backward(target_tensors, {});
114 115
  VLOG(6) << "Finish Backward";

116 117
  eager_test::CompareGradTensorWithValue<float>(
      tensor, is_remove_gradient_hook ? 0.25 : 0.25 + 3.0);
118 119 120

  VLOG(6) << "Checking ReduceHook results";
  for (int i = 0; i < tensor.numel(); i++) {
121
    CHECK_EQ(std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl())
122 123 124
                 ->data<float>()[i],
             static_cast<float>(100.0f));
  }
125 126 127
  VLOG(6) << "After Tests";
}

128
void test_elementwiseAdd(bool is_remove_gradient_hook) {
129 130 131 132 133 134 135
  // Prepare Device Contexts
  eager_test::InitEnv(paddle::platform::CPUPlace());

  auto tracer = std::make_shared<paddle::imperative::Tracer>();
  paddle::imperative::SetCurrentTracer(tracer);

  // 1. Prepare Input
136
  paddle::framework::DDim ddimX = phi::make_ddim({4, 16});
137 138 139 140 141 142 143
  paddle::experimental::Tensor X =
      egr_utils_api::CreateTensorWithValue(ddimX,
                                           paddle::platform::CPUPlace(),
                                           phi::DataType::FLOAT32,
                                           phi::DataLayout::NCHW,
                                           3.0,
                                           true);
144 145
  egr_utils_api::RetainGradForTensor(X);

146
  paddle::framework::DDim ddimY = phi::make_ddim({4, 16});
147 148 149 150 151 152 153
  paddle::experimental::Tensor Y =
      egr_utils_api::CreateTensorWithValue(ddimY,
                                           paddle::platform::CPUPlace(),
                                           phi::DataType::FLOAT32,
                                           phi::DataLayout::NCHW,
                                           2.0,
                                           true);
154

155
  auto reduce_hook = [&]() -> void {
156
    auto* t_ptr =
157
        std::dynamic_pointer_cast<phi::DenseTensor>(Y.impl())->data<float>();
158 159 160 161 162
    for (int i = 0; i < Y.numel(); i++) {
      t_ptr[i] = 100.0;  // set to 100.0
    }
  };

163
  egr_utils_api::RetainGradForTensor(Y);
164 165 166 167
  int64_t hook_id = egr_utils_api::RegisterGradientHookForTensor(
      Y, std::make_shared<CppTensorHook>(hook_function));
  egr_utils_api::RegisterReduceHookForTensor(
      Y, std::make_shared<CppTensorVoidHook>(reduce_hook));
168 169 170 171 172

  auto output_tensor = elementwise_add_dygraph_function(X, Y, {});

  eager_test::CompareTensorWithValue<float>(output_tensor, 5);
  std::vector<paddle::experimental::Tensor> target_tensors = {output_tensor};
173 174 175 176 177 178

  if (is_remove_gradient_hook) {
    std::shared_ptr<GradNodeBase> grad_node_tmp = EagerUtils::grad_node(Y);
    grad_node_tmp->RemoveGradientHook(hook_id);
  }

179
  Backward(target_tensors, {});
180 181

  eager_test::CompareGradTensorWithValue<float>(X, 1.0);
182 183
  eager_test::CompareGradTensorWithValue<float>(
      Y, is_remove_gradient_hook ? 1.0 : 1.0 + 3.0);
184 185 186

  // Checking ReduceHook results
  for (int i = 0; i < Y.numel(); i++) {
187 188 189
    CHECK_EQ(
        std::dynamic_pointer_cast<phi::DenseTensor>(Y.impl())->data<float>()[i],
        static_cast<float>(100.0f));
190
  }
191 192
}

193
void test_matmul(bool is_remove_gradient_hook) {
194 195 196 197 198 199 200
  // Prepare Device Contexts
  eager_test::InitEnv(paddle::platform::CPUPlace());

  auto tracer = std::make_shared<paddle::imperative::Tracer>();
  paddle::imperative::SetCurrentTracer(tracer);

  // 1. Prepare Input
201
  paddle::framework::DDim ddimX = phi::make_ddim({4, 16});
202 203 204 205 206 207 208
  paddle::experimental::Tensor X =
      egr_utils_api::CreateTensorWithValue(ddimX,
                                           paddle::platform::CPUPlace(),
                                           phi::DataType::FLOAT32,
                                           phi::DataLayout::NCHW,
                                           3.0,
                                           true);
209 210
  egr_utils_api::RetainGradForTensor(X);

211
  paddle::framework::DDim ddimY = phi::make_ddim({16, 20});
212 213 214 215 216 217 218
  paddle::experimental::Tensor Y =
      egr_utils_api::CreateTensorWithValue(ddimY,
                                           paddle::platform::CPUPlace(),
                                           phi::DataType::FLOAT32,
                                           phi::DataLayout::NCHW,
                                           2.0,
                                           true);
219

220 221
  auto reduce_hook = [&](void) -> void {
    auto* t_ptr =
222
        std::dynamic_pointer_cast<phi::DenseTensor>(Y.impl())->data<float>();
223 224 225 226 227
    for (int i = 0; i < Y.numel(); i++) {
      t_ptr[i] = 100.0;  // set to 100.0
    }
  };

228
  egr_utils_api::RetainGradForTensor(Y);
229 230 231 232
  int64_t hook_id = egr_utils_api::RegisterGradientHookForTensor(
      Y, std::make_shared<CppTensorHook>(hook_function));
  egr_utils_api::RegisterReduceHookForTensor(
      Y, std::make_shared<CppTensorVoidHook>(reduce_hook));
233 234 235 236 237 238

  auto output_tensor = matmul_v2_dygraph_function(
      X, Y, {{"trans_x", false}, {"trans_y", false}});

  eager_test::CompareTensorWithValue<float>(output_tensor, 96);
  std::vector<paddle::experimental::Tensor> target_tensors = {output_tensor};
239 240 241 242 243 244

  if (is_remove_gradient_hook) {
    std::shared_ptr<GradNodeBase> grad_node_tmp = EagerUtils::grad_node(Y);
    grad_node_tmp->RemoveGradientHook(hook_id);
  }

245
  Backward(target_tensors, {});
246 247

  eager_test::CompareGradTensorWithValue<float>(X, 2.0 * 20);
248 249
  eager_test::CompareGradTensorWithValue<float>(
      Y, is_remove_gradient_hook ? 3.0 * 4 : 3.0 * 4 + 3);
250

251 252
  // Checking ReduceHook results
  for (int i = 0; i < Y.numel(); i++) {
253 254 255
    CHECK_EQ(
        std::dynamic_pointer_cast<phi::DenseTensor>(Y.impl())->data<float>()[i],
        static_cast<float>(100.0f));
256 257
  }
}
258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273

TEST(Hook_intermidiate, Sigmoid) {
  // True or false represents whether to call RemoveGradientHook
  test_sigmoid(true);
  test_sigmoid(false);
}

TEST(Hook_intermidiate, ElementwiseAdd) {
  test_elementwiseAdd(true);
  test_elementwiseAdd(false);
}

TEST(Hook_intermidiate, Matmul_v2) {
  test_matmul(true);
  test_matmul(false);
}
274 275
}  // namespace egr

Y
YuanRisheng 已提交
276
USE_OP_ITSELF(sigmoid);
277 278
USE_OP_ITSELF(elementwise_add);
USE_OP_ITSELF(matmul_v2);