hook_test_intermidiate.cc 9.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <sstream>

#include "gtest/gtest.h"

#include "paddle/fluid/eager/api/all.h"
#include "paddle/fluid/eager/backward.h"
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/tests/test_utils.h"
#include "paddle/fluid/imperative/tracer.h"
24
#include "paddle/phi/core/dense_tensor.h"
25 26

#include "paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.h"
27
#include "paddle/fluid/eager/hooks.h"
28
#include "paddle/phi/core/kernel_registry.h"
29

30 31 32 33 34
PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(matmul, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(matmul_grad, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(add_grad, CPU, ALL_LAYOUT);
35 36
PD_DECLARE_KERNEL(sigmoid, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(sigmoid_grad, CPU, ALL_LAYOUT);
37

38 39 40 41
namespace egr {

paddle::experimental::Tensor hook_function(
    const paddle::experimental::Tensor& t) {
42
  auto t_dense = std::dynamic_pointer_cast<phi::DenseTensor>(t.impl());
43

44 45
  auto ret_meta = phi::DenseTensorMeta(t_dense->dtype(), t_dense->dims(),
                                       t_dense->layout());
46
  auto place = t_dense->place();
47 48 49
  size_t bytes_size = phi::product(t_dense->dims()) * SizeOf(t_dense->dtype());
  auto ret_dense = std::make_shared<phi::DenseTensor>(
      phi::make_intrusive<paddle::experimental::SharedStorage>(
50 51 52 53 54 55 56 57 58
          paddle::memory::Alloc(place, bytes_size)),
      std::move(ret_meta));

  float* t_ptr = t_dense->mutable_data<float>(place);
  float* ret_ptr = ret_dense->mutable_data<float>(place);
  for (int i = 0; i < ret_dense->numel(); i++) {
    ret_ptr[i] = t_ptr[i] + 3.0;
  }

59
  auto ret_impl = std::dynamic_pointer_cast<phi::TensorBase>(ret_dense);
60 61 62 63 64 65
  paddle::experimental::Tensor ret = paddle::experimental::Tensor();
  ret.set_impl(ret_impl);

  return ret;
}

66
void test_sigmoid(bool is_remove_gradient_hook) {
67 68 69 70 71
  // Prepare Device Contexts
  VLOG(6) << "Init Env";
  eager_test::InitEnv(paddle::platform::CPUPlace());

  VLOG(6) << "Make Dim";
72
  paddle::framework::DDim ddim = phi::make_ddim({2, 4, 4, 4});
73 74 75

  VLOG(6) << "Make paddle::experimental::Tensor";
  paddle::experimental::Tensor tensor = egr_utils_api::CreateTensorWithValue(
76 77
      ddim, paddle::platform::CPUPlace(), phi::DataType::FLOAT32,
      phi::DataLayout::NCHW, 0.0, true);
78

79 80
  VLOG(6) << "Make ReduceHook function";
  auto reduce_hook = [&](void) -> void {
81
    auto* t_ptr = std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl())
82 83 84 85 86 87
                      ->data<float>();
    for (int i = 0; i < tensor.numel(); i++) {
      t_ptr[i] = 100.0;  // set to 100.0
    }
  };

88 89 90 91
  VLOG(6) << "Retain Grad for Tensor";
  egr_utils_api::RetainGradForTensor(tensor);

  VLOG(6) << "Register GradientHook for Tensor";
92 93
  int64_t hook_id = egr_utils_api::RegisterGradientHookForTensor(
      tensor, std::make_shared<CppTensorHook>(hook_function));
94

95
  VLOG(6) << "Register ReduceHook for Tensor";
96 97
  egr_utils_api::RegisterReduceHookForTensor(
      tensor, std::make_shared<CppTensorVoidHook>(reduce_hook));
98

99 100 101 102 103 104 105 106
  VLOG(6) << "Runing Forward";
  auto output_tensor = sigmoid_dygraph_function(tensor, {});
  VLOG(6) << "Finish Forward";

  eager_test::CompareTensorWithValue<float>(output_tensor, 0.5);

  std::vector<paddle::experimental::Tensor> target_tensors = {output_tensor};

107 108 109 110 111
  if (is_remove_gradient_hook) {
    std::shared_ptr<GradNodeBase> grad_node_tmp = EagerUtils::grad_node(tensor);
    grad_node_tmp->RemoveGradientHook(hook_id);
  }

112
  VLOG(6) << "Runing Backward";
113
  Backward(target_tensors, {});
114 115
  VLOG(6) << "Finish Backward";

116 117
  eager_test::CompareGradTensorWithValue<float>(
      tensor, is_remove_gradient_hook ? 0.25 : 0.25 + 3.0);
118 119 120

  VLOG(6) << "Checking ReduceHook results";
  for (int i = 0; i < tensor.numel(); i++) {
121
    CHECK_EQ(std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl())
122 123 124
                 ->data<float>()[i],
             static_cast<float>(100.0f));
  }
125 126 127
  VLOG(6) << "After Tests";
}

128
void test_elementwiseAdd(bool is_remove_gradient_hook) {
129 130 131 132 133 134 135
  // Prepare Device Contexts
  eager_test::InitEnv(paddle::platform::CPUPlace());

  auto tracer = std::make_shared<paddle::imperative::Tracer>();
  paddle::imperative::SetCurrentTracer(tracer);

  // 1. Prepare Input
136
  paddle::framework::DDim ddimX = phi::make_ddim({4, 16});
137
  paddle::experimental::Tensor X = egr_utils_api::CreateTensorWithValue(
138 139
      ddimX, paddle::platform::CPUPlace(), phi::DataType::FLOAT32,
      phi::DataLayout::NCHW, 3.0, true);
140 141
  egr_utils_api::RetainGradForTensor(X);

142
  paddle::framework::DDim ddimY = phi::make_ddim({4, 16});
143
  paddle::experimental::Tensor Y = egr_utils_api::CreateTensorWithValue(
144 145
      ddimY, paddle::platform::CPUPlace(), phi::DataType::FLOAT32,
      phi::DataLayout::NCHW, 2.0, true);
146

147
  auto reduce_hook = [&]() -> void {
148
    auto* t_ptr =
149
        std::dynamic_pointer_cast<phi::DenseTensor>(Y.impl())->data<float>();
150 151 152 153 154
    for (int i = 0; i < Y.numel(); i++) {
      t_ptr[i] = 100.0;  // set to 100.0
    }
  };

155
  egr_utils_api::RetainGradForTensor(Y);
156 157 158 159
  int64_t hook_id = egr_utils_api::RegisterGradientHookForTensor(
      Y, std::make_shared<CppTensorHook>(hook_function));
  egr_utils_api::RegisterReduceHookForTensor(
      Y, std::make_shared<CppTensorVoidHook>(reduce_hook));
160 161 162 163 164

  auto output_tensor = elementwise_add_dygraph_function(X, Y, {});

  eager_test::CompareTensorWithValue<float>(output_tensor, 5);
  std::vector<paddle::experimental::Tensor> target_tensors = {output_tensor};
165 166 167 168 169 170

  if (is_remove_gradient_hook) {
    std::shared_ptr<GradNodeBase> grad_node_tmp = EagerUtils::grad_node(Y);
    grad_node_tmp->RemoveGradientHook(hook_id);
  }

171
  Backward(target_tensors, {});
172 173

  eager_test::CompareGradTensorWithValue<float>(X, 1.0);
174 175
  eager_test::CompareGradTensorWithValue<float>(
      Y, is_remove_gradient_hook ? 1.0 : 1.0 + 3.0);
176 177 178

  // Checking ReduceHook results
  for (int i = 0; i < Y.numel(); i++) {
179 180 181
    CHECK_EQ(
        std::dynamic_pointer_cast<phi::DenseTensor>(Y.impl())->data<float>()[i],
        static_cast<float>(100.0f));
182
  }
183 184
}

185
void test_matmul(bool is_remove_gradient_hook) {
186 187 188 189 190 191 192
  // Prepare Device Contexts
  eager_test::InitEnv(paddle::platform::CPUPlace());

  auto tracer = std::make_shared<paddle::imperative::Tracer>();
  paddle::imperative::SetCurrentTracer(tracer);

  // 1. Prepare Input
193
  paddle::framework::DDim ddimX = phi::make_ddim({4, 16});
194
  paddle::experimental::Tensor X = egr_utils_api::CreateTensorWithValue(
195 196
      ddimX, paddle::platform::CPUPlace(), phi::DataType::FLOAT32,
      phi::DataLayout::NCHW, 3.0, true);
197 198
  egr_utils_api::RetainGradForTensor(X);

199
  paddle::framework::DDim ddimY = phi::make_ddim({16, 20});
200
  paddle::experimental::Tensor Y = egr_utils_api::CreateTensorWithValue(
201 202
      ddimY, paddle::platform::CPUPlace(), phi::DataType::FLOAT32,
      phi::DataLayout::NCHW, 2.0, true);
203

204 205
  auto reduce_hook = [&](void) -> void {
    auto* t_ptr =
206
        std::dynamic_pointer_cast<phi::DenseTensor>(Y.impl())->data<float>();
207 208 209 210 211
    for (int i = 0; i < Y.numel(); i++) {
      t_ptr[i] = 100.0;  // set to 100.0
    }
  };

212
  egr_utils_api::RetainGradForTensor(Y);
213 214 215 216
  int64_t hook_id = egr_utils_api::RegisterGradientHookForTensor(
      Y, std::make_shared<CppTensorHook>(hook_function));
  egr_utils_api::RegisterReduceHookForTensor(
      Y, std::make_shared<CppTensorVoidHook>(reduce_hook));
217 218 219 220 221 222

  auto output_tensor = matmul_v2_dygraph_function(
      X, Y, {{"trans_x", false}, {"trans_y", false}});

  eager_test::CompareTensorWithValue<float>(output_tensor, 96);
  std::vector<paddle::experimental::Tensor> target_tensors = {output_tensor};
223 224 225 226 227 228

  if (is_remove_gradient_hook) {
    std::shared_ptr<GradNodeBase> grad_node_tmp = EagerUtils::grad_node(Y);
    grad_node_tmp->RemoveGradientHook(hook_id);
  }

229
  Backward(target_tensors, {});
230 231

  eager_test::CompareGradTensorWithValue<float>(X, 2.0 * 20);
232 233
  eager_test::CompareGradTensorWithValue<float>(
      Y, is_remove_gradient_hook ? 3.0 * 4 : 3.0 * 4 + 3);
234

235 236
  // Checking ReduceHook results
  for (int i = 0; i < Y.numel(); i++) {
237 238 239
    CHECK_EQ(
        std::dynamic_pointer_cast<phi::DenseTensor>(Y.impl())->data<float>()[i],
        static_cast<float>(100.0f));
240 241
  }
}
242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257

TEST(Hook_intermidiate, Sigmoid) {
  // True or false represents whether to call RemoveGradientHook
  test_sigmoid(true);
  test_sigmoid(false);
}

TEST(Hook_intermidiate, ElementwiseAdd) {
  test_elementwiseAdd(true);
  test_elementwiseAdd(false);
}

TEST(Hook_intermidiate, Matmul_v2) {
  test_matmul(true);
  test_matmul(false);
}
258 259
}  // namespace egr

Y
YuanRisheng 已提交
260
USE_OP_ITSELF(sigmoid);
261 262
USE_OP_ITSELF(elementwise_add);
USE_OP_ITSELF(matmul_v2);