hook_test_intermidiate.cc 8.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <sstream>

#include "gtest/gtest.h"

#include "paddle/fluid/eager/api/all.h"
#include "paddle/fluid/eager/backward.h"
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/tests/test_utils.h"
#include "paddle/fluid/imperative/tracer.h"
24
#include "paddle/phi/core/dense_tensor.h"
25 26

#include "paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.h"
27
#include "paddle/fluid/eager/hooks.h"
28
#include "paddle/phi/core/kernel_registry.h"
29

30 31 32 33 34 35
PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(matmul, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(matmul_grad, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(add_grad, CPU, ALL_LAYOUT);

36 37 38 39
namespace egr {

paddle::experimental::Tensor hook_function(
    const paddle::experimental::Tensor& t) {
40
  auto t_dense = std::dynamic_pointer_cast<phi::DenseTensor>(t.impl());
41

42 43
  auto ret_meta = phi::DenseTensorMeta(t_dense->dtype(), t_dense->dims(),
                                       t_dense->layout());
44
  auto place = t_dense->place();
45 46 47
  size_t bytes_size = phi::product(t_dense->dims()) * SizeOf(t_dense->dtype());
  auto ret_dense = std::make_shared<phi::DenseTensor>(
      phi::make_intrusive<paddle::experimental::SharedStorage>(
48 49 50 51 52 53 54 55 56
          paddle::memory::Alloc(place, bytes_size)),
      std::move(ret_meta));

  float* t_ptr = t_dense->mutable_data<float>(place);
  float* ret_ptr = ret_dense->mutable_data<float>(place);
  for (int i = 0; i < ret_dense->numel(); i++) {
    ret_ptr[i] = t_ptr[i] + 3.0;
  }

57
  auto ret_impl = std::dynamic_pointer_cast<phi::TensorBase>(ret_dense);
58 59 60 61 62 63
  paddle::experimental::Tensor ret = paddle::experimental::Tensor();
  ret.set_impl(ret_impl);

  return ret;
}

64
void test_sigmoid(bool is_remove_gradient_hook) {
65 66 67 68 69
  // Prepare Device Contexts
  VLOG(6) << "Init Env";
  eager_test::InitEnv(paddle::platform::CPUPlace());

  VLOG(6) << "Make Dim";
70
  paddle::framework::DDim ddim = phi::make_ddim({2, 4, 4, 4});
71 72 73

  VLOG(6) << "Make paddle::experimental::Tensor";
  paddle::experimental::Tensor tensor = egr_utils_api::CreateTensorWithValue(
74 75
      ddim, paddle::platform::CPUPlace(), phi::DataType::FLOAT32,
      phi::DataLayout::NCHW, 0.0, true);
76

77 78
  VLOG(6) << "Make ReduceHook function";
  auto reduce_hook = [&](void) -> void {
79
    auto* t_ptr = std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl())
80 81 82 83 84 85
                      ->data<float>();
    for (int i = 0; i < tensor.numel(); i++) {
      t_ptr[i] = 100.0;  // set to 100.0
    }
  };

86 87 88 89
  VLOG(6) << "Retain Grad for Tensor";
  egr_utils_api::RetainGradForTensor(tensor);

  VLOG(6) << "Register GradientHook for Tensor";
90 91
  int64_t hook_id = egr_utils_api::RegisterGradientHookForTensor(
      tensor, std::make_shared<CppTensorHook>(hook_function));
92

93
  VLOG(6) << "Register ReduceHook for Tensor";
94 95
  egr_utils_api::RegisterReduceHookForTensor(
      tensor, std::make_shared<CppTensorVoidHook>(reduce_hook));
96

97 98 99 100 101 102 103 104
  VLOG(6) << "Runing Forward";
  auto output_tensor = sigmoid_dygraph_function(tensor, {});
  VLOG(6) << "Finish Forward";

  eager_test::CompareTensorWithValue<float>(output_tensor, 0.5);

  std::vector<paddle::experimental::Tensor> target_tensors = {output_tensor};

105 106 107 108 109
  if (is_remove_gradient_hook) {
    std::shared_ptr<GradNodeBase> grad_node_tmp = EagerUtils::grad_node(tensor);
    grad_node_tmp->RemoveGradientHook(hook_id);
  }

110
  VLOG(6) << "Runing Backward";
111
  Backward(target_tensors, {});
112 113
  VLOG(6) << "Finish Backward";

114 115
  eager_test::CompareGradTensorWithValue<float>(
      tensor, is_remove_gradient_hook ? 0.25 : 0.25 + 3.0);
116 117 118

  VLOG(6) << "Checking ReduceHook results";
  for (int i = 0; i < tensor.numel(); i++) {
119
    CHECK_EQ(std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl())
120 121 122
                 ->data<float>()[i],
             static_cast<float>(100.0f));
  }
123 124 125
  VLOG(6) << "After Tests";
}

126
void test_elementwiseAdd(bool is_remove_gradient_hook) {
127 128 129 130 131 132 133
  // Prepare Device Contexts
  eager_test::InitEnv(paddle::platform::CPUPlace());

  auto tracer = std::make_shared<paddle::imperative::Tracer>();
  paddle::imperative::SetCurrentTracer(tracer);

  // 1. Prepare Input
134
  paddle::framework::DDim ddimX = phi::make_ddim({4, 16});
135
  paddle::experimental::Tensor X = egr_utils_api::CreateTensorWithValue(
136 137
      ddimX, paddle::platform::CPUPlace(), phi::DataType::FLOAT32,
      phi::DataLayout::NCHW, 3.0, true);
138 139
  egr_utils_api::RetainGradForTensor(X);

140
  paddle::framework::DDim ddimY = phi::make_ddim({4, 16});
141
  paddle::experimental::Tensor Y = egr_utils_api::CreateTensorWithValue(
142 143
      ddimY, paddle::platform::CPUPlace(), phi::DataType::FLOAT32,
      phi::DataLayout::NCHW, 2.0, true);
144

145
  auto reduce_hook = [&]() -> void {
146
    auto* t_ptr =
147
        std::dynamic_pointer_cast<phi::DenseTensor>(Y.impl())->data<float>();
148 149 150 151 152
    for (int i = 0; i < Y.numel(); i++) {
      t_ptr[i] = 100.0;  // set to 100.0
    }
  };

153
  egr_utils_api::RetainGradForTensor(Y);
154 155 156 157
  int64_t hook_id = egr_utils_api::RegisterGradientHookForTensor(
      Y, std::make_shared<CppTensorHook>(hook_function));
  egr_utils_api::RegisterReduceHookForTensor(
      Y, std::make_shared<CppTensorVoidHook>(reduce_hook));
158 159 160 161 162

  auto output_tensor = elementwise_add_dygraph_function(X, Y, {});

  eager_test::CompareTensorWithValue<float>(output_tensor, 5);
  std::vector<paddle::experimental::Tensor> target_tensors = {output_tensor};
163 164 165 166 167 168

  if (is_remove_gradient_hook) {
    std::shared_ptr<GradNodeBase> grad_node_tmp = EagerUtils::grad_node(Y);
    grad_node_tmp->RemoveGradientHook(hook_id);
  }

169
  Backward(target_tensors, {});
170 171

  eager_test::CompareGradTensorWithValue<float>(X, 1.0);
172 173
  eager_test::CompareGradTensorWithValue<float>(
      Y, is_remove_gradient_hook ? 1.0 : 1.0 + 3.0);
174 175 176

  // Checking ReduceHook results
  for (int i = 0; i < Y.numel(); i++) {
177 178 179
    CHECK_EQ(
        std::dynamic_pointer_cast<phi::DenseTensor>(Y.impl())->data<float>()[i],
        static_cast<float>(100.0f));
180
  }
181 182
}

183
void test_matmul(bool is_remove_gradient_hook) {
184 185 186 187 188 189 190
  // Prepare Device Contexts
  eager_test::InitEnv(paddle::platform::CPUPlace());

  auto tracer = std::make_shared<paddle::imperative::Tracer>();
  paddle::imperative::SetCurrentTracer(tracer);

  // 1. Prepare Input
191
  paddle::framework::DDim ddimX = phi::make_ddim({4, 16});
192
  paddle::experimental::Tensor X = egr_utils_api::CreateTensorWithValue(
193 194
      ddimX, paddle::platform::CPUPlace(), phi::DataType::FLOAT32,
      phi::DataLayout::NCHW, 3.0, true);
195 196
  egr_utils_api::RetainGradForTensor(X);

197
  paddle::framework::DDim ddimY = phi::make_ddim({16, 20});
198
  paddle::experimental::Tensor Y = egr_utils_api::CreateTensorWithValue(
199 200
      ddimY, paddle::platform::CPUPlace(), phi::DataType::FLOAT32,
      phi::DataLayout::NCHW, 2.0, true);
201

202 203
  auto reduce_hook = [&](void) -> void {
    auto* t_ptr =
204
        std::dynamic_pointer_cast<phi::DenseTensor>(Y.impl())->data<float>();
205 206 207 208 209
    for (int i = 0; i < Y.numel(); i++) {
      t_ptr[i] = 100.0;  // set to 100.0
    }
  };

210
  egr_utils_api::RetainGradForTensor(Y);
211 212 213 214
  int64_t hook_id = egr_utils_api::RegisterGradientHookForTensor(
      Y, std::make_shared<CppTensorHook>(hook_function));
  egr_utils_api::RegisterReduceHookForTensor(
      Y, std::make_shared<CppTensorVoidHook>(reduce_hook));
215 216 217 218 219 220

  auto output_tensor = matmul_v2_dygraph_function(
      X, Y, {{"trans_x", false}, {"trans_y", false}});

  eager_test::CompareTensorWithValue<float>(output_tensor, 96);
  std::vector<paddle::experimental::Tensor> target_tensors = {output_tensor};
221 222 223 224 225 226

  if (is_remove_gradient_hook) {
    std::shared_ptr<GradNodeBase> grad_node_tmp = EagerUtils::grad_node(Y);
    grad_node_tmp->RemoveGradientHook(hook_id);
  }

227
  Backward(target_tensors, {});
228 229

  eager_test::CompareGradTensorWithValue<float>(X, 2.0 * 20);
230 231
  eager_test::CompareGradTensorWithValue<float>(
      Y, is_remove_gradient_hook ? 3.0 * 4 : 3.0 * 4 + 3);
232

233 234
  // Checking ReduceHook results
  for (int i = 0; i < Y.numel(); i++) {
235 236 237
    CHECK_EQ(
        std::dynamic_pointer_cast<phi::DenseTensor>(Y.impl())->data<float>()[i],
        static_cast<float>(100.0f));
238 239
  }
}
240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255

TEST(Hook_intermidiate, Sigmoid) {
  // True or false represents whether to call RemoveGradientHook
  test_sigmoid(true);
  test_sigmoid(false);
}

TEST(Hook_intermidiate, ElementwiseAdd) {
  test_elementwiseAdd(true);
  test_elementwiseAdd(false);
}

TEST(Hook_intermidiate, Matmul_v2) {
  test_matmul(true);
  test_matmul(false);
}
256 257 258 259 260
}  // namespace egr

USE_OP(sigmoid);
USE_OP_ITSELF(elementwise_add);
USE_OP_ITSELF(matmul_v2);