tensor_wrapper.h 8.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

/**
 * We now still need TensorWrapper and it is designed to Copy
 * tensor in autograd mode.
 *
 * Since in autograd usage, we need to pass autograd_meta to
 * backward computation however in tensor interface add to much
 * autograd_related method is not a good choice.
 *
 * In TensorWrapper we will keep autograd info to backward, only
 * for input var, but for output var it will only copy autograd
 * with no grad **/

#pragma once
#include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/utils.h"
31
#include "paddle/phi/api/lib/utils/allocator.h"
32 33 34
#ifndef PADDLE_NO_PYTHON
#include "paddle/fluid/eager/saved_tensors_hooks.h"
#endif
35 36 37 38 39

namespace egr {
class TensorWrapper {
 public:
  TensorWrapper() = default;
40
  explicit TensorWrapper(const paddle::experimental::Tensor& tensor,
41
                         bool no_need_buffer = false) {
42 43 44 45 46 47 48 49 50
    // set inplace_version_snapshot_ according to tensor's current inplace
    // version.
    if (tensor.impl() && phi::DenseTensor::classof(tensor.impl().get())) {
      phi::DenseTensor* dense_tensor =
          static_cast<phi::DenseTensor*>(tensor.impl().get());
      auto& inplace_version_counter = dense_tensor->InplaceVersionCounter();
      inplace_version_snapshot_ = inplace_version_counter.CurrentVersion();
    }

51
    /**
52 53 54
     * Normally, we should only save data and part of autograd_meta of fwd
     * tensor, and should not reserve its original grad_node,
     * to avoid recursive and additional depends on GradNodeBase
55
     * **/
56
    auto* tensor_autograd_meta = EagerUtils::nullable_autograd_meta(tensor);
57
    no_need_buffer_ = no_need_buffer;
58
    // shallow copy tensor_impl here
59 60 61 62 63
    if (no_need_buffer) {
      if (phi::DenseTensor::classof(tensor.impl().get())) {
        // Only Copy Meta
        phi::DenseTensor* dense_tensor =
            static_cast<phi::DenseTensor*>(tensor.impl().get());
64 65 66 67 68 69
        // TODO(jiabin): It's not a good idea to set memory size to zero, find
        // another way and change this.
        intermidiate_tensor_.set_impl(
            std::move(std::make_shared<phi::DenseTensor>(
                std::make_shared<phi::Allocation>(nullptr, 0, tensor.place()),
                std::move(dense_tensor->meta()))));
70 71 72 73 74
      } else {
        PADDLE_THROW(paddle::platform::errors::Fatal(
            "Unrecognized tensor type for no_need_buffer feature"));
      }
    } else {
75
#ifndef PADDLE_NO_PYTHON
76
      if (SavedTensorsHooks::GetInstance().IsEnable() &&
W
wanghuancoder 已提交
77
          tensor.is_dense_tensor() && tensor.initialized()) {
78 79 80 81 82 83 84 85 86 87
        phi::DenseTensor* dense_tensor =
            static_cast<phi::DenseTensor*>(tensor.impl().get());
        intermidiate_tensor_.set_impl(
            std::move(std::make_shared<phi::DenseTensor>(
                std::make_shared<phi::Allocation>(nullptr, 0, tensor.place()),
                dense_tensor->meta())));
        auto pack_hook = SavedTensorsHooks::GetInstance().GetPackHook();
        unpack_hook_ = SavedTensorsHooks::GetInstance().GetUnPackHook();
        packed_value_ = reinterpret_cast<PyObject*>((*pack_hook)(tensor));
      } else {
88
#endif
89
        intermidiate_tensor_.set_impl(tensor.impl());
90
#ifndef PADDLE_NO_PYTHON
91
      }
92
#endif
93
    }
94

95 96 97 98
    if (VLOG_IS_ON(7)) {
      // TODO(jiabin): This may has server performance issue
      intermidiate_tensor_.set_name(tensor.name() + "@Saved");
    }
99

100
    if (tensor_autograd_meta) {
101 102 103
      auto autograd_meta =
          std::make_shared<AutogradMeta>(*tensor_autograd_meta);
      autograd_meta->ResetGradNode();
104 105
      intermidiate_tensor_.set_autograd_meta(autograd_meta);
      weak_grad_node_ = tensor_autograd_meta->GetMutableGradNode();
106
    }
107
  }
108
#ifndef PADDLE_NO_PYTHON
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
  TensorWrapper(const TensorWrapper& other) {
    no_need_buffer_ = other.no_need_buffer_;
    intermidiate_tensor_ = other.intermidiate_tensor_;
    weak_grad_node_ = other.weak_grad_node_;
    inplace_version_snapshot_ = other.inplace_version_snapshot_;
    packed_value_ = other.packed_value_;
    unpack_hook_ = other.unpack_hook_;
    Py_XINCREF(packed_value_);
  }

  TensorWrapper& operator=(const TensorWrapper& other) {
    no_need_buffer_ = other.no_need_buffer_;
    intermidiate_tensor_ = other.intermidiate_tensor_;
    weak_grad_node_ = other.weak_grad_node_;
    inplace_version_snapshot_ = other.inplace_version_snapshot_;
    packed_value_ = other.packed_value_;
    unpack_hook_ = other.unpack_hook_;
    Py_XINCREF(packed_value_);
    return *this;
  }

  ~TensorWrapper() { Py_XDECREF(packed_value_); }
131
#endif
132
  paddle::experimental::Tensor recover() {
133 134 135
    VLOG(6) << "Recover tensor: " << intermidiate_tensor_.name()
            << " for wrapper";
    if (!intermidiate_tensor_.defined()) {
136
      VLOG(6) << "Return NULL tensor Here. ";
137
      return paddle::experimental::Tensor();
138
    }
139
#ifndef PADDLE_NO_PYTHON
140 141 142 143 144 145 146 147
    if (packed_value_ && unpack_hook_) {
      auto tensor_unpacked =
          (*unpack_hook_)(reinterpret_cast<void*>(packed_value_));
      auto src_dense_tensor =
          static_cast<phi::DenseTensor*>(tensor_unpacked.impl().get());
      static_cast<phi::DenseTensor*>(intermidiate_tensor_.impl().get())
          ->ResetHolder(src_dense_tensor->MoveMemoryHolder());
    } else {
148
#endif
149
      check_inplace_version();
150
#ifndef PADDLE_NO_PYTHON
151
    }
152
#endif
153

154 155 156 157
    paddle::experimental::Tensor recovered_tensor = intermidiate_tensor_;

    std::shared_ptr<GradNodeBase> new_grad_node = weak_grad_node_.lock();
    if (new_grad_node) {
J
Jiabin Yang 已提交
158
      VLOG(7) << "Recovered TensorWrapper with GradNode "
159
              << new_grad_node->name() << " addr: " << new_grad_node.get();
160
    } else {
J
Jiabin Yang 已提交
161
      VLOG(7) << "Recovered TensorWrapper with Empty GradNode";
162 163 164
    }
    auto* intermediate_autograd_meta =
        EagerUtils::nullable_autograd_meta(intermidiate_tensor_);
165

166 167 168
    if (intermediate_autograd_meta) {
      auto p_ab_autograd_meta =
          std::make_shared<AutogradMeta>(*intermediate_autograd_meta);
169
      if (new_grad_node) {
170
        p_ab_autograd_meta->SetGradNode(new_grad_node);
171
      }
172
      recovered_tensor.set_autograd_meta(p_ab_autograd_meta);
173
    }
174 175

    return recovered_tensor;
176 177
  }

178 179 180 181
  paddle::experimental::Tensor get_intermidiate_tensor() {
    return intermidiate_tensor_;
  }

182 183 184
  void clear() { intermidiate_tensor_.reset(); }

 private:
185 186
  void check_inplace_version() {
    if (no_need_buffer_) {
J
Jiabin Yang 已提交
187
      VLOG(7) << "There's no need to check inplace_version because "
188 189 190 191 192 193 194 195 196
                 "no_need_buffer_ is true.";
      return;
    }
    if (intermidiate_tensor_.impl() &&
        phi::DenseTensor::classof(intermidiate_tensor_.impl().get())) {
      phi::DenseTensor* dense_tensor =
          static_cast<phi::DenseTensor*>(intermidiate_tensor_.impl().get());
      auto& inplace_version_counter = dense_tensor->InplaceVersionCounter();

197 198
      uint32_t wrapper_version_snapshot = inplace_version_snapshot_;
      uint32_t tensor_version = inplace_version_counter.CurrentVersion();
199
      PADDLE_ENFORCE_EQ(
200 201
          tensor_version,
          wrapper_version_snapshot,
202 203 204 205 206 207 208
          paddle::platform::errors::PermissionDenied(
              "Tensor '%s' used in gradient computation has been "
              "modified by an inplace operation. "
              "Its version is %d but the expected version is %d. "
              "Please fix your code to void calling an inplace operator "
              "after using the Tensor which will used in gradient "
              "computation.",
209 210
              intermidiate_tensor_.name(),
              tensor_version,
211
              wrapper_version_snapshot));
J
Jiabin Yang 已提交
212
      VLOG(7) << " The wrapper_version_snapshot of Tensor '"
213
              << intermidiate_tensor_.name() << "' is [ "
214
              << wrapper_version_snapshot << " ]";
J
Jiabin Yang 已提交
215
      VLOG(7) << " The tensor_version of Tensor '"
216 217
              << intermidiate_tensor_.name() << "' is [ " << tensor_version
              << " ]";
218 219 220
    }
  }

221
 private:
222
  bool no_need_buffer_ = false;
223
  paddle::experimental::Tensor intermidiate_tensor_;
224
  std::weak_ptr<egr::GradNodeBase> weak_grad_node_;
225
  uint32_t inplace_version_snapshot_ = 0;
226
#ifndef PADDLE_NO_PYTHON
227 228
  PyObject* packed_value_{nullptr};
  std::shared_ptr<UnPackHookBase> unpack_hook_;
229 230 231 232
#else
  void* packed_value_{nullptr};
  std::shared_ptr<void> unpack_hook_;
#endif
233 234
};
}  // namespace egr