variable_wrapper.h 11.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

17
#include <map>
18
#include <memory>
19
#include <string>
20 21
#include <utility>

22
#include "paddle/fluid/framework/op_kernel_type.h"
23
#include "paddle/fluid/framework/string_array.h"
24
#include "paddle/fluid/framework/variable.h"
25
#include "paddle/fluid/imperative/hooks.h"
26
#include "paddle/fluid/imperative/op_base.h"
27 28 29 30

namespace paddle {
namespace imperative {

31 32
class VariableWrapperHook;
class InplaceVariableWrapperHook;
33 34 35
class VarBase;
class GradOpNode;

36 37
class VariableWrapper {
 public:
38 39
  friend class VarBase;

40 41
  explicit VariableWrapper(const std::string& name) : name_(name) {}

42 43 44
  VariableWrapper(const std::string& name, const framework::Variable& variable)
      : var_(variable), name_(name) {}

45 46
  ~VariableWrapper() { VLOG(10) << "Destruct VariableWrapper: " << Name(); }

47 48 49 50 51 52 53
  const framework::Variable& Var() const { return var_; }

  framework::Variable* MutableVar() { return &var_; }

  // This is used for python api
  void SetOverridedStopGradient(bool stop_gradient) {
    overrided_stop_gradient_ = static_cast<int>(stop_gradient);
54 55 56 57

    if (auto grad_var = grad_var_.lock()) {
      grad_var->SetOverridedStopGradient(stop_gradient);
    }
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
  }

  // This is used for python api
  bool OverridedStopGradient() const { return overrided_stop_gradient_ != 0; }

  // This is used inside C++
  int InnerOverridedStopGradient() const { return overrided_stop_gradient_; }

  // This is used inside C++
  void InnerSetOverridedStopGradient(bool stop_gradient) {
    if (overrided_stop_gradient_ == -1) {
      overrided_stop_gradient_ = static_cast<int>(stop_gradient);
    } else {
      VLOG(6) << "Ignore Stop gradient conversion for Var: " << Name()
              << "Set value is: " << overrided_stop_gradient_;
    }
74 75 76 77

    if (auto grad_var = grad_var_.lock()) {
      grad_var->InnerSetOverridedStopGradient(stop_gradient);
    }
78 79
  }

80 81 82 83 84 85 86 87 88 89 90
  bool IsLeaf() const {
    if (OverridedStopGradient()) {
      return true;
    }
    if (HasGradVar() && !GetGradVar()->HasGradNode()) {
      return true;
    }
    return false;
  }

  bool IsLeafGrad() const {
91
    if (!HasGradNode() && !OverridedStopGradient()) {
92 93 94 95 96
      return true;
    }
    return false;
  }

97 98 99 100
  void SetPersistable(bool persistable) { persistable_ = persistable; }

  bool Persistable() const { return persistable_; }

101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
  bool IsEmpty() const {
    bool is_empty = true;
    if (var_.IsInitialized()) {
      const framework::Tensor* tensor = nullptr;
      if (var_.IsType<framework::LoDTensor>()) {
        tensor = &(var_.Get<framework::LoDTensor>());
      } else if (var_.IsType<framework::SelectedRows>()) {
        tensor = &(var_.Get<framework::SelectedRows>().value());
      } else {
        PADDLE_THROW(platform::errors::PermissionDenied(
            "Only support LoDTensor and SelectedRows for gradient var"));
      }
      if (tensor && tensor->IsInitialized()) {
        is_empty = false;
      }
    }
    return is_empty || is_empty_;
  }

  // TODO(zhouwei): fix Tensor.clear_gradient() bug, function SetIsEmpty() isn't
  // need
  void SetIsEmpty(bool is_empty) { is_empty_ = is_empty; }

124 125 126 127 128 129 130 131
  const std::string& Name() const { return name_; }

  void SetName(const std::string& name) { name_ = name; }

  void SetType(framework::proto::VarType::Type type) { type_ = type; }

  framework::proto::VarType::Type Type() const { return type_; }

132 133 134 135 136 137 138 139 140 141 142 143
  std::shared_ptr<VariableWrapper> GetGradVar() const {
    return grad_var_.lock();
  }

  const std::weak_ptr<VariableWrapper>& GetWeakGradVar() const {
    return grad_var_;
  }

  std::shared_ptr<GradOpNode> GetGradNode() const { return grad_node_.lock(); }

  bool HasGradNode() const { return !grad_node_.expired(); }

144 145
  bool HasGradVar() const { return !grad_var_.expired(); }

146 147 148 149
  void SetDataType(framework::proto::VarType::Type data_type) {
    data_type_ = data_type;
  }

150 151 152 153 154 155 156
  framework::proto::VarType::Type DataType() const {
    const framework::Tensor* tensor = nullptr;
    if (var_.IsInitialized()) {
      if (type_ == framework::proto::VarType::LOD_TENSOR) {
        tensor = &(var_.Get<framework::LoDTensor>());
      } else if (type_ == framework::proto::VarType::SELECTED_ROWS) {
        tensor = &(var_.Get<framework::SelectedRows>().value());
157 158 159 160 161 162 163 164 165
      } else if (type_ == framework::proto::VarType::VOCAB) {
        const framework::Vocab* data = nullptr;
        data = &(var_.Get<framework::Vocab>());
        if (data && data->size() != 0) {
          VLOG(6) << "The tensor of variable " << name_
                  << " is not initialized";
          return data_type_;
        }
        return framework::proto::VarType::VOCAB;
166 167 168 169 170 171 172 173 174 175 176 177 178
      } else {
        VLOG(6) << "Variable " << name_ << " is not initialized";
        return data_type_;
      }
    }
    if (tensor && tensor->IsInitialized()) {
      return tensor->type();
    } else {
      VLOG(6) << "The tensor of variable " << name_ << " is not initialized";
      return data_type_;
    }
  }

179 180 181 182 183 184 185 186
  void SetForwardDataType(framework::proto::VarType::Type data_type) {
    fwd_data_type_ = data_type;
  }

  framework::proto::VarType::Type ForwardDataType() const {
    return fwd_data_type_;
  }

187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
  const platform::Place Place() const {
    const framework::Tensor* tensor = nullptr;
    auto place =
        platform::CPUPlace();  // Default place for var not initialized.
    if (var_.IsInitialized()) {
      if (type_ == framework::proto::VarType::LOD_TENSOR) {
        tensor = &(var_.Get<framework::LoDTensor>());
      } else if (type_ == framework::proto::VarType::SELECTED_ROWS) {
        tensor = &(var_.Get<framework::SelectedRows>().value());
      } else {
        VLOG(6) << "Variable " << name_ << " is not initialized";
        return place;
      }
    }
    if (tensor && tensor->IsInitialized()) {
      return tensor->place();
    } else {
      VLOG(6) << "The tensor of variable " << name_ << " is not initialized";
      return place;
    }
  }

209 210 211 212 213 214 215 216 217 218 219
  uint32_t InplaceVersionSnapshot() const { return inplace_version_snapshot_; }

  void ResetInplaceVersion() {
    auto new_version = var_.CurrentInplaceVersion();

    VLOG(6) << "The wrapper version of VariableWrapper '" << name_
            << "' will be updated from " << inplace_version_snapshot_ << "to "
            << new_version;
    inplace_version_snapshot_ = new_version;
  }

220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
  bool hasCacheKey(const paddle::framework::OpKernelType& key) {
    return var_cache.find(key) != var_cache.end();
  }

  std::shared_ptr<VariableWrapper> getCacheValue(
      const paddle::framework::OpKernelType& key) {
    return var_cache[key];
  }

  void setCacheValue(const paddle::framework::OpKernelType& key,
                     std::shared_ptr<VariableWrapper> val) {
    var_cache[key] = val;
    return;
  }

235
  /* Hook related methods */
236
  bool HasVariableWrapperHook() const { return !var_hooks_.empty(); }
237

238 239
  int64_t AddVariableWrapperHook(std::shared_ptr<VariableWrapperHook>&& hook) {
    var_hooks_.emplace(next_hook_id_, std::move(hook));
240 241 242
    return next_hook_id_++;
  }

243 244
  bool RemoveVariableWrapperHook(const int64_t& hook_id) {
    auto remove_cnt = var_hooks_.erase(hook_id);
245 246 247 248 249 250
    if (remove_cnt == 0) {
      return false;
    }
    return true;
  }

251 252 253
  const std::map<int64_t, std::shared_ptr<VariableWrapperHook>>&
  GetVariableWrapperHooks() const {
    return var_hooks_;
254 255
  }

256 257 258 259
  bool HasVoidHook() const { return !void_hooks_.empty(); }

  void AddVoidHook(std::shared_ptr<std::function<void()>>&& hook) {
    void_hooks_.emplace_back(std::move(hook));
260 261
  }

262 263 264
  const std::vector<std::shared_ptr<std::function<void()>>>& GetVoidHooks()
      const {
    return void_hooks_;
265 266
  }

267 268 269 270
 private:
  void SetGradVar(const std::shared_ptr<VariableWrapper>& var) {
    auto shared_var = grad_var_.lock();
    if (shared_var != var) {
271 272 273 274
      PADDLE_ENFORCE_EQ(
          shared_var, nullptr,
          platform::errors::PermissionDenied(
              "Cannot set gradient variable wrapper twice for %s", name_));
275 276 277 278 279 280 281 282 283 284 285 286
      grad_var_ = var;
    }
  }

  void SetGradNode(const std::shared_ptr<GradOpNode>& grad_node) {
    if (!grad_node) {
      grad_node_.reset();
      return;
    }

    auto shared_node = grad_node_.lock();
    if (shared_node != grad_node) {
287 288 289 290 291 292 293
      if (grad_node->InplaceGradNameMap().empty()) {
        // grad_node doesn't have Inplace message
        PADDLE_ENFORCE_EQ(
            shared_node, nullptr,
            platform::errors::PermissionDenied(
                "Cannot set gradient op twice unless using Inplace Strategy."));
      } else if (shared_node) {
294 295 296
        VLOG(3) << "The gradient op of Var (" << Name()
                << ") has been set twice. Because Inplace Strategy is used.";
      }
297 298 299 300
      grad_node_ = grad_node;
    }
  }

301 302 303 304
 private:
  framework::Variable var_;
  std::string name_;

305 306 307 308
  // Used for cache the dtype promotioned variableWrapper in real and complex
  // compute of Paddle Quantum
  std::map<paddle::framework::OpKernelType, std::shared_ptr<VariableWrapper>>
      var_cache;
309 310 311 312 313
  // add this property for users may set stop_gradient themselves and this
  // should override the frameworks setting (-1) unset, (1) true, (0) false
  int overrided_stop_gradient_{-1};
  bool persistable_{false};

314 315 316 317
  // Used for checking whether there is any inplace operation affecting gradient
  // calculation.
  uint32_t inplace_version_snapshot_{0};

318 319
  framework::proto::VarType::Type type_{framework::proto::VarType::LOD_TENSOR};
  framework::proto::VarType::Type data_type_{framework::proto::VarType::FP32};
320

321 322 323 324 325 326 327
  // See [ Why need handle complex gradient to real gradient? ]
  // Used for grad var to get the data type of its corresponding forward var,
  // if inconsistent, the data type of grad var needs to be casted to be
  // consistent with forward var
  framework::proto::VarType::Type fwd_data_type_{
      static_cast<framework::proto::VarType::Type>(-1)};

328 329
  std::weak_ptr<VariableWrapper> grad_var_;
  std::weak_ptr<GradOpNode> grad_node_;
330

331 332 333 334
  // TODO(zhouwei): fix bug of Tensor.clear_gradient(), function SetIsEmpty()
  // isn't need
  bool is_empty_{false};

335
  // NOTE(chenweihang): only grad var will hold hooks now
336
  int64_t next_hook_id_{0};
337 338
  // [ Hooks with VariableWrapper as input and output ]
  // NOTE: Now registered for grad var, support adding and removing,
339
  // key is the accumulated int64_t value
340 341 342 343 344 345 346 347
  // NOTE: Var hook need to support removing, so need hook id
  std::map<int64_t, std::shared_ptr<VariableWrapperHook>> var_hooks_;
  // [ Hooks without input and output ]
  // NOTE: Now registered after the execution of the entire backward
  // process is over, currently only used for reducing in distributed
  // training
  // NOTE: Now no need to support remove void hook
  std::vector<std::shared_ptr<std::function<void()>>> void_hooks_;
348 349 350 351
};

}  // namespace imperative
}  // namespace paddle