layer.cc 8.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/imperative/layer.h"
#include <deque>
#include <limits>
#include <map>
#include <random>
#include <utility>

#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/string/printf.h"

namespace paddle {
namespace imperative {

using framework::Variable;

void AddTo(Variable* src, Variable* dst) {
  framework::LoDTensor* dst_tensor = dst->GetMutable<framework::LoDTensor>();
  framework::LoDTensor* src_tensor = src->GetMutable<framework::LoDTensor>();
  PADDLE_ENFORCE(dst_tensor->numel() == src_tensor->numel(), "%lld vs %lld",
                 dst_tensor->numel(), src_tensor->numel());
  float* dst_data = dst_tensor->mutable_data<float>(platform::CPUPlace());
  const float* src_data = src_tensor->data<float>();
  for (size_t i = 0; i < src_tensor->numel(); ++i) {
    dst_data[i] += src_data[i];
  }
}

class Autograd {
 public:
  explicit Autograd(framework::Scope* scope) : scope_(scope) {}

  void RunBackward(VarBase* var) {
    PADDLE_ENFORCE(var->pre_op_->op_desc_);
    // TODO(panyx0718): Only create for vars that "require_grad"
    (*var->pre_op_->output_vars_)[var->pre_op_out_idx_]->grads_ = var->grads_;

    std::deque<OpBase*> ready;
    ready.push_back(var->pre_op_);

    std::map<OpBase*, int> dep_counts = ComputeDepCounts(var->pre_op_);

    while (!ready.empty()) {
      OpBase* ready_op = ready.front();
      ready.pop_front();
      std::vector<Variable*> input_grads = ready_op->ApplyGrad(scope_);

      for (size_t i = 0; i < input_grads.size(); ++i) {
        if (!input_grads[i]) continue;
        OpBase* pre_op = ready_op->pre_ops_->at(i);
        if (!pre_op) continue;

        dep_counts[pre_op] -= 1;
        PADDLE_ENFORCE(dep_counts[pre_op] >= 0);
        bool pre_op_ready = dep_counts[pre_op] == 0;
        if (pre_op_ready) {
          ready.push_back(pre_op);
        }
      }
    }
  }

 private:
  std::map<OpBase*, int> ComputeDepCounts(OpBase* op) {
    std::map<OpBase*, int> ret;

    std::deque<OpBase*> queue;
    queue.push_back(op);
    std::unordered_set<OpBase*> visited;
    visited.insert(op);
    while (!queue.empty()) {
      OpBase* candidate = queue.front();
      queue.pop_front();
      for (OpBase* pre_op : *(candidate->pre_ops_)) {
        if (!pre_op) continue;
        if (visited.find(pre_op) == visited.end()) {
          visited.insert(pre_op);
          queue.push_back(pre_op);
        }
        ret[pre_op] += 1;
      }
    }

    return ret;
  }

  framework::Scope* scope_;
};

framework::Variable* CreateVariable(const std::string& name,
                                    const framework::DDim& dim, float val,
                                    framework::Scope* scope,
                                    bool random_name = true) {
  std::string varname = name;
  if (random_name) {
    std::mt19937 rng;
    rng.seed(std::random_device()());
    std::uniform_int_distribution<std::mt19937::result_type> dist6(
        1, std::numeric_limits<int>::max());
    int id = dist6(rng);
    varname = string::Sprintf("%s@%d", varname, id);
  }

118
  LOG(ERROR) << "creating var " << varname;
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
  VLOG(3) << "creating var " << varname;
  framework::Variable* var = scope->Var(varname);
  framework::LoDTensor* tensor = var->GetMutable<framework::LoDTensor>();

  float* data = tensor->mutable_data<float>(dim, platform::CPUPlace());
  std::fill(data, data + tensor->numel(), val);
  return var;
}

framework::LoDTensor& VarBase::Grad() {
  VLOG(3) << "get var grad " << var_desc_->Name();
  return *grads_->GetMutable<framework::LoDTensor>();
}

void VarBase::ApplyGrad(framework::Scope* scope, Variable* grad) {
134 135 136 137 138 139 140
  PADDLE_ENFORCE(grad->IsInitialized(), "grad %s must be initialized",
                 var_desc_->Name());

  PADDLE_ENFORCE(grad->Get<framework::LoDTensor>().IsInitialized(),
                 "variable %s has NO gradient, please set stop_gradient to it",
                 var_desc_->Name());

141 142
  VLOG(3) << "apply var grad " << var_desc_->Name() << " "
          << grad->Get<framework::LoDTensor>().data<float>()[0];
143

144 145 146 147 148
  if (!grads_) {
    grads_ =
        CreateVariable(string::Sprintf("%s@IGrad", var_desc_->Name()),
                       var_->Get<framework::LoDTensor>().dims(), 0.0, scope);
  }
149

150 151 152 153 154 155 156
  AddTo(grad, grads_);
  VLOG(3) << "grad_ after apply var grad " << var_desc_->Name() << " "
          << grads_->Get<framework::LoDTensor>().data<float>()[0];
}

std::vector<Variable*> OpBase::ApplyGrad(framework::Scope* scope) {
  VLOG(3) << "op grad " << grad_op_desc_->Type();
M
minqiyang 已提交
157 158 159
  if (!grad_to_var_) {
    return {};
  }
160 161 162 163 164 165

  for (const std::string& grad_invar : grad_op_desc_->InputArgumentNames()) {
    if (grad_to_var_->find(grad_invar) == grad_to_var_->end()) {
      // grad op inputs can be forward inputs, so not in grad_to_var.
      continue;
    }
166 167 168
    VLOG(3) << "op grad input var " << grad_invar;
    framework::VarDesc& grad_invar_desc =
        block_->FindRecursiveOrCreateVar(grad_invar);
169 170 171 172 173 174 175 176 177 178
    framework::Variable* var = scope->Var(grad_invar);
    const std::string& invar = grad_to_var_->at(grad_invar);
    for (VarBase* varbase : *output_vars_) {
      // Use the accumulated grads_ by sharing the input with grads_.
      if (varbase->var_desc_->Name() == invar) {
        var->GetMutable<framework::LoDTensor>()->ShareDataWith(
            varbase->grads_->Get<framework::LoDTensor>());
        break;
      }
    }
179 180 181 182 183
    grad_invar_desc.SetShape(
        framework::vectorize(var->Get<framework::LoDTensor>().dims()));
    VLOG(3)
        << "set op grad var desc's shape size "
        << framework::vectorize(var->Get<framework::LoDTensor>().dims()).size();
184 185
  }

186 187
  LOG(ERROR) << "grad_op_desc_" << grad_op_desc_->Proto()->DebugString();

188
  for (const std::string& outvar : grad_op_desc_->OutputArgumentNames()) {
189
    VLOG(3) << "op grad output var " << outvar;
190 191 192
    block_->FindRecursiveOrCreateVar(outvar);
    framework::Variable* var = scope->Var(outvar);
    if (!var->IsInitialized()) {
193
      VLOG(3) << "init op grad output var " << outvar;
194 195 196
      framework::VarDesc* var_desc = block_->FindVar(outvar);
      if (var_desc->GetType() == framework::proto::VarType::LOD_TENSOR) {
        var->GetMutable<framework::LoDTensor>();
197 198
        // framework::Tensor* tensor = var->GetMutable<framework::LoDTensor>();
        // tensor->mutable_data(platform::CPUPlace());
199 200 201 202
      } else {
        LOG(ERROR) << "tracer doesn't support yet";
      }
    }
203
    VLOG(3) << "op grad output var " << outvar << " is inited";
204
  }
205

206 207 208 209 210 211 212 213 214 215 216
  grad_op_desc_->InferShape(*block_);
  grad_op_desc_->InferVarType(block_);
  std::unique_ptr<framework::OperatorBase> opbase =
      framework::OpRegistry::CreateOp(*grad_op_desc_);

  opbase->Run(*scope, platform::CPUPlace());

  // `ret` matches exactly with `input_vars_` of forward op.
  std::vector<Variable*> ret;
  for (size_t i = 0; i < input_vars_->size(); ++i) {
    bool found = false;
M
minqiyang 已提交
217
    VarBase* origin_var = (*input_vars_)[i];
218 219
    for (const std::string& outvar : grad_op_desc_->OutputArgumentNames()) {
      Variable* var = scope->FindVar(outvar);
220 221 222
      std::string orig_var_name = grad_to_var_->at(outvar);
      if (origin_var->var_desc_->Name() != orig_var_name ||
          origin_var->stop_gradient_) {
M
minqiyang 已提交
223 224
        continue;
      }
225
      VLOG(3) << "apply grad " << outvar << " with origin " << orig_var_name;
226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
      origin_var->ApplyGrad(scope, var);
      found = true;
      ret.push_back(var);
      // TODO(panyx0718): There might be another outvar with the same name.
      // In that case, it doesn't matter the first one or the second one is
      // used.
      break;
    }
    if (!found) {
      ret.push_back(nullptr);
    }
  }
  return ret;
}

void VarBase::RunBackward(framework::Scope* scope) {
  grads_ = CreateVariable(framework::GradVarName(var_desc_->Name()),
                          var_->Get<framework::LoDTensor>().dims(), 1.0, scope,
                          false);
  if (!pre_op_) return;
  Autograd(scope).RunBackward(this);
}

}  // namespace imperative
}  // namespace paddle