layer.cc 11.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/imperative/layer.h"
M
minqiyang 已提交
16

17 18 19 20 21 22 23 24
#include <deque>
#include <limits>
#include <map>
#include <random>
#include <utility>

#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
25
#include "paddle/fluid/framework/operator.h"
M
minqiyang 已提交
26 27 28
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/platform/device_context.h"
29 30 31 32 33
#include "paddle/fluid/string/printf.h"

namespace paddle {
namespace imperative {

X
polish  
Xin Pan 已提交
34 35
const char* PyLayer::kFwdInp = "X";
const char* PyLayer::kFwdOut = "Out";
X
polish  
Xin Pan 已提交
36

X
Xin Pan 已提交
37 38
std::map<int, py::object> py_funcs_;

39 40
using framework::Variable;

M
minqiyang 已提交
41 42 43 44 45 46 47 48 49 50 51
namespace detail {

template <typename T>
class TensorAddToFunctor : public boost::static_visitor<> {
 public:
  TensorAddToFunctor(int64_t numel, const T* x, T* y)
      : numel_(numel), x_(x), y_(y) {}

  void operator()(const platform::CPUPlace& place) {
    platform::CPUDeviceContext* ctx = dynamic_cast<platform::CPUDeviceContext*>(
        platform::DeviceContextPool::Instance().Get(place));
P
Paddle CI 已提交
52
    auto blas = operators::math::GetBlas<platform::CPUDeviceContext, T>(*ctx);
M
minqiyang 已提交
53 54 55 56 57 58 59 60
    blas.AXPY(numel_, 1., x_, y_);
  }

#ifdef PADDLE_WITH_CUDA
  void operator()(const platform::CUDAPlace& place) {
    platform::CUDADeviceContext* ctx =
        dynamic_cast<platform::CUDADeviceContext*>(
            platform::DeviceContextPool::Instance().Get(place));
P
Paddle CI 已提交
61
    auto blas = operators::math::GetBlas<platform::CUDADeviceContext, T>(*ctx);
M
minqiyang 已提交
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
    blas.AXPY(numel_, 1., x_, y_);
  }
#else
  void operator()(const platform::CUDAPlace& place) {
    PADDLE_THROW("Do NOT support gradient merge in place %s", place);
  }
#endif

  // there is NO blas in CUDAPinnedPlace
  void operator()(const platform::CUDAPinnedPlace& place) {
    PADDLE_THROW("Do NOT support gradient merge in place %s", place);
  }

 private:
  int64_t numel_;
  const T* x_;
  T* y_;
};

}  // namespace detail

P
Paddle CI 已提交
83
void AddTo(Variable* src, Variable* dst, platform::Place place) {
M
minqiyang 已提交
84 85 86
  framework::Tensor* dst_tensor = dst->GetMutable<framework::LoDTensor>();
  framework::Tensor* src_tensor = src->GetMutable<framework::LoDTensor>();

M
minqiyang 已提交
87 88 89 90 91
  // FIXME(minqiyang): loss_grad op will pass a zero grad of label
  // ugly fix for it
  if (src_tensor->numel() == 0) {
    return;
  }
M
minqiyang 已提交
92

93 94 95
  PADDLE_ENFORCE(dst_tensor->numel() == src_tensor->numel(),
                 "dst_numel %lld vs. src_numel %lld", dst_tensor->numel(),
                 src_tensor->numel());
M
minqiyang 已提交
96 97 98 99 100

  detail::TensorAddToFunctor<float> func(
      src_tensor->numel(), src_tensor->data<float>(),
      dst_tensor->mutable_data<float>(place));
  boost::apply_visitor(func, place);
101 102 103 104
}

class Autograd {
 public:
X
Xin Pan 已提交
105
  Autograd() {}
106 107

  void RunBackward(VarBase* var) {
X
Xin Pan 已提交
108
    if (var->IsStopGradient()) {
109 110
      return;
    }
X
Xin Pan 已提交
111
    VLOG(3) << "start autograd";
112 113

    std::deque<OpBase*> ready;
X
Xin Pan 已提交
114
    ready.push_back(var->PreOp());
115

X
Xin Pan 已提交
116
    std::map<OpBase*, int> dep_counts = ComputeDepCounts(var->PreOp());
117 118 119 120

    while (!ready.empty()) {
      OpBase* ready_op = ready.front();
      ready.pop_front();
X
Xin Pan 已提交
121 122 123 124 125 126 127
      std::map<std::string, std::vector<VarBase*>> input_grads =
          ready_op->ApplyGrad();

      for (auto it : input_grads) {
        const std::vector<VarBase*>& ingrads = it.second;
        for (size_t i = 0; i < ingrads.size(); ++i) {
          if (!ingrads[i]) continue;
X
Xin Pan 已提交
128
          if (ready_op->input_vars_[it.first][i]->IsStopGradient()) {
129 130
            continue;
          }
X
Xin Pan 已提交
131
          OpBase* pre_op = ready_op->pre_ops_[it.first][i];
X
Xin Pan 已提交
132 133 134 135 136 137 138 139
          if (!pre_op) continue;

          dep_counts[pre_op] -= 1;
          PADDLE_ENFORCE(dep_counts[pre_op] >= 0);
          bool pre_op_ready = dep_counts[pre_op] == 0;
          if (pre_op_ready) {
            ready.push_back(pre_op);
          }
140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
        }
      }
    }
  }

 private:
  std::map<OpBase*, int> ComputeDepCounts(OpBase* op) {
    std::map<OpBase*, int> ret;

    std::deque<OpBase*> queue;
    queue.push_back(op);
    std::unordered_set<OpBase*> visited;
    visited.insert(op);
    while (!queue.empty()) {
      OpBase* candidate = queue.front();
      queue.pop_front();
X
Xin Pan 已提交
156
      for (auto it : candidate->pre_ops_) {
X
Xin Pan 已提交
157 158
        for (OpBase* pre_op : it.second) {
          if (!pre_op) continue;
M
minqiyang 已提交
159 160
          VLOG(5) << "op dep " << candidate->op_desc_->Type() << " <---- "
                  << it.first << " <---- " << pre_op->op_desc_->Type();
X
Xin Pan 已提交
161 162 163 164 165
          if (visited.find(pre_op) == visited.end()) {
            visited.insert(pre_op);
            queue.push_back(pre_op);
          }
          ret[pre_op] += 1;
166 167 168 169 170 171 172
        }
      }
    }
    return ret;
  }
};

M
minqiyang 已提交
173 174
std::unique_ptr<VarBase> VarBase::NewVarBase(const platform::Place& dst_place,
                                             const bool blocking) const {
M
minqiyang 已提交
175 176 177
  PADDLE_ENFORCE(var_->IsInitialized(),
                 "Variable must be initialized when getting numpy tensor");

M
minqiyang 已提交
178
  std::unique_ptr<VarBase> new_var(new VarBase());
P
Paddle CI 已提交
179 180 181 182
  framework::LoDTensor* tensor =
      new_var->var_->GetMutable<framework::LoDTensor>();
  tensor->Resize(var_->Get<framework::LoDTensor>().dims());
  tensor->set_lod(var_->Get<framework::LoDTensor>().lod());
M
minqiyang 已提交
183

P
Paddle CI 已提交
184
  if (blocking) {
M
minqiyang 已提交
185
    platform::DeviceContext* dev_ctx =
P
Paddle CI 已提交
186 187 188 189 190
        platform::DeviceContextPool::Instance().Get(dst_place);

    framework::TensorCopySync(var_->Get<framework::LoDTensor>(), dst_place,
                              tensor);

M
minqiyang 已提交
191 192
    dev_ctx->Wait();
  } else {
P
Paddle CI 已提交
193 194 195 196 197
    framework::TensorCopy(var_->Get<framework::LoDTensor>(), dst_place, tensor);
  }

  if (platform::is_gpu_place(dst_place)) {
    VLOG(3) << "copy tensor " << var_desc_->Name() << " from gpu";
M
minqiyang 已提交
198 199
  }

P
Paddle CI 已提交
200
  return new_var;
M
minqiyang 已提交
201 202
}

M
minqiyang 已提交
203
framework::LoDTensor& VarBase::GradValue() {
204
  VLOG(3) << "get var grad " << var_desc_->Name();
M
minqiyang 已提交
205
  return *(grads_->var_->GetMutable<framework::LoDTensor>());
206 207
}

M
minqiyang 已提交
208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
void VarBase::ClearGradient() {
  VLOG(1) << "clear gradient of " << var_desc_->Name();
  if (grads_ && grads_->var_ && grads_->var_->IsInitialized()) {
    auto grads_t = grads_->var_->GetMutable<framework::LoDTensor>();
    operators::math::set_constant(
        *(platform::DeviceContextPool::Instance().Get(
            grads_->var_->Get<framework::LoDTensor>().place())),
        grads_t, 0.0);
  }
}

void VarBase::RunBackward() {
  if (!pre_op_) return;

  VLOG(3) << "start backward";
  auto grads_t = grads_->var_->GetMutable<framework::LoDTensor>();
  operators::math::set_constant(
      *(platform::DeviceContextPool::Instance().Get(
          var_->GetMutable<framework::LoDTensor>()->place())),
      grads_t, 1.0);

  PADDLE_ENFORCE(
      grads_ ==
      pre_op_->output_vars_[pre_op_out_name_][pre_op_out_idx_]->grads_);
  Autograd().RunBackward(this);
}

X
Xin Pan 已提交
235
std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
X
Xin Pan 已提交
236
  if (grad_op_descs_.empty() && backward_id_ <= 0) {
237
    LOG(WARNING) << "op with no grad: " << op_desc_->Type();
X
Xin Pan 已提交
238
    return {};
239 240
  }

X
Xin Pan 已提交
241
  std::vector<framework::VariableValueMap> grad_outputs;
X
Xin Pan 已提交
242 243
  if (backward_id_ > 0) {
    VLOG(3) << "py_layer_grad";
X
polish  
Xin Pan 已提交
244
    grad_outputs.resize(1);
X
Xin Pan 已提交
245 246 247 248
    grad_outputs[0][framework::GradVarName(PyLayer::kFwdOut)] =
        PyLayer::ApplyGrad(
            backward_id_,
            grad_input_vars_[0][framework::GradVarName(PyLayer::kFwdInp)]);
X
Xin Pan 已提交
249
  } else {
X
Xin Pan 已提交
250 251 252 253 254 255 256 257 258 259 260 261
    grad_outputs.resize(grad_op_descs_.size());
    for (size_t k = 0; k < grad_op_descs_.size(); ++k) {
      framework::OpDesc* grad_op_desc = grad_op_descs_[k];
      VLOG(3) << "op grad " << grad_op_desc->Type();
      for (auto it : grad_output_vars_[k]) {
        auto& outputs = grad_outputs[k][it.first];
        for (size_t i = 0; i < it.second.size(); ++i) {
          // Allocate a new variable
          Variable* tmp_var = new framework::Variable();
          tmp_var->GetMutable<framework::LoDTensor>();
          outputs.push_back(tmp_var);
        }
X
polish  
Xin Pan 已提交
262
      }
263

X
Xin Pan 已提交
264
      framework::RuntimeContext ctx(grad_input_vars_[k], grad_outputs[k]);
265

X
Xin Pan 已提交
266 267 268
      // No need to do compile time infer shape here.
      // grad_op_desc_->InferShape(*block_);
      grad_op_desc->InferVarType(block_);
X
Xin Pan 已提交
269

X
Xin Pan 已提交
270 271 272 273 274
      std::unique_ptr<framework::OperatorBase> opbase =
          framework::OpRegistry::CreateOp(*grad_op_desc);
      framework::OperatorWithKernel* op_kernel =
          dynamic_cast<framework::OperatorWithKernel*>(opbase.get());
      PADDLE_ENFORCE_NOT_NULL(op_kernel, "only support op with kernel");
X
Xin Pan 已提交
275

X
Xin Pan 已提交
276 277 278 279 280
      framework::Scope scope;
      PreparedOp p = PreparedOp::Prepare(ctx, *op_kernel, place_);
      p.op.RuntimeInferShape(scope, place_, ctx);
      p.func(framework::ExecutionContext(p.op, scope, *p.dev_ctx, p.ctx));
    }
X
Xin Pan 已提交
281
  }
X
Xin Pan 已提交
282

X
Xin Pan 已提交
283 284 285 286 287 288 289 290 291 292 293 294
  for (size_t k = 0; k < grad_output_vars_.size(); ++k) {
    for (auto it : grad_output_vars_[k]) {
      auto& outputs = grad_outputs[k][it.first];
      auto& origin_outputs = it.second;
      PADDLE_ENFORCE_EQ(outputs.size(), origin_outputs.size());

      for (size_t i = 0; i < outputs.size(); ++i) {
        framework::Variable* grad = outputs[i];
        framework::Variable* orig_grad = origin_outputs[i];
        AddTo(grad, orig_grad, place_);
        delete grad;
      }
295 296
    }
  }
X
Xin Pan 已提交
297

X
Xin Pan 已提交
298
  return input_vars_;
299 300
}

X
Xin Pan 已提交
301 302 303 304
void PyLayer::RegisterFunc(int func_id, const py::object& py_func) {
  py_funcs_[func_id] = py_func;
}

X
polish  
Xin Pan 已提交
305 306
int PyLayer::NumFuncs() { return py_funcs_.size(); }

X
Xin Pan 已提交
307
std::vector<VarBase*> PyLayer::Apply(int func_id,
X
Xin Pan 已提交
308
                                     const std::vector<VarBase*>& inputs) {
X
polish  
Xin Pan 已提交
309
  std::vector<framework::Variable*> invars;
X
Xin Pan 已提交
310
  for (const VarBase* in : inputs) {
X
polish  
Xin Pan 已提交
311
    invars.push_back(in->var_);
X
Xin Pan 已提交
312 313
  }
  PADDLE_ENFORCE(py_funcs_.find(func_id) != py_funcs_.end());
X
polish  
Xin Pan 已提交
314 315 316
  std::vector<Variable*> outvars = CallPythonFunc(py_funcs_[func_id], invars);
  std::vector<VarBase*> ret;
  for (Variable* v : outvars) {
M
minqiyang 已提交
317
    ret.push_back(new VarBase(v, new VarBase(true)));
X
polish  
Xin Pan 已提交
318
  }
X
Xin Pan 已提交
319 320 321
  return ret;
}

X
polish  
Xin Pan 已提交
322 323 324 325 326
std::vector<Variable*> PyLayer::ApplyGrad(
    int func_id, const std::vector<framework::Variable*>& inputs) {
  PADDLE_ENFORCE(py_funcs_.find(func_id) != py_funcs_.end());
  return CallPythonFunc(py_funcs_[func_id], inputs);
}
X
Xin Pan 已提交
327

X
polish  
Xin Pan 已提交
328 329 330 331 332 333 334
std::vector<framework::Variable*> PyLayer::CallPythonFunc(
    const py::object& callable, const std::vector<framework::Variable*>& ins) {
  py::gil_scoped_acquire guard;
  py::tuple in_args(ins.size());
  for (size_t i = 0; i < ins.size(); ++i) {
    const framework::LoDTensor& t = ins[i]->Get<framework::LoDTensor>();
    in_args[i] = t.IsInitialized() ? py::cast(t) : py::cast(nullptr);
X
Xin Pan 已提交
335
  }
X
polish  
Xin Pan 已提交
336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358
  VLOG(3) << "pyfunc in " << py::len(in_args);

  // TODO(panyx0718): Who owns the returned LoDTensor.
  auto ret = callable(in_args);
  auto ret_tuple = py::cast<py::tuple>(ret);
  size_t ret_num = py::len(ret_tuple);
  std::vector<framework::Variable*> outs;
  VLOG(3) << "pyfunc out " << ret_num;
  for (size_t i = 0; i < ret_num; ++i) {
    try {
      auto* py_out_tensor = py::cast<framework::LoDTensor*>(ret_tuple[i]);
      PADDLE_ENFORCE_NOT_NULL(py_out_tensor,
                              "Output tensor %d should not be nullptr", i);
      auto* var = new framework::Variable();
      auto* tensor = var->GetMutable<framework::LoDTensor>();
      tensor->ShareDataWith(*py_out_tensor);
      tensor->set_lod(py_out_tensor->lod());
      outs.push_back(var);
    } catch (py::cast_error&) {
      PADDLE_THROW("The %d-th output must be LoDTensor", i);
    }
  }
  return outs;
X
Xin Pan 已提交
359 360
}

361 362
}  // namespace imperative
}  // namespace paddle