hooks.h 7.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <functional>
#include <memory>
#include <utility>
#include <vector>

#include "paddle/fluid/imperative/type_defs.h"
#include "paddle/fluid/platform/macros.h"

namespace paddle {
namespace imperative {

class VariableWrapper;

/** [ Basic hook classes ]
 * s
 * @brief OpBasePreHook is executed before the grad OpBase is executed,
 *        taking the input of the current grad OpBase as input, and
 *        executing python hooks (user-defined) or C++ hooks (developer-defined)
 *        to achieve the purpose of custom operations on the interior VarBase
 *        gradient.
 *
 * @note  OpBasePreHook will not change the input gradient VarBase.
 *
 * @note  [Why need to be OpBase `PreHook`, why not `PostHook`?]
 *
 *        If set OpBase post hook, when the op executed end, the op's output
 *        gradient may not be the final state, because it may need other op's
 *        gradient output to accumulated to it. But before op can be executed,
 *        the gradient output must have been accumulated to final value.
 *
 * @note  [Why only can be used for interior VarBase?]
 *
 *        Because the leaf VarBase's GradVarBase has no GradOpNode, so leaf
 *        GradVarBase has no next OpBase to executed, so if need to deal with
 *        the leaf GradVarBase, cannot use OpBasePreHook. For this case, we
 *        deal with by GradAccumulatorPostHook.
 */
class OpBasePreHook {
 public:
  virtual ~OpBasePreHook() = default;
  virtual VariableWrapperList operator()(
      const VariableWrapperList& grad_inputs) = 0;
};

/**
 * @brief GradAccumulatorPostHook is the Hook that operates on the current
 *        gradientafter the GradientAccumulator has accumulated the gradient.
 *        Leaf GradVarBase has no next OpBase, if we want to register hook
 *        for it, we also need to wait until the leaf GradVarBase accumulation
 *        is completed, so we can add post hook to GradientAccumulator.
 *
 * @note  GradAccumulatorPostHook will change the grad VarBase value.
 *
 * @note  Only allow leaf VarBase hold GradientAccumulatorPostHook.
 */
class GradAccumulatorPostHook {
 public:
  virtual ~GradAccumulatorPostHook() = default;
  virtual void operator()(VariableWrapper* var) = 0;
};

/** [ Hook for cpp functions ]
 *
 * Here we design three C++ hooks;
 * 1. CppOpBasePreHook (Implement later):
 *    - used for developer-defined C++ interior VarBase hooks
 * 2. CppGradAccumulatorPostHook (Implement later):
 *    - used for developer-defined C++ leaf VarBase hooks
 * 3. LambdaGradAccumulatorPostHook:
 *    - used for VarBase reduce in parallel training
 *
 * @note  [Why need two types of GradAccumulatorPostHook? ]
 *
 *        There are two types of gradient accumulation:
 *        1. Gradient accumulation in same batch
 *        2. Gradient accumulation across batchs
 *        The order of execution between Hooks and gradient accumulation:
 *
 *          [ Gradient accumulation in same batch]
 *                            |
 *                [ leaf GradVarBase hooks ]
 *                            |
 *          [ Gradient accumulation across batchs ]
 *                            |
 *              [ Gradient reduce / allreduce]
 *
 *        Because we currently intend to accumulate these two gradient
 *        accumulation in one GradientAccumulator, We must distinguish between
 *        two types of hooks.
 *
 *        And the LambdaGradAccumulatorPostHook does not allow users to register
 *        directly, and is currently only used to support the reduce strategy of
 *        parallel multi-card training.
 */
class LambdaGradAccumulatorPostHook : public GradAccumulatorPostHook {
 public:
  explicit LambdaGradAccumulatorPostHook(
      std::function<void(VariableWrapper*)> fn)
      : fn_(std::move(fn)) {}

  void operator()(VariableWrapper* var) override { fn_(var); }

 private:
  std::function<void(VariableWrapper*)> fn_;
};

/* Hooks for python function: in pybind/imperative.cc */

/** Add Python Hooks later:
 * - PyOpBasePreHook (Implement later): used for user-defined interior python
 * VarBase hooks
 * - PyGradAccumulatorPostHook (Implement later): used for user-defined leaf
 * python VarBase hooks
 */

/** [ Hook Pipeline classes ]
 *
 * @note  [Why need hook pipeline classes?]
 *
 *        There are 2 purposes for adding Hook pipeline here:
 *
 *        1. Make the code implementation cleaner.
 *
 *          If there are no Hook pipeline, we need to add 3 hook vector into
 *          VariableWrapper, 1 hook vector into OpBase, 2 hook vector into
 *          GradientAccumulator, like:
 *
 *          - VariableWrapper:
 *            std::vector<std::shared_ptr<OpBasePreHook>>
 *              interior_var_hooks_;
 *            std::vector<std::shared_ptr<GradAccumulatorPostHook>>
 *              leaf_var_hooks_;
 *            std::vector<std::shared_ptr<GradAccumulatorPostHook>>
 *              backward_hooks_;
 *
 *          - OpBase:
 *            std::vector<std::weak_ptr<OpBasePreHook>>
 *              interior_var_hooks_;
 *
 *          - GradientAccumulator:
 *            std::vector<std::weak_ptr<GradAccumulatorPostHook>>
 *              leaf_var_hooks_;
 *            std::vector<std::weak_ptr<GradAccumulatorPostHook>>
 *              backward_hooks_;
 *
 *          This seems more complicated, and std::vector<std::weak_ptr<...>>
 *          is not easy to destruct.
 *
 *        2. Make the code easier to understand.
 *
 *          From these two packages, we can clearly understand that we
 *          have two types of Hooks, respectively for the interior
 *          gradient var and leaf gradient var inside the backward
 *          calculation graph.
 */

class InteriorVarHookPipeline {
 public:
  InteriorVarHookPipeline() = default;

  void add_hook(std::unique_ptr<OpBasePreHook>&& hook) {
    hooks_.emplace_back(std::move(hook));
  }

  const std::vector<std::unique_ptr<OpBasePreHook>>& hooks() const {
    return hooks_;
  }

  std::vector<std::unique_ptr<OpBasePreHook>>& hooks() { return hooks_; }

 private:
  std::vector<std::unique_ptr<OpBasePreHook>> hooks_;

  DISABLE_COPY_AND_ASSIGN(InteriorVarHookPipeline);
};

class LeafVarHookPipeline {
 public:
  LeafVarHookPipeline() = default;

  void add_hook(std::unique_ptr<GradAccumulatorPostHook>&& hook) {
    hooks_.emplace_back(std::move(hook));
  }

  const std::vector<std::unique_ptr<GradAccumulatorPostHook>>& hooks() const {
    return hooks_;
  }

  std::vector<std::unique_ptr<GradAccumulatorPostHook>>& hooks() {
    return hooks_;
  }

  void add_backward_hook(std::unique_ptr<GradAccumulatorPostHook>&& hook) {
    backward_hooks_.emplace_back(std::move(hook));
  }

  const std::vector<std::unique_ptr<GradAccumulatorPostHook>>& backward_hooks()
      const {
    return backward_hooks_;
  }

  std::vector<std::unique_ptr<GradAccumulatorPostHook>>& backward_hooks() {
    return backward_hooks_;
  }

 private:
  std::vector<std::unique_ptr<GradAccumulatorPostHook>> hooks_;
  // NOTE: the `backward` here means the `whole backward process`,
  // the `backward_hooks_` need to be executed after the `whole backward
  // process`.
  std::vector<std::unique_ptr<GradAccumulatorPostHook>> backward_hooks_;

  DISABLE_COPY_AND_ASSIGN(LeafVarHookPipeline);
};

}  // namespace imperative
}  // namespace paddle