run_program_op.cc 9.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/operators/run_program_op.h"

#include <string>

namespace paddle {
namespace operators {

class RunProgramOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

  void InferShape(framework::InferShapeContext* ctx) const override {
27 28
    PADDLE_ENFORCE_EQ(ctx->HasInputs("X"),
                      true,
29 30
                      platform::errors::NotFound(
                          "Input(X) of RunProgramOp should not be null."));
31 32
    PADDLE_ENFORCE_EQ(ctx->HasOutputs("Out"),
                      true,
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
                      platform::errors::NotFound(
                          "Output(Out) of RunProgramOp should not be null."));
  }

 protected:
  /* [Why use single type kernel]:
   *
   * This op is similar to a control flow op, it doses not need
   * a op kernel, but in order to make it execute under dynamic
   * graph mode, implement it with op kernel.
   *
   * So whether the kernel data type is int, float or other type,
   * which has no effect on its execution logic, so directly
   * specified a data type here.
   *
   * Of course, the data type here is also not important.
   */
  framework::OpKernelType GetExpectedKernelType(
      const framework::ExecutionContext& ctx) const override {
    return framework::OpKernelType(framework::proto::VarType::FP32,
                                   ctx.GetPlace());
  }

  framework::OpKernelType GetKernelTypeForVar(
57
      const std::string& var_name,
58
      const phi::DenseTensor& tensor,
59 60 61 62 63 64 65 66 67
      const framework::OpKernelType& expected_kernel_type) const override {
    return expected_kernel_type;
  }
};

class RunProgramOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
    AddInput("X",
68
             "(vector<phi::DenseTensor>)"
69 70 71 72
             "The input tensors of RunProgram operator, also the feed targets "
             "of loaded program.")
        .AsDuplicable();
    AddInput("Params",
73
             "(vector<phi::DenseTensor or SelecetedRows>)"
74 75
             "The input parameter of RunProgram operator, also the parameters "
             "of the loaded program.")
76 77
        .AsDuplicable()
        .AsDispensable();
78
    AddOutput("Out",
79
              "(vector<phi::DenseTensor>)"
80 81 82 83 84 85 86 87 88
              "The output tensors of RunProgram operator, also the fetch "
              "targets of the loaded program.")
        .AsDuplicable();
    AddOutput("OutScope",
              "(StepScopeVar)"
              "A vector of execution scope in RunProgram operator, which "
              "contains at most one scope."
              "NOTE: Do not use Scope directly because Scope output is not "
              "currently supported.");
89
    AddOutput("DOut",
90
              "(vector<phi::DenseTensor>)"
91 92 93 94 95
              "The output tensors for GRAD Tensors in RunProgram forward "
              "operator, the forward operator contains GRAD Tensors when it "
              "computes double grad.")
        .AsDuplicable()
        .AsDispensable();
96 97
    AddOutput("CUDAGraph", "The output CUDA Graph when use_cuda_graph=True.")
        .AsDispensable();
98 99 100 101 102 103 104 105 106 107 108 109 110
    AddAttr<BlockDesc*>("global_block",
                        "(BlockDesc *)"
                        "The global block of executed program desc.");
    AddAttr<int64_t>("start_op_index",
                     "(int64_t)"
                     "The index of the op to start execution");
    AddAttr<int64_t>("end_op_index",
                     "(int64_t)"
                     "The index of the op to stop execution");
    AddAttr<bool>("is_test",
                  "(bool, default false) Set to true for inference only, false "
                  "for training.")
        .SetDefault(false);
111 112 113 114
    AddAttr<int64_t>(
        "program_id",
        "(int64_t)"
        "The unique hash id used as cache key for ExecutorInfoCache.");
115 116 117 118 119 120 121
    AddAttr<std::string>("cuda_graph_capture_mode",
                         "(str, default '') The CUDA Graph capture mode. "
                         "Default '' means no CUDA Graph capturing.")
        .SetDefault("");
    AddAttr<int64_t>("cuda_graph_pool_id",
                     "(int64_t, default 0) The CUDA Graph memory pool ID.")
        .SetDefault(0);
122 123 124 125 126 127 128 129 130 131 132
    AddAttr<bool>("use_interpretorcore",
                  "(bool, default false) Set to true for use interpretercore.")
        .SetDefault(false);
    AddAttr<BlockDesc*>("forward_global_block",
                        "(BlockDesc *)"
                        "The global block of executed forward program desc.")
        .SetDefault(nullptr);
    AddAttr<BlockDesc*>("backward_global_block",
                        "(BlockDesc *)"
                        "The global block of executed backward program desc.")
        .SetDefault(nullptr);
133 134 135 136 137 138 139 140
    AddAttr<std::vector<std::string>>("param_grad_names",
                                      "std::vector<std::string>"
                                      "The names of parameter gradients.")
        .SetDefault({});
    AddAttr<std::vector<std::string>>("out_grad_names",
                                      "std::vector<std::string>"
                                      "The names of output gradients.")
        .SetDefault({});
141 142 143
    AddComment(R"DOC(
RunProgram operator.

144 145
The RunProgram operator receives a program's feed targets, fetch targets,
and parameters, and receives the forward and backward program desc
146 147
as attributes, and then executes the program by executor.

148 149
NOTE: This operator is added so that the inference model stored by
`fluid.io.save_inference_model` under the static graph mode can be loaded
150
under the dynamic graph mode for fine-tuning or inferencing.
151

152 153 154 155 156 157 158 159 160
)DOC");
  }
};

class RunProgramGradOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

  void InferShape(framework::InferShapeContext* ctx) const override {
161 162
    PADDLE_ENFORCE_EQ(ctx->HasInputs("X"),
                      true,
163 164 165
                      platform::errors::NotFound(
                          "Input(X) of RunProgramGradOp should not be null."));
    PADDLE_ENFORCE_EQ(
166 167
        ctx->HasInputs(framework::GradVarName("Out")),
        true,
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
        platform::errors::NotFound(
            "Input(Out@GRAD) of RunProgramGradOp should not be null."));
    // NOTE: The X@GRAD and Params@GRAD may not exist,
    // because they can be set stop_gradient = True
  }

 protected:
  /* see [Why use single type kernel] */
  framework::OpKernelType GetExpectedKernelType(
      const framework::ExecutionContext& ctx) const override {
    return framework::OpKernelType(framework::proto::VarType::FP32,
                                   ctx.GetPlace());
  }

  framework::OpKernelType GetKernelTypeForVar(
183
      const std::string& var_name,
184
      const phi::DenseTensor& tensor,
185 186 187 188 189
      const framework::OpKernelType& expected_kernel_type) const override {
    return expected_kernel_type;
  }
};

190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
template <typename T>
struct FilterHelper {};

template <>
struct FilterHelper<imperative::OpBase> {
  static void filter(const BlockDesc* desc,
                     imperative::TracedVarList<imperative::VarBase,
                                               imperative::kBackward>* vec) {
    auto f = [desc](std::shared_ptr<imperative::VarBase> ptr) {
      return !desc->HasVar(ptr->Name());
    };
    auto new_end = std::remove_if(vec->begin(), vec->end(), f);
    vec->resize(new_end - vec->begin());
  }
};

template <>
struct FilterHelper<framework::OpDesc> {
  static void filter(const BlockDesc* desc, std::vector<std::string>* vec) {
    auto f = [desc](const std::string& name) { return !desc->HasVar(name); };
    auto new_end = std::remove_if(vec->begin(), vec->end(), f);
    vec->resize(new_end - vec->begin());
  }
};

215 216 217 218 219 220 221 222 223 224 225 226
template <typename T>
class RunProgramGradOpMaker : public framework::SingleGradOpMaker<T> {
 public:
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;

 protected:
  void Apply(GradOpPtr<T> grad_op) const override {
    grad_op->SetType("run_program_grad");
    grad_op->SetInput("X", this->Input("X"));
    grad_op->SetInput("Params", this->Input("Params"));
    grad_op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
    grad_op->SetInput("OutScope", this->Output("OutScope"));
227
    grad_op->SetInput("DOut", this->Output("DOut"));
228 229 230
    if (this->HasOutput("CUDAGraph")) {
      grad_op->SetInput("CUDAGraph", this->Output("CUDAGraph"));
    }
231
    grad_op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
232 233

    auto block_desc =
R
Ruibiao Chen 已提交
234
        PADDLE_GET_CONST(BlockDesc*, this->GetAttr("global_block"));
235 236 237
    auto params_grad = this->InputGrad("Params");
    FilterHelper<T>::filter(block_desc, &params_grad);  // filter the vector.
    grad_op->SetOutput(framework::GradVarName("Params"), params_grad);
238 239 240 241 242 243 244 245
    grad_op->SetAttrMap(this->Attrs());
  }
};

}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;
246 247 248
REGISTER_OPERATOR(run_program,
                  ops::RunProgramOp,
                  ops::RunProgramOpMaker,
249 250 251 252 253
                  ops::RunProgramGradOpMaker<paddle::framework::OpDesc>,
                  ops::RunProgramGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(run_program_grad, ops::RunProgramGradOp);

/* see [Why use single type kernel] */
L
Leo Chen 已提交
254 255 256 257
REGISTER_OP_CPU_KERNEL(run_program,
                       ops::RunProgramOpKernel<phi::CPUContext, float>)
REGISTER_OP_CPU_KERNEL(run_program_grad,
                       ops::RunProgramGradOpKernel<phi::CPUContext, float>)