sgd_op.cc 6.9 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Q
Qiao Longfei 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

15
#include <string>
16 17

#include "paddle/fluid/operators/optimizers/sgd_op.h"
18 19 20
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
21

Q
Qiao Longfei 已提交
22 23 24
namespace paddle {
namespace operators {

D
dongzhihong 已提交
25
class SGDOp : public framework::OperatorWithKernel {
Y
Yu Yang 已提交
26 27 28
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

C
chengduo 已提交
29
  void InferShape(framework::InferShapeContext *ctx) const override {
C
Chengmo 已提交
30 31 32 33 34 35 36 37 38 39 40 41
    PADDLE_ENFORCE_EQ(ctx->HasInput("Param"), true,
                      platform::errors::NotFound(
                          "Input(Param) of SGDOp should not be null."));
    PADDLE_ENFORCE_EQ(
        ctx->HasInput("Grad"), true,
        platform::errors::NotFound("Input(Grad) of SGDOp should not be null."));
    PADDLE_ENFORCE_EQ(ctx->HasInput("LearningRate"), true,
                      platform::errors::NotFound(
                          "Input(LearningRate) of SGDOp should not be null."));
    PADDLE_ENFORCE_EQ(ctx->HasOutput("ParamOut"), true,
                      platform::errors::NotFound(
                          "Output(ParamOut) of SGDOp should not be null."));
Q
Qiao Longfei 已提交
42

43
    auto lr_dims = ctx->GetInputDim("LearningRate");
44
    PADDLE_ENFORCE_NE(phi::product(lr_dims), 0,
C
Chengmo 已提交
45 46 47 48 49
                      platform::errors::NotFound(
                          "Maybe the Input variable LearningRate has not "
                          "been initialized. You may need to confirm "
                          "if you put exe.run(startup_program) "
                          "after optimizer.minimize function."));
50
    PADDLE_ENFORCE_EQ(phi::product(lr_dims), 1,
C
Chengmo 已提交
51 52 53
                      platform::errors::InvalidArgument(
                          "Learning rate should have 1 element. But received "
                          "LearningRate dims [%s]",
54
                          phi::product(lr_dims)));
55
    auto param_dim = ctx->GetInputDim("Param");
56 57 58 59 60 61
    if (ctx->GetInputsVarType("Grad")[0] ==
        framework::proto::VarType::LOD_TENSOR) {
      PADDLE_ENFORCE_EQ(
          param_dim, ctx->GetInputDim("Grad"),
          platform::errors::InvalidArgument(
              "SGD Operator's input Param and Grad dimensions do not match. "
62 63 64
              "The Param %s shape is [%s], but the Grad %s shape is [%s].",
              ctx->Inputs("Param")[0], param_dim, ctx->Inputs("Grad")[0],
              ctx->GetInputDim("Grad")));
65
    }
66
    ctx->SetOutputDim("ParamOut", param_dim);
Q
Qiao Longfei 已提交
67
  }
68 69 70

 protected:
  framework::OpKernelType GetExpectedKernelType(
C
chengduo 已提交
71
      const framework::ExecutionContext &ctx) const override {
72
    auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Param");
73 74

#ifdef PADDLE_WITH_MKLDNN
75
    using dnnl::memory;
76 77 78 79 80 81 82
    if (this->CanMKLDNNBeUsed(ctx, data_type)) {
      const auto *param_var = ctx.InputVar("Param");
      const auto *grad_var = ctx.InputVar("Grad");

      // supported cases
      bool dense_param_sparse_grad =
          param_var->IsType<framework::LoDTensor>() &&
83
          grad_var->IsType<phi::SelectedRows>();
84 85 86 87 88 89 90 91 92
      bool dense_param_and_grad = param_var->IsType<framework::LoDTensor>() &&
                                  grad_var->IsType<framework::LoDTensor>();

      if (dense_param_sparse_grad || dense_param_and_grad)
        return framework::OpKernelType(data_type, ctx.GetPlace(),
                                       framework::DataLayout::kMKLDNN,
                                       framework::LibraryType::kMKLDNN);
    }
#endif
Q
qiaolongfei 已提交
93
    return framework::OpKernelType(data_type, ctx.device_context());
94
  }
95 96 97 98 99

  framework::OpKernelType GetKernelTypeForVar(
      const std::string &var_name, const framework::Tensor &tensor,
      const framework::OpKernelType &expected_kernel_type) const {
    if (var_name == "LearningRate") {
100 101 102
      return framework::OpKernelType(
          framework::TransToProtoVarType(tensor.dtype()), tensor.place(),
          tensor.layout());
103 104 105 106
    }
    return framework::OpKernelType(expected_kernel_type.data_type_,
                                   tensor.place(), tensor.layout());
  }
Q
Qiao Longfei 已提交
107 108
};

Y
Yancey1989 已提交
109 110
class SGDOpInferVarType : public framework::VarTypeInference {
 public:
M
minqiyang 已提交
111
  void operator()(framework::InferVarTypeContext *ctx) const override {
112 113 114 115 116 117 118
    auto in_var_type = ctx->GetInputType("Param");
    PADDLE_ENFORCE_EQ(in_var_type == framework::proto::VarType::SELECTED_ROWS ||
                          in_var_type == framework::proto::VarType::LOD_TENSOR,
                      true, platform::errors::InvalidArgument(
                                "The input Var's type should be LoDtensor or "
                                "SelectedRows, but the received type is %s",
                                in_var_type));
C
chengduo 已提交
119

120
    ctx->SetOutputType("ParamOut", in_var_type, framework::ALL_ELEMENTS);
Y
Yancey1989 已提交
121 122 123
  }
};

D
dongzhihong 已提交
124
class SGDOpMaker : public framework::OpProtoAndCheckerMaker {
125
 public:
Y
Yu Yang 已提交
126
  void Make() override {
127
    AddInput("Param", "(Tensor or SelectedRows) Input parameter");
128
    AddInput("LearningRate", "(Tensor) Learning rate of SGD");
129
    AddInput("Grad", "(Tensor or SelectedRows) Input gradient");
130
    AddInput("MasterParam", "FP32 master weight for AMP.").AsDispensable();
131 132
    AddOutput("ParamOut",
              "(Tensor or SelectedRows, same with Param) "
133
              "Output parameter, should share the same memory with Param");
134 135 136 137 138
    AddOutput("MasterParamOut",
              "The updated FP32 master weight for AMP. "
              "It shared memory with Input(MasterParam).")
        .AsDispensable();

139 140 141 142
    AddAttr<bool>(
        "use_mkldnn",
        "(bool, default false) Indicates if MKL-DNN kernel will be used")
        .SetDefault(false);
143 144 145 146 147
    AddAttr<bool>("multi_precision",
                  "(bool, default false) "
                  "Whether to use multi-precision during weight updating.")
        .SetDefault(false);

Q
Qiao Longfei 已提交
148 149
    AddComment(R"DOC(

150
SGD operator
Q
Qiao Longfei 已提交
151

152 153
This operator implements one step of the stochastic gradient descent algorithm.

154
$$param\_out = param - learning\_rate * grad$$
Q
Qiao Longfei 已提交
155 156 157 158

)DOC");
  }
};
Q
qijun 已提交
159

Q
Qiao Longfei 已提交
160 161 162
}  // namespace operators
}  // namespace paddle

D
dongzhihong 已提交
163
namespace ops = paddle::operators;
H
hong 已提交
164 165 166 167 168
REGISTER_OPERATOR(
    sgd, ops::SGDOp, ops::SGDOpMaker,
    paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
    paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>,
    ops::SGDOpInferVarType);
169 170
REGISTER_OP_CPU_KERNEL(
    sgd, ops::SGDOpKernel<paddle::platform::CPUDeviceContext, float>,
171 172
    ops::SGDOpKernel<paddle::platform::CPUDeviceContext,
                     paddle::platform::bfloat16>,
173
    ops::SGDOpKernel<paddle::platform::CPUDeviceContext, double>);