sgd_op.cc 6.9 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Q
Qiao Longfei 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

15
#include <string>
16 17

#include "paddle/fluid/operators/optimizers/sgd_op.h"
18 19 20
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
21

Q
Qiao Longfei 已提交
22 23 24
namespace paddle {
namespace operators {

D
dongzhihong 已提交
25
class SGDOp : public framework::OperatorWithKernel {
Y
Yu Yang 已提交
26 27 28
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

C
chengduo 已提交
29
  void InferShape(framework::InferShapeContext *ctx) const override {
C
Chengmo 已提交
30 31 32 33 34 35 36 37 38 39 40 41
    PADDLE_ENFORCE_EQ(ctx->HasInput("Param"), true,
                      platform::errors::NotFound(
                          "Input(Param) of SGDOp should not be null."));
    PADDLE_ENFORCE_EQ(
        ctx->HasInput("Grad"), true,
        platform::errors::NotFound("Input(Grad) of SGDOp should not be null."));
    PADDLE_ENFORCE_EQ(ctx->HasInput("LearningRate"), true,
                      platform::errors::NotFound(
                          "Input(LearningRate) of SGDOp should not be null."));
    PADDLE_ENFORCE_EQ(ctx->HasOutput("ParamOut"), true,
                      platform::errors::NotFound(
                          "Output(ParamOut) of SGDOp should not be null."));
Q
Qiao Longfei 已提交
42

43
    auto lr_dims = ctx->GetInputDim("LearningRate");
44
    PADDLE_ENFORCE_NE(framework::product(lr_dims), 0,
C
Chengmo 已提交
45 46 47 48 49
                      platform::errors::NotFound(
                          "Maybe the Input variable LearningRate has not "
                          "been initialized. You may need to confirm "
                          "if you put exe.run(startup_program) "
                          "after optimizer.minimize function."));
50
    PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1,
C
Chengmo 已提交
51 52 53 54
                      platform::errors::InvalidArgument(
                          "Learning rate should have 1 element. But received "
                          "LearningRate dims [%s]",
                          framework::product(lr_dims)));
55
    auto param_dim = ctx->GetInputDim("Param");
56 57 58 59 60 61
    if (ctx->GetInputsVarType("Grad")[0] ==
        framework::proto::VarType::LOD_TENSOR) {
      PADDLE_ENFORCE_EQ(
          param_dim, ctx->GetInputDim("Grad"),
          platform::errors::InvalidArgument(
              "SGD Operator's input Param and Grad dimensions do not match. "
62 63 64
              "The Param %s shape is [%s], but the Grad %s shape is [%s].",
              ctx->Inputs("Param")[0], param_dim, ctx->Inputs("Grad")[0],
              ctx->GetInputDim("Grad")));
65
    }
66
    ctx->SetOutputDim("ParamOut", param_dim);
Q
Qiao Longfei 已提交
67
  }
68 69 70

 protected:
  framework::OpKernelType GetExpectedKernelType(
C
chengduo 已提交
71
      const framework::ExecutionContext &ctx) const override {
72
    auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Param");
73 74

#ifdef PADDLE_WITH_MKLDNN
75
    using dnnl::memory;
76 77 78 79 80 81 82
    if (this->CanMKLDNNBeUsed(ctx, data_type)) {
      const auto *param_var = ctx.InputVar("Param");
      const auto *grad_var = ctx.InputVar("Grad");

      // supported cases
      bool dense_param_sparse_grad =
          param_var->IsType<framework::LoDTensor>() &&
83
          grad_var->IsType<pten::SelectedRows>();
84 85 86 87 88 89 90 91 92
      bool dense_param_and_grad = param_var->IsType<framework::LoDTensor>() &&
                                  grad_var->IsType<framework::LoDTensor>();

      if (dense_param_sparse_grad || dense_param_and_grad)
        return framework::OpKernelType(data_type, ctx.GetPlace(),
                                       framework::DataLayout::kMKLDNN,
                                       framework::LibraryType::kMKLDNN);
    }
#endif
Q
qiaolongfei 已提交
93
    return framework::OpKernelType(data_type, ctx.device_context());
94
  }
95 96 97 98 99 100 101 102 103 104 105

  framework::OpKernelType GetKernelTypeForVar(
      const std::string &var_name, const framework::Tensor &tensor,
      const framework::OpKernelType &expected_kernel_type) const {
    if (var_name == "LearningRate") {
      return framework::OpKernelType(tensor.type(), tensor.place(),
                                     tensor.layout());
    }
    return framework::OpKernelType(expected_kernel_type.data_type_,
                                   tensor.place(), tensor.layout());
  }
Q
Qiao Longfei 已提交
106 107
};

Y
Yancey1989 已提交
108 109
class SGDOpInferVarType : public framework::VarTypeInference {
 public:
M
minqiyang 已提交
110
  void operator()(framework::InferVarTypeContext *ctx) const override {
111 112 113 114 115 116 117
    auto in_var_type = ctx->GetInputType("Param");
    PADDLE_ENFORCE_EQ(in_var_type == framework::proto::VarType::SELECTED_ROWS ||
                          in_var_type == framework::proto::VarType::LOD_TENSOR,
                      true, platform::errors::InvalidArgument(
                                "The input Var's type should be LoDtensor or "
                                "SelectedRows, but the received type is %s",
                                in_var_type));
C
chengduo 已提交
118

119
    ctx->SetOutputType("ParamOut", in_var_type, framework::ALL_ELEMENTS);
Y
Yancey1989 已提交
120 121 122
  }
};

D
dongzhihong 已提交
123
class SGDOpMaker : public framework::OpProtoAndCheckerMaker {
124
 public:
Y
Yu Yang 已提交
125
  void Make() override {
126
    AddInput("Param", "(Tensor or SelectedRows) Input parameter");
127
    AddInput("LearningRate", "(Tensor) Learning rate of SGD");
128
    AddInput("Grad", "(Tensor or SelectedRows) Input gradient");
129
    AddInput("MasterParam", "FP32 master weight for AMP.").AsDispensable();
130 131
    AddOutput("ParamOut",
              "(Tensor or SelectedRows, same with Param) "
132
              "Output parameter, should share the same memory with Param");
133 134 135 136 137
    AddOutput("MasterParamOut",
              "The updated FP32 master weight for AMP. "
              "It shared memory with Input(MasterParam).")
        .AsDispensable();

138 139 140 141
    AddAttr<bool>(
        "use_mkldnn",
        "(bool, default false) Indicates if MKL-DNN kernel will be used")
        .SetDefault(false);
142 143 144 145 146
    AddAttr<bool>("multi_precision",
                  "(bool, default false) "
                  "Whether to use multi-precision during weight updating.")
        .SetDefault(false);

Q
Qiao Longfei 已提交
147 148
    AddComment(R"DOC(

149
SGD operator
Q
Qiao Longfei 已提交
150

151 152
This operator implements one step of the stochastic gradient descent algorithm.

153
$$param\_out = param - learning\_rate * grad$$
Q
Qiao Longfei 已提交
154 155 156 157

)DOC");
  }
};
Q
qijun 已提交
158

Q
Qiao Longfei 已提交
159 160 161
}  // namespace operators
}  // namespace paddle

D
dongzhihong 已提交
162
namespace ops = paddle::operators;
H
hong 已提交
163 164 165 166 167
REGISTER_OPERATOR(
    sgd, ops::SGDOp, ops::SGDOpMaker,
    paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
    paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>,
    ops::SGDOpInferVarType);
168 169
REGISTER_OP_CPU_KERNEL(
    sgd, ops::SGDOpKernel<paddle::platform::CPUDeviceContext, float>,
170 171
    ops::SGDOpKernel<paddle::platform::CPUDeviceContext,
                     paddle::platform::bfloat16>,
172
    ops::SGDOpKernel<paddle::platform::CPUDeviceContext, double>);