sgd_op.cc 6.5 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Q
Qiao Longfei 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

15
#include <string>
16 17

#include "paddle/fluid/operators/optimizers/sgd_op.h"
18 19 20
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
21

Q
Qiao Longfei 已提交
22 23 24
namespace paddle {
namespace operators {

D
dongzhihong 已提交
25
class SGDOp : public framework::OperatorWithKernel {
Y
Yu Yang 已提交
26 27 28
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

C
chengduo 已提交
29
  void InferShape(framework::InferShapeContext *ctx) const override {
C
Chengmo 已提交
30 31 32 33 34 35 36 37 38 39 40 41
    PADDLE_ENFORCE_EQ(ctx->HasInput("Param"), true,
                      platform::errors::NotFound(
                          "Input(Param) of SGDOp should not be null."));
    PADDLE_ENFORCE_EQ(
        ctx->HasInput("Grad"), true,
        platform::errors::NotFound("Input(Grad) of SGDOp should not be null."));
    PADDLE_ENFORCE_EQ(ctx->HasInput("LearningRate"), true,
                      platform::errors::NotFound(
                          "Input(LearningRate) of SGDOp should not be null."));
    PADDLE_ENFORCE_EQ(ctx->HasOutput("ParamOut"), true,
                      platform::errors::NotFound(
                          "Output(ParamOut) of SGDOp should not be null."));
Q
Qiao Longfei 已提交
42

43
    auto lr_dims = ctx->GetInputDim("LearningRate");
44
    PADDLE_ENFORCE_NE(framework::product(lr_dims), 0,
C
Chengmo 已提交
45 46 47 48 49
                      platform::errors::NotFound(
                          "Maybe the Input variable LearningRate has not "
                          "been initialized. You may need to confirm "
                          "if you put exe.run(startup_program) "
                          "after optimizer.minimize function."));
50
    PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1,
C
Chengmo 已提交
51 52 53 54
                      platform::errors::InvalidArgument(
                          "Learning rate should have 1 element. But received "
                          "LearningRate dims [%s]",
                          framework::product(lr_dims)));
55
    auto param_dim = ctx->GetInputDim("Param");
56 57 58 59 60 61
    if (ctx->GetInputsVarType("Grad")[0] ==
        framework::proto::VarType::LOD_TENSOR) {
      PADDLE_ENFORCE_EQ(
          param_dim, ctx->GetInputDim("Grad"),
          platform::errors::InvalidArgument(
              "SGD Operator's input Param and Grad dimensions do not match. "
62 63 64
              "The Param %s shape is [%s], but the Grad %s shape is [%s].",
              ctx->Inputs("Param")[0], param_dim, ctx->Inputs("Grad")[0],
              ctx->GetInputDim("Grad")));
65
    }
66
    ctx->SetOutputDim("ParamOut", param_dim);
Q
Qiao Longfei 已提交
67
  }
68 69 70

 protected:
  framework::OpKernelType GetExpectedKernelType(
C
chengduo 已提交
71
      const framework::ExecutionContext &ctx) const override {
72
    auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Param");
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92

#ifdef PADDLE_WITH_MKLDNN
    using mkldnn::memory;
    if (this->CanMKLDNNBeUsed(ctx, data_type)) {
      const auto *param_var = ctx.InputVar("Param");
      const auto *grad_var = ctx.InputVar("Grad");

      // supported cases
      bool dense_param_sparse_grad =
          param_var->IsType<framework::LoDTensor>() &&
          grad_var->IsType<framework::SelectedRows>();
      bool dense_param_and_grad = param_var->IsType<framework::LoDTensor>() &&
                                  grad_var->IsType<framework::LoDTensor>();

      if (dense_param_sparse_grad || dense_param_and_grad)
        return framework::OpKernelType(data_type, ctx.GetPlace(),
                                       framework::DataLayout::kMKLDNN,
                                       framework::LibraryType::kMKLDNN);
    }
#endif
Q
qiaolongfei 已提交
93
    return framework::OpKernelType(data_type, ctx.device_context());
94
  }
95 96 97 98 99 100 101 102 103 104 105

  framework::OpKernelType GetKernelTypeForVar(
      const std::string &var_name, const framework::Tensor &tensor,
      const framework::OpKernelType &expected_kernel_type) const {
    if (var_name == "LearningRate") {
      return framework::OpKernelType(tensor.type(), tensor.place(),
                                     tensor.layout());
    }
    return framework::OpKernelType(expected_kernel_type.data_type_,
                                   tensor.place(), tensor.layout());
  }
Q
Qiao Longfei 已提交
106 107
};

Y
Yancey1989 已提交
108 109
class SGDOpInferVarType : public framework::VarTypeInference {
 public:
M
minqiyang 已提交
110
  void operator()(framework::InferVarTypeContext *ctx) const override {
111 112 113 114 115 116 117
    auto in_var_type = ctx->GetInputType("Param");
    PADDLE_ENFORCE_EQ(in_var_type == framework::proto::VarType::SELECTED_ROWS ||
                          in_var_type == framework::proto::VarType::LOD_TENSOR,
                      true, platform::errors::InvalidArgument(
                                "The input Var's type should be LoDtensor or "
                                "SelectedRows, but the received type is %s",
                                in_var_type));
C
chengduo 已提交
118

119
    ctx->SetOutputType("ParamOut", in_var_type, framework::ALL_ELEMENTS);
Y
Yancey1989 已提交
120 121 122
  }
};

D
dongzhihong 已提交
123
class SGDOpMaker : public framework::OpProtoAndCheckerMaker {
124
 public:
Y
Yu Yang 已提交
125
  void Make() override {
126
    AddInput("Param", "(Tensor or SelectedRows) Input parameter");
127
    AddInput("LearningRate", "(Tensor) Learning rate of SGD");
128 129 130
    AddInput("Grad", "(Tensor or SelectedRows) Input gradient");
    AddOutput("ParamOut",
              "(Tensor or SelectedRows, same with Param) "
131
              "Output parameter, should share the same memory with Param");
132 133 134 135
    AddAttr<bool>(
        "use_mkldnn",
        "(bool, default false) Indicates if MKL-DNN kernel will be used")
        .SetDefault(false);
Q
Qiao Longfei 已提交
136 137
    AddComment(R"DOC(

138
SGD operator
Q
Qiao Longfei 已提交
139

140 141
This operator implements one step of the stochastic gradient descent algorithm.

142
$$param\_out = param - learning\_rate * grad$$
Q
Qiao Longfei 已提交
143 144 145 146

)DOC");
  }
};
Q
qijun 已提交
147

Q
Qiao Longfei 已提交
148 149 150
}  // namespace operators
}  // namespace paddle

D
dongzhihong 已提交
151
namespace ops = paddle::operators;
H
hong 已提交
152 153 154 155 156
REGISTER_OPERATOR(
    sgd, ops::SGDOp, ops::SGDOpMaker,
    paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
    paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>,
    ops::SGDOpInferVarType);
157 158
REGISTER_OP_CPU_KERNEL(
    sgd, ops::SGDOpKernel<paddle::platform::CPUDeviceContext, float>,
159 160
    ops::SGDOpKernel<paddle::platform::CPUDeviceContext,
                     paddle::platform::bfloat16>,
161
    ops::SGDOpKernel<paddle::platform::CPUDeviceContext, double>);