fc_op.cc 6.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/operators/fc_op.h"
16
#include <vector>
T
tensor-tang 已提交
17
#include "paddle/fluid/operators/math/blas.h"
18

T
tensor-tang 已提交
19 20
DECLARE_int32(paddle_num_threads);

21 22 23 24 25 26 27 28 29 30
namespace paddle {
namespace operators {

void FCOp::InferShape(framework::InferShapeContext* ctx) const {
  PADDLE_ENFORCE(ctx->HasInput("Input"),
                 "X(Input) of Fully Connected should not be null.");
  PADDLE_ENFORCE(ctx->HasOutput("Out"),
                 "Out(Output) of Fully Connected should not be null.");
  PADDLE_ENFORCE(ctx->HasInput("W"),
                 "W(Input) of Fully Connected should not be null.");
T
tensor-tang 已提交
31
  // NCHW
32
  auto in_dims = ctx->GetInputDim("Input");
T
tensor-tang 已提交
33
  // IO, I=C*H*W
34 35 36
  auto w_dims = ctx->GetInputDim("W");
  std::vector<int64_t> output_shape({in_dims[0], w_dims[1]});

T
tensor-tang 已提交
37 38 39
  if (ctx->HasInput("Bias")) {
    auto bias_dims = ctx->GetInputDim("Bias");
    PADDLE_ENFORCE_EQ(bias_dims[0], 1, "The shape of Bias must be [1, dim].");
T
tensor-tang 已提交
40
    PADDLE_ENFORCE_EQ(bias_dims[1], w_dims[1],
T
tensor-tang 已提交
41 42
                      "The shape of Bias must be [1, dim].");
  }
43
  PADDLE_ENFORCE(in_dims.size() == 2 || in_dims.size() == 4,
M
mozga-intel 已提交
44
                 "Fully Connected input should be 2-D or 4-D tensor.");
T
tensor-tang 已提交
45 46 47
  PADDLE_ENFORCE_EQ(w_dims.size(), 2UL,
                    "Fully Connected input should be 2-D tensor.");
  PADDLE_ENFORCE_EQ(framework::product(in_dims) / in_dims[0], w_dims[0],
T
tensor-tang 已提交
48 49
                    "Fully Connected input and weigth size do not match.");

50 51 52 53 54 55
  ctx->SetOutputDim("Out", framework::make_ddim(output_shape));
  ctx->ShareLoD("Input", "Out");
}

framework::OpKernelType FCOp::GetExpectedKernelType(
    const framework::ExecutionContext& ctx) const {
T
tensor-tang 已提交
56 57
  framework::LibraryType library = framework::LibraryType::kPlain;
  framework::DataLayout layout = framework::DataLayout::kAnyLayout;
T
tensor-tang 已提交
58
  if (ctx.Attr<bool>("use_mkldnn")) {
T
tensor-tang 已提交
59 60 61
    library = framework::LibraryType::kMKLDNN;
    layout = framework::DataLayout::kMKLDNN;
  }
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
  return framework::OpKernelType(
      framework::ToDataType(ctx.Input<Tensor>("Input")->type()), ctx.GetPlace(),
      layout, library);
}

void FCOpGrad::InferShape(framework::InferShapeContext* ctx) const {
  auto in_dims = ctx->GetInputDim("Input");
  auto w_dims = ctx->GetInputDim("W");

  if (ctx->HasOutput(framework::GradVarName("Input"))) {
    ctx->SetOutputDim(framework::GradVarName("Input"), in_dims);
  }
  if (ctx->HasOutput(framework::GradVarName("W"))) {
    ctx->SetOutputDim(framework::GradVarName("W"), w_dims);
  }
T
tensor-tang 已提交
77 78

  if (ctx->HasInput("Bias")) {
T
tensor-tang 已提交
79 80
    PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("Bias")),
                   "Should have bias grad");
T
tensor-tang 已提交
81 82 83
    auto bias_dims = ctx->GetInputDim("Bias");
    ctx->SetOutputDim(framework::GradVarName("Bias"), bias_dims);
  }
84 85 86 87
}

framework::OpKernelType FCOpGrad::GetExpectedKernelType(
    const framework::ExecutionContext& ctx) const {
T
tensor-tang 已提交
88 89
  framework::LibraryType library = framework::LibraryType::kPlain;
  framework::DataLayout layout = framework::DataLayout::kAnyLayout;
T
tensor-tang 已提交
90
  if (ctx.Attr<bool>("use_mkldnn")) {
T
tensor-tang 已提交
91 92 93
    library = framework::LibraryType::kMKLDNN;
    layout = framework::DataLayout::kMKLDNN;
  }
94 95 96 97 98
  return framework::OpKernelType(
      framework::ToDataType(ctx.Input<Tensor>("Input")->type()), ctx.GetPlace(),
      layout, library);
}

Y
Yu Yang 已提交
99
void FCOpMaker::Make() {
T
tensor-tang 已提交
100 101 102 103 104
  AddInput("Input",
           "(Tensor), The input tensor of fully connected operator with format "
           "(NCHW). ");
  AddInput("W", "(Tensor), The weight fc op with shape (I, O).");
  AddInput("Bias", "(Tensor, optional) Bias vector with shape (1 x O")
T
tensor-tang 已提交
105
      .AsDispensable();
106
  AddOutput("Out", "(Tensor) The output tensor of fully connected operator. ");
107 108 109 110 111 112 113 114 115 116 117 118
  AddAttr<bool>("use_mkldnn",
                "(bool, default false) Only used in mkldnn kernel")
      .SetDefault(false);
  AddComment(R"DOC(
  Fully Connected Operator.

  The fully connected operation calculates the output based on the input, weights and bias attribute.
  The size of each dimension of the parameters checked in the infer-shape.
  The matrix of bias is generated by the mkldnn framework, when the bias_attr is True.
  Additional parametrs are use_mkldnn and bias_attr.
  The input(X) size and output(Out) size may be diffrent.

M
mozga-intel 已提交
119
  The fully connected layer only supports MKLDNN version
120 121 122
)DOC");
}

T
tensor-tang 已提交
123 124 125 126
template <typename T>
class FCOpKernel : public framework::OpKernel<T> {
 public:
  void Compute(const paddle::framework::ExecutionContext& ctx) const override {
T
tensor-tang 已提交
127
    PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()),
T
tensor-tang 已提交
128 129 130
                   "It must use CPUPlace.");
    auto input = ctx.Input<Tensor>("Input");
    auto w = ctx.Input<Tensor>("W");
T
tensor-tang 已提交
131
    auto bias = ctx.Input<Tensor>("Bias");
T
tensor-tang 已提交
132
    auto output = ctx.Output<Tensor>("Out");
T
tensor-tang 已提交
133 134
    auto in_dims = input->dims();
    auto w_dims = w->dims();
T
tensor-tang 已提交
135

T
tensor-tang 已提交
136 137
    auto& dev_ctx = ctx.template device_context<platform::CPUDeviceContext>();
    auto blas = math::GetBlas<platform::CPUDeviceContext, T>(dev_ctx);
T
tensor-tang 已提交
138 139 140 141
    const T* input_data = input->data<T>();
    const T* w_data = w->data<T>();
    T* output_data = output->mutable_data<T>(ctx.GetPlace());

T
tensor-tang 已提交
142 143 144
    blas.GEMM(CblasNoTrans, CblasNoTrans, in_dims[0], w_dims[1], w_dims[0],
              static_cast<T>(1), input_data, w_data, static_cast<T>(0),
              output_data);
T
tensor-tang 已提交
145 146 147

    if (bias) {
      const T* bias_data = bias->data<T>();
T
tensor-tang 已提交
148 149 150
#pragma omp parallel for if (FLAGS_paddle_num_threads > 1)
      for (int bs = 0; bs < in_dims[0]; bs++) {
        blas.AXPY(w_dims[1], static_cast<T>(1), bias_data,
T
tensor-tang 已提交
151
                  output_data + bs * w_dims[1]);
T
tensor-tang 已提交
152
      }
T
tensor-tang 已提交
153 154 155 156
    }
  }
};

157 158 159
}  // namespace operators
}  // namespace paddle

T
tensor-tang 已提交
160 161
namespace ops = paddle::operators;
REGISTER_OPERATOR(fc, ops::FCOp, ops::FCOpMaker,
162
                  paddle::framework::DefaultGradOpDescMaker<true>);
T
tensor-tang 已提交
163
REGISTER_OPERATOR(fc_grad, ops::FCOpGrad);
T
tensor-tang 已提交
164
REGISTER_OP_CPU_KERNEL(fc, ops::FCOpKernel<float>, ops::FCOpKernel<double>);