MKLDNNFcLayer.cpp 8.8 KB
Newer Older
T
tensor-tang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

15
#include "MKLDNNFcLayer.h"
T
tensor-tang 已提交
16
#include "paddle/utils/Logging.h"
T
tensor-tang 已提交
17

T
tensor-tang 已提交
18 19 20 21 22 23
using namespace mkldnn;  // NOLINT
typedef memory::format format;
typedef inner_product_forward fc_fwd;
typedef inner_product_backward_weights fc_bwdWgt;
typedef inner_product_backward_data fc_bwdData;

T
tensor-tang 已提交
24 25
namespace paddle {

26
REGISTER_LAYER(mkldnn_fc, MKLDNNFcLayer);
T
tensor-tang 已提交
27

28
bool MKLDNNFcLayer::init(const LayerMap& layerMap,
T
tensor-tang 已提交
29
                         const ParameterMap& parameterMap) {
30
  if (!MKLDNNLayer::init(layerMap, parameterMap)) {
T
tensor-tang 已提交
31 32 33
    return false;
  }

T
tensor-tang 已提交
34
  CHECK_EQ(inputLayers_.size(), 1) << "Only support one input layer yet";
T
tensor-tang 已提交
35 36 37 38 39 40 41
  CHECK_EQ(inputLayers_.size(), parameters_.size());
  CHECK(!parameters_[0]->isSparse()) << "Do not support sparse yet";

  // output size, cat not be changed
  oc_ = getSize();
  oh_ = 1;
  ow_ = 1;
42 43
  ih_ = 1;
  iw_ = 1;
T
tensor-tang 已提交
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59

  // input size can not change in FC
  iLayerSize_ = inputLayers_[0]->getSize();
  CHECK_EQ(parameters_[0]->getSize(), iLayerSize_ * oc_);

  // create weight
  weight_ =
      std::unique_ptr<Weight>(new Weight(oc_, iLayerSize_, parameters_[0], 0));

  // create biases
  if (biasParameter_.get() != NULL) {
    biases_ = std::unique_ptr<Weight>(new Weight(1, oc_, biasParameter_));
  }
  return true;
}

60
void MKLDNNFcLayer::convertWeightsFromPaddle() {
T
tensor-tang 已提交
61
  if (hasInitedWgt_) {
T
tensor-tang 已提交
62 63 64
    return;
  }

T
tensor-tang 已提交
65 66 67 68 69
  CHECK(wgtVal_) << "should have been initialized";
  bool hasNoSpatial_ = ih_ == 1 && iw_ == 1;
  auto targetDim = wgtVal_->getDims();
  auto srcFmt = hasNoSpatial_ ? memory::format::io : memory::format::ihwo;
  wgtVal_->reorderDataFrom(wgtVal_, srcFmt, targetDim);
T
tensor-tang 已提交
70 71 72
  hasInitedWgt_ = true;
}

73
void MKLDNNFcLayer::convertWeightsToPaddle() {
T
tensor-tang 已提交
74 75 76 77 78
  CHECK(wgtVal_) << "should have been initialized";
  bool hasNoSpatial_ = ih_ == 1 && iw_ == 1;
  auto targetDim = wgtVal_->getDims();
  auto dstFmt = hasNoSpatial_ ? memory::format::io : memory::format::ihwo;
  wgtVal_->reorderDataTo(wgtVal_, dstFmt, targetDim);
T
tensor-tang 已提交
79 80
}

81 82 83
void MKLDNNFcLayer::reshape(
    int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) {
  reshapeInput(bs, ih, iw);
84

T
tensor-tang 已提交
85
  CHECK_EQ(iLayerSize_, inputLayers_[0]->getSize());
86 87 88
  ic = iLayerSize_ / (ih * iw);
  CHECK_EQ(size_t(ic * ih * iw), iLayerSize_) << "not divisible";
  CHECK_EQ(size_t(oc), getSize());
T
tensor-tang 已提交
89

90 91
  reshapeOutput(oh, ow);
  resizeOutput(bs, oc);
T
tensor-tang 已提交
92

93
  printSizeInfo();
T
tensor-tang 已提交
94 95
}

96 97 98 99 100 101
void MKLDNNFcLayer::resetFwd(std::vector<mkldnn::primitive>& pipeline,
                             MKLDNNMatrixPtr& in,
                             MKLDNNMatrixPtr& wgt,
                             MKLDNNMatrixPtr& bias,
                             MKLDNNMatrixPtr& out) {
  pipeline.clear();
T
tensor-tang 已提交
102
  bool hasBias = biases_ && biases_->getW();
103 104 105
  const MatrixPtr& wgtVal = weight_->getW();
  const MatrixPtr& biasVal = hasBias ? biases_->getW() : nullptr;
  const MatrixPtr& outVal = output_.value;
T
tensor-tang 已提交
106

T
rename  
tensor-tang 已提交
107
  if (inputIsOnlyMKLDNN()) {
108 109 110
    const MatrixPtr& inVal = getInputValue(0);
    in = std::dynamic_pointer_cast<MKLDNNMatrix>(inVal);
    CHECK(in) << "Input should be MKLDNNMatrix";
T
tensor-tang 已提交
111
  } else {
112
    CHECK_EQ(getPrev(0)->getDeviceId(), CPU_DEVICE) << "Only support CPU yet";
113 114 115
    const MatrixPtr& inVal = getInputValue(0, CPU_DEVICE);
    in = MKLDNNMatrix::create(
        inVal, memory::dims{bs_, ic_, ih_, iw_}, format::nchw, engine_);
T
tensor-tang 已提交
116
  }
117 118 119 120 121 122 123
  in->downSpatial();
  wgt = MKLDNNMatrix::create(
      wgtVal, memory::dims{oc_, ic_, ih_, iw_}, format::oihw, engine_);
  wgt->downSpatial();
  bias = hasBias ? MKLDNNMatrix::create(biasVal, {oc_}, format::x, engine_)
                 : nullptr;
  out = MKLDNNMatrix::create(outVal, {bs_, oc_}, format::nc, engine_);
T
tensor-tang 已提交
124

125
  // change original output value to mkldnn output value
126
  output_.value = std::dynamic_pointer_cast<Matrix>(out);
T
rename  
tensor-tang 已提交
127
  if (!outputIsOnlyMKLDNN()) {
T
tensor-tang 已提交
128 129 130
    // fc cpu output value do not need create convert
    // just share point
    getOutput(CPU_DEVICE).value->setData(output_.value->getData());
131
  }
T
tensor-tang 已提交
132

T
tensor-tang 已提交
133
  // create forward handle
T
tensor-tang 已提交
134
  prop_kind pk = prop_kind::forward;
T
refine  
tensor-tang 已提交
135
  fc_fwd::desc fwdDesc = hasBias ? fc_fwd::desc(pk,
136 137 138 139
                                                in->getMemoryDesc(),
                                                wgt->getMemoryDesc(),
                                                bias->getMemoryDesc(),
                                                out->getMemoryDesc())
T
refine  
tensor-tang 已提交
140
                                 : fc_fwd::desc(pk,
141 142 143
                                                in->getMemoryDesc(),
                                                wgt->getMemoryDesc(),
                                                out->getMemoryDesc());
T
tensor-tang 已提交
144
  fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_);
T
tensor-tang 已提交
145
  if (hasBias) {
146
    fwd_.reset(new fc_fwd(fwdPD, *in, *wgt, *bias, *out));
T
tensor-tang 已提交
147
  } else {
148
    fwd_.reset(new fc_fwd(fwdPD, *in, *wgt, *out));
T
tensor-tang 已提交
149
  }
150 151
  printValueFormatFlow();

152
  pipeline.push_back(*fwd_);
T
tensor-tang 已提交
153 154
}

155 156 157 158 159 160
void MKLDNNFcLayer::resetBwd(std::vector<mkldnn::primitive>& pipeline,
                             MKLDNNMatrixPtr& in,
                             MKLDNNMatrixPtr& wgt,
                             MKLDNNMatrixPtr& bias,
                             MKLDNNMatrixPtr& out) {
  pipeline.clear();
T
tensor-tang 已提交
161 162 163 164 165 166 167
  if (!needResetBwd_) {
    return;
  }
  needResetBwd_ = false;
  bool hasBias = biases_ && biases_->getWGrad();

  /// backward weight
T
tensor-tang 已提交
168
  CHECK(inVal_) << "Should have input value";
169 170
  const MatrixPtr& wgtGrad = weight_->getWGrad();
  const MatrixPtr& biasGrad = hasBias ? biases_->getWGrad() : nullptr;
T
tensor-tang 已提交
171

T
refine  
tensor-tang 已提交
172
  // TODO(TJ): merge outgrad
T
rename  
tensor-tang 已提交
173 174 175 176 177 178 179 180
  int device = outputIsOnlyMKLDNN() ? MKLDNN_DEVICE : CPU_DEVICE;
  // for MKLDNN device:
  // can not directly cast outputgrad to mkldnnmatrix,
  // since each layer can not write the inputgrad to mkldnn inputgrad.
  // So just create from matrix with outputvalue format.
  // for CPU device:
  // fc do not need to convert from cpu device since output is always nc format
  // only need create from cpu device
181 182 183 184 185
  const MatrixPtr& outGrad = getOutput(device).grad;
  out = MKLDNNMatrix::create(outGrad, outVal_->getPrimitiveDesc());
  wgt = MKLDNNMatrix::create(wgtGrad, wgtVal_->getPrimitiveDesc());
  bias = hasBias ? MKLDNNMatrix::create(biasGrad, biasVal_->getPrimitiveDesc())
                 : nullptr;
T
tensor-tang 已提交
186 187 188

  // create memory primitive desc
  fc_fwd::desc fwdDesc = fc_fwd::desc(prop_kind::forward,
T
refine  
tensor-tang 已提交
189
                                      inVal_->getMemoryDesc(),
190 191
                                      wgt->getMemoryDesc(),
                                      out->getMemoryDesc());
T
tensor-tang 已提交
192
  fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_);
T
refine  
tensor-tang 已提交
193 194
  fc_bwdWgt::desc bwdWgtDesc = hasBias
                                   ? fc_bwdWgt::desc(inVal_->getMemoryDesc(),
195 196 197
                                                     wgt->getMemoryDesc(),
                                                     bias->getMemoryDesc(),
                                                     out->getMemoryDesc())
T
refine  
tensor-tang 已提交
198
                                   : fc_bwdWgt::desc(inVal_->getMemoryDesc(),
199 200
                                                     wgt->getMemoryDesc(),
                                                     out->getMemoryDesc());
T
tensor-tang 已提交
201 202 203
  fc_bwdWgt::primitive_desc bwdWgtPD =
      fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, fwdPD);

T
tensor-tang 已提交
204
  if (hasBias) {
205
    bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, *inVal_, *out, *wgt, *bias));
T
tensor-tang 已提交
206
  } else {
207
    bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, *inVal_, *out, *wgt));
T
tensor-tang 已提交
208
  }
209
  pipeline.push_back(*bwdWgt_);
T
tensor-tang 已提交
210 211

  /// backward data
212 213
  const MatrixPtr& inGrad = inputLayers_[0]->getOutput().grad;
  if (inGrad == nullptr) {
T
refine  
tensor-tang 已提交
214 215
    return;
  }
216 217
  if (getInput(0, MKLDNN_DEVICE).getAllCount() > 1) {
    // TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done
218
  } else {
219
    in = MKLDNNMatrix::create(inGrad, inVal_->getPrimitiveDesc());
T
tensor-tang 已提交
220
  }
221

222 223
  fc_bwdData::desc bwdDataDesc = fc_bwdData::desc(
      inVal_->getMemoryDesc(), wgt->getMemoryDesc(), out->getMemoryDesc());
T
tensor-tang 已提交
224 225
  fc_bwdData::primitive_desc bwdDataPD =
      fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD);
T
tensor-tang 已提交
226

T
tensor-tang 已提交
227
  CHECK(wgtVal_) << "Should have weight memory";
228
  bwdData_.reset(new fc_bwdData(bwdDataPD, *out, *wgtVal_, *in));
229
  printGradFormatFlow();
230
  pipeline.push_back(*bwdData_);
T
tensor-tang 已提交
231 232
}

233
void MKLDNNFcLayer::updateInputData() {
234
  inVal_->setData(getInputValue(0, CPU_DEVICE)->getData());
T
tensor-tang 已提交
235 236
}

237 238 239 240
void MKLDNNFcLayer::updateWeights(const UpdateCallback& callback) {
  weight_->getParameterPtr()->incUpdate(callback);
  if (biases_ && biases_->getWGrad()) {
    biases_->getParameterPtr()->incUpdate(callback);
T
tensor-tang 已提交
241
  }
T
tensor-tang 已提交
242
}
T
tensor-tang 已提交
243
}  // namespace paddle