diff --git a/paddle/gserver/layers/MKLDNNConcatLayer.cpp b/paddle/gserver/layers/MKLDNNConcatLayer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c9099297cc5c741fbae0b42f21b988e6c561ef11 --- /dev/null +++ b/paddle/gserver/layers/MKLDNNConcatLayer.cpp @@ -0,0 +1,202 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "MKLDNNConcatLayer.h" + +using namespace mkldnn; // NOLINT +typedef memory::format format; + +namespace paddle { + +REGISTER_LAYER(mkldnn_concat, MKLDNNConcatLayer); + +bool MKLDNNConcatLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + if (!MKLDNNLayer::init(layerMap, parameterMap)) { + return false; + } + CHECK_GT(inputLayers_.size(), 1UL); + CHECK(!biasParameter_); + return true; +} + +void MKLDNNConcatLayer::reshape( + int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) { + reshapeInput(bs, ih, iw); + ic = inputLayers_[0]->getSize() / ih / iw; + CHECK_EQ((size_t)ic * ih * iw, inputLayers_[0]->getSize()); + CHECK_EQ(inputElemenCnt_, (size_t)bs * ic * ih * iw); + CHECK_GT(inputLayers_.size(), 1UL); + channels_.resize(inputLayers_.size()); + channels_[0] = ic; + // need change the output channel, so use oc_ instead + // TODO(TJ): change API, use &oc + oc_ = ic; + for (size_t i = 1; i < inputLayers_.size(); i++) { + int batchsize, height, witdh; + reshapeInput(batchsize, height, witdh, i); + CHECK_EQ(bs, batchsize); + CHECK_EQ(ih, height); + CHECK_EQ(iw, witdh); + + channels_[i] = inputLayers_[i]->getSize() / height / witdh; + CHECK_EQ((size_t)channels_[i] * height * witdh, inputLayers_[i]->getSize()); + oc_ += channels_[i]; + } + oh = ih; + ow = iw; + reshapeOutput(oh, ow); + resizeOutput(bs, oc_ * oh * ow); +} + +void MKLDNNConcatLayer::resetFwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + resetFwdBuffers(inVals_, out); + in = inVals_[0]; + + std::shared_ptr fwdPD; + resetFwdPD(fwdPD, inVals_, out); + + resetFwdPipeline(pipeline, fwdPD, inVals_, out); +} + +void MKLDNNConcatLayer::resetBwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + resetBwdBuffers(inGrads_, out); + in = inGrads_[0]; + + resetBwdPipeline(pipeline, bwds_, inGrads_, out); +} + +void MKLDNNConcatLayer::resetFwdBuffers(std::vector& inputs, + MKLDNNMatrixPtr& out) { + inputs.resize(inputLayers_.size()); + bool has8c = false, has16c = false, hasnc = false; + for (size_t i = 0; i < inputs.size(); i++) { + // resetInValue will use ic_ so temporary change as current input's channel + // TODO(TJ): change ic_ as vector then can remove channels_ + ic_ = channels_[i]; + resetInValue(inputs[i], nullptr, i); + CHECK(inputs[i]); + auto dm = inputs[i]->getDims(); + // inputs format can be different, but ndims must equal + CHECK(i == 0 || dm.size() == inputs[0]->getDims().size()); + CHECK_EQ(bs_, dm[0]); + CHECK_EQ(channels_[i], dm[1]); + if (dm.size() > 2) { + CHECK_EQ(ih_, dm[2]); + CHECK_EQ(iw_, dm[3]); + } + if (inputs[i]->getFormat() == format::nc) { + hasnc = true; + } + if (inputs[i]->getFormat() == format::nChw8c) { + has8c = true; + } + if (inputs[i]->getFormat() == format::nChw16c) { + has16c = true; + } + } + // change back, ic_ always save the input 0 size + ic_ = channels_[0]; + + format outFmt; + if (has16c && oc_ % 16 == 0) { + outFmt = format::nChw16c; + } else if (has8c && oc_ % 8 == 0) { + outFmt = format::nChw8c; + } else if (hasnc) { + CHECK(oh_ == 1 && ow_ == 1); + outFmt = format::nc; + } else { + outFmt = format::nchw; + } + memory::dims outDims = + hasnc ? memory::dims{bs_, oc_} : memory::dims{bs_, oc_, oh_, ow_}; + auto outPD = MKLDNNMatrix::createPrimitiveDesc(outDims, outFmt, engine_); + resetOutValue(out, outPD); +} + +void MKLDNNConcatLayer::resetFwdPD(std::shared_ptr& pd, + std::vector& inputs, + MKLDNNMatrixPtr out) { + std::vector srcPDs; + for (size_t i = 0; i < inputs.size(); i++) { + srcPDs.push_back(inputs[i]->getPrimitiveDesc()); + } + CHECK(out); + pd.reset(new concat::primitive_desc(out->getMemoryDesc(), axis_, srcPDs)); + CHECK_PRIMITIVE_DESC_EQ(out, pd->dst_primitive_desc()); +} + +void MKLDNNConcatLayer::resetFwdPipeline( + std::vector& pipeline, + std::shared_ptr& pd, + std::vector& inputs, + MKLDNNMatrixPtr& out) { + std::vector srcs; + for (size_t i = 0; i < inputs.size(); i++) { + srcs.push_back(*(inputs[i])); + } + fwd_.reset(new concat(*pd, srcs, *out)); + pipeline.push_back(*fwd_); +} + +void MKLDNNConcatLayer::resetBwdBuffers(std::vector& inputs, + MKLDNNMatrixPtr& out) { + CHECK(outVal_); + resetOutGrad(out, outVal_->getPrimitiveDesc()); + CHECK(out); + + inputs.resize(inputLayers_.size()); + for (size_t i = 0; i < inputs.size(); i++) { + CHECK(inVals_[i]); + // resetInGrad will use inVal_ + // TODO(TJ): change move inVals_ to MKLDNNLayer ans remove inVal_ + inVal_ = inVals_[i]; + resetInGrad(inputs[i], inVals_[i]->getPrimitiveDesc(), i); + CHECK_PRIMITIVE_DESC_EQ(inputs[i], inVals_[i]->getPrimitiveDesc()); + } + // change back, inVal_ always save the input 0 + inVal_ = inVals_[0]; +} + +void MKLDNNConcatLayer::resetBwdPipeline( + std::vector& pipeline, + std::vector>& prims, + std::vector& inputs, + MKLDNNMatrixPtr& out) { + // reset the backward primitives + memory::dims offsets = {0, 0, 0, 0}; + prims.resize(inputs.size()); + CHECK_EQ(inputs.size(), channels_.size()); + for (size_t i = 0; i < inputs.size(); i++) { + auto viewPD = view::primitive_desc( + out->getPrimitiveDesc(), inputs[i]->getDims(), offsets); + auto bwdPD = reorder::primitive_desc(viewPD.dst_primitive_desc(), + inputs[i]->getPrimitiveDesc()); + prims[i].reset(new reorder(bwdPD, *out, *(inputs[i]))); + offsets[axis_] += channels_[i]; + // push to pipeline + pipeline.push_back(*prims[i]); + } +} + +} // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNConcatLayer.h b/paddle/gserver/layers/MKLDNNConcatLayer.h new file mode 100644 index 0000000000000000000000000000000000000000..d5749d327e4259b81541a234f48a4538ab035fe4 --- /dev/null +++ b/paddle/gserver/layers/MKLDNNConcatLayer.h @@ -0,0 +1,129 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "MKLDNNLayer.h" +#include "mkldnn.hpp" + +namespace paddle { + +/** + * @brief A subclass of MKLDNNLayer Concatenate layer. + * + * The config file api is mkldnn_concat + */ +class MKLDNNConcatLayer : public MKLDNNLayer { +protected: + std::vector inVals_; + std::vector inGrads_; + std::vector> bwds_; + // input channel numbers + std::vector channels_; + + // concat_dimension in MKLDNN + // if axis_ == 0, concat batchsize + // if axis_ == 1, concat channel (default) + int axis_; + +public: + explicit MKLDNNConcatLayer(const LayerConfig& config) + : MKLDNNLayer(config), axis_(1) {} + + ~MKLDNNConcatLayer() {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void reshape( + int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override; + + void resetFwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) override; + + void resetBwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) override; + + void printSizeInfo() override { + CHECK_EQ(channels_.size(), inputLayers_.size()); + for (size_t i = 0; i < channels_.size(); ++i) { + VLOG(MKLDNN_SIZES) << "Input " << i << ", " << inputLayers_[i]->getName() + << ": " << bs_ << ", " << channels_[i] << ", " << ih_ + << ", " << iw_; + } + VLOG(MKLDNN_SIZES) << "Output: " << bs_ << ", " << oc_ << ", " << oh_ + << ", " << ow_; + } + + void printValueFormat() override { + for (size_t i = 0; i < inVals_.size(); ++i) { + VLOG(MKLDNN_FMTS) << "Input " << i << ", " << inputLayers_[i]->getName() + << ": " << inVals_[i]->getFormat() << " >>>"; + } + if (outVal_) { + VLOG(MKLDNN_FMTS) << outVal_->getFormat() << " >>> "; + } + if (extOutVal_) { + VLOG(MKLDNN_FMTS) << extOutVal_->getFormat(); + } + } + + void printGradFormat() override { + if (extOutGrad_) { + VLOG(MKLDNN_FMTS) << extOutGrad_->getFormat(); + } + if (outGrad_) { + VLOG(MKLDNN_FMTS) << outGrad_->getFormat() << " <<< "; + } + for (size_t i = 0; i < inGrads_.size(); ++i) { + VLOG(MKLDNN_FMTS) << "Input " << i << ", " << inputLayers_[i]->getName() + << ": " << inGrads_[i]->getFormat() << "<<<"; + } + } + +protected: + /** + * Forward functions: reset buffers(inputs, output, bias), + * reset primitive descriptor, + * reset pipeline. + */ + void resetFwdBuffers(std::vector& inputs, + MKLDNNMatrixPtr& out); + void resetFwdPD(std::shared_ptr& pd, + std::vector& inputs, + MKLDNNMatrixPtr out); + void resetFwdPipeline(std::vector& pipeline, + std::shared_ptr& pd, + std::vector& inputs, + MKLDNNMatrixPtr& out); + + /** + * Backward functions: reset buffers(inputs, output, bias) + * reset primitives and pipeline + */ + void resetBwdBuffers(std::vector& inputs, + MKLDNNMatrixPtr& out); + void resetBwdPipeline(std::vector& pipeline, + std::vector>& prims, + std::vector& inputs, + MKLDNNMatrixPtr& out); +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNLayer.cpp b/paddle/gserver/layers/MKLDNNLayer.cpp index 2125155c6cb807045c1a25f422dc072d0a401716..62af83561fda5ceca2df93eea05ab003323ef0a2 100644 --- a/paddle/gserver/layers/MKLDNNLayer.cpp +++ b/paddle/gserver/layers/MKLDNNLayer.cpp @@ -138,8 +138,11 @@ void MKLDNNLayer::backward(const UpdateCallback& callback) { } } -void MKLDNNLayer::reshapeInput(int& batchsize, int& height, int& width) { - const Argument& input = inputLayers_[0]->getOutput(); +void MKLDNNLayer::reshapeInput(int& batchsize, + int& height, + int& width, + size_t inputIdx) { + const Argument& input = inputLayers_[inputIdx]->getOutput(); batchsize = input.getBatchSize(); int h = input.getFrameHeight(); int w = input.getFrameWidth(); diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h index 7479c34c92b5231b2521493bc631474d4efd4224..4c42df1bee75fa7b28c2001c30797cc0df7c5554 100644 --- a/paddle/gserver/layers/MKLDNNLayer.h +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -178,7 +178,10 @@ protected: /** * reshape the input image sizes and input batchsize */ - void reshapeInput(int& batchsize, int& height, int& width); + void reshapeInput(int& batchsize, + int& height, + int& width, + size_t inputIdx = 0); /** * reshape output image sizes diff --git a/paddle/gserver/tests/test_MKLDNN.cpp b/paddle/gserver/tests/test_MKLDNN.cpp index a859e34c8996d81f14bf1edcb6e23d5a4f687e6b..42644e9601a82ea81c417adc6441edeb036998e2 100644 --- a/paddle/gserver/tests/test_MKLDNN.cpp +++ b/paddle/gserver/tests/test_MKLDNN.cpp @@ -313,6 +313,47 @@ TEST(MKLDNNLayer, AddtoLayer) { testAddtoLayer({4, 12, 1, 1}, 3); } +static void getMKLDNNConcatConfig(TestConfig& cfg, + const std::vector& inputs) { + CHECK_GE(inputs.size(), 2) << "at least two inputs"; + int oc = inputs[0].ic; + for (size_t i = 1; i < inputs.size(); ++i) { + CHECK_EQ(inputs[i].bs, inputs[0].bs); + CHECK_EQ(inputs[i].ih, inputs[0].ih); + CHECK_EQ(inputs[i].iw, inputs[0].iw); + oc += inputs[i].ic; + } + cfg.biasSize = 0; + cfg.layerConfig.set_type("mkldnn_concat"); + cfg.layerConfig.set_size(oc * inputs[0].ih * inputs[0].iw); + cfg.layerConfig.set_active_type("relu"); + for (size_t i = 0; i < inputs.size(); ++i) { + std::stringstream ss; + ss << "layer_" << i; + cfg.inputDefs.push_back( + {INPUT_DATA, + ss.str(), + (size_t)(inputs[i].ic) * inputs[i].ih * inputs[i].iw, + 0}); + LayerInputConfig* input = cfg.layerConfig.add_inputs(); + ImageConfig* img_conf = input->mutable_image_conf(); + img_conf->set_channels(inputs[i].ic); + img_conf->set_img_size_y(inputs[i].ih); + img_conf->set_img_size(inputs[i].iw); + } +} + +void testConcatLayer(const std::vector& inputs) { + TestConfig dnnConfig; + getMKLDNNConcatConfig(dnnConfig, inputs); + RUN_MKLDNN_TEST_LAYER(dnnConfig, "concat", inputs[0]) +} + +TEST(MKLDNNLayer, ConcatLayer) { + testConcatLayer({{64, 128, 1, 1}, {64, 32, 1, 1}, {64, 64, 1, 1}}); + testConcatLayer({{32, 100, 8, 8}, {32, 10, 8, 8}}); +} + void testActivation(std::string actType, const testImageDesc& pm) { // TODO(TJ): remove me when paddle support elu activation if (actType == "mkldnn_elu") { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 5bd68e211ac1c8e05f40dc3ca37eef99f32af47f..22c5264f5d9ac697d4ab95aafd1060c0165884ea 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -3506,11 +3506,17 @@ def ExpressionLayer(name, inputs, **xargs): @config_layer('concat') class ConcatenateLayer(LayerBase): + layer_type = 'concat' + def __init__(self, name, inputs, bias=False, **xargs): config_assert(inputs, 'inputs cannot be empty') config_assert(not bias, 'ConcatenateLayer cannot support bias.') + use_mkldnn = bool(int(g_command_config_args.get("use_mkldnn", 0))) + if self.layer_type == "mkldnn_concat": + config_assert(use_mkldnn, "mkldnn_concat only support MKLDNN") + self.layer_type = 'mkldnn_concat' if use_mkldnn else 'concat' super(ConcatenateLayer, self).__init__( - name, 'concat', 0, inputs=inputs, **xargs) + name, self.layer_type, 0, inputs=inputs, **xargs) size = 0 for input_index in xrange(len(self.inputs)): assert self.get_input_layer(0).height == self.get_input_layer( @@ -3530,6 +3536,11 @@ class ConcatenateLayer(LayerBase): self.set_layer_size(size) +@config_layer('mkldnn_concat') +class MKLDNNConcatLayer(ConcatenateLayer): + layer_type = 'mkldnn_concat' + + # like concat layer, but each input layer was processed by a Projection. @config_layer('concat2') class ConcatenateLayer2(LayerBase):