From 2575b74feefd10dde3990452c53e595f83f08d52 Mon Sep 17 00:00:00 2001 From: wangyang59 Date: Tue, 1 Nov 2016 15:12:17 -0700 Subject: [PATCH] refactored ExpandConvLayer and ExpandConvTransLayer with ConvBaseLayerCpu --- paddle/gserver/layers/ConvBaseLayer.h | 27 +- paddle/gserver/layers/ConvBaseLayerCpu.cpp | 241 ++++++++++++++++++ paddle/gserver/layers/ConvBaseLayerCpu.h | 91 +++++++ paddle/gserver/layers/ConvTransBaseLayer.cpp | 88 ------- paddle/gserver/layers/ConvTransBaseLayer.h | 117 --------- paddle/gserver/layers/ExpandConvLayer.cpp | 219 +--------------- paddle/gserver/layers/ExpandConvLayer.h | 36 +-- .../gserver/layers/ExpandConvTransLayer.cpp | 195 +------------- paddle/gserver/layers/ExpandConvTransLayer.h | 56 +--- paddle/gserver/tests/test_LayerGrad.cpp | 2 + 10 files changed, 355 insertions(+), 717 deletions(-) create mode 100644 paddle/gserver/layers/ConvBaseLayerCpu.cpp create mode 100644 paddle/gserver/layers/ConvBaseLayerCpu.h delete mode 100644 paddle/gserver/layers/ConvTransBaseLayer.cpp delete mode 100644 paddle/gserver/layers/ConvTransBaseLayer.h diff --git a/paddle/gserver/layers/ConvBaseLayer.h b/paddle/gserver/layers/ConvBaseLayer.h index 24927dec2..ecdc119a9 100644 --- a/paddle/gserver/layers/ConvBaseLayer.h +++ b/paddle/gserver/layers/ConvBaseLayer.h @@ -78,12 +78,7 @@ protected: /// of output size. bool caffeMode_; - /*The expandInput_ and transOutValue_ are used for CPU expand conv calc*/ - /// Expand one sample at a time. shape: - /// (numChannels * filterPixels_, outputSizeH * outputSizeW) - MatrixPtr expandInput_; - /// The transpose of output, which is an auxiliary matrix. - MatrixPtr transOutValue_; + public: explicit ConvBaseLayer(const LayerConfig& config) : Layer(config) {} @@ -135,26 +130,6 @@ public: CHECK_GE(imageSize, 1); return imageSize; } - - /** - * Create or resize expandInput_. - */ - void resetExpandInput(size_t height, size_t width); - - /** - * Create or resize transOutValue_. - */ - void resetConvOutput(size_t batchSize, int inIdx); - - /** - * Add shared bias. - */ - void addSharedBias(); - - /** - * Add unshared bias. - */ - void addUnsharedBias(); }; } // namespace paddle diff --git a/paddle/gserver/layers/ConvBaseLayerCpu.cpp b/paddle/gserver/layers/ConvBaseLayerCpu.cpp new file mode 100644 index 000000000..0da92bf04 --- /dev/null +++ b/paddle/gserver/layers/ConvBaseLayerCpu.cpp @@ -0,0 +1,241 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + + +#include "paddle/utils/Logging.h" +#include "ConvBaseLayerCpu.h" +namespace paddle { + +bool ConvBaseLayerCpu::init(const LayerMap &layerMap, + const ParameterMap ¶meterMap) { + /* Initialize the basic convolutional parent class */ + ConvBaseLayer::init(layerMap, parameterMap); + + int channel; + /* Initialize the projection */ + for (auto &inputConfig : config_.inputs()) { + const ConvConfig &conf = inputConfig.conv_conf(); + subM_.push_back(numFilters_ / conf.groups()); + subN_.push_back(conf.output_x() * conf.output_x()); + channel = isConv_ ? conf.channels() : numFilters_; + subK_.push_back(channel * conf.filter_size() * conf.filter_size() / + conf.groups()); + /* Consistent caffe mode for multiple input */ + caffeMode_ = conf.caffe_mode(); + } + + return true; +} + +void ConvBaseLayerCpu::resetExpandInput(size_t height, size_t width) { + Matrix::resizeOrCreate(expandInput_, height, width, false, useGpu_); +} + +void ConvBaseLayerCpu::addSharedBias() { + size_t mapW = getSize() / numFilters_; + size_t mapH = getOutputValue()->getElementCnt() / mapW; + MatrixPtr out = + Matrix::create(getOutputValue()->getData(), mapH, mapW, false, useGpu_); + + Matrix::resizeOrCreate(transOutValue_, mapW, mapH, false, useGpu_); + + out->transpose(transOutValue_, false); // false means no memory allocation + transOutValue_->reshape(transOutValue_->getElementCnt() / numFilters_, + numFilters_); + + MatrixPtr bias = + Matrix::create(biases_->getW()->getData(), 1, + biases_->getW()->getElementCnt(), false, useGpu_); + transOutValue_->addBias(*bias, 1.0f); + + transOutValue_->reshape(mapW, mapH); + transOutValue_->transpose(out, false); // false means no memory allocation + + out->clear(); + bias->clear(); +} + +void ConvBaseLayerCpu::addUnsharedBias() { + MatrixPtr outValue = getOutputValue(); + MatrixPtr bias = + Matrix::create(biases_->getW()->getData(), 1, + biases_->getW()->getElementCnt(), false, useGpu_); + outValue->addBias(*bias, 1.0f); +} + + +void ConvBaseLayerCpu::expandOneFrame(MatrixPtr image, size_t startIdx, + int inIdx) { + int channel = isConv_ ? channels_[inIdx] : numFilters_; + + resetExpandInput(subK_[inIdx] * groups_[inIdx], subN_[inIdx]); + real *imgData = image->getData() + startIdx * image->getWidth(); + MatrixPtr imageTmp = Matrix::create( + imgData, 1, imgSizeH_[inIdx] * imgSizeW_[inIdx] * channel, false, + useGpu_); + expandInput_->convExpand(*imageTmp, imgSizeH_[inIdx], imgSizeW_[inIdx], + channel, filterSize_[inIdx], + filterSize_[inIdx], stride_[inIdx], stride_[inIdx], + padding_[inIdx], padding_[inIdx], + outputH_[inIdx], outputW_[inIdx]); + imageTmp->clear(); +} + +void ConvBaseLayerCpu::expandFwdOnce(MatrixPtr image, MatrixPtr out, + int inIdx, int startIdx) { + int subM = subM_[inIdx]; + int subN = subN_[inIdx]; + int subK = subK_[inIdx]; + + expandOneFrame(image, startIdx, inIdx); + + int nf = isConv_ ? numFilters_ : channels_[inIdx]; + + real *outData = + out->getData() + startIdx * subN * nf; + + real *wgtData = weights_[inIdx]->getW()->getData(); + real *expInData = expandInput_->getData(); + for (int g = 0; g < groups_[inIdx]; ++g) { + MatrixPtr A = + Matrix::create(wgtData, subK, subM, true, useGpu_); // mark transpose + MatrixPtr B = Matrix::create(expInData, subK, subN, false, useGpu_); + MatrixPtr C = Matrix::create(outData, subM, subN, false, useGpu_); + C->mul(A, B, 1, 1); + + A->clear(); + B->clear(); + C->clear(); + wgtData += subK * subM; + expInData += subK * subN; + outData += subM * subN; + } +} + +void ConvBaseLayerCpu::bpropActs(MatrixPtr image, MatrixPtr out, int inpIdx) { + int channel = isConv_ ? channels_[inpIdx] : numFilters_; + + int subM = subM_[inpIdx]; + int subN = subN_[inpIdx]; + int subK = subK_[inpIdx]; + size_t batchSize = image->getHeight(); + MatrixPtr tgtGrad = out; + + /* reset the expand-grad memory */ + resetExpandInput(subK * groups_[inpIdx], subN); + + real *localGradData = image->getData(); + real *tgtGradData = tgtGrad->getData(); + for (size_t n = 0; n < batchSize; n++) { + real *wgtData = weights_[inpIdx]->getW()->getData(); + real *expandInData = expandInput_->getData(); + + for (int g = 0; g < groups_[inpIdx]; g++) { + // create temporary matrix + MatrixPtr C = Matrix::create(expandInData, subK, subN, false, useGpu_); + MatrixPtr B = Matrix::create(localGradData, subM, subN, false, useGpu_); + MatrixPtr A = Matrix::create(wgtData, subK, subM, false, useGpu_); + C->mul(A, B); // mul + + // clear the temporary matrix + A->clear(); + B->clear(); + C->clear(); + + expandInData += subK * subN; + localGradData += subM * subN; + wgtData += subK * subM; + } + + // shrink one frame outGrad + MatrixPtr oneGradTmp = Matrix::create( + expandInput_->getData(), subK * groups_[inpIdx], subN, false, useGpu_); + MatrixPtr vTmp = Matrix::create( + tgtGradData, 1, + imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channel, false, + useGpu_); + vTmp->convShrink(*oneGradTmp, imgSizeH_[inpIdx], imgSizeW_[inpIdx], + channel, filterSize_[inpIdx], + filterSize_[inpIdx], stride_[inpIdx], stride_[inpIdx], + padding_[inpIdx], padding_[inpIdx], + outputH_[inpIdx], outputW_[inpIdx], 1.0f, 1.0f); + vTmp->clear(); + oneGradTmp->clear(); + + // move the data-pointer + tgtGradData += imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channel; + } +} + +void ConvBaseLayerCpu::bpropWeights(MatrixPtr image, MatrixPtr out, + int inpIdx) { + MatrixPtr weightGrad = weights_[inpIdx]->getWGrad(); + + int subM = subM_[inpIdx]; + int subN = subN_[inpIdx]; + int subK = subK_[inpIdx]; + size_t batchSize = image->getHeight(); + resetExpandInput(subK * groups_[inpIdx], subN); + + real *gradData = out->getData(); + + for (size_t n = 0; n < batchSize; n++) { // frame by frame + // expand + expandOneFrame(image, n, inpIdx); + real *wGradData = weightGrad->getData(); + real *expandInData = expandInput_->getData(); + + // expand-mul one-group by one + for (int g = 0; g < groups_[inpIdx]; g++) { + MatrixPtr A = Matrix::create(expandInData, subK, subN, false, useGpu_); + MatrixPtr B = Matrix::create(gradData, subM, subN, true, useGpu_); + MatrixPtr C = Matrix::create(wGradData, subK, subM, false, useGpu_); + C->mul(A, B, 1, 1); + + A->clear(); + B->clear(); + C->clear(); + gradData += subM * subN; + wGradData += subK * subM; + expandInData += subK * subN; + } + } +} + +void ConvBaseLayerCpu::bpropSharedBias(MatrixPtr biases, MatrixPtr v) { + size_t mapW = getSize() / numFilters_; + size_t mapH = v->getElementCnt() / mapW; + MatrixPtr vTmp = Matrix::create(v->getData(), mapH, mapW, false, useGpu_); + + Matrix::resizeOrCreate(transOutValue_, mapW, mapH, false, useGpu_); + + vTmp->transpose(transOutValue_, false); // false means no memory allocation + transOutValue_->reshape(transOutValue_->getElementCnt() / numFilters_, + numFilters_); + biases->collectBias(*transOutValue_, 1.0f); +} + +void ConvBaseLayerCpu::bpropBiases(MatrixPtr v) { + MatrixPtr biases = + Matrix::create(biases_->getWGrad()->getData(), 1, + biases_->getWGrad()->getElementCnt(), false, useGpu_); + if (sharedBiases_) { + bpropSharedBias(biases, v); + } else { + biases->collectBias(*v, 1.0f); + } + biases->clear(); +} + +} // namespace paddle diff --git a/paddle/gserver/layers/ConvBaseLayerCpu.h b/paddle/gserver/layers/ConvBaseLayerCpu.h new file mode 100644 index 000000000..08a1426b4 --- /dev/null +++ b/paddle/gserver/layers/ConvBaseLayerCpu.h @@ -0,0 +1,91 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + + +#pragma once + +#include "ConvBaseLayer.h" +#include "paddle/math/Matrix.h" +#include + +namespace paddle { + +/** + * @brief A subclass of ConvBaseLayer that is a superclass of both + * ExpandConvLayer and ExpandConvTransLayer + */ +class ConvBaseLayerCpu : public ConvBaseLayer { +protected: + /// For expand convolution. + /// subM_ = numFilters_ / groups_. + IntV subM_; + /// subN_ = outputH_ * outputW_. + IntV subN_; + /// subK_ = channels_ * filterPixels_ * groups_. + IntV subK_; + /// The spatial dimensions of height of input feature map. + IntV imgSizeH_; + /// The spatial dimensions of width of input feature map. + IntV imgSizeW_; + /// The spatial dimensions of height of output feature map. + IntV outputH_; + /// The spatial dimensions of width of output feature map. + IntV outputW_; + + /*The expandInput_ and transOutValue_ are used for CPU expand conv calc*/ + /// Expand one sample at a time. shape: + /// (numChannels * filterPixels_, outputSizeH * outputSizeW) + MatrixPtr expandInput_; + /// The transpose of output, which is an auxiliary matrix. + MatrixPtr transOutValue_; + +public: + explicit ConvBaseLayerCpu(const LayerConfig& config) + : ConvBaseLayer(config) {} + + ~ConvBaseLayerCpu() {} + + bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + + /** + * Create or resize expandInput_. + */ + void resetExpandInput(size_t height, size_t width); + + /** + * Add shared bias. + */ + void addSharedBias(); + + /** + * Add unshared bias. + */ + void addUnsharedBias(); + /** + * Expand one input sample. + */ + void expandOneFrame(MatrixPtr image, size_t startIdx, int inIdx); + + /** + * Expand one input sample and perform matrix multiplication. + */ + void expandFwdOnce(MatrixPtr image, MatrixPtr out, int inIdx, int startIdx); + + void bpropSharedBias(MatrixPtr biases, MatrixPtr v); + void bpropBiases(MatrixPtr v); + void bpropWeights(MatrixPtr image, MatrixPtr out, int inpIdx); + void bpropActs(MatrixPtr image, MatrixPtr out, int inpIdx); +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/ConvTransBaseLayer.cpp b/paddle/gserver/layers/ConvTransBaseLayer.cpp deleted file mode 100644 index 1b58b7fed..000000000 --- a/paddle/gserver/layers/ConvTransBaseLayer.cpp +++ /dev/null @@ -1,88 +0,0 @@ -/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - - -#include "paddle/utils/Logging.h" -#include "ConvTransBaseLayer.h" -namespace paddle { - -bool ConvTransBaseLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - - /* Initialize the convolutional layer parameter */ - /* Everything is the same as ConvBaseLayer.cpp except that the meaning of - * num_filters and channel is switched. - * - * In the config, num_filters refer to the number of feature maps in the - * output of convTransLayer, and channel refer to the number of feature maps - * in the input of convTransLayer. - * - * However, within the convTrans class, the channel is related to the output - * and num_filters is related to the input, so that it is consistent with the - * settings in convLayer. - * */ - channel_ = config_.num_filters(); - sharedBiases_ = config_.shared_biases(); - for (auto& inputConfig : config_.inputs()) { - const ConvConfig& conf = inputConfig.conv_conf(); - padding_.push_back(conf.padding()); - stride_.push_back(conf.stride()); - filterSize_.push_back(conf.filter_size()); - paddingY_.push_back(conf.padding_y()); - strideY_.push_back(conf.stride_y()); - filterSizeY_.push_back(conf.filter_size_y()); - filterPixels_.push_back(filterSize_.back() * filterSizeY_.back()); - numFilters_.push_back(conf.channels()); - imgSize_.push_back(conf.img_size()); - imgPixels_.push_back(imgSize_.back() * imgSize_.back()); - groups_.push_back(conf.groups()); - filterChannels_.push_back(conf.filter_channels()); - outputX_.push_back(conf.output_x()); - outputs_.push_back(outputX_.back() * outputX_.back()); - } - - /* initialize the weightList */ - CHECK(inputLayers_.size() == parameters_.size()); - for (size_t i = 0; i < inputLayers_.size(); i++) { - size_t height, width; - height = filterPixels_[i] * filterChannels_[i]; - width = numFilters_[i]; - - // create a new weight - CHECK_EQ(parameters_[i]->getSize(), width * height); - Weight* w = new Weight(height, width, parameters_[i]); - weights_.emplace_back(w); - } - - /* initialize the biases_ */ - if (biasParameter_.get() != NULL) { - if (sharedBiases_) { - CHECK_EQ((size_t)channel_, biasParameter_->getSize()); - biases_ = - std::unique_ptr(new Weight(channel_, 1, biasParameter_)); - } else { - biases_ = - std::unique_ptr(new Weight(getSize(), 1, biasParameter_)); - } - } - - // default caffe model - caffeMode_ = true; - - return true; -} - -} // namespace paddle diff --git a/paddle/gserver/layers/ConvTransBaseLayer.h b/paddle/gserver/layers/ConvTransBaseLayer.h deleted file mode 100644 index d7acc184c..000000000 --- a/paddle/gserver/layers/ConvTransBaseLayer.h +++ /dev/null @@ -1,117 +0,0 @@ -/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - - -#pragma once - -#include "Layer.h" -namespace paddle { - -/** - * @brief A Base Convolution Layer, which convolves the input image - * with learned filters and (optionally) adds biases. - */ - -class ConvTransBaseLayer : public Layer { -protected: - typedef std::vector IntV; - - /// The number of channel in image (the output of the deconv layer). - int channel_; - /// The x dimension of the padding. - IntV padding_; - /// The y dimension of the padding. - IntV paddingY_; - /// The x dimension of the stride. - IntV stride_; - /// The y dimension of the stride. - IntV strideY_; - /// The x dimension of a filter kernel. - IntV filterSize_; - /// The y dimension of a filter kernel. - IntV filterSizeY_; - /// The number of filters(i.e. the number channels of the deconv layer input) - IntV numFilters_; - /// The spatial dimensions of input feature map. - IntV imgSize_; - /// The total pixel size of input feature map. - /// imgPixels_ = imgSizeX_ * imgSizeY_. - IntV imgPixels_; - /// filterPixels_ = filterSizeX_ * filterSizeY_. - IntV filterPixels_; - /// filterChannels_ = channels_/groups_. - IntV filterChannels_; - /// The spatial dimensions of output feature map. - IntV outputX_; - /// The spatial dimensions of output feature map. - IntV outputs_; - /// Group size, refer to grouped convolution in - /// Alex Krizhevsky's paper: when group=2, the first half of the - /// filters are only connected to the first half of the input channels, - /// and the second half only connected to the second half. - IntV groups_; - /// Whether the bias is shared for feature in each channel. - bool sharedBiases_; - - /// shape of weight: (numChannels * filterPixels_, numFilters) - WeightList weights_; - /// If shared_biases is false shape of bias: (numFilters_, 1) - /// If shared_biases is ture shape of bias: - /// (numFilters_ * outputX * outputY, 1) - std::unique_ptr biases_; - - /// True by default. The only difference is the calculation - /// of output size. - bool caffeMode_; - -public: - explicit ConvTransBaseLayer(const LayerConfig& config) : Layer(config) {} - - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - - Weight& getWeight(int idx) { return *weights_[idx]; } - - /** - * Calculate image size based on caffeMode_ from outputSize. - * - input(+padding): 0123456789 - * - imageSize(+padding) = 10; - * - filterSize = 3; - * - stride = 2; - * - caffeMode_ is true: - - output: (012), (234), (456), (678) - - outputSize = 4; - * - caffeMode_ is false: - * - output: (012), (234), (456), (678), (9) - * - outputSize = 5; - */ - - /* - * In order to be consistent with the convLayer, here the outputSize is - * actually the size of the input image of convTransLayer, and the image size - * is actually the size of the output image of convTransLayer - */ - int imageSize(int outputSize, int filterSize, int padding, int stride) { - int imageSize; - if (!caffeMode_) { - imageSize = - (outputSize - 1) * stride + filterSize - 2 * padding - stride + 1; - } else { - imageSize = (outputSize - 1) * stride + filterSize - 2 * padding; - } - CHECK_GE(imageSize, 1); - return imageSize; - } -}; - -} // namespace paddle diff --git a/paddle/gserver/layers/ExpandConvLayer.cpp b/paddle/gserver/layers/ExpandConvLayer.cpp index 866cd33c1..5c30c5a1f 100644 --- a/paddle/gserver/layers/ExpandConvLayer.cpp +++ b/paddle/gserver/layers/ExpandConvLayer.cpp @@ -24,32 +24,7 @@ REGISTER_LAYER(exconv, ExpandConvLayer); bool ExpandConvLayer::init(const LayerMap &layerMap, const ParameterMap ¶meterMap) { /* Initialize the basic convolutional parent class */ - ConvBaseLayer::init(layerMap, parameterMap); - - /* Initialize the projection */ - for (auto &inputConfig : config_.inputs()) { - const ConvConfig &conf = inputConfig.conv_conf(); - subM_.push_back(numFilters_ / conf.groups()); - subN_.push_back(conf.output_x() * conf.output_x()); - subK_.push_back(conf.channels() * conf.filter_size() * conf.filter_size() / - conf.groups()); - /* Consistent caffe mode for multiple input */ - caffeMode_ = conf.caffe_mode(); - } - - /* initialize the weightList */ - CHECK(inputLayers_.size() == parameters_.size()); - for (size_t i = 0; i < inputLayers_.size(); i++) { - size_t height, width; - height = filterPixels_[i] * filterChannels_[i]; - width = numFilters_; - - // create a new weight - CHECK_EQ(parameters_[i]->getSize(), width * height); - Weight* w = new Weight(height, width, parameters_[i]); - weights_.emplace_back(w); - } - + ConvBaseLayerCpu::init(layerMap, parameterMap); return true; } @@ -63,72 +38,6 @@ size_t ExpandConvLayer::getOutputSize() { return layerSize; } - -void ExpandConvLayer::expandOneFrame(MatrixPtr image, size_t startIdx, - int inIdx) { - resetExpandInput(subK_[inIdx] * groups_[inIdx], subN_[inIdx]); - real *imgData = image->getData() + startIdx * image->getWidth(); - MatrixPtr imageTmp = Matrix::create( - imgData, 1, imgSizeH_[inIdx] * imgSizeW_[inIdx] * channels_[inIdx], false, - useGpu_); - expandInput_->convExpand(*imageTmp, imgSizeH_[inIdx], imgSizeW_[inIdx], - channels_[inIdx], filterSize_[inIdx], - filterSize_[inIdx], stride_[inIdx], stride_[inIdx], - padding_[inIdx], padding_[inIdx], - outputH_[inIdx], outputW_[inIdx]); - imageTmp->clear(); -} - -void ExpandConvLayer::expandFwdOnce(MatrixPtr image, int inIdx, int startIdx) { - int subM = subM_[inIdx]; - int subN = subN_[inIdx]; - int subK = subK_[inIdx]; - - expandOneFrame(image, startIdx, inIdx); - - real *outData = - getOutputValue()->getData() + startIdx * subN * numFilters_; - - real *wgtData = weights_[inIdx]->getW()->getData(); - real *expInData = expandInput_->getData(); - for (int g = 0; g < groups_[inIdx]; ++g) { - MatrixPtr A = - Matrix::create(wgtData, subK, subM, true, useGpu_); // mark transpose - MatrixPtr B = Matrix::create(expInData, subK, subN, false, useGpu_); - MatrixPtr C = Matrix::create(outData, subM, subN, false, useGpu_); - C->mul(A, B, 1, 1); - - A->clear(); - B->clear(); - C->clear(); - wgtData += subK * subM; - expInData += subK * subN; - outData += subM * subN; - } -} - -void ExpandConvLayer::addSharedBias() { - size_t mapW = getOutputValue()->getWidth() / numFilters_; - size_t mapH = getOutputValue()->getElementCnt() / mapW; - MatrixPtr out = - Matrix::create(getOutputValue()->getData(), mapH, mapW, false, useGpu_); - - Matrix::resizeOrCreate(transOutValue_, mapW, mapH, false, useGpu_); - - out->transpose(transOutValue_, false); // false means no memory allocation - transOutValue_->reshape(transOutValue_->getElementCnt() / numFilters_, - numFilters_); - - MatrixPtr bias = - Matrix::create(biases_->getW()->getData(), 1, - biases_->getW()->getElementCnt(), false, useGpu_); - transOutValue_->addBias(*bias, 1.0f); - - transOutValue_->reshape(mapW, mapH); - transOutValue_->transpose(out, false); // false means no memory allocation - - - void ExpandConvLayer::forward(PassType passType) { Layer::forward(passType); @@ -145,7 +54,7 @@ void ExpandConvLayer::forward(PassType passType) { image = prevLayer->getOutputValue(); for (size_t off = 0; off < image->getHeight(); off++) { REGISTER_TIMER_INFO("expandFwdOnce", getName().c_str()); - expandFwdOnce(image, i, off); + expandFwdOnce(image, getOutputValue(), i, off); } } /* add the bias-vector */ @@ -161,29 +70,6 @@ void ExpandConvLayer::forward(PassType passType) { forwardActivation(); } -void ExpandConvLayer::bpropSharedBias(MatrixPtr biases, MatrixPtr v) { - size_t mapW = v->getWidth() / numFilters_; - size_t mapH = v->getElementCnt() / mapW; - MatrixPtr vTmp = Matrix::create(v->getData(), mapH, mapW, false, useGpu_); - - Matrix::resizeOrCreate(transOutValue_, mapW, mapH, false, useGpu_); - - vTmp->transpose(transOutValue_, false); // false means no memory allocation - vTmp->reshape(transOutValue_->getElementCnt() / numFilters_, numFilters_); - biases->collectBias(*vTmp, 1.0f); -} - -void ExpandConvLayer::bpropBiases(MatrixPtr v) { - MatrixPtr biases = - Matrix::create(biases_->getWGrad()->getData(), 1, - biases_->getWGrad()->getElementCnt(), false, useGpu_); - if (sharedBiases_) { - bpropSharedBias(biases, v); - } else { - biases->collectBias(*v, 1.0f); - } - biases->clear(); -} void ExpandConvLayer::backward(const UpdateCallback &callback) { backwardActivation(); @@ -197,109 +83,16 @@ void ExpandConvLayer::backward(const UpdateCallback &callback) { for (size_t i = 0; i != inputLayers_.size(); ++i) { /* First, calculate the input layers error */ - bpropActs(outGrad, i); + if (NULL != getPrev(i)->getOutputGrad()) { + bpropActs(outGrad, getPrev(i)->getOutputGrad(), i); + } if (weights_[i]->getWGrad()) { /* Then, calculate the W-gradient for the current layer */ - bpropWeights(outGrad, i); + bpropWeights(getPrev(i)->getOutputValue(), outGrad, i); /* Increasing the number of gradient */ weights_[i]->getParameterPtr()->incUpdate(callback); } } } -void ExpandConvLayer::bpropWeights(MatrixPtr v, int inpIdx) { - MatrixPtr weightGrad = weights_[inpIdx]->getWGrad(); - MatrixPtr inputV = getPrev(inpIdx)->getOutputValue(); - - int subM = subM_[inpIdx]; - int subN = subN_[inpIdx]; - int subK = subK_[inpIdx]; - size_t batchSize = inputV->getHeight(); - resetExpandInput(subK * groups_[inpIdx], subN); - resetConvOutput(batchSize, inpIdx); - - real *gradData = v->getData(); - - for (size_t n = 0; n < batchSize; n++) { // frame by frame - // expand - expandOneFrame(inputV, n, inpIdx); - real *wGradData = weightGrad->getData(); - real *expandInData = expandInput_->getData(); - - // expand-mul one-group by one - for (int g = 0; g < groups_[inpIdx]; g++) { - MatrixPtr A = Matrix::create(expandInData, subK, subN, false, useGpu_); - MatrixPtr B = Matrix::create(gradData, subM, subN, true, useGpu_); - MatrixPtr C = Matrix::create(wGradData, subK, subM, false, useGpu_); - C->mul(A, B, 1, 1); - - A->clear(); - B->clear(); - C->clear(); - gradData += subM * subN; - wGradData += subK * subM; - expandInData += subK * subN; - } - } -} - -void ExpandConvLayer::bpropActs(MatrixPtr v, int inpIdx) { - LayerPtr prevLayer = getPrev(inpIdx); - if (NULL == prevLayer->getOutputGrad()) { - return; - } - - int subM = subM_[inpIdx]; - int subN = subN_[inpIdx]; - int subK = subK_[inpIdx]; - size_t batchSize = v->getHeight(); - MatrixPtr tgtGrad = prevLayer->getOutputGrad(); - - /* reset the expand-grad memory */ - resetExpandInput(subK * groups_[inpIdx], subN); - resetConvOutput(batchSize, inpIdx); - - real *localGradData = v->getData(); - real *tgtGradData = tgtGrad->getData(); - for (size_t n = 0; n < batchSize; n++) { - real *wgtData = weights_[inpIdx]->getW()->getData(); - real *expandInData = expandInput_->getData(); - - for (int g = 0; g < groups_[inpIdx]; g++) { - // create temporary matrix - MatrixPtr C = Matrix::create(expandInData, subK, subN, false, useGpu_); - MatrixPtr B = Matrix::create(localGradData, subM, subN, false, useGpu_); - MatrixPtr A = Matrix::create(wgtData, subK, subM, false, useGpu_); - C->mul(A, B); // mul - - // clear the temporary matrix - A->clear(); - B->clear(); - C->clear(); - - expandInData += subK * subN; - localGradData += subM * subN; - wgtData += subK * subM; - } - - // shrink one frame outGrad - MatrixPtr oneGradTmp = Matrix::create( - expandInput_->getData(), subK * groups_[inpIdx], subN, false, useGpu_); - MatrixPtr vTmp = Matrix::create( - tgtGradData, 1, - imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channels_[inpIdx], false, - useGpu_); - vTmp->convShrink(*oneGradTmp, imgSizeH_[inpIdx], imgSizeW_[inpIdx], - channels_[inpIdx], filterSize_[inpIdx], - filterSize_[inpIdx], stride_[inpIdx], stride_[inpIdx], - padding_[inpIdx], padding_[inpIdx], - outputH_[inpIdx], outputW_[inpIdx], 1.0f, 1.0f); - vTmp->clear(); - oneGradTmp->clear(); - - // move the data-pointer - tgtGradData += imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channels_[inpIdx]; - } -} - } // namespace paddle diff --git a/paddle/gserver/layers/ExpandConvLayer.h b/paddle/gserver/layers/ExpandConvLayer.h index f43b19949..5a4abec14 100644 --- a/paddle/gserver/layers/ExpandConvLayer.h +++ b/paddle/gserver/layers/ExpandConvLayer.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once -#include "ConvBaseLayer.h" +#include "ConvBaseLayerCpu.h" #include "paddle/math/Matrix.h" #include @@ -28,24 +28,11 @@ namespace paddle { * * The config file api is img_conv_layer. */ -class ExpandConvLayer : public ConvBaseLayer { -protected: - /// For expand convolution. - /// subM_ = numFilters_ / groups_. - IntV subM_; - /// subN_ = outputH_ * outputW_. - IntV subN_; - /// subK_ = channels_ * filterPixels_ * groups_. - IntV subK_; - /// Expand one sample at a time. shape: - /// (numChannels * filterPixels_, outputSizeH * outputSizeW) - MatrixPtr expandInput_; - /// The transpose of output, which is an auxiliary matrix. - MatrixPtr transOutValue_; - +class ExpandConvLayer : public ConvBaseLayerCpu { public: - explicit ExpandConvLayer(const LayerConfig& config) : ConvBaseLayer(config) {} + explicit ExpandConvLayer(const LayerConfig& config) : + ConvBaseLayerCpu(config) {} ~ExpandConvLayer() {} @@ -53,23 +40,8 @@ public: size_t getOutputSize(); - /** - * Expand one input sample. - */ - void expandOneFrame(MatrixPtr image, size_t startIdx, int inIdx); - - /** - * Expand one input sample and perform matrix multiplication. - */ - void expandFwdOnce(MatrixPtr image, int inIdx, int startIdx); - - void forward(PassType passType); - void bpropSharedBias(MatrixPtr biases, MatrixPtr v); - void bpropBiases(MatrixPtr v); void backward(const UpdateCallback& callback); - void bpropWeights(MatrixPtr v, int inpIdx); - void bpropActs(MatrixPtr v, int inpIdx); }; } // namespace paddle diff --git a/paddle/gserver/layers/ExpandConvTransLayer.cpp b/paddle/gserver/layers/ExpandConvTransLayer.cpp index fb2e7fc4b..99eb18053 100644 --- a/paddle/gserver/layers/ExpandConvTransLayer.cpp +++ b/paddle/gserver/layers/ExpandConvTransLayer.cpp @@ -29,18 +29,7 @@ REGISTER_LAYER(exconvt, ExpandConvTransLayer); bool ExpandConvTransLayer::init(const LayerMap &layerMap, const ParameterMap ¶meterMap) { /* Initialize the basic convolutional parent class */ - ConvBaseLayer::init(layerMap, parameterMap); - - /* Initialize the projection */ - for (auto &inputConfig : config_.inputs()) { - const ConvConfig &conf = inputConfig.conv_conf(); - subM_.push_back(conf.channels() / conf.groups()); - subN_.push_back(conf.output_x() * conf.output_x()); - subK_.push_back(numFilters_ * conf.filter_size() * conf.filter_size() / - conf.groups()); - /* Consistent caffe mode for multiple input */ - caffeMode_ = conf.caffe_mode(); - } + ConvBaseLayerCpu::init(layerMap, parameterMap); return true; } @@ -73,67 +62,6 @@ size_t ExpandConvTransLayer::getSize() { return layerSize; } -void ExpandConvTransLayer::resetExpandInput(size_t height, size_t width) { - Matrix::resizeOrCreate(expandInput_, height, width, false, useGpu_); -} - -/*void ExpandConvTransLayer::resetConvOutput(size_t batchSize, int inIdx) { - Matrix::resizeOrCreate(transOutValue_, batchSize * numFilters_, subN_[inIdx], - false, useGpu_); -}*/ - - - -void ExpandConvTransLayer::expandOneFrame(MatrixPtr image, size_t startIdx, - int inIdx) { - resetExpandInput(subK_[inIdx] * groups_[inIdx], subN_[inIdx]); - real *imgData = image->getData() + startIdx * image->getWidth(); - MatrixPtr imageTmp = Matrix::create( - imgData, 1, imgSizeH_[inIdx] * imgSizeW_[inIdx] * channel_, false, - useGpu_); - expandInput_->convExpand(*imageTmp, imgSizeH_[inIdx], imgSizeW_[inIdx], - channel_, filterSize_[inIdx], - filterSize_[inIdx], stride_[inIdx], stride_[inIdx], - padding_[inIdx], padding_[inIdx], - outputH_[inIdx], outputW_[inIdx]); - imageTmp->clear(); -} - -void ExpandConvTransLayer::expandBackOnce(MatrixPtr imageGrad, int inIdx, - int startIdx) { - int subM = subM_[inIdx]; - int subN = subN_[inIdx]; - int subK = subK_[inIdx]; - - LayerPtr prevLayer = getPrev(inIdx); - if (NULL == prevLayer->getOutputGrad()) { - return; - } - - expandOneFrame(imageGrad, startIdx, inIdx); - - real *outGradData = - prevLayer -> getOutputGrad()->getData() - + startIdx * subN * numFilters_[inIdx]; - - real *wgtData = weights_[inIdx]->getW()->getData(); - real *expInData = expandInput_->getData(); - for (int g = 0; g < groups_[inIdx]; ++g) { - MatrixPtr A = - Matrix::create(wgtData, subK, subM, true, useGpu_); // mark transpose - MatrixPtr B = Matrix::create(expInData, subK, subN, false, useGpu_); - MatrixPtr C = Matrix::create(outGradData, subM, subN, false, useGpu_); - C->mul(A, B, 1, 1); - - A->clear(); - B->clear(); - C->clear(); - wgtData += subK * subM; - expInData += subK * subN; - outGradData += subM * subN; - } -} - void ExpandConvTransLayer::forward(PassType passType) { Layer::forward(passType); @@ -148,7 +76,7 @@ void ExpandConvTransLayer::forward(PassType passType) { LayerPtr prevLayer = getPrev(i); output = prevLayer->getOutputValue(); REGISTER_TIMER_INFO("shrinkFwd", getName().c_str()); - shrinkFwd(output, i); + bpropActs(output, getOutputValue(), i); } /* add the bias-vector */ @@ -164,84 +92,6 @@ void ExpandConvTransLayer::forward(PassType passType) { forwardActivation(); } -void ExpandConvTransLayer::shrinkFwd(MatrixPtr output, int inpIdx) { - int subM = subM_[inpIdx]; - int subN = subN_[inpIdx]; - int subK = subK_[inpIdx]; - - size_t batchSize = output->getHeight(); - MatrixPtr image = getOutputValue(); - - /* reset the expand-grad memory */ - resetExpandInput(subK * groups_[inpIdx], subN); - - real *localData = output->getData(); - real *imageData = image->getData(); - for (size_t n = 0; n < batchSize; n++) { - real *wgtData = weights_[inpIdx]->getW()->getData(); - real *expandInData = expandInput_->getData(); - - for (int g = 0; g < groups_[inpIdx]; g++) { - // create temporary matrix - MatrixPtr C = Matrix::create(expandInData, subK, subN, false, useGpu_); - MatrixPtr B = Matrix::create(localData, subM, subN, false, useGpu_); - MatrixPtr A = Matrix::create(wgtData, subK, subM, false, useGpu_); - C->mul(A, B); // mul - - // clear the temporary matrix - A->clear(); - B->clear(); - C->clear(); - - expandInData += subK * subN; - localData += subM * subN; - wgtData += subK * subM; - } - - // shrink one frame outGrad - MatrixPtr oneTmp = Matrix::create( - expandInput_->getData(), subK * groups_[inpIdx], subN, false, useGpu_); - MatrixPtr vTmp = Matrix::create( - imageData, 1, - imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channel_, false, - useGpu_); - vTmp->convShrink(*oneTmp, imgSizeH_[inpIdx], imgSizeW_[inpIdx], - channel_, filterSize_[inpIdx], - filterSize_[inpIdx], stride_[inpIdx], stride_[inpIdx], - padding_[inpIdx], padding_[inpIdx], - outputH_[inpIdx], outputW_[inpIdx], 1.0f, 1.0f); - vTmp->clear(); - oneTmp->clear(); - - // move the data-pointer - imageData += imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channel_; - } -} - -void ExpandConvTransLayer::bpropSharedBias(MatrixPtr biases, MatrixPtr v) { - size_t mapW = getSize() / channel_; - size_t mapH = v->getElementCnt() / mapW; - MatrixPtr vTmp = Matrix::create(v->getData(), mapH, mapW, false, useGpu_); - - Matrix::resizeOrCreate(transOutValue_, mapW, mapH, false, useGpu_); - - vTmp->transpose(transOutValue_, false); // false means no memory allocation - vTmp->reshape(transOutValue_->getElementCnt() / channel_, channel_); - biases->collectBias(*vTmp, 1.0f); -} - -void ExpandConvTransLayer::bpropBiases(MatrixPtr v) { - MatrixPtr biases = - Matrix::create(biases_->getWGrad()->getData(), 1, - biases_->getWGrad()->getElementCnt(), false, useGpu_); - if (sharedBiases_) { - bpropSharedBias(biases, v); - } else { - biases->collectBias(*v, 1.0f); - } - biases->clear(); -} - void ExpandConvTransLayer::backward(const UpdateCallback &callback) { backwardActivation(); @@ -255,51 +105,18 @@ void ExpandConvTransLayer::backward(const UpdateCallback &callback) { for (size_t i = 0; i != inputLayers_.size(); ++i) { /* First, calculate the input layers error */ for (size_t off = 0; off < imageGrad->getHeight(); off++) { - expandBackOnce(imageGrad, i, off); + if (NULL != getPrev(i)->getOutputGrad()) { + expandFwdOnce(imageGrad, getPrev(i)->getOutputGrad(), i, off); + } } if (weights_[i]->getWGrad()) { /* Then, calculate the W-gradient for the current layer */ - bpropWeights(imageGrad, i); + bpropWeights(imageGrad, getPrev(i)->getOutputValue(), i); /* Increasing the number of gradient */ weights_[i]->getParameterPtr()->incUpdate(callback); } } } -void ExpandConvTransLayer::bpropWeights(MatrixPtr v, int inpIdx) { - MatrixPtr weightGrad = weights_[inpIdx]->getWGrad(); - MatrixPtr outputV = getPrev(inpIdx)->getOutputValue(); - - int subM = subM_[inpIdx]; - int subN = subN_[inpIdx]; - int subK = subK_[inpIdx]; - size_t batchSize = outputV->getHeight(); - resetExpandInput(subK * groups_[inpIdx], subN); - - real *outputData = outputV -> getData(); - - for (size_t n = 0; n < batchSize; n++) { // frame by frame - // expand - expandOneFrame(v, n, inpIdx); - real *wGradData = weightGrad->getData(); - real *expandInData = expandInput_->getData(); - - // expand-mul one-group by one - for (int g = 0; g < groups_[inpIdx]; g++) { - MatrixPtr A = Matrix::create(expandInData, subK, subN, false, useGpu_); - MatrixPtr B = Matrix::create(outputData, subM, subN, true, useGpu_); - MatrixPtr C = Matrix::create(wGradData, subK, subM, false, useGpu_); - C->mul(A, B, 1, 1); - - A->clear(); - B->clear(); - C->clear(); - outputData += subM * subN; - wGradData += subK * subM; - expandInData += subK * subN; - } - } -} - } // namespace paddle diff --git a/paddle/gserver/layers/ExpandConvTransLayer.h b/paddle/gserver/layers/ExpandConvTransLayer.h index cbe4da814..214f460d6 100644 --- a/paddle/gserver/layers/ExpandConvTransLayer.h +++ b/paddle/gserver/layers/ExpandConvTransLayer.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once -#include "ConvBaseLayer.h" +#include "ConvBaseLayerCpu.h" #include "paddle/math/Matrix.h" #include @@ -24,32 +24,14 @@ namespace paddle { /** * @brief A subclass of convolution layer. * This layer expands input and use matrix multiplication to - * calculate convolution operation. + * calculate convolution transpose (deconv) operation. * * The config file api is img_convTrans_layer. */ -class ExpandConvTransLayer : public ConvBaseLayer { -protected: - /// For expand convolution. - /// subM_ = numFilters_ / groups_. - IntV subM_; - /// subN_ = outputH_ * outputW_. - IntV subN_; - /// subK_ = channels_ * filterPixels_ * groups_. - IntV subK_; - /// The spatial dimensions of height of input feature map. - IntV imgSizeH_; - /// The spatial dimensions of width of input feature map. - IntV imgSizeW_; - /// The spatial dimensions of height of output feature map. - IntV outputH_; - /// The spatial dimensions of width of output feature map. - IntV outputW_; - - +class ExpandConvTransLayer : public ConvBaseLayerCpu { public: explicit ExpandConvTransLayer(const LayerConfig& config) : - ConvBaseLayer(config) {} + ConvBaseLayerCpu(config) {} ~ExpandConvTransLayer() {} @@ -57,38 +39,8 @@ public: size_t getSize(); - /** - * Create or resize expandInput_. - */ - void resetExpandInput(size_t height, size_t width); - - /** - * Create or resize transOutValue_. - */ - void resetConvOutput(size_t batchSize, int inIdx); - - /** - * Expand one input sample. - */ - void expandOneFrame(MatrixPtr image, size_t startIdx, int inIdx); - - /** - * Expand one output image and perform matrix multiplication. - */ - void expandBackOnce(MatrixPtr image, int inIdx, int startIdx); - - /** - * Perform matrix multiplication on one output and then shrink. - */ - void shrinkFwd(MatrixPtr output, int inpIdx); - - void forward(PassType passType); - void bpropSharedBias(MatrixPtr biases, MatrixPtr v); - void bpropBiases(MatrixPtr v); void backward(const UpdateCallback& callback); - void bpropWeights(MatrixPtr v, int inpIdx); - void bpropActs(MatrixPtr v, int inpIdx); }; } // namespace paddle diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index d634d198c..1c27ee2d5 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -302,6 +302,8 @@ void testConvLayer(const string& type, bool trans, bool useGpu) { config.layerConfig.num_filters()); testLayerGrad(config, "conv", 100, trans, useGpu); + // Use small batch_size and useWeight=true to test biasGrad + testLayerGrad(config, "conv", 2, trans, useGpu, true, 0.02); } TEST(Layer, convLayer) { -- GitLab