提交 5c88f072 编写于 作者: W wangyang59

initial take on deconv layers

上级 05204af1
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Logging.h"
#include "ConvTransBaseLayer.h"
namespace paddle {
bool ConvTransBaseLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
/* Initialize the basic parent class */
Layer::init(layerMap, parameterMap);
/* Initialize the convolutional layer parameter */
channel_ = config_.num_filters();
sharedBiases_ = config_.shared_biases();
for (auto& inputConfig : config_.inputs()) {
const ConvConfig& conf = inputConfig.conv_conf();
padding_.push_back(conf.padding());
stride_.push_back(conf.stride());
filterSize_.push_back(conf.filter_size());
paddingY_.push_back(conf.padding_y());
strideY_.push_back(conf.stride_y());
filterSizeY_.push_back(conf.filter_size_y());
filterPixels_.push_back(filterSize_.back() * filterSizeY_.back());
numFilters_.push_back(conf.channels());
imgSize_.push_back(conf.img_size());
imgPixels_.push_back(imgSize_.back() * imgSize_.back());
groups_.push_back(conf.groups());
filterChannels_.push_back(conf.filter_channels());
outputX_.push_back(conf.output_x());
outputs_.push_back(outputX_.back() * outputX_.back());
}
/* initialize the weightList */
CHECK(inputLayers_.size() == parameters_.size());
for (size_t i = 0; i < inputLayers_.size(); i++) {
size_t height, width;
height = filterPixels_[i] * filterChannels_[i];
width = numFilters_[i];
// create a new weight
CHECK_EQ(parameters_[i]->getSize(), width * height);
Weight* w = new Weight(height, width, parameters_[i]);
weights_.emplace_back(w);
}
/* initialize the biases_ */
if (biasParameter_.get() != NULL) {
if (sharedBiases_) {
CHECK_EQ((size_t)channel_, biasParameter_->getSize());
biases_ =
std::unique_ptr<Weight>(new Weight(channel_, 1, biasParameter_));
} else {
biases_ =
std::unique_ptr<Weight>(new Weight(getSize(), 1, biasParameter_));
}
}
// default caffe model
caffeMode_ = true;
return true;
}
} // namespace paddle
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Layer.h"
namespace paddle {
/**
* @brief A Base Convolution Layer, which convolves the input image
* with learned filters and (optionally) adds biases.
*/
class ConvTransBaseLayer : public Layer {
protected:
typedef std::vector<int> IntV;
/// The number of channel in image (the output of the deconv layer).
int channel_;
/// The x dimension of the padding.
IntV padding_;
/// The y dimension of the padding.
IntV paddingY_;
/// The x dimension of the stride.
IntV stride_;
/// The y dimension of the stride.
IntV strideY_;
/// The x dimension of a filter kernel.
IntV filterSize_;
/// The y dimension of a filter kernel.
IntV filterSizeY_;
/// The number of filters(i.e. the number channels of the deconv layer input)
IntV numFilters_;
/// The spatial dimensions of input feature map.
IntV imgSize_;
/// The total pixel size of input feature map.
/// imgPixels_ = imgSizeX_ * imgSizeY_.
IntV imgPixels_;
/// filterPixels_ = filterSizeX_ * filterSizeY_.
IntV filterPixels_;
/// filterChannels_ = channels_/groups_.
IntV filterChannels_;
/// The spatial dimensions of output feature map.
IntV outputX_;
/// The spatial dimensions of output feature map.
IntV outputs_;
/// Group size, refer to grouped convolution in
/// Alex Krizhevsky's paper: when group=2, the first half of the
/// filters are only connected to the first half of the input channels,
/// and the second half only connected to the second half.
IntV groups_;
/// Whether the bias is shared for feature in each channel.
bool sharedBiases_;
/// shape of weight: (numChannels * filterPixels_, numFilters)
WeightList weights_;
/// If shared_biases is false shape of bias: (numFilters_, 1)
/// If shared_biases is ture shape of bias:
/// (numFilters_ * outputX * outputY, 1)
std::unique_ptr<Weight> biases_;
/// True by default. The only difference is the calculation
/// of output size.
bool caffeMode_;
public:
explicit ConvTransBaseLayer(const LayerConfig& config) : Layer(config) {}
virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
Weight& getWeight(int idx) { return *weights_[idx]; }
/**
* Calculate image size based on caffeMode_ from outputSize.
* - input(+padding): 0123456789
* - imageSize(+padding) = 10;
* - filterSize = 3;
* - stride = 2;
* - caffeMode_ is true:
- output: (012), (234), (456), (678)
- outputSize = 4;
* - caffeMode_ is false:
* - output: (012), (234), (456), (678), (9)
* - outputSize = 5;
*/
int imageSize(int outputSize, int filterSize, int padding, int stride) {
int imageSize;
if (!caffeMode_) {
imageSize =
(outputSize - 1) * stride + filterSize - 2 * padding - stride + 1;
} else {
imageSize = (outputSize - 1) * stride + filterSize - 2 * padding;
}
CHECK_GE(imageSize, 1);
return imageSize;
}
};
} // namespace paddle
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
#include "ExpandConvTransLayer.h"
namespace paddle {
REGISTER_LAYER(exconvt, ExpandConvTransLayer);
bool ExpandConvTransLayer::init(const LayerMap &layerMap,
const ParameterMap &parameterMap) {
/* Initialize the basic convolutional parent class */
ConvTransBaseLayer::init(layerMap, parameterMap);
/* Initialize the projection */
for (auto &inputConfig : config_.inputs()) {
const ConvConfig &conf = inputConfig.conv_conf();
subM_.push_back(conf.channels() / conf.groups());
subN_.push_back(conf.output_x() * conf.output_x());
subK_.push_back(channel_ * conf.filter_size() * conf.filter_size() /
conf.groups());
/* Consistent caffe mode for multiple input */
caffeMode_ = conf.caffe_mode();
}
return true;
}
// Why this is necessary after calling init?
size_t ExpandConvTransLayer::getSize() {
CHECK_NE(inputLayers_.size(), 0UL);
imgSizeH_.clear();
imgSizeW_.clear();
outputH_.clear();
outputW_.clear();
subN_.clear();
size_t layerSize = 0;
for (size_t i = 0; i < inputLayers_.size(); i++) {
outputH_.push_back(inputLayers_[i]->getOutput().getFrameHeight());
outputW_.push_back(inputLayers_[i]->getOutput().getFrameWidth());
if (outputH_[i] == 0) outputH_[i] = outputX_[i];
if (outputW_[i] == 0) outputW_[i] = outputX_[i];
imgSizeH_.push_back(
imageSize(outputH_[i], filterSize_[i], padding_[i], stride_[i]));
imgSizeW_.push_back(
imageSize(outputW_[i], filterSize_[i], padding_[i], stride_[i]));
subN_.push_back(outputH_[i] * outputW_[i]);
CHECK(layerSize == 0 ||
imgSizeH_[i] * imgSizeW_[i] * (size_t)channel_ == layerSize);
layerSize = imgSizeH_[i] * imgSizeW_[i] * channel_;
}
getOutput().setFrameHeight(imgSizeH_[0]);
getOutput().setFrameWidth(imgSizeW_[0]);
return layerSize;
}
void ExpandConvTransLayer::resetExpandInput(size_t height, size_t width) {
Matrix::resizeOrCreate(expandInput_, height, width, false, useGpu_);
}
/*void ExpandConvTransLayer::resetConvOutput(size_t batchSize, int inIdx) {
Matrix::resizeOrCreate(transOutValue_, batchSize * numFilters_, subN_[inIdx],
false, useGpu_);
}*/
void ExpandConvTransLayer::addSharedBias() {
size_t mapW = getSize() / channel_;
size_t mapH = getOutputValue()->getElementCnt() / mapW;
MatrixPtr out =
Matrix::create(getOutputValue()->getData(), mapH, mapW, false, useGpu_);
Matrix::resizeOrCreate(transOutValue_, mapW, mapH, false, useGpu_);
out->transpose(transOutValue_, false); // false means no memory allocation
transOutValue_->reshape(transOutValue_->getElementCnt() / channel_,
channel_);
MatrixPtr bias =
Matrix::create(biases_->getW()->getData(), 1,
biases_->getW()->getElementCnt(), false, useGpu_);
transOutValue_->addBias(*bias, 1.0f);
transOutValue_->reshape(mapW, mapH);
transOutValue_->transpose(out, false); // false means no memory allocation
out->clear();
bias->clear();
}
void ExpandConvTransLayer::addUnsharedBias() {
MatrixPtr outValue = getOutputValue();
MatrixPtr bias =
Matrix::create(biases_->getW()->getData(), 1,
biases_->getW()->getElementCnt(), false, useGpu_);
outValue->addBias(*bias, 1.0f);
}
void ExpandConvTransLayer::expandOneFrame(MatrixPtr image, size_t startIdx,
int inIdx) {
resetExpandInput(subK_[inIdx] * groups_[inIdx], subN_[inIdx]);
real *imgData = image->getData() + startIdx * image->getWidth();
MatrixPtr imageTmp = Matrix::create(
imgData, 1, imgSizeH_[inIdx] * imgSizeW_[inIdx] * channel_, false,
useGpu_);
expandInput_->convExpand(*imageTmp, imgSizeH_[inIdx], imgSizeW_[inIdx],
channel_, filterSize_[inIdx],
filterSize_[inIdx], stride_[inIdx], stride_[inIdx],
padding_[inIdx], padding_[inIdx],
outputH_[inIdx], outputW_[inIdx]);
imageTmp->clear();
}
void ExpandConvTransLayer::expandBackOnce(MatrixPtr imageGrad, int inIdx,
int startIdx) {
int subM = subM_[inIdx];
int subN = subN_[inIdx];
int subK = subK_[inIdx];
LayerPtr prevLayer = getPrev(inIdx);
if (NULL == prevLayer->getOutputGrad()) {
return;
}
expandOneFrame(imageGrad, startIdx, inIdx);
real *outGradData =
prevLayer -> getOutputGrad()->getData()
+ startIdx * subN * numFilters_[inIdx];
real *wgtData = weights_[inIdx]->getW()->getData();
real *expInData = expandInput_->getData();
for (int g = 0; g < groups_[inIdx]; ++g) {
MatrixPtr A =
Matrix::create(wgtData, subK, subM, true, useGpu_); // mark transpose
MatrixPtr B = Matrix::create(expInData, subK, subN, false, useGpu_);
MatrixPtr C = Matrix::create(outGradData, subM, subN, false, useGpu_);
C->mul(A, B, 1, 1);
A->clear();
B->clear();
C->clear();
wgtData += subK * subM;
expInData += subK * subN;
outGradData += subM * subN;
}
}
void ExpandConvTransLayer::forward(PassType passType) {
Layer::forward(passType);
/* malloc memory for the output_ if necessary */
/* note: one sample correspond to one colum, and the
* transOutValue correspond sample to one row */
int batchSize = inputLayers_[0]->getOutputValue()->getHeight();
resetOutput(batchSize, getSize());
MatrixPtr output = nullptr;
for (size_t i = 0; i != inputLayers_.size(); ++i) {
LayerPtr prevLayer = getPrev(i);
output = prevLayer->getOutputValue();
REGISTER_TIMER_INFO("shrinkFwd", getName().c_str());
shrinkFwd(output, i);
}
/* add the bias-vector */
if (biases_.get() != NULL) {
if (sharedBiases_) {
addSharedBias();
} else {
addUnsharedBias();
}
}
/* activation */
forwardActivation();
}
void ExpandConvTransLayer::shrinkFwd(MatrixPtr output, int inpIdx) {
int subM = subM_[inpIdx];
int subN = subN_[inpIdx];
int subK = subK_[inpIdx];
size_t batchSize = output->getHeight();
MatrixPtr image = getOutputValue();
/* reset the expand-grad memory */
resetExpandInput(subK * groups_[inpIdx], subN);
real *localData = output->getData();
real *imageData = image->getData();
for (size_t n = 0; n < batchSize; n++) {
real *wgtData = weights_[inpIdx]->getW()->getData();
real *expandInData = expandInput_->getData();
for (int g = 0; g < groups_[inpIdx]; g++) {
// create temporary matrix
MatrixPtr C = Matrix::create(expandInData, subK, subN, false, useGpu_);
MatrixPtr B = Matrix::create(localData, subM, subN, false, useGpu_);
MatrixPtr A = Matrix::create(wgtData, subK, subM, false, useGpu_);
C->mul(A, B); // mul
// clear the temporary matrix
A->clear();
B->clear();
C->clear();
expandInData += subK * subN;
localData += subM * subN;
wgtData += subK * subM;
}
// shrink one frame outGrad
MatrixPtr oneTmp = Matrix::create(
expandInput_->getData(), subK * groups_[inpIdx], subN, false, useGpu_);
MatrixPtr vTmp = Matrix::create(
imageData, 1,
imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channel_, false,
useGpu_);
vTmp->convShrink(*oneTmp, imgSizeH_[inpIdx], imgSizeW_[inpIdx],
channel_, filterSize_[inpIdx],
filterSize_[inpIdx], stride_[inpIdx], stride_[inpIdx],
padding_[inpIdx], padding_[inpIdx],
outputH_[inpIdx], outputW_[inpIdx], 1.0f, 1.0f);
vTmp->clear();
oneTmp->clear();
// move the data-pointer
imageData += imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channel_;
}
}
void ExpandConvTransLayer::bpropSharedBias(MatrixPtr biases, MatrixPtr v) {
size_t mapW = getSize() / channel_;
size_t mapH = v->getElementCnt() / mapW;
MatrixPtr vTmp = Matrix::create(v->getData(), mapH, mapW, false, useGpu_);
Matrix::resizeOrCreate(transOutValue_, mapW, mapH, false, useGpu_);
vTmp->transpose(transOutValue_, false); // false means no memory allocation
vTmp->reshape(transOutValue_->getElementCnt() / channel_, channel_);
biases->collectBias(*vTmp, 1.0f);
}
void ExpandConvTransLayer::bpropBiases(MatrixPtr v) {
MatrixPtr biases =
Matrix::create(biases_->getWGrad()->getData(), 1,
biases_->getWGrad()->getElementCnt(), false, useGpu_);
if (sharedBiases_) {
bpropSharedBias(biases, v);
} else {
biases->collectBias(*v, 1.0f);
}
biases->clear();
}
void ExpandConvTransLayer::backward(const UpdateCallback &callback) {
backwardActivation();
MatrixPtr imageGrad = getOutputGrad();
if (biases_ && biases_->getWGrad()) {
bpropBiases(imageGrad);
/* Increasing the number of gradient */
biases_->getParameterPtr()->incUpdate(callback);
}
for (size_t i = 0; i != inputLayers_.size(); ++i) {
/* First, calculate the input layers error */
for (size_t off = 0; off < imageGrad->getHeight(); off++) {
expandBackOnce(imageGrad, i, off);
}
if (weights_[i]->getWGrad()) {
/* Then, calculate the W-gradient for the current layer */
bpropWeights(imageGrad, i);
/* Increasing the number of gradient */
weights_[i]->getParameterPtr()->incUpdate(callback);
}
}
}
void ExpandConvTransLayer::bpropWeights(MatrixPtr v, int inpIdx) {
MatrixPtr weightGrad = weights_[inpIdx]->getWGrad();
MatrixPtr outputV = getPrev(inpIdx)->getOutputValue();
int subM = subM_[inpIdx];
int subN = subN_[inpIdx];
int subK = subK_[inpIdx];
size_t batchSize = outputV->getHeight();
resetExpandInput(subK * groups_[inpIdx], subN);
real *outputData = outputV -> getData();
for (size_t n = 0; n < batchSize; n++) { // frame by frame
// expand
expandOneFrame(v, n, inpIdx);
real *wGradData = weightGrad->getData();
real *expandInData = expandInput_->getData();
// expand-mul one-group by one
for (int g = 0; g < groups_[inpIdx]; g++) {
MatrixPtr A = Matrix::create(expandInData, subK, subN, false, useGpu_);
MatrixPtr B = Matrix::create(outputData, subM, subN, true, useGpu_);
MatrixPtr C = Matrix::create(wGradData, subK, subM, false, useGpu_);
C->mul(A, B, 1, 1);
A->clear();
B->clear();
C->clear();
outputData += subM * subN;
wGradData += subK * subM;
expandInData += subK * subN;
}
}
}
} // namespace paddle
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "ConvTransBaseLayer.h"
#include "paddle/math/Matrix.h"
#include <vector>
namespace paddle {
/**
* @brief A subclass of convolution layer.
* This layer expands input and use matrix multiplication to
* calculate convolution operation.
*
* The config file api is img_conv_layer.
*/
class ExpandConvTransLayer : public ConvTransBaseLayer {
protected:
/// For expand convolution.
/// subM_ = numFilters_ / groups_.
IntV subM_;
/// subN_ = outputH_ * outputW_.
IntV subN_;
/// subK_ = channels_ * filterPixels_ * groups_.
IntV subK_;
/// The spatial dimensions of height of input feature map.
IntV imgSizeH_;
/// The spatial dimensions of width of input feature map.
IntV imgSizeW_;
/// The spatial dimensions of height of output feature map.
IntV outputH_;
/// The spatial dimensions of width of output feature map.
IntV outputW_;
/// Expand one sample at a time. shape:
/// (numChannels * filterPixels_, outputSizeH * outputSizeW)
MatrixPtr expandInput_;
/// The transpose of output, which is an auxiliary matrix.
MatrixPtr transOutValue_;
public:
explicit ExpandConvTransLayer(const LayerConfig& config) :
ConvTransBaseLayer(config) {}
~ExpandConvTransLayer() {}
bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
size_t getSize();
/**
* Create or resize expandInput_.
*/
void resetExpandInput(size_t height, size_t width);
/**
* Create or resize transOutValue_.
*/
void resetConvOutput(size_t batchSize, int inIdx);
/**
* Expand one input sample.
*/
void expandOneFrame(MatrixPtr image, size_t startIdx, int inIdx);
/**
* Expand one output image and perform matrix multiplication.
*/
void expandBackOnce(MatrixPtr image, int inIdx, int startIdx);
/**
* Perform matrix multiplication on one output and then shrink.
*/
void shrinkFwd(MatrixPtr output, int inpIdx);
/**
* Add shared bias.
*/
void addSharedBias();
/**
* Add unshared bias.
*/
void addUnsharedBias();
void forward(PassType passType);
void bpropSharedBias(MatrixPtr biases, MatrixPtr v);
void bpropBiases(MatrixPtr v);
void backward(const UpdateCallback& callback);
void bpropWeights(MatrixPtr v, int inpIdx);
void bpropActs(MatrixPtr v, int inpIdx);
};
} // namespace paddle
......@@ -312,6 +312,49 @@ TEST(Layer, convLayer) {
#endif
}
void testConvTransLayer(const string& type, bool trans, bool useGpu) {
TestConfig config;
config.biasSize = 3;
config.layerConfig.set_type(type);
config.layerConfig.set_num_filters(3);
config.layerConfig.set_partial_sum(1);
config.layerConfig.set_shared_biases(true);
config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 288});
LayerInputConfig* input = config.layerConfig.add_inputs();
ConvConfig* conv = input->mutable_conv_conf();
conv->set_filter_size(2);
conv->set_filter_size_y(3);
conv->set_channels(16);
conv->set_padding(0);
conv->set_padding_y(1);
conv->set_stride(2);
conv->set_stride_y(2);
conv->set_groups(1);
conv->set_filter_channels(3 / conv->groups());
conv->set_img_size(16);
conv->set_output_x(
(2 * conv->padding() + conv->img_size() - conv->filter_size()) /
((float)conv->stride()) +
1.5);
config.layerConfig.set_size(conv->img_size() * conv->img_size() *
config.layerConfig.num_filters());
testLayerGrad(config, "convTrans", 100, trans, useGpu);
}
TEST(Layer, convTransLayer) {
testConvTransLayer("exconvt", /* trans= */ false, /* useGpu= */ false);
/*
#ifndef PADDLE_ONLY_CPU
testConvLayer("exconv", trans= false, useGpu= true);
testConvLayer("cudnn_conv", trans= false, useGpu= true);
#endif
*/
}
TEST(Layer, blockExpandLayer) {
TestConfig config;
config.biasSize = 0;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册