提交 cfc965d5 编写于 作者: Q qingqing01 提交者: GitHub

Merge pull request #269 from wangyang59/deconv

...@@ -5,4 +5,6 @@ build/ ...@@ -5,4 +5,6 @@ build/
.vscode .vscode
.idea .idea
.project .project
.cproject
.pydevproject .pydevproject
Makefile
...@@ -14,12 +14,15 @@ limitations under the License. */ ...@@ -14,12 +14,15 @@ limitations under the License. */
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
#include "ConvBaseLayer.h" #include "ConvBaseLayer.h"
#include "paddle/math/MathUtils.h"
namespace paddle { namespace paddle {
bool ConvBaseLayer::init(const LayerMap& layerMap, bool ConvBaseLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) { const ParameterMap& parameterMap) {
/* Initialize the basic parent class */ /* Initialize the basic parent class */
Layer::init(layerMap, parameterMap); Layer::init(layerMap, parameterMap);
isDeconv_ = (config_.type() == "exconv" || config_.type() == "cudnn_conv")
? false : true;
/* Initialize the convolutional layer parameter */ /* Initialize the convolutional layer parameter */
numFilters_ = config_.num_filters(); numFilters_ = config_.num_filters();
...@@ -42,8 +45,20 @@ bool ConvBaseLayer::init(const LayerMap& layerMap, ...@@ -42,8 +45,20 @@ bool ConvBaseLayer::init(const LayerMap& layerMap,
outputW_.push_back(conf.output_x()); outputW_.push_back(conf.output_x());
} }
CHECK(inputLayers_.size() == parameters_.size());
for (size_t i = 0; i < inputLayers_.size(); i++) {
size_t height, width;
height = filterPixels_[i] * filterChannels_[i];
width = (!isDeconv_) ? numFilters_ : channels_[i];
// create a new weight
CHECK_EQ(parameters_[i]->getSize(), width * height);
Weight* w = new Weight(height, width, parameters_[i]);
weights_.emplace_back(w);
}
/* initialize the biases_ */ /* initialize the biases_ */
if (biasParameter_.get() != NULL) { if (biasParameter_.get()) {
if (sharedBiases_) { if (sharedBiases_) {
CHECK_EQ((size_t)numFilters_, biasParameter_->getSize()); CHECK_EQ((size_t)numFilters_, biasParameter_->getSize());
biases_ = biases_ =
...@@ -70,23 +85,48 @@ size_t ConvBaseLayer::calOutputSize() { ...@@ -70,23 +85,48 @@ size_t ConvBaseLayer::calOutputSize() {
clearAndReserve(&outputH_); clearAndReserve(&outputH_);
clearAndReserve(&outputW_); clearAndReserve(&outputW_);
size_t layerSize = 0; size_t layerSize = 0;
for (size_t i = 0; i < inputLayers_.size(); i++) {
imgSizeH_.push_back(inputLayers_[i]->getOutput().getFrameHeight()); auto setLayerSize = [&](IntV& inH, IntV& inW, IntV& outH, IntV& outW) {
imgSizeW_.push_back(inputLayers_[i]->getOutput().getFrameWidth()); for (size_t i = 0; i < inputLayers_.size(); i++) {
if (imgSizeH_[i] == 0) inH.push_back(inputLayers_[i]->getOutput().getFrameHeight());
imgSizeH_[i] = config_.inputs(i).conv_conf().img_size(); inW.push_back(inputLayers_[i]->getOutput().getFrameWidth());
if (imgSizeW_[i] == 0) if (isDeconv_) {
imgSizeW_[i] = config_.inputs(i).conv_conf().img_size(); if (inH[i] == 0)
outputH_.push_back(outputSize(imgSizeH_[i], filterSizeY_[i], paddingY_[i], inH[i] = config_.inputs(i).conv_conf().output_x();
strideY_[i], caffeMode_)); if (inW[i] == 0)
outputW_.push_back(outputSize(imgSizeW_[i], filterSize_[i], padding_[i], inW[i] = config_.inputs(i).conv_conf().output_x();
stride_[i], caffeMode_)); outH.push_back(
CHECK_EQ(outputH_[i], outputH_[0]); imageSize(inH[i], filterSizeY_[i], paddingY_[i], strideY_[i],
CHECK_EQ(outputW_[i], outputW_[0]); caffeMode_));
outW.push_back(
imageSize(inW[i], filterSize_[i], padding_[i], stride_[i],
caffeMode_));
} else {
if (inH[i] == 0)
inH[i] = config_.inputs(i).conv_conf().img_size();
if (inW[i] == 0)
inW[i] = config_.inputs(i).conv_conf().img_size();
outH.push_back(
outputSize(inH[i], filterSizeY_[i], paddingY_[i], strideY_[i],
caffeMode_));
outW.push_back(
outputSize(inW[i], filterSize_[i], padding_[i], stride_[i],
caffeMode_));
}
CHECK_EQ(outH[i], outH[0]);
CHECK_EQ(outW[i], outW[0]);
}
getOutput().setFrameHeight(outH[0]);
getOutput().setFrameWidth(outW[0]);
layerSize = outH[0] * outW[0] * size_t(numFilters_);
};
if (isDeconv_) {
setLayerSize(outputH_, outputW_, imgSizeH_, imgSizeW_);
} else {
setLayerSize(imgSizeH_, imgSizeW_, outputH_, outputW_);
} }
getOutput().setFrameHeight(outputH_[0]);
getOutput().setFrameWidth(outputW_[0]);
layerSize = outputH_[0] * outputW_[0] * size_t(numFilters_);
return layerSize; return layerSize;
} }
......
...@@ -28,6 +28,9 @@ class ConvBaseLayer : public Layer { ...@@ -28,6 +28,9 @@ class ConvBaseLayer : public Layer {
protected: protected:
typedef std::vector<int> IntV; typedef std::vector<int> IntV;
/// True if it's deconv layer, false if it's convolution layer
bool isDeconv_;
/// The number of filters. /// The number of filters.
int numFilters_; int numFilters_;
/// The x dimension of the padding. /// The x dimension of the padding.
......
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "ExpandConvBaseLayer.h"
#include "paddle/utils/Logging.h"
namespace paddle {
bool ExpandConvBaseLayer::init(const LayerMap &layerMap,
const ParameterMap &parameterMap) {
/* Initialize the basic convolutional parent class */
ConvBaseLayer::init(layerMap, parameterMap);
/* The class fields channels_ and numFilters_ are the same as in the config
* i.e., channels_ is the for the input and numFilters_ is for the output
*
* But in order for the variables in convTrans having the same semantic
* meaning as in conv, we need to swap channels_ and numFilters here for
* convTrans, and in other functions too.
* */
int channel;
int numFilters;
/* Initialize the projection */
for (auto &inputConfig : config_.inputs()) {
const ConvConfig &conf = inputConfig.conv_conf();
numFilters = isDeconv_ ? conf.channels() : numFilters_;
subM_.push_back(numFilters / conf.groups());
subN_.push_back(conf.output_x() * conf.output_x());
channel = isDeconv_ ? numFilters_ : conf.channels();
subK_.push_back(channel * conf.filter_size() * conf.filter_size() /
conf.groups());
/* Consistent caffe mode for multiple input */
caffeMode_ = conf.caffe_mode();
}
getOutputSize();
return true;
}
size_t ExpandConvBaseLayer::getOutputSize() {
CHECK_NE(inputLayers_.size(), 0UL);
size_t layerSize = ConvBaseLayer::calOutputSize();
subN_.clear();
for (size_t i = 0; i < inputLayers_.size(); i++) {
subN_.push_back(outputH_[i] * outputW_[i]);
}
return layerSize;
}
void ExpandConvBaseLayer::resetExpandInput(size_t height, size_t width) {
Matrix::resizeOrCreate(expandInput_, height, width, false, useGpu_);
}
void ExpandConvBaseLayer::addSharedBias() {
size_t mapW = getOutputSize() / numFilters_;
size_t mapH = getOutputValue()->getElementCnt() / mapW;
MatrixPtr out =
Matrix::create(getOutputValue()->getData(), mapH, mapW, false, useGpu_);
Matrix::resizeOrCreate(transOutValue_, mapW, mapH, false, useGpu_);
out->transpose(transOutValue_, false); // false means no memory allocation
transOutValue_->reshape(transOutValue_->getElementCnt() / numFilters_,
numFilters_);
MatrixPtr bias =
Matrix::create(biases_->getW()->getData(), 1,
biases_->getW()->getElementCnt(), false, useGpu_);
transOutValue_->addBias(*bias, 1.0f);
transOutValue_->reshape(mapW, mapH);
transOutValue_->transpose(out, false); // false means no memory allocation
out->clear();
bias->clear();
}
void ExpandConvBaseLayer::addUnsharedBias() {
MatrixPtr outValue = getOutputValue();
MatrixPtr bias =
Matrix::create(biases_->getW()->getData(), 1,
biases_->getW()->getElementCnt(), false, useGpu_);
outValue->addBias(*bias, 1.0f);
}
void ExpandConvBaseLayer::expandOneFrame(MatrixPtr image, size_t startIdx,
int inIdx) {
int channel = isDeconv_ ? numFilters_ : channels_[inIdx];
resetExpandInput(subK_[inIdx] * groups_[inIdx], subN_[inIdx]);
real *imgData = image->getData() + startIdx * image->getWidth();
MatrixPtr imageTmp = Matrix::create(
imgData, 1, imgSizeH_[inIdx] * imgSizeW_[inIdx] * channel, false,
useGpu_);
expandInput_->convExpand(*imageTmp, imgSizeH_[inIdx], imgSizeW_[inIdx],
channel, filterSize_[inIdx],
filterSize_[inIdx], stride_[inIdx], stride_[inIdx],
padding_[inIdx], padding_[inIdx],
outputH_[inIdx], outputW_[inIdx]);
imageTmp->clear();
}
void ExpandConvBaseLayer::expandFwdOnce(MatrixPtr image, MatrixPtr out,
int inIdx, int startIdx) {
int subM = subM_[inIdx];
int subN = subN_[inIdx];
int subK = subK_[inIdx];
expandOneFrame(image, startIdx, inIdx);
int numFilters = isDeconv_ ? channels_[inIdx] : numFilters_;
real *outData =
out->getData() + startIdx * subN * numFilters;
real *wgtData = weights_[inIdx]->getW()->getData();
real *expInData = expandInput_->getData();
for (int g = 0; g < groups_[inIdx]; ++g) {
MatrixPtr A =
Matrix::create(wgtData, subK, subM, true, useGpu_); // mark transpose
MatrixPtr B = Matrix::create(expInData, subK, subN, false, useGpu_);
MatrixPtr C = Matrix::create(outData, subM, subN, false, useGpu_);
C->mul(A, B, 1, 1);
A->clear();
B->clear();
C->clear();
wgtData += subK * subM;
expInData += subK * subN;
outData += subM * subN;
}
}
void ExpandConvBaseLayer::bpropActs(MatrixPtr out, MatrixPtr image,
int inpIdx) {
int channel = isDeconv_ ? numFilters_ : channels_[inpIdx];
int subM = subM_[inpIdx];
int subN = subN_[inpIdx];
int subK = subK_[inpIdx];
size_t batchSize = image->getHeight();
/* reset the expand-grad memory */
resetExpandInput(subK * groups_[inpIdx], subN);
real *localGradData = out->getData();
real *tgtGradData = image->getData();
for (size_t n = 0; n < batchSize; n++) {
real *wgtData = weights_[inpIdx]->getW()->getData();
real *expandInData = expandInput_->getData();
for (int g = 0; g < groups_[inpIdx]; g++) {
// create temporary matrix
MatrixPtr C = Matrix::create(expandInData, subK, subN, false, useGpu_);
MatrixPtr B = Matrix::create(localGradData, subM, subN, false, useGpu_);
MatrixPtr A = Matrix::create(wgtData, subK, subM, false, useGpu_);
C->mul(A, B); // mul
// clear the temporary matrix
A->clear();
B->clear();
C->clear();
expandInData += subK * subN;
localGradData += subM * subN;
wgtData += subK * subM;
}
// shrink one frame outGrad
MatrixPtr oneGradTmp = Matrix::create(
expandInput_->getData(), subK * groups_[inpIdx], subN, false, useGpu_);
MatrixPtr vTmp = Matrix::create(
tgtGradData, 1,
imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channel, false,
useGpu_);
vTmp->convShrink(*oneGradTmp, imgSizeH_[inpIdx], imgSizeW_[inpIdx],
channel, filterSize_[inpIdx],
filterSize_[inpIdx], stride_[inpIdx], stride_[inpIdx],
padding_[inpIdx], padding_[inpIdx],
outputH_[inpIdx], outputW_[inpIdx], 1.0f, 1.0f);
vTmp->clear();
oneGradTmp->clear();
// move the data-pointer
tgtGradData += imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channel;
}
}
void ExpandConvBaseLayer::bpropWeights(MatrixPtr image, MatrixPtr out,
int inpIdx) {
MatrixPtr weightGrad = weights_[inpIdx]->getWGrad();
int subM = subM_[inpIdx];
int subN = subN_[inpIdx];
int subK = subK_[inpIdx];
size_t batchSize = image->getHeight();
resetExpandInput(subK * groups_[inpIdx], subN);
real *gradData = out->getData();
for (size_t n = 0; n < batchSize; n++) { // frame by frame
// expand
expandOneFrame(image, n, inpIdx);
real *wGradData = weightGrad->getData();
real *expandInData = expandInput_->getData();
// expand-mul one-group by one
for (int g = 0; g < groups_[inpIdx]; g++) {
MatrixPtr A = Matrix::create(expandInData, subK, subN, false, useGpu_);
MatrixPtr B = Matrix::create(gradData, subM, subN, true, useGpu_);
MatrixPtr C = Matrix::create(wGradData, subK, subM, false, useGpu_);
C->mul(A, B, 1, 1);
A->clear();
B->clear();
C->clear();
gradData += subM * subN;
wGradData += subK * subM;
expandInData += subK * subN;
}
}
}
void ExpandConvBaseLayer::bpropSharedBias(MatrixPtr biases, MatrixPtr v) {
size_t mapW = getOutputSize() / numFilters_;
size_t mapH = v->getElementCnt() / mapW;
MatrixPtr vTmp = Matrix::create(v->getData(), mapH, mapW, false, useGpu_);
Matrix::resizeOrCreate(transOutValue_, mapW, mapH, false, useGpu_);
vTmp->transpose(transOutValue_, false); // false means no memory allocation
transOutValue_->reshape(transOutValue_->getElementCnt() / numFilters_,
numFilters_);
biases->collectBias(*transOutValue_, 1.0f);
}
void ExpandConvBaseLayer::bpropBiases(MatrixPtr v) {
MatrixPtr biases =
Matrix::create(biases_->getWGrad()->getData(), 1,
biases_->getWGrad()->getElementCnt(), false, useGpu_);
if (sharedBiases_) {
bpropSharedBias(biases, v);
} else {
biases->collectBias(*v, 1.0f);
}
biases->clear();
}
} // namespace paddle
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "ConvBaseLayer.h"
#include "paddle/math/Matrix.h"
#include <vector>
namespace paddle {
/**
* @brief A subclass of ConvBaseLayer that is a superclass of both
* ExpandConvLayer and ExpandConvTransLayer
*/
class ExpandConvBaseLayer : public ConvBaseLayer {
protected:
/// For expand convolution.
/// subM_ = numFilters_ / groups_.
IntV subM_;
/// subN_ = outputH_ * outputW_.
IntV subN_;
/// subK_ = channels_ * filterPixels_ * groups_.
IntV subK_;
/*The expandInput_ and transOutValue_ are used for CPU expand conv calc
* Expand one sample at a time. shape:
* (numChannels * filterPixels_, outputSizeH * outputSizeW)
* */
MatrixPtr expandInput_;
/// The transpose of output, which is an auxiliary matrix.
MatrixPtr transOutValue_;
public:
explicit ExpandConvBaseLayer(const LayerConfig& config)
: ConvBaseLayer(config) {}
~ExpandConvBaseLayer() {}
bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
size_t getOutputSize();
/**
* Create or resize expandInput_.
*/
void resetExpandInput(size_t height, size_t width);
/**
* Add shared bias.
*/
void addSharedBias();
/**
* Add unshared bias.
*/
void addUnsharedBias();
/**
* Expand one input sample.
*/
void expandOneFrame(MatrixPtr image, size_t startIdx, int inIdx);
/**
* Expand one input sample and perform matrix multiplication.
*/
void expandFwdOnce(MatrixPtr image, MatrixPtr out, int inIdx, int startIdx);
void bpropSharedBias(MatrixPtr biases, MatrixPtr v);
void bpropBiases(MatrixPtr v);
void bpropWeights(MatrixPtr image, MatrixPtr out, int inpIdx);
void bpropActs(MatrixPtr image, MatrixPtr out, int inpIdx);
};
} // namespace paddle
...@@ -24,150 +24,29 @@ REGISTER_LAYER(exconv, ExpandConvLayer); ...@@ -24,150 +24,29 @@ REGISTER_LAYER(exconv, ExpandConvLayer);
bool ExpandConvLayer::init(const LayerMap &layerMap, bool ExpandConvLayer::init(const LayerMap &layerMap,
const ParameterMap &parameterMap) { const ParameterMap &parameterMap) {
/* Initialize the basic convolutional parent class */ /* Initialize the basic convolutional parent class */
ConvBaseLayer::init(layerMap, parameterMap); ExpandConvBaseLayer::init(layerMap, parameterMap);
/* Initialize the projection */
for (auto &inputConfig : config_.inputs()) {
const ConvConfig &conf = inputConfig.conv_conf();
subM_.push_back(numFilters_ / conf.groups());
subN_.push_back(conf.output_x() * conf.output_x());
subK_.push_back(conf.channels() * conf.filter_size() * conf.filter_size() /
conf.groups());
/* Consistent caffe mode for multiple input */
caffeMode_ = conf.caffe_mode();
}
/* initialize the weightList */
CHECK(inputLayers_.size() == parameters_.size());
for (size_t i = 0; i < inputLayers_.size(); i++) {
size_t height, width;
height = filterPixels_[i] * filterChannels_[i];
width = numFilters_;
// create a new weight
CHECK_EQ(parameters_[i]->getSize(), width * height);
Weight* w = new Weight(height, width, parameters_[i]);
weights_.emplace_back(w);
}
return true; return true;
} }
size_t ExpandConvLayer::getOutputSize() {
CHECK_NE(inputLayers_.size(), 0UL);
size_t layerSize = ConvBaseLayer::calOutputSize();
subN_.clear();
for (size_t i = 0; i < inputLayers_.size(); i++) {
subN_.push_back(outputH_[i] * outputW_[i]);
}
return layerSize;
}
void ExpandConvLayer::resetExpandInput(size_t height, size_t width) {
Matrix::resizeOrCreate(expandInput_, height, width, false, useGpu_);
}
void ExpandConvLayer::resetConvOutput(size_t batchSize, int inIdx) {
Matrix::resizeOrCreate(transOutValue_, batchSize * numFilters_, subN_[inIdx],
false, useGpu_);
}
void ExpandConvLayer::expandOneFrame(MatrixPtr image, size_t startIdx,
int inIdx) {
resetExpandInput(subK_[inIdx] * groups_[inIdx], subN_[inIdx]);
real *imgData = image->getData() + startIdx * image->getWidth();
MatrixPtr imageTmp = Matrix::create(
imgData, 1, imgSizeH_[inIdx] * imgSizeW_[inIdx] * channels_[inIdx], false,
useGpu_);
expandInput_->convExpand(*imageTmp, imgSizeH_[inIdx], imgSizeW_[inIdx],
channels_[inIdx], filterSize_[inIdx],
filterSize_[inIdx], stride_[inIdx], stride_[inIdx],
padding_[inIdx], padding_[inIdx],
outputH_[inIdx], outputW_[inIdx]);
imageTmp->clear();
}
void ExpandConvLayer::expandFwdOnce(MatrixPtr image, int inIdx, int startIdx) {
int subM = subM_[inIdx];
int subN = subN_[inIdx];
int subK = subK_[inIdx];
expandOneFrame(image, startIdx, inIdx);
real *outData =
getOutputValue()->getData() + startIdx * subN * numFilters_;
real *wgtData = weights_[inIdx]->getW()->getData();
real *expInData = expandInput_->getData();
for (int g = 0; g < groups_[inIdx]; ++g) {
MatrixPtr A =
Matrix::create(wgtData, subK, subM, true, useGpu_); // mark transpose
MatrixPtr B = Matrix::create(expInData, subK, subN, false, useGpu_);
MatrixPtr C = Matrix::create(outData, subM, subN, false, useGpu_);
C->mul(A, B, 1, 1);
A->clear();
B->clear();
C->clear();
wgtData += subK * subM;
expInData += subK * subN;
outData += subM * subN;
}
}
void ExpandConvLayer::addSharedBias() {
size_t mapW = getOutputValue()->getWidth() / numFilters_;
size_t mapH = getOutputValue()->getElementCnt() / mapW;
MatrixPtr out =
Matrix::create(getOutputValue()->getData(), mapH, mapW, false, useGpu_);
Matrix::resizeOrCreate(transOutValue_, mapW, mapH, false, useGpu_);
out->transpose(transOutValue_, false); // false means no memory allocation
transOutValue_->reshape(transOutValue_->getElementCnt() / numFilters_,
numFilters_);
MatrixPtr bias =
Matrix::create(biases_->getW()->getData(), 1,
biases_->getW()->getElementCnt(), false, useGpu_);
transOutValue_->addBias(*bias, 1.0f);
transOutValue_->reshape(mapW, mapH);
transOutValue_->transpose(out, false); // false means no memory allocation
out->clear();
bias->clear();
}
void ExpandConvLayer::addUnsharedBias() {
MatrixPtr outValue = getOutputValue();
MatrixPtr bias =
Matrix::create(biases_->getW()->getData(), 1,
biases_->getW()->getElementCnt(), false, useGpu_);
outValue->addBias(*bias, 1.0f);
}
void ExpandConvLayer::forward(PassType passType) { void ExpandConvLayer::forward(PassType passType) {
Layer::forward(passType); Layer::forward(passType);
/* malloc memory for the output_ if necessary */ /* malloc memory for the output_ if necessary */
/* note: one sample correspond to one colum, and the int batchSize = inputLayers_[0]->getOutputValue()->getHeight();
* transOutValue correspond sample to one row */
int batchSize = inputLayers_[0]->getOutputValue()->getWidth();
batchSize = inputLayers_[0]->getOutputValue()->getHeight();
resetOutput(batchSize, getOutputSize()); resetOutput(batchSize, getOutputSize());
MatrixPtr image = nullptr; MatrixPtr image = nullptr;
for (size_t i = 0; i != inputLayers_.size(); ++i) { MatrixPtr outV = getOutputValue();
for (size_t i = 0; i < inputLayers_.size(); ++i) {
LayerPtr prevLayer = getPrev(i); LayerPtr prevLayer = getPrev(i);
image = prevLayer->getOutputValue(); image = prevLayer->getOutputValue();
for (size_t off = 0; off < image->getHeight(); off++) { for (size_t off = 0; off < image->getHeight(); off++) {
REGISTER_TIMER_INFO("expandFwdOnce", getName().c_str()); REGISTER_TIMER_INFO("expandFwdOnce", getName().c_str());
expandFwdOnce(image, i, off); expandFwdOnce(image, outV, i, off);
} }
} }
/* add the bias-vector */ /* add the bias-vector */
if (biases_.get() != NULL) { if (biases_.get()) {
if (sharedBiases_) { if (sharedBiases_) {
addSharedBias(); addSharedBias();
} else { } else {
...@@ -179,29 +58,6 @@ void ExpandConvLayer::forward(PassType passType) { ...@@ -179,29 +58,6 @@ void ExpandConvLayer::forward(PassType passType) {
forwardActivation(); forwardActivation();
} }
void ExpandConvLayer::bpropSharedBias(MatrixPtr biases, MatrixPtr v) {
size_t mapW = v->getWidth() / numFilters_;
size_t mapH = v->getElementCnt() / mapW;
MatrixPtr vTmp = Matrix::create(v->getData(), mapH, mapW, false, useGpu_);
Matrix::resizeOrCreate(transOutValue_, mapW, mapH, false, useGpu_);
vTmp->transpose(transOutValue_, false); // false means no memory allocation
vTmp->reshape(transOutValue_->getElementCnt() / numFilters_, numFilters_);
biases->collectBias(*vTmp, 1.0f);
}
void ExpandConvLayer::bpropBiases(MatrixPtr v) {
MatrixPtr biases =
Matrix::create(biases_->getWGrad()->getData(), 1,
biases_->getWGrad()->getElementCnt(), false, useGpu_);
if (sharedBiases_) {
bpropSharedBias(biases, v);
} else {
biases->collectBias(*v, 1.0f);
}
biases->clear();
}
void ExpandConvLayer::backward(const UpdateCallback &callback) { void ExpandConvLayer::backward(const UpdateCallback &callback) {
backwardActivation(); backwardActivation();
...@@ -213,111 +69,18 @@ void ExpandConvLayer::backward(const UpdateCallback &callback) { ...@@ -213,111 +69,18 @@ void ExpandConvLayer::backward(const UpdateCallback &callback) {
biases_->getParameterPtr()->incUpdate(callback); biases_->getParameterPtr()->incUpdate(callback);
} }
for (size_t i = 0; i != inputLayers_.size(); ++i) { for (size_t i = 0; i < inputLayers_.size(); ++i) {
/* First, calculate the input layers error */ /* First, calculate the input layers error */
bpropActs(outGrad, i); if (getPrev(i)->getOutputGrad()) {
bpropActs(outGrad, getPrev(i)->getOutputGrad(), i);
}
if (weights_[i]->getWGrad()) { if (weights_[i]->getWGrad()) {
/* Then, calculate the W-gradient for the current layer */ /* Then, calculate the W-gradient for the current layer */
bpropWeights(outGrad, i); bpropWeights(getPrev(i)->getOutputValue(), outGrad, i);
/* Increasing the number of gradient */ /* Increasing the number of gradient */
weights_[i]->getParameterPtr()->incUpdate(callback); weights_[i]->getParameterPtr()->incUpdate(callback);
} }
} }
} }
void ExpandConvLayer::bpropWeights(MatrixPtr v, int inpIdx) {
MatrixPtr weightGrad = weights_[inpIdx]->getWGrad();
MatrixPtr inputV = getPrev(inpIdx)->getOutputValue();
int subM = subM_[inpIdx];
int subN = subN_[inpIdx];
int subK = subK_[inpIdx];
size_t batchSize = inputV->getHeight();
resetExpandInput(subK * groups_[inpIdx], subN);
resetConvOutput(batchSize, inpIdx);
real *gradData = v->getData();
for (size_t n = 0; n < batchSize; n++) { // frame by frame
// expand
expandOneFrame(inputV, n, inpIdx);
real *wGradData = weightGrad->getData();
real *expandInData = expandInput_->getData();
// expand-mul one-group by one
for (int g = 0; g < groups_[inpIdx]; g++) {
MatrixPtr A = Matrix::create(expandInData, subK, subN, false, useGpu_);
MatrixPtr B = Matrix::create(gradData, subM, subN, true, useGpu_);
MatrixPtr C = Matrix::create(wGradData, subK, subM, false, useGpu_);
C->mul(A, B, 1, 1);
A->clear();
B->clear();
C->clear();
gradData += subM * subN;
wGradData += subK * subM;
expandInData += subK * subN;
}
}
}
void ExpandConvLayer::bpropActs(MatrixPtr v, int inpIdx) {
LayerPtr prevLayer = getPrev(inpIdx);
if (NULL == prevLayer->getOutputGrad()) {
return;
}
int subM = subM_[inpIdx];
int subN = subN_[inpIdx];
int subK = subK_[inpIdx];
size_t batchSize = v->getHeight();
MatrixPtr tgtGrad = prevLayer->getOutputGrad();
/* reset the expand-grad memory */
resetExpandInput(subK * groups_[inpIdx], subN);
resetConvOutput(batchSize, inpIdx);
real *localGradData = v->getData();
real *tgtGradData = tgtGrad->getData();
for (size_t n = 0; n < batchSize; n++) {
real *wgtData = weights_[inpIdx]->getW()->getData();
real *expandInData = expandInput_->getData();
for (int g = 0; g < groups_[inpIdx]; g++) {
// create temporary matrix
MatrixPtr C = Matrix::create(expandInData, subK, subN, false, useGpu_);
MatrixPtr B = Matrix::create(localGradData, subM, subN, false, useGpu_);
MatrixPtr A = Matrix::create(wgtData, subK, subM, false, useGpu_);
C->mul(A, B); // mul
// clear the temporary matrix
A->clear();
B->clear();
C->clear();
expandInData += subK * subN;
localGradData += subM * subN;
wgtData += subK * subM;
}
// shrink one frame outGrad
MatrixPtr oneGradTmp = Matrix::create(
expandInput_->getData(), subK * groups_[inpIdx], subN, false, useGpu_);
MatrixPtr vTmp = Matrix::create(
tgtGradData, 1,
imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channels_[inpIdx], false,
useGpu_);
vTmp->convShrink(*oneGradTmp, imgSizeH_[inpIdx], imgSizeW_[inpIdx],
channels_[inpIdx], filterSize_[inpIdx],
filterSize_[inpIdx], stride_[inpIdx], stride_[inpIdx],
padding_[inpIdx], padding_[inpIdx],
outputH_[inpIdx], outputW_[inpIdx], 1.0f, 1.0f);
vTmp->clear();
oneGradTmp->clear();
// move the data-pointer
tgtGradData += imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channels_[inpIdx];
}
}
} // namespace paddle } // namespace paddle
...@@ -15,9 +15,9 @@ limitations under the License. */ ...@@ -15,9 +15,9 @@ limitations under the License. */
#pragma once #pragma once
#include "ConvBaseLayer.h"
#include "paddle/math/Matrix.h" #include "paddle/math/Matrix.h"
#include <vector> #include <vector>
#include "ExpandConvBaseLayer.h"
namespace paddle { namespace paddle {
...@@ -28,65 +28,18 @@ namespace paddle { ...@@ -28,65 +28,18 @@ namespace paddle {
* *
* The config file api is img_conv_layer. * The config file api is img_conv_layer.
*/ */
class ExpandConvLayer : public ConvBaseLayer {
protected:
/// For expand convolution.
/// subM_ = numFilters_ / groups_.
IntV subM_;
/// subN_ = outputH_ * outputW_.
IntV subN_;
/// subK_ = channels_ * filterPixels_ * groups_.
IntV subK_;
/// Expand one sample at a time. shape:
/// (numChannels * filterPixels_, outputSizeH * outputSizeW)
MatrixPtr expandInput_;
/// The transpose of output, which is an auxiliary matrix.
MatrixPtr transOutValue_;
class ExpandConvLayer : public ExpandConvBaseLayer {
public: public:
explicit ExpandConvLayer(const LayerConfig& config) : ConvBaseLayer(config) {} explicit ExpandConvLayer(const LayerConfig& config) :
ExpandConvBaseLayer(config) {}
~ExpandConvLayer() {} ~ExpandConvLayer() {}
bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
size_t getOutputSize();
/**
* Create or resize expandInput_.
*/
void resetExpandInput(size_t height, size_t width);
/**
* Create or resize transOutValue_.
*/
void resetConvOutput(size_t batchSize, int inIdx);
/**
* Expand one input sample.
*/
void expandOneFrame(MatrixPtr image, size_t startIdx, int inIdx);
/**
* Expand one input sample and perform matrix multiplication.
*/
void expandFwdOnce(MatrixPtr image, int inIdx, int startIdx);
/**
* Add shared bias.
*/
void addSharedBias();
/**
* Add unshared bias.
*/
void addUnsharedBias();
void forward(PassType passType); void forward(PassType passType);
void bpropSharedBias(MatrixPtr biases, MatrixPtr v);
void bpropBiases(MatrixPtr v);
void backward(const UpdateCallback& callback); void backward(const UpdateCallback& callback);
void bpropWeights(MatrixPtr v, int inpIdx);
void bpropActs(MatrixPtr v, int inpIdx);
}; };
} // namespace paddle } // namespace paddle
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
#include "ExpandConvTransLayer.h"
/* The implementation of the convTransLayer is basically a swap of forward and
* backward of the original convLayer.
* The variable naming follows the convention of the convLayer.
* */
namespace paddle {
REGISTER_LAYER(exconvt, ExpandConvTransLayer);
bool ExpandConvTransLayer::init(const LayerMap &layerMap,
const ParameterMap &parameterMap) {
/* Initialize the basic convolutional parent class */
ExpandConvBaseLayer::init(layerMap, parameterMap);
return true;
}
void ExpandConvTransLayer::forward(PassType passType) {
Layer::forward(passType);
/* malloc memory for the output_ if necessary */
int batchSize = inputLayers_[0]->getOutputValue()->getHeight();
resetOutput(batchSize, getOutputSize());
MatrixPtr output = nullptr;
for (size_t i = 0; i < inputLayers_.size(); ++i) {
LayerPtr prevLayer = getPrev(i);
output = prevLayer->getOutputValue();
REGISTER_TIMER_INFO("shrinkFwd", getName().c_str());
bpropActs(output, getOutputValue(), i);
}
/* add the bias-vector */
if (biases_.get()) {
if (sharedBiases_) {
addSharedBias();
} else {
addUnsharedBias();
}
}
/* activation */
forwardActivation();
}
void ExpandConvTransLayer::backward(const UpdateCallback &callback) {
backwardActivation();
MatrixPtr imageGrad = getOutputGrad();
if (biases_ && biases_->getWGrad()) {
bpropBiases(imageGrad);
/* Increasing the number of gradient */
biases_->getParameterPtr()->incUpdate(callback);
}
for (size_t i = 0; i < inputLayers_.size(); ++i) {
/* First, calculate the input layers error */
for (size_t off = 0; off < imageGrad->getHeight(); off++) {
if (getPrev(i)->getOutputGrad()) {
expandFwdOnce(imageGrad, getPrev(i)->getOutputGrad(), i, off);
}
}
if (weights_[i]->getWGrad()) {
/* Then, calculate the W-gradient for the current layer */
bpropWeights(imageGrad, getPrev(i)->getOutputValue(), i);
/* Increasing the number of gradient */
weights_[i]->getParameterPtr()->incUpdate(callback);
}
}
}
} // namespace paddle
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/math/Matrix.h"
#include <vector>
#include "ExpandConvBaseLayer.h"
namespace paddle {
/**
* @brief A subclass of convolution layer.
* This layer expands input and use matrix multiplication to
* calculate convolution transpose (deconv) operation.
*
* The config file api is img_conv_layer with flag trans=True.
*/
class ExpandConvTransLayer : public ExpandConvBaseLayer {
public:
explicit ExpandConvTransLayer(const LayerConfig& config) :
ExpandConvBaseLayer(config) {}
~ExpandConvTransLayer() {}
bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
void forward(PassType passType);
void backward(const UpdateCallback& callback);
};
} // namespace paddle
...@@ -26,6 +26,14 @@ add_unittest_without_exec(test_ActivationGrad ...@@ -26,6 +26,14 @@ add_unittest_without_exec(test_ActivationGrad
TestUtil.cpp) TestUtil.cpp)
add_test(NAME test_ActivationGrad add_test(NAME test_ActivationGrad
COMMAND test_ActivationGrad) COMMAND test_ActivationGrad)
################# test_ConvTrans #######################
add_unittest_without_exec(test_ConvTrans
test_ConvTrans.cpp
LayerGradUtil.cpp
TestUtil.cpp)
add_test(NAME test_ConvTrans
COMMAND test_ConvTrans)
################## test_Evaluator ####################### ################## test_Evaluator #######################
add_unittest(test_Evaluator add_unittest(test_Evaluator
......
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include <vector>
#include <string>
#include "paddle/gserver/layers/DataLayer.h"
#include "ModelConfig.pb.h"
#include "paddle/trainer/Trainer.h"
#include "paddle/utils/GlobalConstants.h"
#include "paddle/gserver/layers/ExpandConvTransLayer.h"
#include "paddle/math/MathUtils.h"
#include "TestUtil.h"
#include "LayerGradUtil.h"
using namespace paddle; // NOLINT
using namespace std; // NOLINT
P_DECLARE_bool(use_gpu);
P_DECLARE_int32(gpu_id);
P_DECLARE_double(checkgrad_eps);
P_DECLARE_bool(thread_local_rand_use_global_seed);
P_DECLARE_bool(prev_batch_state);
// Test that the convTrans forward is the same as conv backward
TEST(Layer, convTransLayerFwd) {
// Setting up conv-trans layer
TestConfig configt;
configt.biasSize = 3;
configt.layerConfig.set_type("exconvt");
configt.layerConfig.set_num_filters(3);
configt.layerConfig.set_partial_sum(1);
configt.layerConfig.set_shared_biases(true);
configt.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 384});
LayerInputConfig* input = configt.layerConfig.add_inputs();
ConvConfig* conv = input->mutable_conv_conf();
conv->set_filter_size(2);
conv->set_filter_size_y(4);
conv->set_channels(16);
conv->set_padding(0);
conv->set_padding_y(1);
conv->set_stride(2);
conv->set_stride_y(2);
conv->set_groups(1);
conv->set_filter_channels(3 / conv->groups());
conv->set_img_size(16);
conv->set_output_x(outputSize(conv->img_size(), conv->filter_size(),
conv->padding(), conv->stride(),
/* caffeMode */ true));
configt.layerConfig.set_size(conv->img_size() * conv->img_size() *
configt.layerConfig.num_filters());
configt.layerConfig.set_name("convTrans");
// data layer initialize
std::vector<DataLayerPtr> dataLayers;
LayerMap layerMap;
vector<Argument> datas;
initDataLayer(configt, &dataLayers, &datas, &layerMap, "convTrans",
100, false, false);
// test layer initialize
std::vector<ParameterPtr> parameters;
LayerPtr convtLayer;
initTestLayer(configt, &layerMap, &parameters, &convtLayer);
convtLayer->getBiasParameter()->zeroMem();
convtLayer->forward(PASS_GC);
// Setting up conv-layer config
TestConfig config;
config.biasSize = 16;
config.layerConfig.set_type("exconv");
config.layerConfig.set_num_filters(16);
config.layerConfig.set_partial_sum(1);
config.layerConfig.set_shared_biases(true);
config.inputDefs.push_back({INPUT_DATA, "layer_1", 768, 384});
input = config.layerConfig.add_inputs();
conv = input->mutable_conv_conf();
conv->set_filter_size(2);
conv->set_filter_size_y(4);
conv->set_channels(3);
conv->set_padding(0);
conv->set_padding_y(1);
conv->set_stride(2);
conv->set_stride_y(2);
conv->set_groups(1);
conv->set_filter_channels(conv->channels() / conv->groups());
conv->set_img_size(16);
conv->set_output_x(outputSize(conv->img_size(), conv->filter_size(),
conv->padding(), conv->stride(),
/* caffeMode */ true));
config.layerConfig.set_size(conv->output_x() * conv->output_x() *
config.layerConfig.num_filters());
config.layerConfig.set_name("conv");
// data layer initialize
std::vector<DataLayerPtr> dataLayers2;
LayerMap layerMap2;
vector<Argument> datas2;
initDataLayer(config, &dataLayers2, &datas2, &layerMap2, "conv",
100, false, false);
// test layer initialize
std::vector<ParameterPtr> parameters2;
LayerPtr convLayer;
initTestLayer(config, &layerMap2, &parameters2, &convLayer);
// Sync convLayer and convtLayer parameter
convLayer->getBiasParameter()->zeroMem();
convLayer->getParameters()[0]->getBuf(PARAMETER_VALUE)->copyFrom(
*(convtLayer->getParameters()[0]->getBuf(PARAMETER_VALUE)));
// Set convLayer outputGrad as convTransLayer input value
convLayer->forward(PASS_GC);
convLayer->getOutput().grad->copyFrom(*(dataLayers[0]->getOutputValue()));
vector<int> callbackFlags(parameters2.size(), 0);
auto callback = [&](Parameter* para) { ++callbackFlags[para->getID()]; };
convLayer->backward(callback);
// Check that the convLayer backward is the same as convTransLayer forward
checkMatrixEqual(convtLayer->getOutputValue(),
dataLayers2[0]->getOutputGrad());
}
// Do one forward pass of convTrans layer and check to see if its output
// matches the given result
void doOneConvtTest(size_t imgSize, size_t output_x, size_t stride,
size_t padding, size_t filter_size, MatrixPtr& result) {
TestConfig configt;
configt.biasSize = 1;
configt.layerConfig.set_type("exconvt");
configt.layerConfig.set_num_filters(1);
configt.layerConfig.set_partial_sum(1);
configt.layerConfig.set_shared_biases(true);
configt.inputDefs.push_back({INPUT_DATA, "layer_0", output_x * output_x,
filter_size * filter_size});
LayerInputConfig* input = configt.layerConfig.add_inputs();
ConvConfig* conv = input->mutable_conv_conf();
conv->set_filter_size(filter_size);
conv->set_filter_size_y(filter_size);
conv->set_channels(1);
conv->set_padding(padding);
conv->set_padding_y(padding);
conv->set_stride(stride);
conv->set_stride_y(stride);
conv->set_groups(1);
conv->set_filter_channels(1);
conv->set_img_size(imgSize);
conv->set_output_x(output_x);
configt.layerConfig.set_size(conv->img_size() * conv->img_size() *
configt.layerConfig.num_filters());
configt.layerConfig.set_name("convTrans");
std::vector<DataLayerPtr> dataLayers;
LayerMap layerMap;
vector<Argument> datas;
initDataLayer(configt, &dataLayers, &datas, &layerMap, "convTrans",
1, false, false);
dataLayers[0]->getOutputValue()->zeroMem();
dataLayers[0]->getOutputValue()->add(1.0);
// test layer initialize
std::vector<ParameterPtr> parameters;
LayerPtr convtLayer;
initTestLayer(configt, &layerMap, &parameters, &convtLayer);
convtLayer->getBiasParameter()->zeroMem();
convtLayer->getParameters()[0]->zeroMem();
convtLayer->getParameters()[0]->getBuf(PARAMETER_VALUE)->add(1.0);
convtLayer->forward(PASS_GC);
checkMatrixEqual(convtLayer->getOutputValue(), result);
}
TEST(Layer, convTransLayerFwd2) {
MatrixPtr result;
result = Matrix::create(1, 5 * 5, false, false);
result->zeroMem();
result->add(1.0);
doOneConvtTest(/* imgSize */ 5,
/* output_x */ 1,
/* stride */ 1,
/* padding */ 0,
/* filter_size */ 5,
result);
float resultData[] = {1, 2, 2, 2, 1,
2, 4, 4, 4, 2,
2, 4, 4, 4, 2,
2, 4, 4, 4, 2,
1, 2, 2, 2, 1};
result->setData(resultData);
doOneConvtTest(/* imgSize */ 5,
/* output_x */ 2,
/* stride */ 1,
/* padding */ 0,
/* filter_size */ 4,
result);
float resultData2[] = {1, 2, 2, 2, 1,
2, 4, 4, 4, 2,
2, 4, 4, 4, 2,
2, 4, 4, 4, 2,
1, 2, 2, 2, 1};
result->setData(resultData2);
doOneConvtTest(/* imgSize */ 5,
/* output_x */ 2,
/* stride */ 2,
/* padding */ 1,
/* filter_size */ 5,
result);
float resultData3[] = {1, 1, 2, 1, 1,
1, 1, 2, 1, 1,
2, 2, 4, 2, 2,
1, 1, 2, 1, 1,
1, 1, 2, 1, 1};
result->setData(resultData3);
doOneConvtTest(/* imgSize */ 5,
/* output_x */ 2,
/* stride */ 2,
/* padding */ 0,
/* filter_size */ 3,
result);}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
initMain(argc, argv);
FLAGS_thread_local_rand_use_global_seed = true;
srand(1);
return RUN_ALL_TESTS();
}
...@@ -302,6 +302,8 @@ void testConvLayer(const string& type, bool trans, bool useGpu) { ...@@ -302,6 +302,8 @@ void testConvLayer(const string& type, bool trans, bool useGpu) {
config.layerConfig.num_filters()); config.layerConfig.num_filters());
testLayerGrad(config, "conv", 100, trans, useGpu); testLayerGrad(config, "conv", 100, trans, useGpu);
// Use small batch_size and useWeight=true to test biasGrad
testLayerGrad(config, "conv", 2, trans, useGpu, true, 0.02);
} }
TEST(Layer, convLayer) { TEST(Layer, convLayer) {
...@@ -312,6 +314,46 @@ TEST(Layer, convLayer) { ...@@ -312,6 +314,46 @@ TEST(Layer, convLayer) {
#endif #endif
} }
void testConvTransLayer(const string& type, bool trans, bool useGpu) {
TestConfig config;
config.biasSize = 3;
config.layerConfig.set_type(type);
config.layerConfig.set_num_filters(3);
config.layerConfig.set_partial_sum(1);
config.layerConfig.set_shared_biases(true);
config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 288});
LayerInputConfig* input = config.layerConfig.add_inputs();
ConvConfig* conv = input->mutable_conv_conf();
conv->set_filter_size(2);
conv->set_filter_size_y(3);
conv->set_channels(16);
conv->set_padding(0);
conv->set_padding_y(1);
conv->set_stride(2);
conv->set_stride_y(2);
conv->set_groups(1);
conv->set_filter_channels(3 / conv->groups());
conv->set_img_size(16);
conv->set_output_x(outputSize(conv->img_size(), conv->filter_size(),
conv->padding(), conv->stride(),
/* caffeMode */ true));
config.layerConfig.set_size(conv->img_size() * conv->img_size() *
config.layerConfig.num_filters());
testLayerGrad(config, "convTrans", 100, trans, useGpu);
// Use small batch_size and useWeight=true to test biasGrad
testLayerGrad(config, "convTrans", 2, trans, useGpu, true, 0.02);
}
TEST(Layer, convTransLayer) {
for (auto useGpu : {false, true}) {
testConvTransLayer("exconvt", /* trans= */ false, /* useGpu= */ useGpu);
}
}
TEST(Layer, blockExpandLayer) { TEST(Layer, blockExpandLayer) {
TestConfig config; TestConfig config;
config.biasSize = 0; config.biasSize = 0;
......
...@@ -80,4 +80,17 @@ int outputSize(int imageSize, int filterSize, int padding, int stride, ...@@ -80,4 +80,17 @@ int outputSize(int imageSize, int filterSize, int padding, int stride,
return outputSize; return outputSize;
} }
int imageSize(int outputSize, int filterSize, int padding, int stride,
bool caffeMode) {
int imageSize;
if (!caffeMode) {
imageSize =
(outputSize - 1) * stride + filterSize - 2 * padding - stride + 1;
} else {
imageSize = (outputSize - 1) * stride + filterSize - 2 * padding;
}
CHECK_GE(imageSize, 1);
return imageSize;
}
} // namespace paddle } // namespace paddle
...@@ -60,4 +60,11 @@ void sparseRand(int* major, int* minor, int nnz, int majorLen, int minorMax, ...@@ -60,4 +60,11 @@ void sparseRand(int* major, int* minor, int nnz, int majorLen, int minorMax,
int outputSize(int imageSize, int filterSize, int padding, int stride, int outputSize(int imageSize, int filterSize, int padding, int stride,
bool caffeMode); bool caffeMode);
/**
* Calculate image size based on output size and caffeMode_.
* It is the reverse function of outputSize()
*/
int imageSize(int outputSize, int filterSize, int padding, int stride,
bool caffeMode);
} // namespace paddle } // namespace paddle
...@@ -649,7 +649,8 @@ class ConvProjection(Projection): ...@@ -649,7 +649,8 @@ class ConvProjection(Projection):
parse_conv(conv_conf, parse_conv(conv_conf,
input_layer_name, input_layer_name,
self.proj_conf.conv_conf) self.proj_conf.conv_conf,
num_filters)
# TODO: support rectangle input # TODO: support rectangle input
self.proj_conf.output_size = (self.proj_conf.conv_conf.output_x ** 2) * num_filters self.proj_conf.output_size = (self.proj_conf.conv_conf.output_x ** 2) * num_filters
...@@ -730,7 +731,8 @@ class ConvOperator(Operator): ...@@ -730,7 +731,8 @@ class ConvOperator(Operator):
parse_conv(conv_conf, parse_conv(conv_conf,
MakeLayerNameInSubmodel(input_layer_names[0]), MakeLayerNameInSubmodel(input_layer_names[0]),
self.operator_conf.conv_conf) self.operator_conf.conv_conf,
num_filters)
self.operator_conf.output_size = (self.operator_conf.conv_conf.output_x ** 2) * num_filters self.operator_conf.output_size = (self.operator_conf.conv_conf.output_x ** 2) * num_filters
config_assert(len(input_layer_names) == 2, "Conv is binary operator") config_assert(len(input_layer_names) == 2, "Conv is binary operator")
...@@ -1017,6 +1019,17 @@ def cnn_output_size(img_size, filter_size, padding, stride, caffe_mode): ...@@ -1017,6 +1019,17 @@ def cnn_output_size(img_size, filter_size, padding, stride, caffe_mode):
else: else:
return 1 + int(math.ceil(output)) return 1 + int(math.ceil(output))
'''
calcualte image_size based on output_size for convolution.
It is the reverse function of cnn_output_size
'''
def cnn_image_size(output_size, filter_size, padding, stride, caffe_mode):
if caffe_mode:
img_size = (output_size - 1) * stride + filter_size - 2 * padding
else:
img_size = (output_size - 2) * stride + filter_size - 2 * padding + 1
return img_size
def parse_pool(pool, input_layer_name, pool_conf): def parse_pool(pool, input_layer_name, pool_conf):
pool_conf.pool_type = pool.pool_type pool_conf.pool_type = pool.pool_type
config_assert(pool.pool_type in ['max-projection', 'avg-projection', config_assert(pool.pool_type in ['max-projection', 'avg-projection',
...@@ -1082,7 +1095,11 @@ def parse_norm(norm, input_layer_name, norm_conf): ...@@ -1082,7 +1095,11 @@ def parse_norm(norm, input_layer_name, norm_conf):
else: else:
norm_conf.scale /= norm.size ** 2 norm_conf.scale /= norm.size ** 2
def parse_conv(conv, input_layer_name, conv_conf): '''
caffe_mode: compute the output size using floor instead of ceil,
which is consistent of caffe and CuDNN's convention.
'''
def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False):
conv_conf.filter_size = conv.filter_size conv_conf.filter_size = conv.filter_size
conv_conf.filter_size_y = conv.filter_size_y conv_conf.filter_size_y = conv.filter_size_y
conv_conf.channels = conv.channels conv_conf.channels = conv.channels
...@@ -1091,20 +1108,37 @@ def parse_conv(conv, input_layer_name, conv_conf): ...@@ -1091,20 +1108,37 @@ def parse_conv(conv, input_layer_name, conv_conf):
conv_conf.stride = conv.stride conv_conf.stride = conv.stride
conv_conf.stride_y = conv.stride_y conv_conf.stride_y = conv.stride_y
conv_conf.groups = conv.groups conv_conf.groups = conv.groups
conv_conf.filter_channels = conv.channels / conv.groups
conv_conf.caffe_mode = conv.caffe_mode conv_conf.caffe_mode = conv.caffe_mode
img_pixels = g_layer_map[input_layer_name].size / conv.channels if not trans:
print('channels=%d size=%d'%(conv.channels, conv_conf.filter_channels = conv.channels / conv.groups
g_layer_map[input_layer_name].size))
conv_conf.img_size = int(img_pixels ** 0.5) img_pixels = g_layer_map[input_layer_name].size / conv.channels
config_assert((conv_conf.img_size ** 2) == img_pixels, print('channels=%d size=%d'%(conv.channels,
("Input layer %s: Incorrect input image size %d for input " g_layer_map[input_layer_name].size))
+ "image pixels %d") conv_conf.img_size = int(img_pixels ** 0.5)
% (input_layer_name, conv_conf.img_size, img_pixels)) config_assert((conv_conf.img_size ** 2) == img_pixels,
conv_conf.output_x = cnn_output_size(conv_conf.img_size, conv_conf.filter_size, ("Input layer %s: Incorrect input image size %d for input "
conv_conf.padding, conv_conf.stride, + "image pixels %d")
conv_conf.caffe_mode) % (input_layer_name, conv_conf.img_size, img_pixels))
conv_conf.output_x = cnn_output_size(
conv_conf.img_size, conv_conf.filter_size,
conv_conf.padding, conv_conf.stride, conv_conf.caffe_mode)
else:
conv_conf.filter_channels = num_filters / conv.groups
outputSize = g_layer_map[input_layer_name].size / conv.channels
print('channels=%d size=%d'%(conv.channels,
g_layer_map[input_layer_name].size))
conv_conf.output_x = int(outputSize ** 0.5)
config_assert((conv_conf.output_x ** 2) == outputSize,
("Input layer %s: Incorrect input image size %d for input "
+ "image pixels %d")
% (input_layer_name, conv_conf.output_x, outputSize))
conv_conf.img_size = cnn_image_size(
conv_conf.output_x, conv_conf.filter_size,
conv_conf.padding, conv_conf.stride, conv_conf.caffe_mode)
def parse_block_expand(block_expand, input_layer_name, block_expand_conf): def parse_block_expand(block_expand, input_layer_name, block_expand_conf):
block_expand_conf.channels = block_expand.channels block_expand_conf.channels = block_expand.channels
...@@ -1587,7 +1621,8 @@ class ConvLayerBase(LayerBase): ...@@ -1587,7 +1621,8 @@ class ConvLayerBase(LayerBase):
parse_conv( parse_conv(
self.inputs[input_index].conv, self.inputs[input_index].conv,
input_layer.name, input_layer.name,
self.config.inputs[input_index].conv_conf) self.config.inputs[input_index].conv_conf,
num_filters)
conv_conf = self.config.inputs[input_index].conv_conf conv_conf = self.config.inputs[input_index].conv_conf
psize = self.calc_parameter_size(conv_conf) psize = self.calc_parameter_size(conv_conf)
print("output size for %s is %d " % (name, conv_conf.output_x)) print("output size for %s is %d " % (name, conv_conf.output_x))
...@@ -1612,6 +1647,63 @@ class ConvLayer(ConvLayerBase): ...@@ -1612,6 +1647,63 @@ class ConvLayer(ConvLayerBase):
class ConvLayer(ConvLayerBase): class ConvLayer(ConvLayerBase):
layer_type = 'cudnn_conv' layer_type = 'cudnn_conv'
@config_layer('convt')
class ConvTransLayerBase(LayerBase):
layer_type = 'convt'
def __init__(
self,
name,
inputs=[],
bias=True,
num_filters=None,
shared_biases=False,
**xargs):
super(ConvTransLayerBase, self).__init__(
name, self.layer_type, 0, inputs=inputs, **xargs)
if num_filters is not None:
self.config.num_filters = num_filters
use_gpu = int(g_command_config_args.get("use_gpu", 0))
parallel_nn = int(g_command_config_args.get("parallel_nn", 0))
# cudnn_convt has not been implemented so use exconvt only
self.layer_type = "exconvt"
# need to specify layer in config
self.config.type = self.layer_type
if shared_biases is not None:
self.config.shared_biases = shared_biases
for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index)
parse_conv(
self.inputs[input_index].conv,
input_layer.name,
self.config.inputs[input_index].conv_conf,
num_filters,
trans=True)
conv_conf = self.config.inputs[input_index].conv_conf
psize = self.calc_parameter_size(conv_conf)
print("output size for %s is %d " % (name, conv_conf.output_x))
self.create_input_parameter(input_index, psize)
self.set_layer_size(
(conv_conf.img_size ** 2) * self.config.num_filters)
psize = self.config.size
if shared_biases:
psize = self.config.num_filters
self.create_bias_parameter(bias, psize, [psize, 1])
def calc_parameter_size(self, conv_conf):
return conv_conf.channels * conv_conf.filter_channels \
* (conv_conf.filter_size * conv_conf.filter_size_y)
@config_layer('exconvt')
class ConvTransLayer(ConvTransLayerBase):
layer_type = 'exconvt'
@config_layer('norm') @config_layer('norm')
class NormLayer(LayerBase): class NormLayer(LayerBase):
def __init__( def __init__(
......
...@@ -78,6 +78,7 @@ class LayerType(object): ...@@ -78,6 +78,7 @@ class LayerType(object):
COSINE_SIM = 'cos' COSINE_SIM = 'cos'
HSIGMOID = 'hsigmoid' HSIGMOID = 'hsigmoid'
CONV_LAYER = "conv" CONV_LAYER = "conv"
CONVTRANS_LAYER = "convt"
POOL_LAYER = "pool" POOL_LAYER = "pool"
BATCH_NORM_LAYER = 'batch_norm' BATCH_NORM_LAYER = 'batch_norm'
NORM_LAYER = 'norm' NORM_LAYER = 'norm'
...@@ -1517,7 +1518,8 @@ def img_conv_layer(input, filter_size, num_filters, ...@@ -1517,7 +1518,8 @@ def img_conv_layer(input, filter_size, num_filters,
name=None, num_channels=None, name=None, num_channels=None,
act=None, groups=1, stride=1, padding=0, bias_attr=None, act=None, groups=1, stride=1, padding=0, bias_attr=None,
param_attr=None, shared_biases=True, layer_attr=None, param_attr=None, shared_biases=True, layer_attr=None,
filter_size_y=None, stride_y=None, padding_y=None): filter_size_y=None, stride_y=None, padding_y=None,
trans=False):
""" """
Convolution layer for image. Paddle only support square input currently and Convolution layer for image. Paddle only support square input currently and
thus input image's width equals height. thus input image's width equals height.
...@@ -1525,7 +1527,14 @@ def img_conv_layer(input, filter_size, num_filters, ...@@ -1525,7 +1527,14 @@ def img_conv_layer(input, filter_size, num_filters,
The details of convolution layer, please refer UFLDL's `convolution The details of convolution layer, please refer UFLDL's `convolution
<http://ufldl.stanford.edu/tutorial/supervised/ <http://ufldl.stanford.edu/tutorial/supervised/
FeatureExtractionUsingConvolution/>`_ . FeatureExtractionUsingConvolution/>`_ .
Convolution Transpose (deconv) layer for image. Paddle only support square
input currently and thus input image's width equals height.
The details of convolution transpose layer,
please refer to the following explanation and references therein
<http://datascience.stackexchange.com/questions/6107/
what-are-deconvolutional-layers/>`_ .
The num_channel means input image's channel number. It may be 1 or 3 when The num_channel means input image's channel number. It may be 1 or 3 when
input is raw pixels of image(mono or RGB), or it may be the previous layer's input is raw pixels of image(mono or RGB), or it may be the previous layer's
num_filters * num_group. num_filters * num_group.
...@@ -1575,6 +1584,8 @@ def img_conv_layer(input, filter_size, num_filters, ...@@ -1575,6 +1584,8 @@ def img_conv_layer(input, filter_size, num_filters,
:type shared_biases: bool :type shared_biases: bool
:param layer_attr: Layer Extra Attribute. :param layer_attr: Layer Extra Attribute.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:param trans: true if it is a convTransLayer, false if it is a convLayer
:type trans: bool
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -1610,6 +1621,9 @@ def img_conv_layer(input, filter_size, num_filters, ...@@ -1610,6 +1621,9 @@ def img_conv_layer(input, filter_size, num_filters,
param_attr.attr["initial_std"] = init_w param_attr.attr["initial_std"] = init_w
param_attr.attr["initial_strategy"] = 0 param_attr.attr["initial_strategy"] = 0
param_attr.attr["initial_smart"] = False param_attr.attr["initial_smart"] = False
lt = LayerType.CONVTRANS_LAYER if trans else LayerType.CONV_LAYER
Layer( Layer(
name=name, name=name,
inputs=Input(input.name, conv=Conv( inputs=Input(input.name, conv=Conv(
...@@ -1622,10 +1636,10 @@ def img_conv_layer(input, filter_size, num_filters, ...@@ -1622,10 +1636,10 @@ def img_conv_layer(input, filter_size, num_filters,
num_filters=num_filters, num_filters=num_filters,
bias=ParamAttr.to_bias(bias_attr), bias=ParamAttr.to_bias(bias_attr),
shared_biases=shared_biases, shared_biases=shared_biases,
type=LayerType.CONV_LAYER, type=lt,
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr)
) )
return LayerOutput(name, LayerType.CONV_LAYER, parents=[input], return LayerOutput(name, lt, parents=[input],
activation=act, num_filters=num_filters) activation=act, num_filters=num_filters)
......
...@@ -9,7 +9,7 @@ protostr=$PWD/protostr ...@@ -9,7 +9,7 @@ protostr=$PWD/protostr
configs=(test_fc layer_activations projections test_print_layer configs=(test_fc layer_activations projections test_print_layer
test_sequence_pooling test_lstmemory_layer test_grumemory_layer test_sequence_pooling test_lstmemory_layer test_grumemory_layer
last_first_seq test_expand_layer test_ntm_layers test_hsigmoid last_first_seq test_expand_layer test_ntm_layers test_hsigmoid
img_layers util_layers simple_rnn_layers unused_layers test_cost_layers img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers
test_rnn_group shared_fc shared_lstm test_cost_layers_with_weight test_rnn_group shared_fc shared_lstm test_cost_layers_with_weight
test_maxout test_bi_grumemory math_ops) test_maxout test_bi_grumemory math_ops)
......
from paddle.trainer_config_helpers import *
settings(
learning_rate=1e-3,
batch_size=1000
)
img = data_layer(name='image', size=227*227)
# the parse_conv in config_parse.py is not strictly accurate when filter_size
# is not square. So here set square filter_size.
img_conv = img_conv_layer(input=img, num_channels=1, num_filters=64,
filter_size=(32, 32), padding=(1, 1), stride=(1, 1),
act=LinearActivation(), trans=True)
img_bn = batch_norm_layer(input=img_conv, act=ReluActivation())
img_norm = img_cmrnorm_layer(input=img_bn, size=32)
img_pool = img_pool_layer(input=img_conv, pool_size=32, pool_type=MaxPooling())
outputs(img_pool, img_norm)
type: "nn"
layers {
name: "image"
type: "data"
size: 51529
active_type: ""
}
layers {
name: "__conv_0__"
type: "exconvt"
size: 4194304
active_type: ""
inputs {
input_layer_name: "image"
input_parameter_name: "___conv_0__.w0"
conv_conf {
filter_size: 32
channels: 1
stride: 1
padding: 1
groups: 1
filter_channels: 64
output_x: 227
img_size: 256
caffe_mode: true
filter_size_y: 32
padding_y: 1
stride_y: 1
}
}
bias_parameter_name: "___conv_0__.wbias"
num_filters: 64
shared_biases: true
}
layers {
name: "__batch_norm_0__"
type: "batch_norm"
size: 4194304
active_type: "relu"
inputs {
input_layer_name: "__conv_0__"
input_parameter_name: "___batch_norm_0__.w0"
image_conf {
channels: 64
img_size: 256
}
}
inputs {
input_layer_name: "__conv_0__"
input_parameter_name: "___batch_norm_0__.w1"
}
inputs {
input_layer_name: "__conv_0__"
input_parameter_name: "___batch_norm_0__.w2"
}
bias_parameter_name: "___batch_norm_0__.wbias"
moving_average_fraction: 0.9
}
layers {
name: "__crmnorm_0__"
type: "norm"
size: 4194304
active_type: ""
inputs {
input_layer_name: "__batch_norm_0__"
norm_conf {
norm_type: "cmrnorm-projection"
channels: 64
size: 32
scale: 0.0004
pow: 0.75
output_x: 256
img_size: 256
blocked: false
}
}
}
layers {
name: "__pool_0__"
type: "pool"
size: 3240000
active_type: ""
inputs {
input_layer_name: "__conv_0__"
pool_conf {
pool_type: "max-projection"
channels: 64
size_x: 32
stride: 1
output_x: 225
img_size: 256
padding: 0
size_y: 32
stride_y: 1
output_y: 225
img_size_y: 256
padding_y: 0
}
}
}
parameters {
name: "___conv_0__.w0"
size: 65536
initial_mean: 0.0
initial_std: 0.0441941738242
initial_strategy: 0
initial_smart: false
}
parameters {
name: "___conv_0__.wbias"
size: 64
initial_mean: 0.0
initial_std: 0.0
dims: 64
dims: 1
initial_strategy: 0
initial_smart: false
}
parameters {
name: "___batch_norm_0__.w0"
size: 64
initial_mean: 1.0
initial_std: 0.0
initial_strategy: 0
initial_smart: false
}
parameters {
name: "___batch_norm_0__.w1"
size: 64
initial_mean: 0.0
initial_std: 0.0
dims: 1
dims: 64
initial_strategy: 0
initial_smart: false
is_static: true
is_shared: true
}
parameters {
name: "___batch_norm_0__.w2"
size: 64
initial_mean: 0.0
initial_std: 0.0
dims: 1
dims: 64
initial_strategy: 0
initial_smart: false
is_static: true
is_shared: true
}
parameters {
name: "___batch_norm_0__.wbias"
size: 64
initial_mean: 0.0
initial_std: 0.0
dims: 1
dims: 64
initial_strategy: 0
initial_smart: false
}
input_layer_names: "image"
output_layer_names: "__pool_0__"
output_layer_names: "__crmnorm_0__"
sub_models {
name: "root"
layer_names: "image"
layer_names: "__conv_0__"
layer_names: "__batch_norm_0__"
layer_names: "__crmnorm_0__"
layer_names: "__pool_0__"
input_layer_names: "image"
output_layer_names: "__pool_0__"
output_layer_names: "__crmnorm_0__"
is_recurrent_layer_group: false
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册