From e802471c58068669de78a3eaec143b705cb654b8 Mon Sep 17 00:00:00 2001 From: luotao1 Date: Mon, 7 Nov 2016 19:41:20 +0800 Subject: [PATCH] abstract outputSize function in CNN-related layers (#314) --- paddle/gserver/layers/ConvBaseLayer.cpp | 9 ++- paddle/gserver/layers/ConvBaseLayer.h | 26 +------ paddle/gserver/layers/ConvOperator.cpp | 72 ++++++------------- paddle/gserver/layers/ConvProjection.h | 12 ++-- paddle/gserver/layers/CudnnPoolLayer.cpp | 20 +++--- paddle/gserver/layers/PoolLayer.h | 11 +-- paddle/gserver/layers/PoolProjectionLayer.cpp | 29 ++++---- paddle/gserver/tests/test_LayerGrad.cpp | 56 +++++++-------- paddle/math/MathUtils.cpp | 19 +++-- paddle/math/MathUtils.h | 16 +++++ python/paddle/trainer/config_parser.py | 53 +++++++------- 11 files changed, 139 insertions(+), 184 deletions(-) diff --git a/paddle/gserver/layers/ConvBaseLayer.cpp b/paddle/gserver/layers/ConvBaseLayer.cpp index 040510b7ad..42ff0b70d8 100644 --- a/paddle/gserver/layers/ConvBaseLayer.cpp +++ b/paddle/gserver/layers/ConvBaseLayer.cpp @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - #include "paddle/utils/Logging.h" #include "ConvBaseLayer.h" namespace paddle { @@ -78,10 +77,10 @@ size_t ConvBaseLayer::calOutputSize() { imgSizeH_[i] = config_.inputs(i).conv_conf().img_size(); if (imgSizeW_[i] == 0) imgSizeW_[i] = config_.inputs(i).conv_conf().img_size(); - outputH_.push_back( - outputSize(imgSizeH_[i], filterSizeY_[i], paddingY_[i], strideY_[i])); - outputW_.push_back( - outputSize(imgSizeW_[i], filterSize_[i], padding_[i], stride_[i])); + outputH_.push_back(outputSize(imgSizeH_[i], filterSizeY_[i], paddingY_[i], + strideY_[i], caffeMode_)); + outputW_.push_back(outputSize(imgSizeW_[i], filterSize_[i], padding_[i], + stride_[i], caffeMode_)); CHECK_EQ(outputH_[i], outputH_[0]); CHECK_EQ(outputW_[i], outputW_[0]); } diff --git a/paddle/gserver/layers/ConvBaseLayer.h b/paddle/gserver/layers/ConvBaseLayer.h index 316514acf1..e660a6d6f5 100644 --- a/paddle/gserver/layers/ConvBaseLayer.h +++ b/paddle/gserver/layers/ConvBaseLayer.h @@ -16,6 +16,7 @@ limitations under the License. */ #pragma once #include "Layer.h" +#include "paddle/math/MathUtils.h" namespace paddle { /** @@ -87,31 +88,6 @@ public: virtual size_t calOutputSize(); Weight& getWeight(int idx) { return *weights_[idx]; } - - /** - * Calculate output size based on caffeMode_. - * - input(+padding): 0123456789 - * - imageSize(+padding) = 10; - * - filterSize = 3; - * - stride = 2; - * - caffeMode_ is true: - - output: (012), (234), (456), (678) - - outputSize = 4; - * - caffeMode_ is false: - * - output: (012), (234), (456), (678), (9) - * - outputSize = 5; - */ - int outputSize(int imageSize, int filterSize, int padding, int stride) { - int outputSize; - if (!caffeMode_) { - outputSize = - (imageSize - filterSize + 2 * padding + stride - 1) / stride + 1; - } else { - outputSize = (imageSize - filterSize + 2 * padding) / stride + 1; - } - CHECK_GE(outputSize, 1); - return outputSize; - } }; } // namespace paddle diff --git a/paddle/gserver/layers/ConvOperator.cpp b/paddle/gserver/layers/ConvOperator.cpp index 8c72c17784..2d9c892fe5 100644 --- a/paddle/gserver/layers/ConvOperator.cpp +++ b/paddle/gserver/layers/ConvOperator.cpp @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - #include "paddle/math/Matrix.h" +#include "paddle/math/MathUtils.h" #include "Operator.h" namespace paddle { @@ -35,8 +35,8 @@ public: */ virtual ~ConvOperator() { if (workSpaceInBytes_ != 0) { - hl_free_mem_device(workSpace_); - workSpaceInBytes_ = 0; + hl_free_mem_device(workSpace_); + workSpaceInBytes_ = 0; } hl_destroy_tensor_descriptor(inputDesc_); @@ -83,33 +83,6 @@ private: filterSize_ * filterSizeY_ * channels_ * numFilters_); } - /** - * Calculate output size. - */ - int outputSize(int imageSize, int filterSize, int padding, int stride) { - int outputSize; - if (!caffeMode_) { - /* input(+padding): 0123456789 - * imageSize(+padding) = 10; - * filterSize = 3; - * stride = 2; - * output: (012), (234), (456), (678), (9) - * outputSize = 5; - */ - outputSize = - (imageSize - filterSize + 2 * padding + stride - 1) / stride + 1; - } else { - /* input(+padding): 0123456789 - * imageSize(+padding) = 10; - * filterSize = 3; - * stride = 2; - * output: (012), (234), (456), (678) - * outputSize = 4; - */ - outputSize = (imageSize - filterSize + 2 * padding) / stride + 1; - } - return outputSize; - } /// Most of member variables are same with CudnnConvLayer. /// There is no explanation here. int imageH_, imageW_, outputH_, outputW_; @@ -129,7 +102,7 @@ private: int fwdAlgo_, bwdFilterAlgo_, bwdDataAlgo_; size_t fwdLimitBytes_, bwdDataLimitBytes_, bwdFilterLimitBytes_; size_t workSpaceInBytes_; - void* workSpace_; + void *workSpace_; bool isSelectAlgo_; }; @@ -160,7 +133,7 @@ ConvOperator::ConvOperator(const OperatorConfig &config, bool useGpu) void ConvOperator::allocConvWorkSpace(size_t maxWorkSpace) { if (maxWorkSpace > workSpaceInBytes_) { if (workSpaceInBytes_ != 0) { - hl_free_mem_device(workSpace_); + hl_free_mem_device(workSpace_); } // total amount of storage needed workSpace_ = hl_malloc_device(maxWorkSpace); @@ -168,14 +141,13 @@ void ConvOperator::allocConvWorkSpace(size_t maxWorkSpace) { } } - void ConvOperator::reshape(int batchSize) { imageH_ = ins_[0]->getFrameHeight(); imageW_ = ins_[0]->getFrameWidth(); if (imageH_ == 0) imageH_ = imgSize_; if (imageW_ == 0) imageW_ = imgSize_; - outputH_ = outputSize(imageH_, filterSizeY_, paddingY_, strideY_); - outputW_ = outputSize(imageW_, filterSize_, padding_, stride_); + outputH_ = outputSize(imageH_, filterSizeY_, paddingY_, strideY_, caffeMode_); + outputW_ = outputSize(imageW_, filterSize_, padding_, stride_, caffeMode_); out_->setFrameHeight(outputH_); out_->setFrameWidth(outputW_); @@ -183,10 +155,10 @@ void ConvOperator::reshape(int batchSize) { reshapeImageDescriptors(); if (!isSelectAlgo_) { - hl_conv_workspace(inputDesc_, outputDesc_, filterDesc_, - convDesc_, &fwdAlgo_, &fwdLimitBytes_, - &bwdDataAlgo_, &bwdDataLimitBytes_, - &bwdFilterAlgo_, &bwdFilterLimitBytes_); + hl_conv_workspace(inputDesc_, outputDesc_, filterDesc_, convDesc_, + &fwdAlgo_, &fwdLimitBytes_, &bwdDataAlgo_, + &bwdDataLimitBytes_, &bwdFilterAlgo_, + &bwdFilterLimitBytes_); size_t maxWorkSpace = 0; maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_); @@ -202,7 +174,8 @@ void ConvOperator::computeConvSizes() { hl_create_filter_descriptor(&filterDesc_, channels_, numFilters_, filterSizeY_, filterSize_); hl_create_tensor_descriptor(&inputDesc_); - int outputX = outputSize(imgSize_, filterSize_, padding_, stride_); + int outputX = + outputSize(imgSize_, filterSize_, padding_, stride_, caffeMode_); CHECK_EQ(outputX, outputX_); hl_create_tensor_descriptor(&outputDesc_); hl_create_convolution_descriptor(&convDesc_, inputDesc_, filterDesc_, @@ -211,13 +184,13 @@ void ConvOperator::computeConvSizes() { void ConvOperator::reshapeImageDescriptors() { hl_tensor_reshape(inputDesc_, 1, channels_, imageH_, imageW_, - channels_ * imageH_ * imageW_, imageH_ * imageW_, - imageW_, 1); + channels_ * imageH_ * imageW_, imageH_ * imageW_, imageW_, + 1); hl_tensor_reshape(outputDesc_, 1, numFilters_, outputH_, outputW_, numFilters_ * outputH_ * outputW_, outputH_ * outputW_, outputW_, 1); - hl_reset_convolution_descriptor(convDesc_, inputDesc_, filterDesc_, - paddingY_, padding_, strideY_, stride_); + hl_reset_convolution_descriptor(convDesc_, inputDesc_, filterDesc_, paddingY_, + padding_, strideY_, stride_); inputOffset_ = channels_ * imageH_ * imageW_; outputOffset_ = numFilters_ * outputH_ * outputW_; weightOffset_ = numFilters_ * channels_ * filterSize_ * filterSize_; @@ -273,18 +246,17 @@ void ConvOperator::backward() { real *weightGrad = ins_[1]->grad->getData() + weightOffset_ * batchId; hl_convolution_backward_filter(inputDesc_, inputData, outputDesc_, outGrad, filterDesc_, weightGrad, - convDesc_, workSpace_, - workSpaceInBytes_, bwdFilterAlgo_); + convDesc_, workSpace_, workSpaceInBytes_, + bwdFilterAlgo_); } MatrixPtr preGrad = ins_[0]->grad; if (NULL != preGrad) { real *inputGrad = preGrad->getData() + inputOffset_ * batchId; real *wgtData = ins_[1]->value->getData() + weightOffset_ * batchId; - hl_convolution_backward_data(inputDesc_, inputGrad, outputDesc_, - outGrad, filterDesc_, wgtData, - convDesc_, workSpace_, - workSpaceInBytes_, bwdDataAlgo_); + hl_convolution_backward_data( + inputDesc_, inputGrad, outputDesc_, outGrad, filterDesc_, wgtData, + convDesc_, workSpace_, workSpaceInBytes_, bwdDataAlgo_); } } } diff --git a/paddle/gserver/layers/ConvProjection.h b/paddle/gserver/layers/ConvProjection.h index 41a100ac3c..d0bfe9a6ed 100644 --- a/paddle/gserver/layers/ConvProjection.h +++ b/paddle/gserver/layers/ConvProjection.h @@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - #pragma once #include "Projection.h" +#include "paddle/math/MathUtils.h" namespace paddle { @@ -42,17 +42,15 @@ protected: void reshapeTensorDesc(int batchSize); void reshape(int batchSize); - int outputSize(int imageSize, int filterSize, int padding, int stride) { - return (imageSize - filterSize + 2 * padding) / stride + 1; - } - size_t calOutputSize() { imageH_ = in_->getFrameHeight(); imageW_ = in_->getFrameWidth(); if (imageH_ == 0) imageH_ = configImgH_; if (imageW_ == 0) imageW_ = configImgW_; - outputH_ = outputSize(imageH_, filterH_, paddingH_, strideH_); - outputW_ = outputSize(imageW_, filterW_, paddingW_, strideW_); + outputH_ = outputSize(imageH_, filterH_, paddingH_, strideH_, + /* caffeMode */ true); + outputW_ = outputSize(imageW_, filterW_, paddingW_, strideW_, + /* caffeMode */ true); const_cast(out_)->setFrameHeight(outputH_); const_cast(out_)->setFrameWidth(outputW_); diff --git a/paddle/gserver/layers/CudnnPoolLayer.cpp b/paddle/gserver/layers/CudnnPoolLayer.cpp index 4c733591b3..24adb50a98 100644 --- a/paddle/gserver/layers/CudnnPoolLayer.cpp +++ b/paddle/gserver/layers/CudnnPoolLayer.cpp @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - #include "paddle/utils/Logging.h" #include "paddle/utils/Stat.h" #include "paddle/math/Matrix.h" @@ -62,9 +61,9 @@ bool CudnnPoolLayer::init(const LayerMap &layerMap, strideHeight = strideY_; strideWidth = stride_; - hl_create_pooling_descriptor(&poolingDesc_, mode_, windowHeight, - windowWidth, heightPadding, widthPadding, - strideHeight, strideWidth); + hl_create_pooling_descriptor(&poolingDesc_, mode_, windowHeight, windowWidth, + heightPadding, widthPadding, strideHeight, + strideWidth); return true; } @@ -80,8 +79,10 @@ void CudnnPoolLayer::reshape(int batchSize) { } CHECK_EQ(inputLayers_[0]->getOutput().value->getWidth(), channels_ * imageH_ * imageW_); - outputH_ = outputSize(imageH_, sizeY_, confPaddingY_, strideY_); - outputW_ = outputSize(imageW_, sizeX_, confPadding_, stride_); + outputH_ = outputSize(imageH_, sizeY_, confPaddingY_, strideY_, + /* caffeMode */ false); + outputW_ = + outputSize(imageW_, sizeX_, confPadding_, stride_, /* caffeMode */ false); getOutput().setFrameHeight(outputH_); getOutput().setFrameWidth(outputW_); @@ -99,8 +100,7 @@ void CudnnPoolLayer::forward(PassType passType) { real *inputData = getInputValue(0)->getData(); real *outData = getOutputValue()->getData(); - hl_pooling_forward(inputDesc_, inputData, outputDesc_, outData, - poolingDesc_); + hl_pooling_forward(inputDesc_, inputData, outputDesc_, outData, poolingDesc_); } void CudnnPoolLayer::backward(const UpdateCallback &callback) { @@ -113,8 +113,8 @@ void CudnnPoolLayer::backward(const UpdateCallback &callback) { real *inputGrad = getInputGrad(0)->getData(); real *outData = getOutputValue()->getData(); real *outGrad = getOutputGrad()->getData(); - hl_pooling_backward(inputDesc_, inputData, inputGrad, outputDesc_, - outData, outGrad, poolingDesc_); + hl_pooling_backward(inputDesc_, inputData, inputGrad, outputDesc_, outData, + outGrad, poolingDesc_); } CudnnPoolLayer::~CudnnPoolLayer() { diff --git a/paddle/gserver/layers/PoolLayer.h b/paddle/gserver/layers/PoolLayer.h index bde1f5b8dc..e87ad08251 100644 --- a/paddle/gserver/layers/PoolLayer.h +++ b/paddle/gserver/layers/PoolLayer.h @@ -17,6 +17,7 @@ limitations under the License. */ #include "Layer.h" #include "paddle/math/Matrix.h" +#include "paddle/math/MathUtils.h" #include namespace paddle { @@ -47,16 +48,6 @@ public: static Layer* create(const LayerConfig& config); virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - - /** - * Calculate output size according window size and padding size. - */ - int outputSize(int imageSize, int windowSize, int padding, int stride) { - int outputSize; - outputSize = - (imageSize - windowSize + 2 * padding + stride - 1) / stride + 1; - return outputSize; - } }; } // namespace paddle diff --git a/paddle/gserver/layers/PoolProjectionLayer.cpp b/paddle/gserver/layers/PoolProjectionLayer.cpp index 5a2e9afb6e..9e8ce77850 100644 --- a/paddle/gserver/layers/PoolProjectionLayer.cpp +++ b/paddle/gserver/layers/PoolProjectionLayer.cpp @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - #include "paddle/utils/Logging.h" #include "paddle/utils/Stat.h" #include "PoolProjectionLayer.h" @@ -31,8 +30,10 @@ size_t PoolProjectionLayer::getSize() { imgSizeW_ = imgSize_; } - outputH_ = outputSize(imgSizeH_, sizeY_, confPaddingY_, strideY_); - outputW_ = outputSize(imgSizeW_, sizeX_, confPadding_, stride_); + outputH_ = outputSize(imgSizeH_, sizeY_, confPaddingY_, strideY_, + /* caffeMode */ false); + outputW_ = outputSize(imgSizeW_, sizeX_, confPadding_, stride_, + /* caffeMode */ false); layerSize = outputH_ * outputW_ * channels_; @@ -53,9 +54,9 @@ void MaxPoolProjectionLayer::forward(PassType passType) { MatrixPtr outV = getOutputValue(); - outV->maxPoolForward(*input, imgSizeH_, imgSizeW_, channels_, - sizeX_, sizeY_, strideY_, stride_, - outputH_, outputW_, confPaddingY_, confPadding_); + outV->maxPoolForward(*input, imgSizeH_, imgSizeW_, channels_, sizeX_, sizeY_, + strideY_, stride_, outputH_, outputW_, confPaddingY_, + confPadding_); } void MaxPoolProjectionLayer::backward(const UpdateCallback& callback) { @@ -72,9 +73,8 @@ void MaxPoolProjectionLayer::backward(const UpdateCallback& callback) { MatrixPtr inputGrad = getInputGrad(0); inputGrad->maxPoolBackward(*inputV, imgSizeH_, imgSizeW_, *outGrad, *outV, - sizeX_, sizeY_, - strideY_, stride_, outputH_, outputW_, 1, 1, - confPaddingY_, confPadding_); + sizeX_, sizeY_, strideY_, stride_, outputH_, + outputW_, 1, 1, confPaddingY_, confPadding_); } void AvgPoolProjectionLayer::forward(PassType passType) { @@ -89,9 +89,9 @@ void AvgPoolProjectionLayer::forward(PassType passType) { MatrixPtr outV = getOutputValue(); - outV->avgPoolForward(*input, imgSizeH_, imgSizeW_, channels_, - sizeX_, sizeY_, strideY_, stride_, - outputH_, outputW_, confPaddingY_, confPadding_); + outV->avgPoolForward(*input, imgSizeH_, imgSizeW_, channels_, sizeX_, sizeY_, + strideY_, stride_, outputH_, outputW_, confPaddingY_, + confPadding_); } void AvgPoolProjectionLayer::backward(const UpdateCallback& callback) { @@ -103,9 +103,8 @@ void AvgPoolProjectionLayer::backward(const UpdateCallback& callback) { /* Do derivation */ MatrixPtr outputGrad = getOutputGrad(); MatrixPtr inputGrad = getInputGrad(0); - inputGrad->avgPoolBackward(*outputGrad, imgSizeH_, imgSizeW_, - sizeX_, sizeY_, strideY_, stride_, - outputH_, outputW_, 1, 1, + inputGrad->avgPoolBackward(*outputGrad, imgSizeH_, imgSizeW_, sizeX_, sizeY_, + strideY_, stride_, outputH_, outputW_, 1, 1, confPaddingY_, confPadding_); } } // namespace paddle diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index bf2c2e0499..5397b952bc 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -18,6 +18,7 @@ limitations under the License. */ #include "paddle/gserver/layers/DataLayer.h" #include "ModelConfig.pb.h" #include "paddle/trainer/Trainer.h" +#include "paddle/math/MathUtils.h" #include "TestUtil.h" #include "LayerGradUtil.h" @@ -134,7 +135,6 @@ TEST(Projection, identity) { } } - #ifndef PADDLE_ONLY_CPU TEST(Projection, conv) { const int NUM_FILTERS = 16; @@ -158,21 +158,23 @@ TEST(Projection, conv) { conv->set_groups(1); conv->set_filter_channels(conv->channels() / conv->groups()); conv->set_img_size(IMAGE_SIZE); - int outputSize = (2 * conv->padding() + conv->img_size() - - conv->filter_size()) / conv->stride() + 1; - int outputSizeY = (2 * conv->padding_y() + conv->img_size() - - conv->filter_size_y()) / conv->stride_y() + 1; - conv->set_output_x(outputSize); + int output_x = + outputSize(conv->img_size(), conv->filter_size(), conv->padding(), + conv->stride(), /* caffeMode */ true); + int output_y = + outputSize(conv->img_size(), conv->filter_size_y(), conv->padding_y(), + conv->stride_y(), /* caffeMode */ true); + conv->set_output_x(output_x); conf.set_input_size(IMAGE_SIZE * IMAGE_SIZE * CHANNELS); - conf.set_output_size(outputSize * outputSizeY * NUM_FILTERS); + conf.set_output_size(output_x * output_y * NUM_FILTERS); - testProjectionGrad(conf, INPUT_DATA, + testProjectionGrad( + conf, INPUT_DATA, /* parameterSize */ NUM_FILTERS * CHANNELS * FILTER_SIZE * FILTER_SIZE_Y, /* batchSize */ 100, true, false, NUM_FILTERS, true); } #endif - TEST(Layer, concat) { TestConfig config; config.biasSize = 0; @@ -293,10 +295,9 @@ void testConvLayer(const string& type, bool trans, bool useGpu) { conv->set_groups(1); conv->set_filter_channels(conv->channels() / conv->groups()); conv->set_img_size(16); - conv->set_output_x( - (2 * conv->padding() + conv->img_size() - conv->filter_size()) / - ((float)conv->stride()) + - 1.5); + conv->set_output_x(outputSize(conv->img_size(), conv->filter_size(), + conv->padding(), conv->stride(), + /* caffeMode */ true)); config.layerConfig.set_size(conv->output_x() * conv->output_x() * config.layerConfig.num_filters()); @@ -329,15 +330,13 @@ TEST(Layer, blockExpandLayer) { blockExpand->set_stride_x(2); blockExpand->set_stride_y(2); blockExpand->set_output_x( - 1 + - (2 * blockExpand->padding_x() + blockExpand->img_size_x() - - blockExpand->block_x() + blockExpand->stride_x() - 1) / - blockExpand->stride_x()); + outputSize(blockExpand->img_size_x(), blockExpand->block_x(), + blockExpand->padding_x(), blockExpand->stride_x(), + /* caffeMode */ false)); blockExpand->set_output_y( - 1 + - (2 * blockExpand->padding_y() + blockExpand->img_size_y() - - blockExpand->block_y() + blockExpand->stride_y() - 1) / - blockExpand->stride_y()); + outputSize(blockExpand->img_size_y(), blockExpand->block_y(), + blockExpand->padding_y(), blockExpand->stride_y(), + /* caffeMode */ false)); config.layerConfig.set_size(blockExpand->block_x() * blockExpand->block_y() * blockExpand->channels()); @@ -862,8 +861,8 @@ void setPoolConfig(TestConfig* config, PoolConfig* pool, pool->set_stride(sw); pool->set_stride_y(sh); - int ow = (pool->img_size() - kw + 2 * pw + sw - 1) / sw + 1; - int oh = (pool->img_size_y() - kh + 2 * ph + sh - 1) / sh + 1; + int ow = outputSize(pool->img_size(), kw, pw, sw, /* caffeMode */ false); + int oh = outputSize(pool->img_size_y(), kh, ph, sh, /* caffeMode */ false); pool->set_output_x(ow); pool->set_output_y(oh); } @@ -1255,12 +1254,11 @@ TEST(Operator, conv) { conv->set_groups(1); conv->set_filter_channels(conv->channels() / conv->groups()); conv->set_img_size(IMAGE_SIZE); - int outputSize = - int(1.0 * (2 * conv->padding() + conv->img_size() - conv->filter_size()) / - conv->stride()) + - 1; - conv->set_output_x(outputSize); - config.layerConfig.set_size(outputSize * outputSize * + int output_x = + outputSize(conv->img_size(), conv->filter_size(), conv->padding(), + conv->stride(), /* caffeMode */ true); + conv->set_output_x(output_x); + config.layerConfig.set_size(output_x * output_x * config.layerConfig.num_filters()); config.layerConfig.set_size(conv->output_x() * conv->output_x() * NUM_FILTERS); diff --git a/paddle/math/MathUtils.cpp b/paddle/math/MathUtils.cpp index 5b78ab1b07..c1af8628d0 100644 --- a/paddle/math/MathUtils.cpp +++ b/paddle/math/MathUtils.cpp @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - #include "MathUtils.h" #include #include "paddle/utils/Logging.h" @@ -24,11 +23,7 @@ namespace paddle { * major is rows and minor is cols, according to * major value to initialize minor value" */ -void sparseRand(int* major, - int* minor, - int nnz, - int majorLen, - int minorMax, +void sparseRand(int* major, int* minor, int nnz, int majorLen, int minorMax, bool useGpu) { CHECK(size_t(nnz) > size_t(1)); int* cpuMajor; @@ -72,5 +67,17 @@ void sparseRand(int* major, } } +int outputSize(int imageSize, int filterSize, int padding, int stride, + bool caffeMode) { + int outputSize; + if (!caffeMode) { + outputSize = + (imageSize - filterSize + 2 * padding + stride - 1) / stride + 1; + } else { + outputSize = (imageSize - filterSize + 2 * padding) / stride + 1; + } + CHECK_GE(outputSize, 1); + return outputSize; +} } // namespace paddle diff --git a/paddle/math/MathUtils.h b/paddle/math/MathUtils.h index 83375022ab..49d0c10a8f 100644 --- a/paddle/math/MathUtils.h +++ b/paddle/math/MathUtils.h @@ -44,4 +44,20 @@ namespace paddle { void sparseRand(int* major, int* minor, int nnz, int majorLen, int minorMax, bool useGpu); +/** + * Calculate output size based on caffeMode_. + * - input(+padding): 0123456789 + * - imageSize(+padding) = 10; + * - filterSize = 3; + * - stride = 2; + * - caffeMode is true: + - output: (012), (234), (456), (678) + - outputSize = 4; + * - caffeMode is false: + * - output: (012), (234), (456), (678), (9) + * - outputSize = 5; + */ +int outputSize(int imageSize, int filterSize, int padding, int stride, + bool caffeMode); + } // namespace paddle diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index e9038fea8a..73631602a9 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1006,6 +1006,17 @@ def TestData(data_config, async_load_data=None): " Data definition") g_config.test_data_config.async_load_data = async_load_data +''' +caffe_mode: compute the output size using floor instead of ceil, + which is consistent of caffe and CuDNN's convention. +''' +def cnn_output_size(img_size, filter_size, padding, stride, caffe_mode): + output = (2 * padding + img_size - filter_size) / float(stride) + if caffe_mode: + return 1 + int(math.floor(output)) + else: + return 1 + int(math.ceil(output)) + def parse_pool(pool, input_layer_name, pool_conf): pool_conf.pool_type = pool.pool_type config_assert(pool.pool_type in ['max-projection', 'avg-projection', @@ -1036,12 +1047,10 @@ def parse_pool(pool, input_layer_name, pool_conf): if pool.padding is not None: pool_conf.padding = pool.padding pool_conf.padding_y = default(pool.padding_y, pool_conf.padding) - pool_conf.output_x = int(math.ceil((pool_conf.img_size + \ - 2*pool_conf.padding - pool_conf.size_x) / \ - float(pool_conf.stride))) + 1 - pool_conf.output_y = int(math.ceil((pool_conf.img_size_y + \ - 2*pool_conf.padding_y - pool_conf.size_y) / \ - float(pool_conf.stride_y))) + 1 + pool_conf.output_x = cnn_output_size(pool_conf.img_size, pool_conf.size_x, + pool_conf.padding, pool_conf.stride, False) + pool_conf.output_y = cnn_output_size(pool_conf.img_size_y, pool_conf.size_y, + pool_conf.padding_y, pool_conf.stride_y, False) def parse_image(image, input_layer_name, image_conf): image_conf.channels = image.channels @@ -1072,10 +1081,7 @@ def parse_norm(norm, input_layer_name, norm_conf): norm_conf.scale /= norm.size else: norm_conf.scale /= norm.size ** 2 -''' -caffe_mode: compute the output size using floor instead of ceil, - which is consistent of caffe and CuDNN's convention. -''' + def parse_conv(conv, input_layer_name, conv_conf): conv_conf.filter_size = conv.filter_size conv_conf.filter_size_y = conv.filter_size_y @@ -1096,14 +1102,9 @@ def parse_conv(conv, input_layer_name, conv_conf): ("Input layer %s: Incorrect input image size %d for input " + "image pixels %d") % (input_layer_name, conv_conf.img_size, img_pixels)) - if conv.caffe_mode: - conv_conf.output_x = \ - 1 + int(math.floor((2 * conv.padding + conv_conf.img_size \ - - conv.filter_size) / float(conv.stride))) - else: - conv_conf.output_x = \ - 1 + int(math.ceil((2 * conv.padding + conv_conf.img_size \ - - conv.filter_size) / float(conv.stride))) + conv_conf.output_x = cnn_output_size(conv_conf.img_size, conv_conf.filter_size, + conv_conf.padding, conv_conf.stride, + conv_conf.caffe_mode) def parse_block_expand(block_expand, input_layer_name, block_expand_conf): block_expand_conf.channels = block_expand.channels @@ -1118,18 +1119,16 @@ def parse_block_expand(block_expand, input_layer_name, block_expand_conf): if block_expand_conf.img_size_x == 0: block_expand_conf.output_x = 0 else: - block_expand_conf.output_x = \ - 1 + \ - int(math.ceil((2 * block_expand.padding_x + block_expand.img_size_x \ - - block_expand.block_x) / float(block_expand.stride_x))) + block_expand_conf.output_x = cnn_output_size( + block_expand.img_size_x, block_expand.block_x, + block_expand.padding_x, block_expand.stride_x, False) if block_expand_conf.img_size_y == 0: - block_expand_conf.output_y = 0 + block_expand_conf.output_y = 0 else: - block_expand_conf.output_y = \ - 1 + \ - int(math.ceil((2 * block_expand.padding_y + block_expand.img_size_y \ - - block_expand.block_y) / float(block_expand.stride_y))) + block_expand_conf.output_y = cnn_output_size( + block_expand.img_size_y, block_expand.block_y, + block_expand.padding_y, block_expand.stride_y, False) def parse_maxout(maxout, input_layer_name, maxout_conf): maxout_conf.channels = maxout.channels -- GitLab