提交 e802471c 编写于 作者: L luotao1 提交者: GitHub

abstract outputSize function in CNN-related layers (#314)

上级 f9849ac9
......@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Logging.h"
#include "ConvBaseLayer.h"
namespace paddle {
......@@ -78,10 +77,10 @@ size_t ConvBaseLayer::calOutputSize() {
imgSizeH_[i] = config_.inputs(i).conv_conf().img_size();
if (imgSizeW_[i] == 0)
imgSizeW_[i] = config_.inputs(i).conv_conf().img_size();
outputH_.push_back(
outputSize(imgSizeH_[i], filterSizeY_[i], paddingY_[i], strideY_[i]));
outputW_.push_back(
outputSize(imgSizeW_[i], filterSize_[i], padding_[i], stride_[i]));
outputH_.push_back(outputSize(imgSizeH_[i], filterSizeY_[i], paddingY_[i],
strideY_[i], caffeMode_));
outputW_.push_back(outputSize(imgSizeW_[i], filterSize_[i], padding_[i],
stride_[i], caffeMode_));
CHECK_EQ(outputH_[i], outputH_[0]);
CHECK_EQ(outputW_[i], outputW_[0]);
}
......
......@@ -16,6 +16,7 @@ limitations under the License. */
#pragma once
#include "Layer.h"
#include "paddle/math/MathUtils.h"
namespace paddle {
/**
......@@ -87,31 +88,6 @@ public:
virtual size_t calOutputSize();
Weight& getWeight(int idx) { return *weights_[idx]; }
/**
* Calculate output size based on caffeMode_.
* - input(+padding): 0123456789
* - imageSize(+padding) = 10;
* - filterSize = 3;
* - stride = 2;
* - caffeMode_ is true:
- output: (012), (234), (456), (678)
- outputSize = 4;
* - caffeMode_ is false:
* - output: (012), (234), (456), (678), (9)
* - outputSize = 5;
*/
int outputSize(int imageSize, int filterSize, int padding, int stride) {
int outputSize;
if (!caffeMode_) {
outputSize =
(imageSize - filterSize + 2 * padding + stride - 1) / stride + 1;
} else {
outputSize = (imageSize - filterSize + 2 * padding) / stride + 1;
}
CHECK_GE(outputSize, 1);
return outputSize;
}
};
} // namespace paddle
......@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/math/Matrix.h"
#include "paddle/math/MathUtils.h"
#include "Operator.h"
namespace paddle {
......@@ -35,8 +35,8 @@ public:
*/
virtual ~ConvOperator() {
if (workSpaceInBytes_ != 0) {
hl_free_mem_device(workSpace_);
workSpaceInBytes_ = 0;
hl_free_mem_device(workSpace_);
workSpaceInBytes_ = 0;
}
hl_destroy_tensor_descriptor(inputDesc_);
......@@ -83,33 +83,6 @@ private:
filterSize_ * filterSizeY_ * channels_ * numFilters_);
}
/**
* Calculate output size.
*/
int outputSize(int imageSize, int filterSize, int padding, int stride) {
int outputSize;
if (!caffeMode_) {
/* input(+padding): 0123456789
* imageSize(+padding) = 10;
* filterSize = 3;
* stride = 2;
* output: (012), (234), (456), (678), (9)
* outputSize = 5;
*/
outputSize =
(imageSize - filterSize + 2 * padding + stride - 1) / stride + 1;
} else {
/* input(+padding): 0123456789
* imageSize(+padding) = 10;
* filterSize = 3;
* stride = 2;
* output: (012), (234), (456), (678)
* outputSize = 4;
*/
outputSize = (imageSize - filterSize + 2 * padding) / stride + 1;
}
return outputSize;
}
/// Most of member variables are same with CudnnConvLayer.
/// There is no explanation here.
int imageH_, imageW_, outputH_, outputW_;
......@@ -129,7 +102,7 @@ private:
int fwdAlgo_, bwdFilterAlgo_, bwdDataAlgo_;
size_t fwdLimitBytes_, bwdDataLimitBytes_, bwdFilterLimitBytes_;
size_t workSpaceInBytes_;
void* workSpace_;
void *workSpace_;
bool isSelectAlgo_;
};
......@@ -160,7 +133,7 @@ ConvOperator::ConvOperator(const OperatorConfig &config, bool useGpu)
void ConvOperator::allocConvWorkSpace(size_t maxWorkSpace) {
if (maxWorkSpace > workSpaceInBytes_) {
if (workSpaceInBytes_ != 0) {
hl_free_mem_device(workSpace_);
hl_free_mem_device(workSpace_);
}
// total amount of storage needed
workSpace_ = hl_malloc_device(maxWorkSpace);
......@@ -168,14 +141,13 @@ void ConvOperator::allocConvWorkSpace(size_t maxWorkSpace) {
}
}
void ConvOperator::reshape(int batchSize) {
imageH_ = ins_[0]->getFrameHeight();
imageW_ = ins_[0]->getFrameWidth();
if (imageH_ == 0) imageH_ = imgSize_;
if (imageW_ == 0) imageW_ = imgSize_;
outputH_ = outputSize(imageH_, filterSizeY_, paddingY_, strideY_);
outputW_ = outputSize(imageW_, filterSize_, padding_, stride_);
outputH_ = outputSize(imageH_, filterSizeY_, paddingY_, strideY_, caffeMode_);
outputW_ = outputSize(imageW_, filterSize_, padding_, stride_, caffeMode_);
out_->setFrameHeight(outputH_);
out_->setFrameWidth(outputW_);
......@@ -183,10 +155,10 @@ void ConvOperator::reshape(int batchSize) {
reshapeImageDescriptors();
if (!isSelectAlgo_) {
hl_conv_workspace(inputDesc_, outputDesc_, filterDesc_,
convDesc_, &fwdAlgo_, &fwdLimitBytes_,
&bwdDataAlgo_, &bwdDataLimitBytes_,
&bwdFilterAlgo_, &bwdFilterLimitBytes_);
hl_conv_workspace(inputDesc_, outputDesc_, filterDesc_, convDesc_,
&fwdAlgo_, &fwdLimitBytes_, &bwdDataAlgo_,
&bwdDataLimitBytes_, &bwdFilterAlgo_,
&bwdFilterLimitBytes_);
size_t maxWorkSpace = 0;
maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_);
......@@ -202,7 +174,8 @@ void ConvOperator::computeConvSizes() {
hl_create_filter_descriptor(&filterDesc_, channels_, numFilters_,
filterSizeY_, filterSize_);
hl_create_tensor_descriptor(&inputDesc_);
int outputX = outputSize(imgSize_, filterSize_, padding_, stride_);
int outputX =
outputSize(imgSize_, filterSize_, padding_, stride_, caffeMode_);
CHECK_EQ(outputX, outputX_);
hl_create_tensor_descriptor(&outputDesc_);
hl_create_convolution_descriptor(&convDesc_, inputDesc_, filterDesc_,
......@@ -211,13 +184,13 @@ void ConvOperator::computeConvSizes() {
void ConvOperator::reshapeImageDescriptors() {
hl_tensor_reshape(inputDesc_, 1, channels_, imageH_, imageW_,
channels_ * imageH_ * imageW_, imageH_ * imageW_,
imageW_, 1);
channels_ * imageH_ * imageW_, imageH_ * imageW_, imageW_,
1);
hl_tensor_reshape(outputDesc_, 1, numFilters_, outputH_, outputW_,
numFilters_ * outputH_ * outputW_, outputH_ * outputW_,
outputW_, 1);
hl_reset_convolution_descriptor(convDesc_, inputDesc_, filterDesc_,
paddingY_, padding_, strideY_, stride_);
hl_reset_convolution_descriptor(convDesc_, inputDesc_, filterDesc_, paddingY_,
padding_, strideY_, stride_);
inputOffset_ = channels_ * imageH_ * imageW_;
outputOffset_ = numFilters_ * outputH_ * outputW_;
weightOffset_ = numFilters_ * channels_ * filterSize_ * filterSize_;
......@@ -273,18 +246,17 @@ void ConvOperator::backward() {
real *weightGrad = ins_[1]->grad->getData() + weightOffset_ * batchId;
hl_convolution_backward_filter(inputDesc_, inputData, outputDesc_,
outGrad, filterDesc_, weightGrad,
convDesc_, workSpace_,
workSpaceInBytes_, bwdFilterAlgo_);
convDesc_, workSpace_, workSpaceInBytes_,
bwdFilterAlgo_);
}
MatrixPtr preGrad = ins_[0]->grad;
if (NULL != preGrad) {
real *inputGrad = preGrad->getData() + inputOffset_ * batchId;
real *wgtData = ins_[1]->value->getData() + weightOffset_ * batchId;
hl_convolution_backward_data(inputDesc_, inputGrad, outputDesc_,
outGrad, filterDesc_, wgtData,
convDesc_, workSpace_,
workSpaceInBytes_, bwdDataAlgo_);
hl_convolution_backward_data(
inputDesc_, inputGrad, outputDesc_, outGrad, filterDesc_, wgtData,
convDesc_, workSpace_, workSpaceInBytes_, bwdDataAlgo_);
}
}
}
......
......@@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Projection.h"
#include "paddle/math/MathUtils.h"
namespace paddle {
......@@ -42,17 +42,15 @@ protected:
void reshapeTensorDesc(int batchSize);
void reshape(int batchSize);
int outputSize(int imageSize, int filterSize, int padding, int stride) {
return (imageSize - filterSize + 2 * padding) / stride + 1;
}
size_t calOutputSize() {
imageH_ = in_->getFrameHeight();
imageW_ = in_->getFrameWidth();
if (imageH_ == 0) imageH_ = configImgH_;
if (imageW_ == 0) imageW_ = configImgW_;
outputH_ = outputSize(imageH_, filterH_, paddingH_, strideH_);
outputW_ = outputSize(imageW_, filterW_, paddingW_, strideW_);
outputH_ = outputSize(imageH_, filterH_, paddingH_, strideH_,
/* caffeMode */ true);
outputW_ = outputSize(imageW_, filterW_, paddingW_, strideW_,
/* caffeMode */ true);
const_cast<Argument*>(out_)->setFrameHeight(outputH_);
const_cast<Argument*>(out_)->setFrameWidth(outputW_);
......
......@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
#include "paddle/math/Matrix.h"
......@@ -62,9 +61,9 @@ bool CudnnPoolLayer::init(const LayerMap &layerMap,
strideHeight = strideY_;
strideWidth = stride_;
hl_create_pooling_descriptor(&poolingDesc_, mode_, windowHeight,
windowWidth, heightPadding, widthPadding,
strideHeight, strideWidth);
hl_create_pooling_descriptor(&poolingDesc_, mode_, windowHeight, windowWidth,
heightPadding, widthPadding, strideHeight,
strideWidth);
return true;
}
......@@ -80,8 +79,10 @@ void CudnnPoolLayer::reshape(int batchSize) {
}
CHECK_EQ(inputLayers_[0]->getOutput().value->getWidth(),
channels_ * imageH_ * imageW_);
outputH_ = outputSize(imageH_, sizeY_, confPaddingY_, strideY_);
outputW_ = outputSize(imageW_, sizeX_, confPadding_, stride_);
outputH_ = outputSize(imageH_, sizeY_, confPaddingY_, strideY_,
/* caffeMode */ false);
outputW_ =
outputSize(imageW_, sizeX_, confPadding_, stride_, /* caffeMode */ false);
getOutput().setFrameHeight(outputH_);
getOutput().setFrameWidth(outputW_);
......@@ -99,8 +100,7 @@ void CudnnPoolLayer::forward(PassType passType) {
real *inputData = getInputValue(0)->getData();
real *outData = getOutputValue()->getData();
hl_pooling_forward(inputDesc_, inputData, outputDesc_, outData,
poolingDesc_);
hl_pooling_forward(inputDesc_, inputData, outputDesc_, outData, poolingDesc_);
}
void CudnnPoolLayer::backward(const UpdateCallback &callback) {
......@@ -113,8 +113,8 @@ void CudnnPoolLayer::backward(const UpdateCallback &callback) {
real *inputGrad = getInputGrad(0)->getData();
real *outData = getOutputValue()->getData();
real *outGrad = getOutputGrad()->getData();
hl_pooling_backward(inputDesc_, inputData, inputGrad, outputDesc_,
outData, outGrad, poolingDesc_);
hl_pooling_backward(inputDesc_, inputData, inputGrad, outputDesc_, outData,
outGrad, poolingDesc_);
}
CudnnPoolLayer::~CudnnPoolLayer() {
......
......@@ -17,6 +17,7 @@ limitations under the License. */
#include "Layer.h"
#include "paddle/math/Matrix.h"
#include "paddle/math/MathUtils.h"
#include <vector>
namespace paddle {
......@@ -47,16 +48,6 @@ public:
static Layer* create(const LayerConfig& config);
virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
/**
* Calculate output size according window size and padding size.
*/
int outputSize(int imageSize, int windowSize, int padding, int stride) {
int outputSize;
outputSize =
(imageSize - windowSize + 2 * padding + stride - 1) / stride + 1;
return outputSize;
}
};
} // namespace paddle
......@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
#include "PoolProjectionLayer.h"
......@@ -31,8 +30,10 @@ size_t PoolProjectionLayer::getSize() {
imgSizeW_ = imgSize_;
}
outputH_ = outputSize(imgSizeH_, sizeY_, confPaddingY_, strideY_);
outputW_ = outputSize(imgSizeW_, sizeX_, confPadding_, stride_);
outputH_ = outputSize(imgSizeH_, sizeY_, confPaddingY_, strideY_,
/* caffeMode */ false);
outputW_ = outputSize(imgSizeW_, sizeX_, confPadding_, stride_,
/* caffeMode */ false);
layerSize = outputH_ * outputW_ * channels_;
......@@ -53,9 +54,9 @@ void MaxPoolProjectionLayer::forward(PassType passType) {
MatrixPtr outV = getOutputValue();
outV->maxPoolForward(*input, imgSizeH_, imgSizeW_, channels_,
sizeX_, sizeY_, strideY_, stride_,
outputH_, outputW_, confPaddingY_, confPadding_);
outV->maxPoolForward(*input, imgSizeH_, imgSizeW_, channels_, sizeX_, sizeY_,
strideY_, stride_, outputH_, outputW_, confPaddingY_,
confPadding_);
}
void MaxPoolProjectionLayer::backward(const UpdateCallback& callback) {
......@@ -72,9 +73,8 @@ void MaxPoolProjectionLayer::backward(const UpdateCallback& callback) {
MatrixPtr inputGrad = getInputGrad(0);
inputGrad->maxPoolBackward(*inputV, imgSizeH_, imgSizeW_, *outGrad, *outV,
sizeX_, sizeY_,
strideY_, stride_, outputH_, outputW_, 1, 1,
confPaddingY_, confPadding_);
sizeX_, sizeY_, strideY_, stride_, outputH_,
outputW_, 1, 1, confPaddingY_, confPadding_);
}
void AvgPoolProjectionLayer::forward(PassType passType) {
......@@ -89,9 +89,9 @@ void AvgPoolProjectionLayer::forward(PassType passType) {
MatrixPtr outV = getOutputValue();
outV->avgPoolForward(*input, imgSizeH_, imgSizeW_, channels_,
sizeX_, sizeY_, strideY_, stride_,
outputH_, outputW_, confPaddingY_, confPadding_);
outV->avgPoolForward(*input, imgSizeH_, imgSizeW_, channels_, sizeX_, sizeY_,
strideY_, stride_, outputH_, outputW_, confPaddingY_,
confPadding_);
}
void AvgPoolProjectionLayer::backward(const UpdateCallback& callback) {
......@@ -103,9 +103,8 @@ void AvgPoolProjectionLayer::backward(const UpdateCallback& callback) {
/* Do derivation */
MatrixPtr outputGrad = getOutputGrad();
MatrixPtr inputGrad = getInputGrad(0);
inputGrad->avgPoolBackward(*outputGrad, imgSizeH_, imgSizeW_,
sizeX_, sizeY_, strideY_, stride_,
outputH_, outputW_, 1, 1,
inputGrad->avgPoolBackward(*outputGrad, imgSizeH_, imgSizeW_, sizeX_, sizeY_,
strideY_, stride_, outputH_, outputW_, 1, 1,
confPaddingY_, confPadding_);
}
} // namespace paddle
......@@ -18,6 +18,7 @@ limitations under the License. */
#include "paddle/gserver/layers/DataLayer.h"
#include "ModelConfig.pb.h"
#include "paddle/trainer/Trainer.h"
#include "paddle/math/MathUtils.h"
#include "TestUtil.h"
#include "LayerGradUtil.h"
......@@ -134,7 +135,6 @@ TEST(Projection, identity) {
}
}
#ifndef PADDLE_ONLY_CPU
TEST(Projection, conv) {
const int NUM_FILTERS = 16;
......@@ -158,21 +158,23 @@ TEST(Projection, conv) {
conv->set_groups(1);
conv->set_filter_channels(conv->channels() / conv->groups());
conv->set_img_size(IMAGE_SIZE);
int outputSize = (2 * conv->padding() + conv->img_size() -
conv->filter_size()) / conv->stride() + 1;
int outputSizeY = (2 * conv->padding_y() + conv->img_size() -
conv->filter_size_y()) / conv->stride_y() + 1;
conv->set_output_x(outputSize);
int output_x =
outputSize(conv->img_size(), conv->filter_size(), conv->padding(),
conv->stride(), /* caffeMode */ true);
int output_y =
outputSize(conv->img_size(), conv->filter_size_y(), conv->padding_y(),
conv->stride_y(), /* caffeMode */ true);
conv->set_output_x(output_x);
conf.set_input_size(IMAGE_SIZE * IMAGE_SIZE * CHANNELS);
conf.set_output_size(outputSize * outputSizeY * NUM_FILTERS);
conf.set_output_size(output_x * output_y * NUM_FILTERS);
testProjectionGrad(conf, INPUT_DATA,
testProjectionGrad(
conf, INPUT_DATA,
/* parameterSize */ NUM_FILTERS * CHANNELS * FILTER_SIZE * FILTER_SIZE_Y,
/* batchSize */ 100, true, false, NUM_FILTERS, true);
}
#endif
TEST(Layer, concat) {
TestConfig config;
config.biasSize = 0;
......@@ -293,10 +295,9 @@ void testConvLayer(const string& type, bool trans, bool useGpu) {
conv->set_groups(1);
conv->set_filter_channels(conv->channels() / conv->groups());
conv->set_img_size(16);
conv->set_output_x(
(2 * conv->padding() + conv->img_size() - conv->filter_size()) /
((float)conv->stride()) +
1.5);
conv->set_output_x(outputSize(conv->img_size(), conv->filter_size(),
conv->padding(), conv->stride(),
/* caffeMode */ true));
config.layerConfig.set_size(conv->output_x() * conv->output_x() *
config.layerConfig.num_filters());
......@@ -329,15 +330,13 @@ TEST(Layer, blockExpandLayer) {
blockExpand->set_stride_x(2);
blockExpand->set_stride_y(2);
blockExpand->set_output_x(
1 +
(2 * blockExpand->padding_x() + blockExpand->img_size_x() -
blockExpand->block_x() + blockExpand->stride_x() - 1) /
blockExpand->stride_x());
outputSize(blockExpand->img_size_x(), blockExpand->block_x(),
blockExpand->padding_x(), blockExpand->stride_x(),
/* caffeMode */ false));
blockExpand->set_output_y(
1 +
(2 * blockExpand->padding_y() + blockExpand->img_size_y() -
blockExpand->block_y() + blockExpand->stride_y() - 1) /
blockExpand->stride_y());
outputSize(blockExpand->img_size_y(), blockExpand->block_y(),
blockExpand->padding_y(), blockExpand->stride_y(),
/* caffeMode */ false));
config.layerConfig.set_size(blockExpand->block_x() * blockExpand->block_y() *
blockExpand->channels());
......@@ -862,8 +861,8 @@ void setPoolConfig(TestConfig* config, PoolConfig* pool,
pool->set_stride(sw);
pool->set_stride_y(sh);
int ow = (pool->img_size() - kw + 2 * pw + sw - 1) / sw + 1;
int oh = (pool->img_size_y() - kh + 2 * ph + sh - 1) / sh + 1;
int ow = outputSize(pool->img_size(), kw, pw, sw, /* caffeMode */ false);
int oh = outputSize(pool->img_size_y(), kh, ph, sh, /* caffeMode */ false);
pool->set_output_x(ow);
pool->set_output_y(oh);
}
......@@ -1255,12 +1254,11 @@ TEST(Operator, conv) {
conv->set_groups(1);
conv->set_filter_channels(conv->channels() / conv->groups());
conv->set_img_size(IMAGE_SIZE);
int outputSize =
int(1.0 * (2 * conv->padding() + conv->img_size() - conv->filter_size()) /
conv->stride()) +
1;
conv->set_output_x(outputSize);
config.layerConfig.set_size(outputSize * outputSize *
int output_x =
outputSize(conv->img_size(), conv->filter_size(), conv->padding(),
conv->stride(), /* caffeMode */ true);
conv->set_output_x(output_x);
config.layerConfig.set_size(output_x * output_x *
config.layerConfig.num_filters());
config.layerConfig.set_size(conv->output_x() * conv->output_x() *
NUM_FILTERS);
......
......@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "MathUtils.h"
#include <algorithm>
#include "paddle/utils/Logging.h"
......@@ -24,11 +23,7 @@ namespace paddle {
* major is rows and minor is cols, according to
* major value to initialize minor value"
*/
void sparseRand(int* major,
int* minor,
int nnz,
int majorLen,
int minorMax,
void sparseRand(int* major, int* minor, int nnz, int majorLen, int minorMax,
bool useGpu) {
CHECK(size_t(nnz) > size_t(1));
int* cpuMajor;
......@@ -72,5 +67,17 @@ void sparseRand(int* major,
}
}
int outputSize(int imageSize, int filterSize, int padding, int stride,
bool caffeMode) {
int outputSize;
if (!caffeMode) {
outputSize =
(imageSize - filterSize + 2 * padding + stride - 1) / stride + 1;
} else {
outputSize = (imageSize - filterSize + 2 * padding) / stride + 1;
}
CHECK_GE(outputSize, 1);
return outputSize;
}
} // namespace paddle
......@@ -44,4 +44,20 @@ namespace paddle {
void sparseRand(int* major, int* minor, int nnz, int majorLen, int minorMax,
bool useGpu);
/**
* Calculate output size based on caffeMode_.
* - input(+padding): 0123456789
* - imageSize(+padding) = 10;
* - filterSize = 3;
* - stride = 2;
* - caffeMode is true:
- output: (012), (234), (456), (678)
- outputSize = 4;
* - caffeMode is false:
* - output: (012), (234), (456), (678), (9)
* - outputSize = 5;
*/
int outputSize(int imageSize, int filterSize, int padding, int stride,
bool caffeMode);
} // namespace paddle
......@@ -1006,6 +1006,17 @@ def TestData(data_config, async_load_data=None):
" Data definition")
g_config.test_data_config.async_load_data = async_load_data
'''
caffe_mode: compute the output size using floor instead of ceil,
which is consistent of caffe and CuDNN's convention.
'''
def cnn_output_size(img_size, filter_size, padding, stride, caffe_mode):
output = (2 * padding + img_size - filter_size) / float(stride)
if caffe_mode:
return 1 + int(math.floor(output))
else:
return 1 + int(math.ceil(output))
def parse_pool(pool, input_layer_name, pool_conf):
pool_conf.pool_type = pool.pool_type
config_assert(pool.pool_type in ['max-projection', 'avg-projection',
......@@ -1036,12 +1047,10 @@ def parse_pool(pool, input_layer_name, pool_conf):
if pool.padding is not None:
pool_conf.padding = pool.padding
pool_conf.padding_y = default(pool.padding_y, pool_conf.padding)
pool_conf.output_x = int(math.ceil((pool_conf.img_size + \
2*pool_conf.padding - pool_conf.size_x) / \
float(pool_conf.stride))) + 1
pool_conf.output_y = int(math.ceil((pool_conf.img_size_y + \
2*pool_conf.padding_y - pool_conf.size_y) / \
float(pool_conf.stride_y))) + 1
pool_conf.output_x = cnn_output_size(pool_conf.img_size, pool_conf.size_x,
pool_conf.padding, pool_conf.stride, False)
pool_conf.output_y = cnn_output_size(pool_conf.img_size_y, pool_conf.size_y,
pool_conf.padding_y, pool_conf.stride_y, False)
def parse_image(image, input_layer_name, image_conf):
image_conf.channels = image.channels
......@@ -1072,10 +1081,7 @@ def parse_norm(norm, input_layer_name, norm_conf):
norm_conf.scale /= norm.size
else:
norm_conf.scale /= norm.size ** 2
'''
caffe_mode: compute the output size using floor instead of ceil,
which is consistent of caffe and CuDNN's convention.
'''
def parse_conv(conv, input_layer_name, conv_conf):
conv_conf.filter_size = conv.filter_size
conv_conf.filter_size_y = conv.filter_size_y
......@@ -1096,14 +1102,9 @@ def parse_conv(conv, input_layer_name, conv_conf):
("Input layer %s: Incorrect input image size %d for input "
+ "image pixels %d")
% (input_layer_name, conv_conf.img_size, img_pixels))
if conv.caffe_mode:
conv_conf.output_x = \
1 + int(math.floor((2 * conv.padding + conv_conf.img_size \
- conv.filter_size) / float(conv.stride)))
else:
conv_conf.output_x = \
1 + int(math.ceil((2 * conv.padding + conv_conf.img_size \
- conv.filter_size) / float(conv.stride)))
conv_conf.output_x = cnn_output_size(conv_conf.img_size, conv_conf.filter_size,
conv_conf.padding, conv_conf.stride,
conv_conf.caffe_mode)
def parse_block_expand(block_expand, input_layer_name, block_expand_conf):
block_expand_conf.channels = block_expand.channels
......@@ -1118,18 +1119,16 @@ def parse_block_expand(block_expand, input_layer_name, block_expand_conf):
if block_expand_conf.img_size_x == 0:
block_expand_conf.output_x = 0
else:
block_expand_conf.output_x = \
1 + \
int(math.ceil((2 * block_expand.padding_x + block_expand.img_size_x \
- block_expand.block_x) / float(block_expand.stride_x)))
block_expand_conf.output_x = cnn_output_size(
block_expand.img_size_x, block_expand.block_x,
block_expand.padding_x, block_expand.stride_x, False)
if block_expand_conf.img_size_y == 0:
block_expand_conf.output_y = 0
block_expand_conf.output_y = 0
else:
block_expand_conf.output_y = \
1 + \
int(math.ceil((2 * block_expand.padding_y + block_expand.img_size_y \
- block_expand.block_y) / float(block_expand.stride_y)))
block_expand_conf.output_y = cnn_output_size(
block_expand.img_size_y, block_expand.block_y,
block_expand.padding_y, block_expand.stride_y, False)
def parse_maxout(maxout, input_layer_name, maxout_conf):
maxout_conf.channels = maxout.channels
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册