From fb20187aaa30b7c049c1dee2ad62bde7005b5af8 Mon Sep 17 00:00:00 2001 From: wangyang59 Date: Thu, 3 Nov 2016 11:25:34 -0700 Subject: [PATCH] deconv layer implementation modification following luotao1 comments --- paddle/gserver/layers/ConvBaseLayer.cpp | 65 +++++++------- paddle/gserver/layers/ConvBaseLayer.h | 2 - paddle/gserver/layers/ExpandConvBaseLayer.cpp | 16 ++-- paddle/gserver/tests/test_ConvTrans.cpp | 63 +++++++------- paddle/gserver/tests/test_LayerGrad.cpp | 6 +- python/paddle/trainer/config_parser.py | 87 +++++++++---------- .../paddle/trainer_config_helpers/layers.py | 2 +- 7 files changed, 115 insertions(+), 126 deletions(-) diff --git a/paddle/gserver/layers/ConvBaseLayer.cpp b/paddle/gserver/layers/ConvBaseLayer.cpp index 5bc22f47793..733065a7537 100644 --- a/paddle/gserver/layers/ConvBaseLayer.cpp +++ b/paddle/gserver/layers/ConvBaseLayer.cpp @@ -89,42 +89,41 @@ size_t ConvBaseLayer::calOutputSize() { clearAndReserve(&outputW_); size_t layerSize = 0; - if (!isDeconv_) { + auto setLayerSize = [&](IntV& inH, IntV& inW, IntV& outH, IntV& outW) { for (size_t i = 0; i < inputLayers_.size(); i++) { - imgSizeH_.push_back(inputLayers_[i]->getOutput().getFrameHeight()); - imgSizeW_.push_back(inputLayers_[i]->getOutput().getFrameWidth()); - if (imgSizeH_[i] == 0) - imgSizeH_[i] = config_.inputs(i).conv_conf().img_size(); - if (imgSizeW_[i] == 0) - imgSizeW_[i] = config_.inputs(i).conv_conf().img_size(); - outputH_.push_back( - outputSize(imgSizeH_[i], filterSizeY_[i], paddingY_[i], strideY_[i])); - outputW_.push_back( - outputSize(imgSizeW_[i], filterSize_[i], padding_[i], stride_[i])); - CHECK_EQ(outputH_[i], outputH_[0]); - CHECK_EQ(outputW_[i], outputW_[0]); + inH.push_back(inputLayers_[i]->getOutput().getFrameHeight()); + inW.push_back(inputLayers_[i]->getOutput().getFrameWidth()); + if (isDeconv_) { + if (inH[i] == 0) + inH[i] = config_.inputs(i).conv_conf().output_x(); + if (inW[i] == 0) + inW[i] = config_.inputs(i).conv_conf().output_x(); + outH.push_back( + imageSize(inH[i], filterSizeY_[i], paddingY_[i], strideY_[i])); + outW.push_back( + imageSize(inW[i], filterSize_[i], padding_[i], stride_[i])); + } else { + if (inH[i] == 0) + inH[i] = config_.inputs(i).conv_conf().img_size(); + if (inW[i] == 0) + inW[i] = config_.inputs(i).conv_conf().img_size(); + outH.push_back( + outputSize(inH[i], filterSizeY_[i], paddingY_[i], strideY_[i])); + outW.push_back( + outputSize(inW[i], filterSize_[i], padding_[i], stride_[i])); + CHECK_EQ(outH[i], outH[0]); + CHECK_EQ(outW[i], outW[0]); + } } - getOutput().setFrameHeight(outputH_[0]); - getOutput().setFrameWidth(outputW_[0]); - layerSize = outputH_[0] * outputW_[0] * size_t(numFilters_); + getOutput().setFrameHeight(outH[0]); + getOutput().setFrameWidth(outW[0]); + layerSize = outH[0] * outW[0] * size_t(numFilters_); + }; + + if (isDeconv_) { + setLayerSize(outputH_, outputW_, imgSizeH_, imgSizeW_); } else { - for (size_t i = 0; i < inputLayers_.size(); i++) { - outputH_.push_back(inputLayers_[i]->getOutput().getFrameHeight()); - outputW_.push_back(inputLayers_[i]->getOutput().getFrameWidth()); - if (outputH_[i] == 0) - outputH_[i] = config_.inputs(i).conv_conf().output_x(); - if (outputW_[i] == 0) - outputW_[i] = config_.inputs(i).conv_conf().output_x(); - imgSizeH_.push_back( - imageSize(outputH_[i], filterSizeY_[i], paddingY_[i], strideY_[i])); - imgSizeW_.push_back( - imageSize(outputW_[i], filterSize_[i], padding_[i], stride_[i])); - CHECK_EQ(imgSizeH_[i], imgSizeH_[0]); - CHECK_EQ(imgSizeW_[i], imgSizeW_[0]); - } - getOutput().setFrameHeight(imgSizeH_[0]); - getOutput().setFrameWidth(imgSizeW_[0]); - layerSize = imgSizeH_[0] * imgSizeW_[0] * size_t(numFilters_); + setLayerSize(imgSizeH_, imgSizeW_, outputH_, outputW_); } return layerSize; diff --git a/paddle/gserver/layers/ConvBaseLayer.h b/paddle/gserver/layers/ConvBaseLayer.h index 2f2ce59ad9e..4d5b2b8d05a 100644 --- a/paddle/gserver/layers/ConvBaseLayer.h +++ b/paddle/gserver/layers/ConvBaseLayer.h @@ -78,8 +78,6 @@ protected: /// of output size. bool caffeMode_; - - public: explicit ConvBaseLayer(const LayerConfig& config) : Layer(config) {} diff --git a/paddle/gserver/layers/ExpandConvBaseLayer.cpp b/paddle/gserver/layers/ExpandConvBaseLayer.cpp index 75ac8245d88..0bab0ca764f 100644 --- a/paddle/gserver/layers/ExpandConvBaseLayer.cpp +++ b/paddle/gserver/layers/ExpandConvBaseLayer.cpp @@ -31,14 +31,14 @@ bool ExpandConvBaseLayer::init(const LayerMap &layerMap, * convTrans, and in other functions too. * */ int channel; - int nf; + int numFilters; /* Initialize the projection */ for (auto &inputConfig : config_.inputs()) { const ConvConfig &conf = inputConfig.conv_conf(); - nf = (!isDeconv_) ? numFilters_ : conf.channels(); - subM_.push_back(nf / conf.groups()); + numFilters = isDeconv_ ? conf.channels() : numFilters_; + subM_.push_back(numFilters / conf.groups()); subN_.push_back(conf.output_x() * conf.output_x()); - channel = (!isDeconv_) ? conf.channels() : numFilters_; + channel = isDeconv_ ? numFilters_ : conf.channels(); subK_.push_back(channel * conf.filter_size() * conf.filter_size() / conf.groups()); /* Consistent caffe mode for multiple input */ @@ -99,7 +99,7 @@ void ExpandConvBaseLayer::addUnsharedBias() { void ExpandConvBaseLayer::expandOneFrame(MatrixPtr image, size_t startIdx, int inIdx) { - int channel = (!isDeconv_) ? channels_[inIdx] : numFilters_; + int channel = isDeconv_ ? numFilters_ : channels_[inIdx]; resetExpandInput(subK_[inIdx] * groups_[inIdx], subN_[inIdx]); real *imgData = image->getData() + startIdx * image->getWidth(); @@ -122,10 +122,10 @@ void ExpandConvBaseLayer::expandFwdOnce(MatrixPtr image, MatrixPtr out, expandOneFrame(image, startIdx, inIdx); - int nf = (!isDeconv_) ? numFilters_ : channels_[inIdx]; + int numFilters = isDeconv_ ? channels_[inIdx] : numFilters_; real *outData = - out->getData() + startIdx * subN * nf; + out->getData() + startIdx * subN * numFilters; real *wgtData = weights_[inIdx]->getW()->getData(); real *expInData = expandInput_->getData(); @@ -147,7 +147,7 @@ void ExpandConvBaseLayer::expandFwdOnce(MatrixPtr image, MatrixPtr out, void ExpandConvBaseLayer::bpropActs(MatrixPtr out, MatrixPtr image, int inpIdx) { - int channel = (!isDeconv_) ? channels_[inpIdx] : numFilters_; + int channel = isDeconv_ ? numFilters_ : channels_[inpIdx]; int subM = subM_[inpIdx]; int subN = subN_[inpIdx]; diff --git a/paddle/gserver/tests/test_ConvTrans.cpp b/paddle/gserver/tests/test_ConvTrans.cpp index 756faf26516..9246484ba22 100644 --- a/paddle/gserver/tests/test_ConvTrans.cpp +++ b/paddle/gserver/tests/test_ConvTrans.cpp @@ -189,58 +189,55 @@ void doOneConvtTest(size_t imgSize, size_t output_x, size_t stride, } TEST(Layer, convTransLayerFwd2) { - size_t imgSize, output_x, stride, padding, filter_size; MatrixPtr result; - - imgSize = 5; - output_x = 1; - stride = 1; - padding = 0; - filter_size = 5; - result = Matrix::create(1, imgSize * imgSize, false, false); + result = Matrix::create(1, 5 * 5, false, false); result->zeroMem(); result->add(1.0); - doOneConvtTest(imgSize, output_x, stride, padding, filter_size, result); + doOneConvtTest(/* imgSize */ 5, + /* output_x */ 1, + /* stride */ 1, + /* padding */ 0, + /* filter_size */ 5, + result); - imgSize = 5; - output_x = 2; - stride = 1; - padding = 0; - filter_size = 4; float resultData[] = {1, 2, 2, 2, 1, 2, 4, 4, 4, 2, 2, 4, 4, 4, 2, 2, 4, 4, 4, 2, 1, 2, 2, 2, 1}; - result = Matrix::create(resultData, 1, imgSize * imgSize, false, false); - doOneConvtTest(imgSize, output_x, stride, padding, filter_size, result); - - imgSize = 5; - output_x = 2; - stride = 2; - padding = 1; - filter_size = 5; + result->setData(resultData); + doOneConvtTest(/* imgSize */ 5, + /* output_x */ 2, + /* stride */ 1, + /* padding */ 0, + /* filter_size */ 4, + result); + float resultData2[] = {1, 2, 2, 2, 1, 2, 4, 4, 4, 2, 2, 4, 4, 4, 2, 2, 4, 4, 4, 2, 1, 2, 2, 2, 1}; - result = Matrix::create(resultData2, 1, imgSize * imgSize, false, false); - doOneConvtTest(imgSize, output_x, stride, padding, filter_size, result); - - imgSize = 5; - output_x = 2; - stride = 2; - padding = 0; - filter_size = 3; + result->setData(resultData2); + doOneConvtTest(/* imgSize */ 5, + /* output_x */ 2, + /* stride */ 2, + /* padding */ 1, + /* filter_size */ 5, + result); + float resultData3[] = {1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 2, 2, 4, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1}; - result = Matrix::create(resultData3, 1, imgSize * imgSize, false, false); - doOneConvtTest(imgSize, output_x, stride, padding, filter_size, result); -} + result->setData(resultData3); + doOneConvtTest(/* imgSize */ 5, + /* output_x */ 2, + /* stride */ 2, + /* padding */ 0, + /* filter_size */ 3, + result);} int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 42c7b139062..9e2e5ebaac2 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -351,12 +351,10 @@ void testConvTransLayer(const string& type, bool trans, bool useGpu) { TEST(Layer, convTransLayer) { testConvTransLayer("exconvt", /* trans= */ false, /* useGpu= */ false); -/* #ifndef PADDLE_ONLY_CPU - testConvLayer("exconv", trans= false, useGpu= true); - testConvLayer("cudnn_conv", trans= false, useGpu= true); + testConvTransLayer("exconvt", /* trans= */ false, /* useGpu= */ true); + // testConvLayer("cudnn_conv", /* trans= */ false, /* useGpu= */ true); #endif -*/ } TEST(Layer, blockExpandLayer) { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index b3d17a47a96..3aa5576c3cf 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1082,7 +1082,11 @@ def parse_norm(norm, input_layer_name, norm_conf): else: norm_conf.scale /= norm.size ** 2 -def parse_conv(conv, input_layer_name, conv_conf): +''' +caffe_mode: compute the output size using floor instead of ceil, + which is consistent of caffe and CuDNN's convention. +''' +def parse_conv(conv, input_layer_name, conv_conf, trans=False): conv_conf.filter_size = conv.filter_size conv_conf.filter_size_y = conv.filter_size_y conv_conf.channels = conv.channels @@ -1093,49 +1097,41 @@ def parse_conv(conv, input_layer_name, conv_conf): conv_conf.groups = conv.groups conv_conf.filter_channels = conv.channels / conv.groups conv_conf.caffe_mode = conv.caffe_mode - - img_pixels = g_layer_map[input_layer_name].size / conv.channels - print('channels=%d size=%d'%(conv.channels, - g_layer_map[input_layer_name].size)) - conv_conf.img_size = int(img_pixels ** 0.5) - config_assert((conv_conf.img_size ** 2) == img_pixels, - ("Input layer %s: Incorrect input image size %d for input " - + "image pixels %d") - % (input_layer_name, conv_conf.img_size, img_pixels)) - conv_conf.output_x = cnn_output_size(conv_conf.img_size, conv_conf.filter_size, - conv_conf.padding, conv_conf.stride, - conv_conf.caffe_mode) - - -def parse_conv_trans(conv, input_layer_name, conv_conf, num_filters): - conv_conf.filter_size = conv.filter_size - conv_conf.filter_size_y = conv.filter_size_y - conv_conf.channels = conv.channels - conv_conf.padding = conv.padding - conv_conf.padding_y = conv.padding_y - conv_conf.stride = conv.stride - conv_conf.stride_y = conv.stride_y - conv_conf.groups = conv.groups - conv_conf.filter_channels = num_filters / conv.groups - conv_conf.caffe_mode = conv.caffe_mode - - outputSize = g_layer_map[input_layer_name].size / conv.channels - print('channels=%d size=%d'%(conv.channels, - g_layer_map[input_layer_name].size)) - conv_conf.output_x = int(outputSize ** 0.5) - config_assert((conv_conf.output_x ** 2) == outputSize, - ("Input layer %s: Incorrect input image size %d for input " - + "image pixels %d") - % (input_layer_name, conv_conf.output_x, outputSize)) - if conv.caffe_mode: - conv_conf.img_size = \ - (conv_conf.output_x - 1) * conv.stride \ - + conv.filter_size - 2 * conv.padding + + if not trans: + img_pixels = g_layer_map[input_layer_name].size / conv.channels + print('channels=%d size=%d'%(conv.channels, + g_layer_map[input_layer_name].size)) + conv_conf.img_size = int(img_pixels ** 0.5) + config_assert((conv_conf.img_size ** 2) == img_pixels, + ("Input layer %s: Incorrect input image size %d for input " + + "image pixels %d") + % (input_layer_name, conv_conf.img_size, img_pixels)) + if conv.caffe_mode: + conv_conf.output_x = \ + 1 + int(math.floor((2 * conv.padding + conv_conf.img_size \ + - conv.filter_size) / float(conv.stride))) + else: + conv_conf.output_x = \ + 1 + int(math.ceil((2 * conv.padding + conv_conf.img_size \ + - conv.filter_size) / float(conv.stride))) else: - conv_conf.img_size = \ - (conv_conf.output_x - 2) * conv.stride \ - + conv.filter_size - 2 * conv.padding + 1 - + outputSize = g_layer_map[input_layer_name].size / conv.channels + print('channels=%d size=%d'%(conv.channels, + g_layer_map[input_layer_name].size)) + conv_conf.output_x = int(outputSize ** 0.5) + config_assert((conv_conf.output_x ** 2) == outputSize, + ("Input layer %s: Incorrect input image size %d for input " + + "image pixels %d") + % (input_layer_name, conv_conf.output_x, outputSize)) + if conv.caffe_mode: + conv_conf.img_size = \ + (conv_conf.output_x - 1) * conv.stride \ + + conv.filter_size - 2 * conv.padding + else: + conv_conf.img_size = \ + (conv_conf.output_x - 2) * conv.stride \ + + conv.filter_size - 2 * conv.padding + 1 def parse_block_expand(block_expand, input_layer_name, block_expand_conf): block_expand_conf.channels = block_expand.channels @@ -1685,10 +1681,11 @@ class ConvTransLayerBase(LayerBase): for input_index in xrange(len(self.inputs)): input_layer = self.get_input_layer(input_index) - parse_conv_trans( + parse_conv( self.inputs[input_index].conv, input_layer.name, - self.config.inputs[input_index].conv_conf, num_filters) + self.config.inputs[input_index].conv_conf, num_filters, + trans=True) conv_conf = self.config.inputs[input_index].conv_conf psize = self.calc_parameter_size(conv_conf) print("output size for %s is %d " % (name, conv_conf.output_x)) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 711c9ca993a..1fdb1a849e8 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -36,7 +36,7 @@ __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel", "pooling_layer", "lstmemory", "last_seq", "first_seq", "cos_sim", "hsigmoid", "conv_projection", "regression_cost", 'classification_cost', "LayerOutput", - 'img_conv_layer', 'img_convTrans_layer', 'img_pool_layer', 'batch_norm_layer', + 'img_conv_layer', 'img_pool_layer', 'batch_norm_layer', 'img_cmrnorm_layer', 'addto_layer', 'concat_layer', 'lstm_step_layer', 'recurrent_group', 'memory', 'StaticInput', 'expand_layer', 'scaling_layer', -- GitLab