提交 fb20187a 编写于 作者: W wangyang59

deconv layer implementation modification following luotao1 comments

上级 3d72e949
......@@ -89,42 +89,41 @@ size_t ConvBaseLayer::calOutputSize() {
clearAndReserve(&outputW_);
size_t layerSize = 0;
if (!isDeconv_) {
auto setLayerSize = [&](IntV& inH, IntV& inW, IntV& outH, IntV& outW) {
for (size_t i = 0; i < inputLayers_.size(); i++) {
imgSizeH_.push_back(inputLayers_[i]->getOutput().getFrameHeight());
imgSizeW_.push_back(inputLayers_[i]->getOutput().getFrameWidth());
if (imgSizeH_[i] == 0)
imgSizeH_[i] = config_.inputs(i).conv_conf().img_size();
if (imgSizeW_[i] == 0)
imgSizeW_[i] = config_.inputs(i).conv_conf().img_size();
outputH_.push_back(
outputSize(imgSizeH_[i], filterSizeY_[i], paddingY_[i], strideY_[i]));
outputW_.push_back(
outputSize(imgSizeW_[i], filterSize_[i], padding_[i], stride_[i]));
CHECK_EQ(outputH_[i], outputH_[0]);
CHECK_EQ(outputW_[i], outputW_[0]);
inH.push_back(inputLayers_[i]->getOutput().getFrameHeight());
inW.push_back(inputLayers_[i]->getOutput().getFrameWidth());
if (isDeconv_) {
if (inH[i] == 0)
inH[i] = config_.inputs(i).conv_conf().output_x();
if (inW[i] == 0)
inW[i] = config_.inputs(i).conv_conf().output_x();
outH.push_back(
imageSize(inH[i], filterSizeY_[i], paddingY_[i], strideY_[i]));
outW.push_back(
imageSize(inW[i], filterSize_[i], padding_[i], stride_[i]));
} else {
if (inH[i] == 0)
inH[i] = config_.inputs(i).conv_conf().img_size();
if (inW[i] == 0)
inW[i] = config_.inputs(i).conv_conf().img_size();
outH.push_back(
outputSize(inH[i], filterSizeY_[i], paddingY_[i], strideY_[i]));
outW.push_back(
outputSize(inW[i], filterSize_[i], padding_[i], stride_[i]));
CHECK_EQ(outH[i], outH[0]);
CHECK_EQ(outW[i], outW[0]);
}
}
getOutput().setFrameHeight(outputH_[0]);
getOutput().setFrameWidth(outputW_[0]);
layerSize = outputH_[0] * outputW_[0] * size_t(numFilters_);
getOutput().setFrameHeight(outH[0]);
getOutput().setFrameWidth(outW[0]);
layerSize = outH[0] * outW[0] * size_t(numFilters_);
};
if (isDeconv_) {
setLayerSize(outputH_, outputW_, imgSizeH_, imgSizeW_);
} else {
for (size_t i = 0; i < inputLayers_.size(); i++) {
outputH_.push_back(inputLayers_[i]->getOutput().getFrameHeight());
outputW_.push_back(inputLayers_[i]->getOutput().getFrameWidth());
if (outputH_[i] == 0)
outputH_[i] = config_.inputs(i).conv_conf().output_x();
if (outputW_[i] == 0)
outputW_[i] = config_.inputs(i).conv_conf().output_x();
imgSizeH_.push_back(
imageSize(outputH_[i], filterSizeY_[i], paddingY_[i], strideY_[i]));
imgSizeW_.push_back(
imageSize(outputW_[i], filterSize_[i], padding_[i], stride_[i]));
CHECK_EQ(imgSizeH_[i], imgSizeH_[0]);
CHECK_EQ(imgSizeW_[i], imgSizeW_[0]);
}
getOutput().setFrameHeight(imgSizeH_[0]);
getOutput().setFrameWidth(imgSizeW_[0]);
layerSize = imgSizeH_[0] * imgSizeW_[0] * size_t(numFilters_);
setLayerSize(imgSizeH_, imgSizeW_, outputH_, outputW_);
}
return layerSize;
......
......@@ -78,8 +78,6 @@ protected:
/// of output size.
bool caffeMode_;
public:
explicit ConvBaseLayer(const LayerConfig& config) : Layer(config) {}
......
......@@ -31,14 +31,14 @@ bool ExpandConvBaseLayer::init(const LayerMap &layerMap,
* convTrans, and in other functions too.
* */
int channel;
int nf;
int numFilters;
/* Initialize the projection */
for (auto &inputConfig : config_.inputs()) {
const ConvConfig &conf = inputConfig.conv_conf();
nf = (!isDeconv_) ? numFilters_ : conf.channels();
subM_.push_back(nf / conf.groups());
numFilters = isDeconv_ ? conf.channels() : numFilters_;
subM_.push_back(numFilters / conf.groups());
subN_.push_back(conf.output_x() * conf.output_x());
channel = (!isDeconv_) ? conf.channels() : numFilters_;
channel = isDeconv_ ? numFilters_ : conf.channels();
subK_.push_back(channel * conf.filter_size() * conf.filter_size() /
conf.groups());
/* Consistent caffe mode for multiple input */
......@@ -99,7 +99,7 @@ void ExpandConvBaseLayer::addUnsharedBias() {
void ExpandConvBaseLayer::expandOneFrame(MatrixPtr image, size_t startIdx,
int inIdx) {
int channel = (!isDeconv_) ? channels_[inIdx] : numFilters_;
int channel = isDeconv_ ? numFilters_ : channels_[inIdx];
resetExpandInput(subK_[inIdx] * groups_[inIdx], subN_[inIdx]);
real *imgData = image->getData() + startIdx * image->getWidth();
......@@ -122,10 +122,10 @@ void ExpandConvBaseLayer::expandFwdOnce(MatrixPtr image, MatrixPtr out,
expandOneFrame(image, startIdx, inIdx);
int nf = (!isDeconv_) ? numFilters_ : channels_[inIdx];
int numFilters = isDeconv_ ? channels_[inIdx] : numFilters_;
real *outData =
out->getData() + startIdx * subN * nf;
out->getData() + startIdx * subN * numFilters;
real *wgtData = weights_[inIdx]->getW()->getData();
real *expInData = expandInput_->getData();
......@@ -147,7 +147,7 @@ void ExpandConvBaseLayer::expandFwdOnce(MatrixPtr image, MatrixPtr out,
void ExpandConvBaseLayer::bpropActs(MatrixPtr out, MatrixPtr image,
int inpIdx) {
int channel = (!isDeconv_) ? channels_[inpIdx] : numFilters_;
int channel = isDeconv_ ? numFilters_ : channels_[inpIdx];
int subM = subM_[inpIdx];
int subN = subN_[inpIdx];
......
......@@ -189,58 +189,55 @@ void doOneConvtTest(size_t imgSize, size_t output_x, size_t stride,
}
TEST(Layer, convTransLayerFwd2) {
size_t imgSize, output_x, stride, padding, filter_size;
MatrixPtr result;
imgSize = 5;
output_x = 1;
stride = 1;
padding = 0;
filter_size = 5;
result = Matrix::create(1, imgSize * imgSize, false, false);
result = Matrix::create(1, 5 * 5, false, false);
result->zeroMem();
result->add(1.0);
doOneConvtTest(imgSize, output_x, stride, padding, filter_size, result);
doOneConvtTest(/* imgSize */ 5,
/* output_x */ 1,
/* stride */ 1,
/* padding */ 0,
/* filter_size */ 5,
result);
imgSize = 5;
output_x = 2;
stride = 1;
padding = 0;
filter_size = 4;
float resultData[] = {1, 2, 2, 2, 1,
2, 4, 4, 4, 2,
2, 4, 4, 4, 2,
2, 4, 4, 4, 2,
1, 2, 2, 2, 1};
result = Matrix::create(resultData, 1, imgSize * imgSize, false, false);
doOneConvtTest(imgSize, output_x, stride, padding, filter_size, result);
imgSize = 5;
output_x = 2;
stride = 2;
padding = 1;
filter_size = 5;
result->setData(resultData);
doOneConvtTest(/* imgSize */ 5,
/* output_x */ 2,
/* stride */ 1,
/* padding */ 0,
/* filter_size */ 4,
result);
float resultData2[] = {1, 2, 2, 2, 1,
2, 4, 4, 4, 2,
2, 4, 4, 4, 2,
2, 4, 4, 4, 2,
1, 2, 2, 2, 1};
result = Matrix::create(resultData2, 1, imgSize * imgSize, false, false);
doOneConvtTest(imgSize, output_x, stride, padding, filter_size, result);
imgSize = 5;
output_x = 2;
stride = 2;
padding = 0;
filter_size = 3;
result->setData(resultData2);
doOneConvtTest(/* imgSize */ 5,
/* output_x */ 2,
/* stride */ 2,
/* padding */ 1,
/* filter_size */ 5,
result);
float resultData3[] = {1, 1, 2, 1, 1,
1, 1, 2, 1, 1,
2, 2, 4, 2, 2,
1, 1, 2, 1, 1,
1, 1, 2, 1, 1};
result = Matrix::create(resultData3, 1, imgSize * imgSize, false, false);
doOneConvtTest(imgSize, output_x, stride, padding, filter_size, result);
}
result->setData(resultData3);
doOneConvtTest(/* imgSize */ 5,
/* output_x */ 2,
/* stride */ 2,
/* padding */ 0,
/* filter_size */ 3,
result);}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
......
......@@ -351,12 +351,10 @@ void testConvTransLayer(const string& type, bool trans, bool useGpu) {
TEST(Layer, convTransLayer) {
testConvTransLayer("exconvt", /* trans= */ false, /* useGpu= */ false);
/*
#ifndef PADDLE_ONLY_CPU
testConvLayer("exconv", trans= false, useGpu= true);
testConvLayer("cudnn_conv", trans= false, useGpu= true);
testConvTransLayer("exconvt", /* trans= */ false, /* useGpu= */ true);
// testConvLayer("cudnn_conv", /* trans= */ false, /* useGpu= */ true);
#endif
*/
}
TEST(Layer, blockExpandLayer) {
......
......@@ -1082,7 +1082,11 @@ def parse_norm(norm, input_layer_name, norm_conf):
else:
norm_conf.scale /= norm.size ** 2
def parse_conv(conv, input_layer_name, conv_conf):
'''
caffe_mode: compute the output size using floor instead of ceil,
which is consistent of caffe and CuDNN's convention.
'''
def parse_conv(conv, input_layer_name, conv_conf, trans=False):
conv_conf.filter_size = conv.filter_size
conv_conf.filter_size_y = conv.filter_size_y
conv_conf.channels = conv.channels
......@@ -1093,49 +1097,41 @@ def parse_conv(conv, input_layer_name, conv_conf):
conv_conf.groups = conv.groups
conv_conf.filter_channels = conv.channels / conv.groups
conv_conf.caffe_mode = conv.caffe_mode
img_pixels = g_layer_map[input_layer_name].size / conv.channels
print('channels=%d size=%d'%(conv.channels,
g_layer_map[input_layer_name].size))
conv_conf.img_size = int(img_pixels ** 0.5)
config_assert((conv_conf.img_size ** 2) == img_pixels,
("Input layer %s: Incorrect input image size %d for input "
+ "image pixels %d")
% (input_layer_name, conv_conf.img_size, img_pixels))
conv_conf.output_x = cnn_output_size(conv_conf.img_size, conv_conf.filter_size,
conv_conf.padding, conv_conf.stride,
conv_conf.caffe_mode)
def parse_conv_trans(conv, input_layer_name, conv_conf, num_filters):
conv_conf.filter_size = conv.filter_size
conv_conf.filter_size_y = conv.filter_size_y
conv_conf.channels = conv.channels
conv_conf.padding = conv.padding
conv_conf.padding_y = conv.padding_y
conv_conf.stride = conv.stride
conv_conf.stride_y = conv.stride_y
conv_conf.groups = conv.groups
conv_conf.filter_channels = num_filters / conv.groups
conv_conf.caffe_mode = conv.caffe_mode
outputSize = g_layer_map[input_layer_name].size / conv.channels
print('channels=%d size=%d'%(conv.channels,
g_layer_map[input_layer_name].size))
conv_conf.output_x = int(outputSize ** 0.5)
config_assert((conv_conf.output_x ** 2) == outputSize,
("Input layer %s: Incorrect input image size %d for input "
+ "image pixels %d")
% (input_layer_name, conv_conf.output_x, outputSize))
if conv.caffe_mode:
conv_conf.img_size = \
(conv_conf.output_x - 1) * conv.stride \
+ conv.filter_size - 2 * conv.padding
if not trans:
img_pixels = g_layer_map[input_layer_name].size / conv.channels
print('channels=%d size=%d'%(conv.channels,
g_layer_map[input_layer_name].size))
conv_conf.img_size = int(img_pixels ** 0.5)
config_assert((conv_conf.img_size ** 2) == img_pixels,
("Input layer %s: Incorrect input image size %d for input "
+ "image pixels %d")
% (input_layer_name, conv_conf.img_size, img_pixels))
if conv.caffe_mode:
conv_conf.output_x = \
1 + int(math.floor((2 * conv.padding + conv_conf.img_size \
- conv.filter_size) / float(conv.stride)))
else:
conv_conf.output_x = \
1 + int(math.ceil((2 * conv.padding + conv_conf.img_size \
- conv.filter_size) / float(conv.stride)))
else:
conv_conf.img_size = \
(conv_conf.output_x - 2) * conv.stride \
+ conv.filter_size - 2 * conv.padding + 1
outputSize = g_layer_map[input_layer_name].size / conv.channels
print('channels=%d size=%d'%(conv.channels,
g_layer_map[input_layer_name].size))
conv_conf.output_x = int(outputSize ** 0.5)
config_assert((conv_conf.output_x ** 2) == outputSize,
("Input layer %s: Incorrect input image size %d for input "
+ "image pixels %d")
% (input_layer_name, conv_conf.output_x, outputSize))
if conv.caffe_mode:
conv_conf.img_size = \
(conv_conf.output_x - 1) * conv.stride \
+ conv.filter_size - 2 * conv.padding
else:
conv_conf.img_size = \
(conv_conf.output_x - 2) * conv.stride \
+ conv.filter_size - 2 * conv.padding + 1
def parse_block_expand(block_expand, input_layer_name, block_expand_conf):
block_expand_conf.channels = block_expand.channels
......@@ -1685,10 +1681,11 @@ class ConvTransLayerBase(LayerBase):
for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index)
parse_conv_trans(
parse_conv(
self.inputs[input_index].conv,
input_layer.name,
self.config.inputs[input_index].conv_conf, num_filters)
self.config.inputs[input_index].conv_conf, num_filters,
trans=True)
conv_conf = self.config.inputs[input_index].conv_conf
psize = self.calc_parameter_size(conv_conf)
print("output size for %s is %d " % (name, conv_conf.output_x))
......
......@@ -36,7 +36,7 @@ __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel",
"pooling_layer", "lstmemory", "last_seq", "first_seq",
"cos_sim", "hsigmoid", "conv_projection",
"regression_cost", 'classification_cost', "LayerOutput",
'img_conv_layer', 'img_convTrans_layer', 'img_pool_layer', 'batch_norm_layer',
'img_conv_layer', 'img_pool_layer', 'batch_norm_layer',
'img_cmrnorm_layer', 'addto_layer',
'concat_layer', 'lstm_step_layer', 'recurrent_group',
'memory', 'StaticInput', 'expand_layer', 'scaling_layer',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册