diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index e88e962cff5bbfcb8be1014dbaab85568d2625ff..ab416ff2e3c18b445c382ef8addebd8122f3324c 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -874,7 +874,7 @@ class Conv(Cfg): filter_size_y=None, padding_y=None, stride_y=None, - dilation=None, + dilation=1, dilation_y=None): self.add_keys(locals()) if filter_size_y is None: @@ -1200,8 +1200,14 @@ def TestData(data_config, async_load_data=None): #caffe_mode: compute the output size using floor instead of ceil, # which is consistent of caffe and CuDNN's convention. -def cnn_output_size(img_size, filter_size, padding, stride, caffe_mode): - output = (2 * padding + img_size - filter_size) / float(stride) +def cnn_output_size(img_size, + filter_size, + padding, + stride, + caffe_mode, + dilation=1): + filter_s = (filter_size - 1) * dilation + 1 + output = (2 * padding + img_size - filter_s) / float(stride) if caffe_mode: return 1 + int(math.floor(output)) else: @@ -1210,8 +1216,14 @@ def cnn_output_size(img_size, filter_size, padding, stride, caffe_mode): #calcualte image_size based on output_size for de-convolution (ConvTransLayer). #It is the reverse function of cnn_output_size -def cnn_image_size(output_size, filter_size, padding, stride, caffe_mode): - img_size = (output_size - 1) * stride + filter_size - 2 * padding +def cnn_image_size(output_size, + filter_size, + padding, + stride, + caffe_mode, + dilation=1): + filter_s = (filter_size - 1) * dilation + 1 + img_size = (output_size - 1) * stride + filter_s - 2 * padding if not caffe_mode: img_size = img_size + 1 return img_size @@ -1376,6 +1388,8 @@ def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False): conv_conf.stride_y = conv.stride_y conv_conf.groups = conv.groups conv_conf.caffe_mode = conv.caffe_mode + conv_conf.dilation = conv.dilation + conv_conf.dilation_y = conv.dilation_y if not trans: conv_conf.filter_channels = conv.channels / conv.groups @@ -1383,20 +1397,20 @@ def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False): get_img_size(input_layer_name, conv.channels) conv_conf.output_x = cnn_output_size( conv_conf.img_size, conv_conf.filter_size, conv_conf.padding, - conv_conf.stride, conv_conf.caffe_mode) + conv_conf.stride, conv_conf.caffe_mode, conv_conf.dilation) conv_conf.output_y = cnn_output_size( conv_conf.img_size_y, conv_conf.filter_size_y, conv_conf.padding_y, - conv_conf.stride_y, conv_conf.caffe_mode) + conv_conf.stride_y, conv_conf.caffe_mode, conv_conf.dilation_y) else: conv_conf.filter_channels = num_filters / conv.groups conv_conf.output_x, conv_conf.output_y = \ get_img_size(input_layer_name, conv.channels) conv_conf.img_size = cnn_image_size( conv_conf.output_x, conv_conf.filter_size, conv_conf.padding, - conv_conf.stride, conv_conf.caffe_mode) + conv_conf.stride, conv_conf.caffe_mode, conv_conf.dilation) conv_conf.img_size_y = cnn_image_size( conv_conf.output_y, conv_conf.filter_size_y, conv_conf.padding_y, - conv_conf.stride_y, conv_conf.caffe_mode) + conv_conf.stride_y, conv_conf.caffe_mode, conv_conf.dilation_y) #caffe_mode: compute the output size using floor instead of ceil,