提交 fb20187a 编写于 作者: W wangyang59

deconv layer implementation modification following luotao1 comments

上级 3d72e949
...@@ -89,42 +89,41 @@ size_t ConvBaseLayer::calOutputSize() { ...@@ -89,42 +89,41 @@ size_t ConvBaseLayer::calOutputSize() {
clearAndReserve(&outputW_); clearAndReserve(&outputW_);
size_t layerSize = 0; size_t layerSize = 0;
if (!isDeconv_) { auto setLayerSize = [&](IntV& inH, IntV& inW, IntV& outH, IntV& outW) {
for (size_t i = 0; i < inputLayers_.size(); i++) { for (size_t i = 0; i < inputLayers_.size(); i++) {
imgSizeH_.push_back(inputLayers_[i]->getOutput().getFrameHeight()); inH.push_back(inputLayers_[i]->getOutput().getFrameHeight());
imgSizeW_.push_back(inputLayers_[i]->getOutput().getFrameWidth()); inW.push_back(inputLayers_[i]->getOutput().getFrameWidth());
if (imgSizeH_[i] == 0) if (isDeconv_) {
imgSizeH_[i] = config_.inputs(i).conv_conf().img_size(); if (inH[i] == 0)
if (imgSizeW_[i] == 0) inH[i] = config_.inputs(i).conv_conf().output_x();
imgSizeW_[i] = config_.inputs(i).conv_conf().img_size(); if (inW[i] == 0)
outputH_.push_back( inW[i] = config_.inputs(i).conv_conf().output_x();
outputSize(imgSizeH_[i], filterSizeY_[i], paddingY_[i], strideY_[i])); outH.push_back(
outputW_.push_back( imageSize(inH[i], filterSizeY_[i], paddingY_[i], strideY_[i]));
outputSize(imgSizeW_[i], filterSize_[i], padding_[i], stride_[i])); outW.push_back(
CHECK_EQ(outputH_[i], outputH_[0]); imageSize(inW[i], filterSize_[i], padding_[i], stride_[i]));
CHECK_EQ(outputW_[i], outputW_[0]); } else {
if (inH[i] == 0)
inH[i] = config_.inputs(i).conv_conf().img_size();
if (inW[i] == 0)
inW[i] = config_.inputs(i).conv_conf().img_size();
outH.push_back(
outputSize(inH[i], filterSizeY_[i], paddingY_[i], strideY_[i]));
outW.push_back(
outputSize(inW[i], filterSize_[i], padding_[i], stride_[i]));
CHECK_EQ(outH[i], outH[0]);
CHECK_EQ(outW[i], outW[0]);
}
} }
getOutput().setFrameHeight(outputH_[0]); getOutput().setFrameHeight(outH[0]);
getOutput().setFrameWidth(outputW_[0]); getOutput().setFrameWidth(outW[0]);
layerSize = outputH_[0] * outputW_[0] * size_t(numFilters_); layerSize = outH[0] * outW[0] * size_t(numFilters_);
};
if (isDeconv_) {
setLayerSize(outputH_, outputW_, imgSizeH_, imgSizeW_);
} else { } else {
for (size_t i = 0; i < inputLayers_.size(); i++) { setLayerSize(imgSizeH_, imgSizeW_, outputH_, outputW_);
outputH_.push_back(inputLayers_[i]->getOutput().getFrameHeight());
outputW_.push_back(inputLayers_[i]->getOutput().getFrameWidth());
if (outputH_[i] == 0)
outputH_[i] = config_.inputs(i).conv_conf().output_x();
if (outputW_[i] == 0)
outputW_[i] = config_.inputs(i).conv_conf().output_x();
imgSizeH_.push_back(
imageSize(outputH_[i], filterSizeY_[i], paddingY_[i], strideY_[i]));
imgSizeW_.push_back(
imageSize(outputW_[i], filterSize_[i], padding_[i], stride_[i]));
CHECK_EQ(imgSizeH_[i], imgSizeH_[0]);
CHECK_EQ(imgSizeW_[i], imgSizeW_[0]);
}
getOutput().setFrameHeight(imgSizeH_[0]);
getOutput().setFrameWidth(imgSizeW_[0]);
layerSize = imgSizeH_[0] * imgSizeW_[0] * size_t(numFilters_);
} }
return layerSize; return layerSize;
......
...@@ -78,8 +78,6 @@ protected: ...@@ -78,8 +78,6 @@ protected:
/// of output size. /// of output size.
bool caffeMode_; bool caffeMode_;
public: public:
explicit ConvBaseLayer(const LayerConfig& config) : Layer(config) {} explicit ConvBaseLayer(const LayerConfig& config) : Layer(config) {}
......
...@@ -31,14 +31,14 @@ bool ExpandConvBaseLayer::init(const LayerMap &layerMap, ...@@ -31,14 +31,14 @@ bool ExpandConvBaseLayer::init(const LayerMap &layerMap,
* convTrans, and in other functions too. * convTrans, and in other functions too.
* */ * */
int channel; int channel;
int nf; int numFilters;
/* Initialize the projection */ /* Initialize the projection */
for (auto &inputConfig : config_.inputs()) { for (auto &inputConfig : config_.inputs()) {
const ConvConfig &conf = inputConfig.conv_conf(); const ConvConfig &conf = inputConfig.conv_conf();
nf = (!isDeconv_) ? numFilters_ : conf.channels(); numFilters = isDeconv_ ? conf.channels() : numFilters_;
subM_.push_back(nf / conf.groups()); subM_.push_back(numFilters / conf.groups());
subN_.push_back(conf.output_x() * conf.output_x()); subN_.push_back(conf.output_x() * conf.output_x());
channel = (!isDeconv_) ? conf.channels() : numFilters_; channel = isDeconv_ ? numFilters_ : conf.channels();
subK_.push_back(channel * conf.filter_size() * conf.filter_size() / subK_.push_back(channel * conf.filter_size() * conf.filter_size() /
conf.groups()); conf.groups());
/* Consistent caffe mode for multiple input */ /* Consistent caffe mode for multiple input */
...@@ -99,7 +99,7 @@ void ExpandConvBaseLayer::addUnsharedBias() { ...@@ -99,7 +99,7 @@ void ExpandConvBaseLayer::addUnsharedBias() {
void ExpandConvBaseLayer::expandOneFrame(MatrixPtr image, size_t startIdx, void ExpandConvBaseLayer::expandOneFrame(MatrixPtr image, size_t startIdx,
int inIdx) { int inIdx) {
int channel = (!isDeconv_) ? channels_[inIdx] : numFilters_; int channel = isDeconv_ ? numFilters_ : channels_[inIdx];
resetExpandInput(subK_[inIdx] * groups_[inIdx], subN_[inIdx]); resetExpandInput(subK_[inIdx] * groups_[inIdx], subN_[inIdx]);
real *imgData = image->getData() + startIdx * image->getWidth(); real *imgData = image->getData() + startIdx * image->getWidth();
...@@ -122,10 +122,10 @@ void ExpandConvBaseLayer::expandFwdOnce(MatrixPtr image, MatrixPtr out, ...@@ -122,10 +122,10 @@ void ExpandConvBaseLayer::expandFwdOnce(MatrixPtr image, MatrixPtr out,
expandOneFrame(image, startIdx, inIdx); expandOneFrame(image, startIdx, inIdx);
int nf = (!isDeconv_) ? numFilters_ : channels_[inIdx]; int numFilters = isDeconv_ ? channels_[inIdx] : numFilters_;
real *outData = real *outData =
out->getData() + startIdx * subN * nf; out->getData() + startIdx * subN * numFilters;
real *wgtData = weights_[inIdx]->getW()->getData(); real *wgtData = weights_[inIdx]->getW()->getData();
real *expInData = expandInput_->getData(); real *expInData = expandInput_->getData();
...@@ -147,7 +147,7 @@ void ExpandConvBaseLayer::expandFwdOnce(MatrixPtr image, MatrixPtr out, ...@@ -147,7 +147,7 @@ void ExpandConvBaseLayer::expandFwdOnce(MatrixPtr image, MatrixPtr out,
void ExpandConvBaseLayer::bpropActs(MatrixPtr out, MatrixPtr image, void ExpandConvBaseLayer::bpropActs(MatrixPtr out, MatrixPtr image,
int inpIdx) { int inpIdx) {
int channel = (!isDeconv_) ? channels_[inpIdx] : numFilters_; int channel = isDeconv_ ? numFilters_ : channels_[inpIdx];
int subM = subM_[inpIdx]; int subM = subM_[inpIdx];
int subN = subN_[inpIdx]; int subN = subN_[inpIdx];
......
...@@ -189,58 +189,55 @@ void doOneConvtTest(size_t imgSize, size_t output_x, size_t stride, ...@@ -189,58 +189,55 @@ void doOneConvtTest(size_t imgSize, size_t output_x, size_t stride,
} }
TEST(Layer, convTransLayerFwd2) { TEST(Layer, convTransLayerFwd2) {
size_t imgSize, output_x, stride, padding, filter_size;
MatrixPtr result; MatrixPtr result;
result = Matrix::create(1, 5 * 5, false, false);
imgSize = 5;
output_x = 1;
stride = 1;
padding = 0;
filter_size = 5;
result = Matrix::create(1, imgSize * imgSize, false, false);
result->zeroMem(); result->zeroMem();
result->add(1.0); result->add(1.0);
doOneConvtTest(imgSize, output_x, stride, padding, filter_size, result); doOneConvtTest(/* imgSize */ 5,
/* output_x */ 1,
/* stride */ 1,
/* padding */ 0,
/* filter_size */ 5,
result);
imgSize = 5;
output_x = 2;
stride = 1;
padding = 0;
filter_size = 4;
float resultData[] = {1, 2, 2, 2, 1, float resultData[] = {1, 2, 2, 2, 1,
2, 4, 4, 4, 2, 2, 4, 4, 4, 2,
2, 4, 4, 4, 2, 2, 4, 4, 4, 2,
2, 4, 4, 4, 2, 2, 4, 4, 4, 2,
1, 2, 2, 2, 1}; 1, 2, 2, 2, 1};
result = Matrix::create(resultData, 1, imgSize * imgSize, false, false); result->setData(resultData);
doOneConvtTest(imgSize, output_x, stride, padding, filter_size, result); doOneConvtTest(/* imgSize */ 5,
/* output_x */ 2,
imgSize = 5; /* stride */ 1,
output_x = 2; /* padding */ 0,
stride = 2; /* filter_size */ 4,
padding = 1; result);
filter_size = 5;
float resultData2[] = {1, 2, 2, 2, 1, float resultData2[] = {1, 2, 2, 2, 1,
2, 4, 4, 4, 2, 2, 4, 4, 4, 2,
2, 4, 4, 4, 2, 2, 4, 4, 4, 2,
2, 4, 4, 4, 2, 2, 4, 4, 4, 2,
1, 2, 2, 2, 1}; 1, 2, 2, 2, 1};
result = Matrix::create(resultData2, 1, imgSize * imgSize, false, false); result->setData(resultData2);
doOneConvtTest(imgSize, output_x, stride, padding, filter_size, result); doOneConvtTest(/* imgSize */ 5,
/* output_x */ 2,
imgSize = 5; /* stride */ 2,
output_x = 2; /* padding */ 1,
stride = 2; /* filter_size */ 5,
padding = 0; result);
filter_size = 3;
float resultData3[] = {1, 1, 2, 1, 1, float resultData3[] = {1, 1, 2, 1, 1,
1, 1, 2, 1, 1, 1, 1, 2, 1, 1,
2, 2, 4, 2, 2, 2, 2, 4, 2, 2,
1, 1, 2, 1, 1, 1, 1, 2, 1, 1,
1, 1, 2, 1, 1}; 1, 1, 2, 1, 1};
result = Matrix::create(resultData3, 1, imgSize * imgSize, false, false); result->setData(resultData3);
doOneConvtTest(imgSize, output_x, stride, padding, filter_size, result); doOneConvtTest(/* imgSize */ 5,
} /* output_x */ 2,
/* stride */ 2,
/* padding */ 0,
/* filter_size */ 3,
result);}
int main(int argc, char** argv) { int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv); testing::InitGoogleTest(&argc, argv);
......
...@@ -351,12 +351,10 @@ void testConvTransLayer(const string& type, bool trans, bool useGpu) { ...@@ -351,12 +351,10 @@ void testConvTransLayer(const string& type, bool trans, bool useGpu) {
TEST(Layer, convTransLayer) { TEST(Layer, convTransLayer) {
testConvTransLayer("exconvt", /* trans= */ false, /* useGpu= */ false); testConvTransLayer("exconvt", /* trans= */ false, /* useGpu= */ false);
/*
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
testConvLayer("exconv", trans= false, useGpu= true); testConvTransLayer("exconvt", /* trans= */ false, /* useGpu= */ true);
testConvLayer("cudnn_conv", trans= false, useGpu= true); // testConvLayer("cudnn_conv", /* trans= */ false, /* useGpu= */ true);
#endif #endif
*/
} }
TEST(Layer, blockExpandLayer) { TEST(Layer, blockExpandLayer) {
......
...@@ -1082,7 +1082,11 @@ def parse_norm(norm, input_layer_name, norm_conf): ...@@ -1082,7 +1082,11 @@ def parse_norm(norm, input_layer_name, norm_conf):
else: else:
norm_conf.scale /= norm.size ** 2 norm_conf.scale /= norm.size ** 2
def parse_conv(conv, input_layer_name, conv_conf): '''
caffe_mode: compute the output size using floor instead of ceil,
which is consistent of caffe and CuDNN's convention.
'''
def parse_conv(conv, input_layer_name, conv_conf, trans=False):
conv_conf.filter_size = conv.filter_size conv_conf.filter_size = conv.filter_size
conv_conf.filter_size_y = conv.filter_size_y conv_conf.filter_size_y = conv.filter_size_y
conv_conf.channels = conv.channels conv_conf.channels = conv.channels
...@@ -1093,49 +1097,41 @@ def parse_conv(conv, input_layer_name, conv_conf): ...@@ -1093,49 +1097,41 @@ def parse_conv(conv, input_layer_name, conv_conf):
conv_conf.groups = conv.groups conv_conf.groups = conv.groups
conv_conf.filter_channels = conv.channels / conv.groups conv_conf.filter_channels = conv.channels / conv.groups
conv_conf.caffe_mode = conv.caffe_mode conv_conf.caffe_mode = conv.caffe_mode
img_pixels = g_layer_map[input_layer_name].size / conv.channels if not trans:
print('channels=%d size=%d'%(conv.channels, img_pixels = g_layer_map[input_layer_name].size / conv.channels
g_layer_map[input_layer_name].size)) print('channels=%d size=%d'%(conv.channels,
conv_conf.img_size = int(img_pixels ** 0.5) g_layer_map[input_layer_name].size))
config_assert((conv_conf.img_size ** 2) == img_pixels, conv_conf.img_size = int(img_pixels ** 0.5)
("Input layer %s: Incorrect input image size %d for input " config_assert((conv_conf.img_size ** 2) == img_pixels,
+ "image pixels %d") ("Input layer %s: Incorrect input image size %d for input "
% (input_layer_name, conv_conf.img_size, img_pixels)) + "image pixels %d")
conv_conf.output_x = cnn_output_size(conv_conf.img_size, conv_conf.filter_size, % (input_layer_name, conv_conf.img_size, img_pixels))
conv_conf.padding, conv_conf.stride, if conv.caffe_mode:
conv_conf.caffe_mode) conv_conf.output_x = \
1 + int(math.floor((2 * conv.padding + conv_conf.img_size \
- conv.filter_size) / float(conv.stride)))
def parse_conv_trans(conv, input_layer_name, conv_conf, num_filters): else:
conv_conf.filter_size = conv.filter_size conv_conf.output_x = \
conv_conf.filter_size_y = conv.filter_size_y 1 + int(math.ceil((2 * conv.padding + conv_conf.img_size \
conv_conf.channels = conv.channels - conv.filter_size) / float(conv.stride)))
conv_conf.padding = conv.padding
conv_conf.padding_y = conv.padding_y
conv_conf.stride = conv.stride
conv_conf.stride_y = conv.stride_y
conv_conf.groups = conv.groups
conv_conf.filter_channels = num_filters / conv.groups
conv_conf.caffe_mode = conv.caffe_mode
outputSize = g_layer_map[input_layer_name].size / conv.channels
print('channels=%d size=%d'%(conv.channels,
g_layer_map[input_layer_name].size))
conv_conf.output_x = int(outputSize ** 0.5)
config_assert((conv_conf.output_x ** 2) == outputSize,
("Input layer %s: Incorrect input image size %d for input "
+ "image pixels %d")
% (input_layer_name, conv_conf.output_x, outputSize))
if conv.caffe_mode:
conv_conf.img_size = \
(conv_conf.output_x - 1) * conv.stride \
+ conv.filter_size - 2 * conv.padding
else: else:
conv_conf.img_size = \ outputSize = g_layer_map[input_layer_name].size / conv.channels
(conv_conf.output_x - 2) * conv.stride \ print('channels=%d size=%d'%(conv.channels,
+ conv.filter_size - 2 * conv.padding + 1 g_layer_map[input_layer_name].size))
conv_conf.output_x = int(outputSize ** 0.5)
config_assert((conv_conf.output_x ** 2) == outputSize,
("Input layer %s: Incorrect input image size %d for input "
+ "image pixels %d")
% (input_layer_name, conv_conf.output_x, outputSize))
if conv.caffe_mode:
conv_conf.img_size = \
(conv_conf.output_x - 1) * conv.stride \
+ conv.filter_size - 2 * conv.padding
else:
conv_conf.img_size = \
(conv_conf.output_x - 2) * conv.stride \
+ conv.filter_size - 2 * conv.padding + 1
def parse_block_expand(block_expand, input_layer_name, block_expand_conf): def parse_block_expand(block_expand, input_layer_name, block_expand_conf):
block_expand_conf.channels = block_expand.channels block_expand_conf.channels = block_expand.channels
...@@ -1685,10 +1681,11 @@ class ConvTransLayerBase(LayerBase): ...@@ -1685,10 +1681,11 @@ class ConvTransLayerBase(LayerBase):
for input_index in xrange(len(self.inputs)): for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index) input_layer = self.get_input_layer(input_index)
parse_conv_trans( parse_conv(
self.inputs[input_index].conv, self.inputs[input_index].conv,
input_layer.name, input_layer.name,
self.config.inputs[input_index].conv_conf, num_filters) self.config.inputs[input_index].conv_conf, num_filters,
trans=True)
conv_conf = self.config.inputs[input_index].conv_conf conv_conf = self.config.inputs[input_index].conv_conf
psize = self.calc_parameter_size(conv_conf) psize = self.calc_parameter_size(conv_conf)
print("output size for %s is %d " % (name, conv_conf.output_x)) print("output size for %s is %d " % (name, conv_conf.output_x))
......
...@@ -36,7 +36,7 @@ __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel", ...@@ -36,7 +36,7 @@ __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel",
"pooling_layer", "lstmemory", "last_seq", "first_seq", "pooling_layer", "lstmemory", "last_seq", "first_seq",
"cos_sim", "hsigmoid", "conv_projection", "cos_sim", "hsigmoid", "conv_projection",
"regression_cost", 'classification_cost', "LayerOutput", "regression_cost", 'classification_cost', "LayerOutput",
'img_conv_layer', 'img_convTrans_layer', 'img_pool_layer', 'batch_norm_layer', 'img_conv_layer', 'img_pool_layer', 'batch_norm_layer',
'img_cmrnorm_layer', 'addto_layer', 'img_cmrnorm_layer', 'addto_layer',
'concat_layer', 'lstm_step_layer', 'recurrent_group', 'concat_layer', 'lstm_step_layer', 'recurrent_group',
'memory', 'StaticInput', 'expand_layer', 'scaling_layer', 'memory', 'StaticInput', 'expand_layer', 'scaling_layer',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册