提交 496d64eb 编写于 作者: L Luo Tao

Support rectangle input for CNN

上级 65612425
......@@ -61,15 +61,10 @@ bool BatchNormBaseLayer::init(const LayerMap& layerMap,
void BatchNormBaseLayer::calFeatureMapSize() {
const ImageConfig& conf = config_.inputs(0).image_conf();
if (inputLayers_[0]->getOutput().getFrameHeight() == 0 &&
inputLayers_[0]->getOutput().getFrameWidth() == 0) {
imgSize_ = conf.img_size();
imageH_ = imgSize_;
imageW_ = imgSize_;
} else {
imageH_ = inputLayers_[0]->getOutput().getFrameHeight();
imageW_ = inputLayers_[0]->getOutput().getFrameWidth();
}
imageH_ = inputLayers_[0]->getOutput().getFrameHeight();
imageW_ = inputLayers_[0]->getOutput().getFrameWidth();
if (imageH_ == 0) imageH_ = conf.img_size_y();
if (imageW_ == 0) imageW_ = conf.img_size();
imgPixels_ = imageH_ * imageW_;
getOutput().setFrameHeight(imageH_);
getOutput().setFrameWidth(imageW_);
......
......@@ -78,9 +78,8 @@ protected:
MatrixPtr savedMean_;
MatrixPtr savedInvVar_;
/// Height or width of input image feature, now height is equal to width.
/// imgSize is 1 if the input is fully-connected layer.
int imgSize_;
/// Height or width of input image feature.
/// Both of them are 1 if the input is fully-connected layer.
int imageH_;
int imageW_;
/// Height * Width.
......
......@@ -26,15 +26,15 @@ size_t BilinearInterpLayer::getSize() {
const BilinearInterpConfig& conf = config_.inputs(0).bilinear_interp_conf();
if (inImgH_ == 0) {
inImgH_ = conf.img_size_y();
inImgH_ = conf.image_conf().img_size_y();
}
if (inImgW_ == 0) {
inImgW_ = conf.img_size_x();
inImgW_ = conf.image_conf().img_size();
}
outImgH_ = conf.out_size_y();
outImgW_ = conf.out_size_x();
numChannels_ = conf.num_channels();
numChannels_ = conf.image_conf().channels();
CHECK(outImgH_ > 0 && outImgW_ > 0);
CHECK(inImgH_ > 0 && inImgW_ > 0);
......
......@@ -37,11 +37,13 @@ bool ConvBaseLayer::init(const LayerMap& layerMap,
filterSizeY_.push_back(conf.filter_size_y());
filterPixels_.push_back(filterSize_.back() * filterSizeY_.back());
channels_.push_back(conf.channels());
imgSizeH_.push_back(conf.img_size());
imgSizeH_.push_back(conf.has_img_size_y() ? conf.img_size_y() :
conf.img_size());
imgSizeW_.push_back(conf.img_size());
groups_.push_back(conf.groups());
filterChannels_.push_back(conf.filter_channels());
outputH_.push_back(conf.output_x());
outputH_.push_back(conf.has_output_y() ? conf.output_y() :
conf.output_x());
outputW_.push_back(conf.output_x());
}
......@@ -90,11 +92,12 @@ size_t ConvBaseLayer::calOutputSize() {
for (size_t i = 0; i < inputLayers_.size(); i++) {
inH.push_back(inputLayers_[i]->getOutput().getFrameHeight());
inW.push_back(inputLayers_[i]->getOutput().getFrameWidth());
const ConvConfig& conf = config_.inputs(i).conv_conf();
if (isDeconv_) {
if (inH[i] == 0)
inH[i] = config_.inputs(i).conv_conf().output_x();
inH[i] = conf.has_output_y() ? conf.output_y() : conf.output_x();
if (inW[i] == 0)
inW[i] = config_.inputs(i).conv_conf().output_x();
inW[i] = conf.output_x();
outH.push_back(
imageSize(inH[i], filterSizeY_[i], paddingY_[i], strideY_[i],
caffeMode_));
......@@ -103,9 +106,9 @@ size_t ConvBaseLayer::calOutputSize() {
caffeMode_));
} else {
if (inH[i] == 0)
inH[i] = config_.inputs(i).conv_conf().img_size();
inH[i] = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
if (inW[i] == 0)
inW[i] = config_.inputs(i).conv_conf().img_size();
inW[i] = conf.img_size();
outH.push_back(
outputSize(inH[i], filterSizeY_[i], paddingY_[i], strideY_[i],
caffeMode_));
......
......@@ -93,9 +93,9 @@ private:
bool caffeMode_;
int inputOffset_, outputOffset_, weightOffset_;
int numFilters_;
int padding_, stride_, filterSize_, channels_, imgSize_;
int padding_, stride_, filterSize_, channels_, imgSize_, imgSizeY_;
int paddingY_, strideY_, filterSizeY_;
int imgPixels_, filterPixels_, filterChannels_, outputX_, outputs_;
int imgPixels_, filterPixels_, filterChannels_, outputX_, outputY_, outputs_;
/// Following member variables are same with CudnnConvLayer.
/// There is no explanation here.
......@@ -144,7 +144,7 @@ void ConvOperator::allocConvWorkSpace(size_t maxWorkSpace) {
void ConvOperator::reshape(int batchSize) {
imageH_ = ins_[0]->getFrameHeight();
imageW_ = ins_[0]->getFrameWidth();
if (imageH_ == 0) imageH_ = imgSize_;
if (imageH_ == 0) imageH_ = imgSizeY_;
if (imageW_ == 0) imageW_ = imgSize_;
outputH_ = outputSize(imageH_, filterSizeY_, paddingY_, strideY_, caffeMode_);
outputW_ = outputSize(imageW_, filterSize_, padding_, stride_, caffeMode_);
......@@ -176,7 +176,10 @@ void ConvOperator::computeConvSizes() {
hl_create_tensor_descriptor(&inputDesc_);
int outputX =
outputSize(imgSize_, filterSize_, padding_, stride_, caffeMode_);
int outputY =
outputSize(imgSizeY_, filterSizeY_, paddingY_, strideY_, caffeMode_);
CHECK_EQ(outputX, outputX_);
CHECK_EQ(outputY, outputY_);
hl_create_tensor_descriptor(&outputDesc_);
hl_create_convolution_descriptor(&convDesc_, inputDesc_, filterDesc_,
paddingY_, padding_, strideY_, stride_);
......@@ -208,10 +211,12 @@ void ConvOperator::getConvParams() {
filterPixels_ = filterSize_ * filterSizeY_;
channels_ = conf.channels();
imgSize_ = conf.img_size();
imgPixels_ = imgSize_ * imgSize_;
imgSizeY_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
imgPixels_ = imgSize_ * imgSizeY_;
CHECK_EQ(conf.groups(), 1U);
filterChannels_ = conf.filter_channels();
outputX_ = conf.output_x();
outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x();
outputs_ = outputX_ * outputX_;
}
......
......@@ -47,7 +47,7 @@ void ConvProjection::getConvParams() {
filterH_ = conf.filter_size_y();
filterW_ = conf.filter_size();
configImgH_ = conf.img_size();
configImgH_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
configImgW_ = conf.img_size();
channels_ = conf.channels();
......
......@@ -48,8 +48,8 @@ void DataLayer::copyDataToOutput(Argument& output) {
output.ids->copyFrom(*data_.ids);
}
}
output.setFrameHeight(data_.getFrameHeight());
output.setFrameWidth(data_.getFrameWidth());
output.setFrameHeight(config_.height());
output.setFrameWidth(config_.width());
output.cpuSequenceDims = data_.cpuSequenceDims;
output.sequenceStartPositions = data_.sequenceStartPositions;
output.subSequenceStartPositions = data_.subSequenceStartPositions;
......
......@@ -30,17 +30,19 @@ bool ExpandConvBaseLayer::init(const LayerMap &layerMap,
* meaning as in conv, we need to swap channels_ and numFilters here for
* convTrans, and in other functions too.
* */
int channel;
int numFilters;
/* Initialize the projection */
for (auto &inputConfig : config_.inputs()) {
const ConvConfig &conf = inputConfig.conv_conf();
numFilters = isDeconv_ ? conf.channels() : numFilters_;
int numFilters = isDeconv_ ? conf.channels() : numFilters_;
subM_.push_back(numFilters / conf.groups());
subN_.push_back(conf.output_x() * conf.output_x());
channel = isDeconv_ ? numFilters_ : conf.channels();
subK_.push_back(channel * conf.filter_size() * conf.filter_size() /
conf.groups());
subN_.push_back(conf.output_x() *
(conf.has_output_y() ? conf.output_y() : conf.output_x()));
int channel = isDeconv_ ? numFilters_ : conf.channels();
subK_.push_back(
channel * conf.filter_size() *
(conf.has_filter_size_y() ? conf.filter_size_y() : conf.filter_size()) /
conf.groups());
/* Consistent caffe mode for multiple input */
caffeMode_ = conf.caffe_mode();
}
......@@ -107,9 +109,9 @@ void ExpandConvBaseLayer::expandOneFrame(MatrixPtr image, size_t startIdx,
imgData, 1, imgSizeH_[inIdx] * imgSizeW_[inIdx] * channel, false,
useGpu_);
expandInput_->convExpand(*imageTmp, imgSizeH_[inIdx], imgSizeW_[inIdx],
channel, filterSize_[inIdx],
filterSize_[inIdx], stride_[inIdx], stride_[inIdx],
padding_[inIdx], padding_[inIdx],
channel, filterSizeY_[inIdx],
filterSize_[inIdx], strideY_[inIdx], stride_[inIdx],
paddingY_[inIdx], padding_[inIdx],
outputH_[inIdx], outputW_[inIdx]);
imageTmp->clear();
}
......@@ -188,10 +190,10 @@ void ExpandConvBaseLayer::bpropActs(MatrixPtr out, MatrixPtr image,
imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channel, false,
useGpu_);
vTmp->convShrink(*oneGradTmp, imgSizeH_[inpIdx], imgSizeW_[inpIdx],
channel, filterSize_[inpIdx],
filterSize_[inpIdx], stride_[inpIdx], stride_[inpIdx],
padding_[inpIdx], padding_[inpIdx],
outputH_[inpIdx], outputW_[inpIdx], 1.0f, 1.0f);
channel, filterSizeY_[inpIdx],
filterSize_[inpIdx], strideY_[inpIdx], stride_[inpIdx],
paddingY_[inpIdx], padding_[inpIdx], outputH_[inpIdx],
outputW_[inpIdx], 1.0f, 1.0f);
vTmp->clear();
oneGradTmp->clear();
......
......@@ -25,10 +25,10 @@ size_t MaxOutLayer::getSize() {
imgSizeH_ = inputLayers_[0]->getOutput().getFrameHeight();
imgSizeW_ = inputLayers_[0]->getOutput().getFrameWidth();
if (imgSizeH_ == 0) {
imgSizeH_ = maxoutConf.img_size_y();
imgSizeH_ = maxoutConf.image_conf().img_size_y();
}
if (imgSizeW_ == 0) {
imgSizeW_ = maxoutConf.img_size_x();
imgSizeW_ = maxoutConf.image_conf().img_size();
}
featLen_ = imgSizeH_ * imgSizeW_;
......@@ -50,7 +50,7 @@ bool MaxOutLayer::init(const LayerMap& layerMap,
const MaxOutConfig& conf = config_.inputs(0).maxout_conf();
groups_ = conf.groups();
channels_ = conf.channels();
channels_ = conf.image_conf().channels();
CHECK_EQ(channels_ % groups_, 0UL);
outputChannels_ = channels_ / groups_;
......
......@@ -49,6 +49,9 @@ bool ResponseNormLayer::init(const LayerMap& layerMap,
outputX_ = conf.output_x();
imgSize_ = conf.img_size();
denoms_ = NULL;
outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x();
imgSizeY_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
return true;
}
......
......@@ -50,7 +50,7 @@ public:
*/
class ResponseNormLayer : public NormLayer {
protected:
size_t channels_, size_, outputX_, imgSize_;
size_t channels_, size_, outputX_, imgSize_, outputY_, imgSizeY_;
float scale_, pow_;
MatrixPtr denoms_;
......
......@@ -24,7 +24,7 @@ size_t CMRProjectionNormLayer::getSize() {
imgSizeH_ = inputLayers_[0]->getOutput().getFrameHeight();
imgSizeW_ = inputLayers_[0]->getOutput().getFrameWidth();
if (imgSizeH_ == 0) {
imgSizeH_ = imgSize_;
imgSizeH_ = imgSizeY_;
}
if (imgSizeW_ == 0) {
imgSizeW_ = imgSize_;
......
......@@ -56,14 +56,14 @@ ProjectionConfig SpatialPyramidPoolLayer::getConfig(size_t imgSizeW,
size_t SpatialPyramidPoolLayer::getSize() {
CHECK_EQ(inputLayers_.size(), 1UL);
size_t layerSize = 0;
const SppConfig& sppConf = config_.inputs(0).spp_conf();
const ImageConfig& conf = config_.inputs(0).spp_conf().image_conf();
imgSizeH_ = inputLayers_[0]->getOutput().getFrameHeight();
imgSizeW_ = inputLayers_[0]->getOutput().getFrameWidth();
if (imgSizeH_ == 0) {
imgSizeH_ = sppConf.has_img_size_y() ? sppConf.img_size_y() : imgSizeW_;
imgSizeH_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
}
if (imgSizeW_ == 0) {
imgSizeW_ = sppConf.img_size();
imgSizeW_ = conf.img_size();
}
size_t outputH = 1;
......@@ -82,9 +82,10 @@ bool SpatialPyramidPoolLayer::init(const LayerMap& layerMap,
pyramidHeight_ = sppConf.pyramid_height();
poolType_ = sppConf.pool_type();
channels_ = sppConf.channels();
imgSizeW_ = sppConf.img_size();
imgSizeH_ = sppConf.has_img_size_y() ? sppConf.img_size_y() : imgSizeW_;
const ImageConfig& imageConf = sppConf.image_conf();
channels_ = imageConf.channels();
imgSizeW_ = imageConf.img_size();
imgSizeH_ = imageConf.has_img_size_y() ? imageConf.img_size_y() : imgSizeW_;
poolProjections_.reserve(pyramidHeight_);
projCol_.reserve(pyramidHeight_);
projOutput_.resize(pyramidHeight_);
......
......@@ -28,7 +28,6 @@ maxpool = img_pool_layer(input=conv,
stride_y=2,
padding=1,
padding_y=2,
img_width=16,
pool_type=MaxPooling(),
)
avgpool = img_pool_layer(input=conv,
......@@ -39,7 +38,6 @@ avgpool = img_pool_layer(input=conv,
stride_y=2,
padding=1,
padding_y=2,
img_width=16,
pool_type=AvgPooling(),
)
......
......@@ -194,9 +194,10 @@ TEST(Layer, BilinearInterpLayer) {
LayerInputConfig* input = config.layerConfig.add_inputs();
BilinearInterpConfig* bilinear = input->mutable_bilinear_interp_conf();
bilinear->set_img_size_x(32);
bilinear->set_img_size_y(32);
bilinear->set_num_channels(4);
ImageConfig* image = bilinear->mutable_image_conf();
image->set_img_size(32);
image->set_img_size_y(32);
image->set_channels(4);
for (auto useGpu : {false, true}) {
for (auto outSize : {32, 64}) {
......@@ -314,7 +315,7 @@ void testConvLayer(const string& type, bool trans, bool useGpu) {
config.layerConfig.set_partial_sum(1);
config.layerConfig.set_shared_biases(true);
config.inputDefs.push_back({INPUT_DATA, "layer_0", 768, 288});
config.inputDefs.push_back({INPUT_DATA, "layer_0", 384, 288});
LayerInputConfig* input = config.layerConfig.add_inputs();
ConvConfig* conv = input->mutable_conv_conf();
conv->set_filter_size(2);
......@@ -327,10 +328,14 @@ void testConvLayer(const string& type, bool trans, bool useGpu) {
conv->set_groups(1);
conv->set_filter_channels(conv->channels() / conv->groups());
conv->set_img_size(16);
conv->set_img_size_y(8);
conv->set_output_x(outputSize(conv->img_size(), conv->filter_size(),
conv->padding(), conv->stride(),
/* caffeMode */ true));
config.layerConfig.set_size(conv->output_x() * conv->output_x() *
conv->set_output_y(outputSize(conv->img_size_y(), conv->filter_size_y(),
conv->padding_y(), conv->stride_y(),
/* caffeMode */ true));
config.layerConfig.set_size(conv->output_x() * conv->output_y() *
config.layerConfig.num_filters());
testLayerGrad(config, "conv", 100, trans, useGpu);
......@@ -427,10 +432,11 @@ TEST(Layer, maxoutLayer) {
config.inputDefs.push_back({INPUT_DATA, "layer_0", 4096, 0});
LayerInputConfig* input = config.layerConfig.add_inputs();
MaxOutConfig* maxout = input->mutable_maxout_conf();
ImageConfig* image = maxout->mutable_image_conf();
maxout->set_img_size_x(32);
maxout->set_img_size_y(32);
maxout->set_channels(4);
image->set_img_size(32);
image->set_img_size_y(32);
image->set_channels(4);
maxout->set_groups(2);
for (auto useGpu : {false, true}) {
......@@ -902,7 +908,7 @@ void testNormLayer(const string& normType, bool trans, bool useGpu) {
config.layerConfig.set_type("norm");
config.layerConfig.set_active_type("relu");
config.inputDefs.push_back({INPUT_DATA, "layer_0", 3136, 0});
config.inputDefs.push_back({INPUT_DATA, "layer_0", 1568, 0});
LayerInputConfig* input = config.layerConfig.add_inputs();
NormConfig* norm = input->mutable_norm_conf();
norm->set_norm_type(normType);
......@@ -912,7 +918,9 @@ void testNormLayer(const string& normType, bool trans, bool useGpu) {
norm->set_pow(0.75);
norm->set_blocked(0);
norm->set_img_size(14);
norm->set_img_size_y(7);
norm->set_output_x(norm->img_size());
norm->set_output_y(norm->img_size_y());
if (norm->norm_type() == "cmrnorm" ||
norm->norm_type() == "cmrnorm-projection") {
norm->set_scale(norm->scale() / norm->size());
......@@ -920,7 +928,7 @@ void testNormLayer(const string& normType, bool trans, bool useGpu) {
norm->set_scale(norm->scale() / (norm->size() * norm->size()));
}
config.layerConfig.set_size(norm->output_x() * norm->output_x() *
config.layerConfig.set_size(norm->output_x() * norm->output_y() *
norm->channels());
config.biasSize = 0;
......@@ -1018,11 +1026,12 @@ void testSppLayer(const string& poolType, const int pyramidHeight, bool trans,
SppConfig* sppConfig = input->mutable_spp_conf();
sppConfig->set_pool_type(poolType);
sppConfig->set_pyramid_height(pyramidHeight);
sppConfig->set_channels(16);
sppConfig->set_img_size(10);
sppConfig->set_img_size_y(20);
ImageConfig* imageConfig = sppConfig->mutable_image_conf();
imageConfig->set_channels(16);
imageConfig->set_img_size(10);
imageConfig->set_img_size_y(20);
int outputSize = (std::pow(4, sppConfig->pyramid_height()) - 1) / (4 - 1);
config.layerConfig.set_size(outputSize * sppConfig->channels());
config.layerConfig.set_size(outputSize * imageConfig->channels());
testLayerGrad(config, "spp", 100, trans, useGpu);
}
......@@ -1328,12 +1337,13 @@ void testBatchNormLayer(const string& type, bool trans, bool useGpu) {
TestConfig config;
const int CHANNELS = 10;
const int IMG_SIZE = 16;
const int IMG_SIZE_Y = 8;
size_t size = CHANNELS * IMG_SIZE * IMG_SIZE_Y;
config.layerConfig.set_type(type);
config.layerConfig.set_size(CHANNELS * IMG_SIZE * IMG_SIZE);
config.layerConfig.set_size(size);
config.layerConfig.set_active_type("sigmoid");
config.biasSize = CHANNELS;
config.inputDefs.push_back({INPUT_DATA, "layer_0",
/* dim= */ IMG_SIZE * IMG_SIZE * CHANNELS,
config.inputDefs.push_back({INPUT_DATA, "layer_0", /* dim= */ size,
/* paraSize= */ CHANNELS});
config.inputDefs.push_back({INPUT_DATA, "layer_1_running_mean", 1, CHANNELS});
......@@ -1348,6 +1358,7 @@ void testBatchNormLayer(const string& type, bool trans, bool useGpu) {
ImageConfig* img_conf = input->mutable_image_conf();
img_conf->set_channels(CHANNELS);
img_conf->set_img_size(IMG_SIZE);
img_conf->set_img_size_y(IMG_SIZE_Y);
testLayerGrad(config, "batch_norm", 64, /* trans= */ trans, useGpu,
/* useWeight */ true);
......@@ -1370,6 +1381,7 @@ TEST(Operator, conv) {
const int FILTER_SIZE_Y = 3;
const int CHANNELS = 3;
const int IMAGE_SIZE = 16;
const int IMAGE_SIZE_Y = 8;
OperatorConfig& operatorConf = *config.layerConfig.add_operator_confs();
operatorConf.set_type("conv");
ConvConfig* conv = operatorConf.mutable_conv_conf();
......@@ -1384,17 +1396,18 @@ TEST(Operator, conv) {
conv->set_groups(1);
conv->set_filter_channels(conv->channels() / conv->groups());
conv->set_img_size(IMAGE_SIZE);
int output_x =
outputSize(conv->img_size(), conv->filter_size(), conv->padding(),
conv->stride(), /* caffeMode */ true);
conv->set_output_x(output_x);
config.layerConfig.set_size(output_x * output_x *
config.layerConfig.num_filters());
config.layerConfig.set_size(conv->output_x() * conv->output_x() *
conv->set_img_size_y(IMAGE_SIZE_Y);
conv->set_output_x(outputSize(conv->img_size(), conv->filter_size(),
conv->padding(), conv->stride(),
/* caffeMode */ true));
conv->set_output_y(outputSize(conv->img_size_y(), conv->filter_size_y(),
conv->padding_y(), conv->stride_y(),
/* caffeMode */ true));
config.layerConfig.set_size(conv->output_x() * conv->output_y() *
NUM_FILTERS);
config.inputDefs.push_back(
{INPUT_DATA, "layer_0", IMAGE_SIZE * IMAGE_SIZE * CHANNELS, 0});
{INPUT_DATA, "layer_0", IMAGE_SIZE * IMAGE_SIZE_Y * CHANNELS, 0});
config.inputDefs.push_back(
{INPUT_DATA, "layer_1",
FILTER_SIZE * FILTER_SIZE_Y * CHANNELS * NUM_FILTERS, 0});
......
......@@ -203,6 +203,8 @@ void Argument::resizeAndCopyFrom(const Argument& src, bool useGpu,
}
resizeAndCopy(udp, src.udp, useGpu, stream);
resizeAndCopy(strs, src.strs, useGpu, stream);
frameWidth = src.frameWidth;
frameHeight = src.frameHeight;
}
int32_t Argument::resizeAndCopyFrom(const Argument& src, int32_t startSeq,
......
......@@ -59,7 +59,6 @@ pool = img_pool_layer(input=fc2,
padding_y=2,
stride=2,
stride_y=3,
img_width=3,
pool_type=CudnnAvgPooling())
concat = concat_layer(input=[fc3, fc4])
......
......@@ -76,6 +76,12 @@ message ConvConfig {
required uint32 filter_size_y = 10;
required uint32 padding_y = 11;
required uint32 stride_y = 12;
// if not set, use output_x
optional uint32 output_y = 13 [default = 0];
// if not set, use img_size
optional uint32 img_size_y = 14 [default = 0];
}
message PoolConfig {
......@@ -121,11 +127,9 @@ message PoolConfig {
}
message SppConfig {
required string pool_type = 1;
required uint32 pyramid_height = 2;
required uint32 channels = 3;
required uint32 img_size = 4;
optional uint32 img_size_y = 5;
required ImageConfig image_conf = 1;
required string pool_type = 2;
required uint32 pyramid_height = 3;
}
message NormConfig {
......@@ -155,6 +159,12 @@ message NormConfig {
// fixed window: shared a fixed window for each value
// sliding window: have a different window for each value
optional bool blocked = 8;
// if not set, use output_x
optional uint32 output_y = 9 [default = 0];
// if not set, use img_size
optional uint32 img_size_y = 10 [default = 0];
}
message BlockExpandConfig {
......@@ -179,12 +189,8 @@ message BlockExpandConfig {
}
message MaxOutConfig {
required uint32 channels = 1;
required ImageConfig image_conf = 1;
required uint32 groups = 2;
// The size of input feature map.
required uint32 img_size_x = 3;
required uint32 img_size_y = 4;
}
message ProjectionConfig {
......@@ -225,12 +231,10 @@ message OperatorConfig {
message BilinearInterpConfig {
// The size of input feature map.
optional uint32 img_size_x = 1;
optional uint32 img_size_y = 2;
required ImageConfig image_conf = 1;
// The size of output feature map.
required uint32 out_size_x = 3;
required uint32 out_size_y = 4;
required uint32 num_channels = 5;
required uint32 out_size_x = 2;
required uint32 out_size_y = 3;
}
message ImageConfig {
......@@ -240,6 +244,7 @@ message ImageConfig {
// The size of input feature map.
required uint32 img_size = 8;
required uint32 img_size_y = 9;
}
message LayerInputConfig {
......@@ -412,7 +417,10 @@ sinclude(`ModelConfigLayer.proto.m4')
// string type is used for flexibility: different types can be converted
// to string and reinterpreted in the user's own layer implementation.
optional string user_arg = 49;
// to indicate rectangle image data
optional uint64 height = 50;
optional uint64 width = 51;
}
message EvaluatorConfig {
......
......@@ -763,7 +763,7 @@ def mixed_layer(size=0,
@layer_support()
def data_layer(name, size, layer_attr=None):
def data_layer(name, size, height=None, width=None, layer_attr=None):
"""
Define DataLayer For NeuralNetwork.
......@@ -778,6 +778,10 @@ def data_layer(name, size, layer_attr=None):
:type name: basestring
:param size: Size of this data layer.
:type size: int
:param height: Height of this data layer, used for image
:type size: int|None
:param width: Width of this data layer, used for image
:type size: int|None
:param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute.
:return: LayerOutput object.
......@@ -787,6 +791,8 @@ def data_layer(name, size, layer_attr=None):
type=LayerType.DATA,
name=name,
size=size,
height=height,
width=width,
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name, LayerType.DATA, size=size)
......@@ -1480,7 +1486,7 @@ def bilinear_interp_layer(input,
bilinear_interp=BilinearInterp(
out_size_x=out_size_x,
out_size_y=out_size_y,
num_channels=num_channels)),
channels=num_channels)),
type=LayerType.BILINEAR_INTERP_LAYER,
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(
......@@ -1908,8 +1914,7 @@ def img_pool_layer(input,
layer_attr=None,
pool_size_y=None,
stride_y=None,
padding_y=None,
img_width=None):
padding_y=None):
"""
Image pooling Layer.
......@@ -1940,9 +1945,6 @@ def img_pool_layer(input,
:type stride_y: int|None
:param layer_attr: Extra Layer attribute.
:type layer_attr: ExtraLayerAttribute
:param img_width: the width of input feature map. If it is None, the input feature
map should be square.
:type img_width: int|None
:return: LayerOutput object.
:rtype: LayerOutput
"""
......@@ -1978,8 +1980,7 @@ def img_pool_layer(input,
padding=padding,
size_y=pool_size_y,
stride_y=stride_y,
padding_y=padding_y,
img_width=img_width))
padding_y=padding_y))
],
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(
......@@ -1997,7 +1998,6 @@ def spp_layer(input,
num_channels=None,
pool_type=None,
pyramid_height=None,
img_width=None,
layer_attr=None):
"""
Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition.
......@@ -2014,9 +2014,6 @@ def spp_layer(input,
:type scale: BasePoolingType
:param pyramid_height: pyramid height.
:type pyramid_height: int
:param img_width: the width of input feature map. If it is None, the input feature
map should be square.
:type img_width: int|None
:param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
......@@ -2043,8 +2040,7 @@ def spp_layer(input,
spp=SpatialPyramidPool(
pool_type=type_name,
channels=num_channels,
pyramid_height=pyramid_height,
img_width=img_width)),
pyramid_height=pyramid_height)),
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(
name,
......
......@@ -26,11 +26,15 @@ layers {
filter_size_y: 32
padding_y: 1
stride_y: 1
output_y: 227
img_size_y: 256
}
}
bias_parameter_name: "___conv_0__.wbias"
num_filters: 64
shared_biases: true
height: 227
width: 227
}
layers {
name: "__batch_norm_0__"
......@@ -43,6 +47,7 @@ layers {
image_conf {
channels: 64
img_size: 227
img_size_y: 227
}
}
inputs {
......@@ -55,6 +60,8 @@ layers {
}
bias_parameter_name: "___batch_norm_0__.wbias"
moving_average_fraction: 0.9
height: 227
width: 227
}
layers {
name: "__crmnorm_0__"
......@@ -72,8 +79,12 @@ layers {
output_x: 227
img_size: 227
blocked: false
output_y: 227
img_size_y: 227
}
}
height: 227
width: 227
}
layers {
name: "__pool_0__"
......@@ -97,6 +108,8 @@ layers {
padding_y: 0
}
}
height: 196
width: 196
}
parameters {
name: "___conv_0__.w0"
......
......@@ -26,6 +26,8 @@ layers {
filter_size_y: 32
padding_y: 1
stride_y: 1
output_y: 227
img_size_y: 198
}
}
bias_parameter_name: "___conv_0__.wbias"
......@@ -43,6 +45,7 @@ layers {
image_conf {
channels: 64
img_size: 256
img_size_y: 256
}
}
inputs {
......@@ -55,6 +58,8 @@ layers {
}
bias_parameter_name: "___batch_norm_0__.wbias"
moving_average_fraction: 0.9
height: 256
width: 256
}
layers {
name: "__crmnorm_0__"
......@@ -72,8 +77,12 @@ layers {
output_x: 256
img_size: 256
blocked: false
output_y: 256
img_size_y: 256
}
}
height: 256
width: 256
}
layers {
name: "__pool_0__"
......@@ -97,6 +106,8 @@ layers {
padding_y: 0
}
}
height: 225
width: 225
}
parameters {
name: "___conv_0__.w0"
......
......@@ -177,6 +177,8 @@ layers {
filter_size_y: 3
padding_y: 0
stride_y: 1
output_y: 30
img_size_y: 32
}
num_filters: 64
}
......
......@@ -26,11 +26,15 @@ layers {
filter_size_y: 3
padding_y: 1
stride_y: 1
output_y: 48
img_size_y: 48
}
}
bias_parameter_name: "___conv_0__.wbias"
num_filters: 16
shared_biases: true
height: 48
width: 48
}
layers {
name: "__bilinear_interp_layer_0__"
......@@ -40,11 +44,17 @@ layers {
inputs {
input_layer_name: "__conv_0__"
bilinear_interp_conf {
image_conf {
channels: 16
img_size: 48
img_size_y: 48
}
out_size_x: 64
out_size_y: 64
num_channels: 16
}
}
height: 64
width: 64
}
layers {
name: "__pool_0__"
......@@ -55,19 +65,21 @@ layers {
input_layer_name: "__bilinear_interp_layer_0__"
pool_conf {
pool_type: "max-projection"
channels: 4
channels: 16
size_x: 2
stride: 2
output_x: 64
img_size: 128
output_x: 32
img_size: 64
padding: 0
size_y: 2
stride_y: 2
output_y: 64
img_size_y: 128
output_y: 32
img_size_y: 64
padding_y: 0
}
}
height: 32
width: 32
}
layers {
name: "__fc_layer_0__"
......@@ -78,6 +90,8 @@ layers {
input_layer_name: "__pool_0__"
input_parameter_name: "___fc_layer_0__.w0"
}
height: 32
width: 32
}
parameters {
name: "___conv_0__.w0"
......
......@@ -4,6 +4,8 @@ layers {
type: "data"
size: 2304
active_type: ""
height: 48
width: 48
}
layers {
name: "__conv_0__"
......@@ -26,11 +28,15 @@ layers {
filter_size_y: 3
padding_y: 1
stride_y: 1
output_y: 48
img_size_y: 48
}
}
bias_parameter_name: "___conv_0__.wbias"
num_filters: 16
shared_biases: true
height: 48
width: 48
}
layers {
name: "__maxout_layer_0__"
......@@ -40,12 +46,16 @@ layers {
inputs {
input_layer_name: "__conv_0__"
maxout_conf {
channels: 16
image_conf {
channels: 16
img_size: 48
img_size_y: 48
}
groups: 2
img_size_x: 0
img_size_y: 0
}
}
height: 48
width: 48
}
layers {
name: "__pool_0__"
......@@ -69,48 +79,58 @@ layers {
padding_y: 0
}
}
height: 24
width: 24
}
layers {
name: "__conv_1__"
type: "exconv"
size: 18432
size: 73728
active_type: ""
inputs {
input_layer_name: "__pool_0__"
input_parameter_name: "___conv_1__.w0"
conv_conf {
filter_size: 3
channels: 32
channels: 8
stride: 1
padding: 1
groups: 1
filter_channels: 32
output_x: 12
img_size: 12
filter_channels: 8
output_x: 24
img_size: 24
caffe_mode: true
filter_size_y: 3
padding_y: 1
stride_y: 1
output_y: 24
img_size_y: 24
}
}
bias_parameter_name: "___conv_1__.wbias"
num_filters: 128
shared_biases: true
height: 24
width: 24
}
layers {
name: "__maxout_layer_1__"
type: "maxout"
size: 9216
size: 18432
active_type: ""
inputs {
input_layer_name: "__conv_0__"
input_layer_name: "__conv_1__"
maxout_conf {
channels: 128
image_conf {
channels: 128
img_size: 24
img_size_y: 24
}
groups: 4
img_size_x: 0
img_size_y: 0
}
}
height: 24
width: 24
}
layers {
name: "__block_expand_layer_0__"
......@@ -118,7 +138,7 @@ layers {
size: 192
active_type: ""
inputs {
input_layer_name: "__maxout_layer_0__"
input_layer_name: "__maxout_layer_1__"
block_expand_conf {
channels: 32
stride_x: 1
......@@ -133,6 +153,8 @@ layers {
img_size_y: 0
}
}
height: 24
width: 24
}
layers {
name: "__fc_layer_0__"
......@@ -143,6 +165,8 @@ layers {
input_layer_name: "__block_expand_layer_0__"
input_parameter_name: "___fc_layer_0__.w0"
}
height: 24
width: 24
}
parameters {
name: "___conv_0__.w0"
......@@ -164,9 +188,9 @@ parameters {
}
parameters {
name: "___conv_1__.w0"
size: 36864
size: 9216
initial_mean: 0.0
initial_std: 0.0833333333333
initial_std: 0.166666666667
initial_strategy: 0
initial_smart: false
}
......
......@@ -4,6 +4,8 @@ layers {
type: "data"
size: 3200
active_type: ""
height: 20
width: 10
}
layers {
name: "__spp_0__"
......@@ -13,13 +15,17 @@ layers {
inputs {
input_layer_name: "data"
spp_conf {
image_conf {
channels: 16
img_size: 10
img_size_y: 20
}
pool_type: "max-projection"
pyramid_height: 2
channels: 16
img_size: 10
img_size_y: 20
}
}
height: 1
width: 5
}
input_layer_names: "data"
output_layer_names: "__spp_0__"
......
......@@ -17,7 +17,7 @@ bilinear = bilinear_interp_layer(input=conv, out_size_x=64, out_size_y=64)
pool = img_pool_layer(
input=bilinear,
num_channels=4,
num_channels=16,
pool_size=2,
stride=2,
pool_type=MaxPooling())
......
......@@ -2,7 +2,7 @@ from paddle.trainer_config_helpers import *
settings(batch_size=1000, learning_rate=1e-5)
data = data_layer(name='data', size=2304)
data = data_layer(name='data', size=2304, height=48, width=48)
conv = img_conv_layer(
input=data,
......@@ -21,16 +21,21 @@ pool = img_pool_layer(
conv2 = img_conv_layer(
input=pool,
filter_size=3,
num_channels=32,
num_channels=8,
num_filters=128,
padding=1,
act=LinearActivation(),
bias_attr=True)
maxout2 = maxout_layer(input=conv, num_channels=128, groups=4)
maxout2 = maxout_layer(input=conv2, num_channels=128, groups=4)
block = block_expand_layer(
input=maxout, num_channels=32, stride_x=1, stride_y=1, block_x=1, block_y=6)
input=maxout2,
num_channels=32,
stride_x=1,
stride_y=1,
block_x=1,
block_y=6)
fc = fc_layer(input=block, size=384, bias_attr=False)
......
......@@ -2,13 +2,9 @@ from paddle.trainer_config_helpers import *
settings(batch_size=100, learning_rate=1e-5)
data = data_layer(name='data', size=3200)
data = data_layer(name='data', size=3200, height=20, width=10)
spp = spp_layer(
input=data,
pyramid_height=2,
num_channels=16,
pool_type=MaxPooling(),
img_width=10)
input=data, pyramid_height=2, num_channels=16, pool_type=MaxPooling())
outputs(spp)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册