From 7ed6463ee91e0b71e7beca313554eae36da1c4e4 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Wed, 24 May 2017 13:55:58 +0800 Subject: [PATCH] fix bugs for CrossChannelNormLayer --- .../gserver/layers/CrossChannelNormLayer.cpp | 32 ++++++++++++++----- paddle/gserver/layers/NormLayer.cpp | 10 ------ paddle/gserver/tests/LayerGradUtil.cpp | 7 +++- paddle/gserver/tests/LayerGradUtil.h | 6 ++++ paddle/gserver/tests/test_LayerGrad.cpp | 5 ++- 5 files changed, 40 insertions(+), 20 deletions(-) diff --git a/paddle/gserver/layers/CrossChannelNormLayer.cpp b/paddle/gserver/layers/CrossChannelNormLayer.cpp index 3fbccc1103..4dfe460561 100644 --- a/paddle/gserver/layers/CrossChannelNormLayer.cpp +++ b/paddle/gserver/layers/CrossChannelNormLayer.cpp @@ -36,6 +36,16 @@ MatrixPtr CrossChannelNormLayer::createSpatialMatrix(MatrixPtr data, data->getData() + iter * spatialDim, 1, spatialDim, false, useGpu_); } +bool CrossChannelNormLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + Layer::init(layerMap, parameterMap); + CHECK(parameters_[0]); + const NormConfig& conf = config_.inputs(0).norm_conf(); + channels_ = conf.channels(); + scale_.reset(new Weight(channels_, 1, parameters_[0])); + return true; +} + void CrossChannelNormLayer::forward(PassType passType) { Layer::forward(passType); MatrixPtr inV = getInputValue(0); @@ -63,6 +73,7 @@ void CrossChannelNormLayer::forward(PassType passType) { // compute norm. spatialBuffer_->sumCols(*dataTmp, 1, 0); + spatialBuffer_->add(*normTmp); spatialBuffer_->sqrt2(*spatialBuffer_); normTmp->copyFrom(*spatialBuffer_); outVTmp->copyFrom(*inVTmp); @@ -82,6 +93,9 @@ void CrossChannelNormLayer::backward(const UpdateCallback& callback) { size_t dataDim = inG->getWidth(); size_t spatialDim = dataDim / channels_; + MatrixPtr inGBuffer; + Matrix::resizeOrCreate(inGBuffer, channels_, spatialDim, false, useGpu_); + dataBuffer_->dotMul(*outG, *outV); Matrix::resizeOrCreate(scaleDiff_, channels_, 1, false, useGpu_); Matrix::resizeOrCreate(channelBuffer_, channels_, 1, false, useGpu_); @@ -100,22 +114,24 @@ void CrossChannelNormLayer::backward(const UpdateCallback& callback) { scaleDiff_->add(*channelBuffer_, 1.); sampleBuffer_->dotMul(*inVTmp, *outGTmp); - spatialBuffer_->sumCols(*sampleBuffer_, 1., 1.); + spatialBuffer_->sumCols(*sampleBuffer_, 1., 0.); // scale the grad - inGTmp->copyFrom(*inVTmp); - inGTmp->mulRowVector(*spatialBuffer_); + inGBuffer->copyFrom(*inVTmp); + inGBuffer->mulRowVector(*spatialBuffer_); // divide by square of norm spatialBuffer_->dotMul(*normTmp, *normTmp); - inGTmp->divRowVector(*spatialBuffer_); + inGBuffer->divRowVector(*spatialBuffer_); // subtract - inGTmp->add(*outGTmp, -1, 1); + inGBuffer->add(*outGTmp, -1, 1); // divide by norm - inGTmp->divRowVector(*normTmp); + inGBuffer->divRowVector(*normTmp); // scale the diff - inGTmp->mulColVector(*scale_->getW()); + inGBuffer->mulColVector(*scale_->getW()); + + inGTmp->add(*inGBuffer); } // updata scale - if (scale_->getWGrad()) scale_->getWGrad()->copyFrom(*scaleDiff_); + if (scale_->getWGrad()) scale_->getWGrad()->add(*scaleDiff_); scale_->getParameterPtr()->incUpdate(callback); } diff --git a/paddle/gserver/layers/NormLayer.cpp b/paddle/gserver/layers/NormLayer.cpp index e094078bfe..caef710092 100644 --- a/paddle/gserver/layers/NormLayer.cpp +++ b/paddle/gserver/layers/NormLayer.cpp @@ -56,14 +56,4 @@ bool ResponseNormLayer::init(const LayerMap& layerMap, return true; } -bool CrossChannelNormLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - Layer::init(layerMap, parameterMap); - CHECK(parameters_[0]); - const NormConfig& conf = config_.inputs(0).norm_conf(); - channels_ = conf.channels(); - scale_.reset(new Weight(channels_, 1, parameters_[0])); - return true; -} - } // namespace paddle diff --git a/paddle/gserver/tests/LayerGradUtil.cpp b/paddle/gserver/tests/LayerGradUtil.cpp index e3591ba4df..66aafba844 100644 --- a/paddle/gserver/tests/LayerGradUtil.cpp +++ b/paddle/gserver/tests/LayerGradUtil.cpp @@ -465,7 +465,6 @@ void initTestLayer(TestConfig testConf, ParameterConfig paraConfig) { paraConfig.set_name(paraName); paraConfig.set_size(paraSize); - paraConfig.set_initial_std(1); paraConfig.set_is_static(isStatic); auto para = std::make_shared(paraConfig, FLAGS_use_gpu, initialize); @@ -499,6 +498,12 @@ void initTestLayer(TestConfig testConf, paraConfig.add_dims((*layerMap)[input.input_layer_name()]->getSize()); paraConfig.add_dims(testConf.layerConfig.size()); } + if (testConf.hasParamInitialValue) { + paraConfig.set_initial_mean(testConf.paramInitialMean); + paraConfig.set_initial_std(testConf.paramInitialStd); + } else { + paraConfig.set_initial_std(1); + } initParameter(paraName, paraSize, inputDef.isStatic, false, paraConfig); } } diff --git a/paddle/gserver/tests/LayerGradUtil.h b/paddle/gserver/tests/LayerGradUtil.h index 18a6525a14..5ea7ca0f24 100644 --- a/paddle/gserver/tests/LayerGradUtil.h +++ b/paddle/gserver/tests/LayerGradUtil.h @@ -125,12 +125,18 @@ struct TestConfig { LayerConfig layerConfig; std::vector inputDefs; size_t biasSize; + real paramInitialMean; + real paramInitialStd; + bool hasParamInitialValue; bool testAccumulate; bool testState; bool staticBias; bool testBatchState; TestConfig() : biasSize(0), + paramInitialMean(0), + paramInitialStd(1), + hasParamInitialValue(false), testAccumulate(true), testState(false), staticBias(false), diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 59d1e9273d..6441e08b48 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1661,6 +1661,9 @@ TEST(Layer, PadLayer) { TEST(Layer, CrossChannelNormLayer) { TestConfig config; + config.hasParamInitialValue = true; + config.paramInitialMean = 1.; + config.paramInitialStd = 0.; config.layerConfig.set_type("norm"); config.layerConfig.set_size(100); LayerInputConfig* input = config.layerConfig.add_inputs(); @@ -1674,7 +1677,7 @@ TEST(Layer, CrossChannelNormLayer) { config.inputDefs.push_back({INPUT_DATA, "layer_0", 100, 10}); for (auto useGpu : {false, true}) { - testLayerGrad(config, "cross-channel-norm", 10, false, useGpu, false, 5); + testLayerGrad(config, "cross-channel-norm", 10, false, useGpu, false); } } -- GitLab