提交 7ed6463e 编写于 作者: Y yangyaming 提交者: yangyaming

fix bugs for CrossChannelNormLayer

上级 98378968
...@@ -36,6 +36,16 @@ MatrixPtr CrossChannelNormLayer::createSpatialMatrix(MatrixPtr data, ...@@ -36,6 +36,16 @@ MatrixPtr CrossChannelNormLayer::createSpatialMatrix(MatrixPtr data,
data->getData() + iter * spatialDim, 1, spatialDim, false, useGpu_); data->getData() + iter * spatialDim, 1, spatialDim, false, useGpu_);
} }
bool CrossChannelNormLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
Layer::init(layerMap, parameterMap);
CHECK(parameters_[0]);
const NormConfig& conf = config_.inputs(0).norm_conf();
channels_ = conf.channels();
scale_.reset(new Weight(channels_, 1, parameters_[0]));
return true;
}
void CrossChannelNormLayer::forward(PassType passType) { void CrossChannelNormLayer::forward(PassType passType) {
Layer::forward(passType); Layer::forward(passType);
MatrixPtr inV = getInputValue(0); MatrixPtr inV = getInputValue(0);
...@@ -63,6 +73,7 @@ void CrossChannelNormLayer::forward(PassType passType) { ...@@ -63,6 +73,7 @@ void CrossChannelNormLayer::forward(PassType passType) {
// compute norm. // compute norm.
spatialBuffer_->sumCols(*dataTmp, 1, 0); spatialBuffer_->sumCols(*dataTmp, 1, 0);
spatialBuffer_->add(*normTmp);
spatialBuffer_->sqrt2(*spatialBuffer_); spatialBuffer_->sqrt2(*spatialBuffer_);
normTmp->copyFrom(*spatialBuffer_); normTmp->copyFrom(*spatialBuffer_);
outVTmp->copyFrom(*inVTmp); outVTmp->copyFrom(*inVTmp);
...@@ -82,6 +93,9 @@ void CrossChannelNormLayer::backward(const UpdateCallback& callback) { ...@@ -82,6 +93,9 @@ void CrossChannelNormLayer::backward(const UpdateCallback& callback) {
size_t dataDim = inG->getWidth(); size_t dataDim = inG->getWidth();
size_t spatialDim = dataDim / channels_; size_t spatialDim = dataDim / channels_;
MatrixPtr inGBuffer;
Matrix::resizeOrCreate(inGBuffer, channels_, spatialDim, false, useGpu_);
dataBuffer_->dotMul(*outG, *outV); dataBuffer_->dotMul(*outG, *outV);
Matrix::resizeOrCreate(scaleDiff_, channels_, 1, false, useGpu_); Matrix::resizeOrCreate(scaleDiff_, channels_, 1, false, useGpu_);
Matrix::resizeOrCreate(channelBuffer_, channels_, 1, false, useGpu_); Matrix::resizeOrCreate(channelBuffer_, channels_, 1, false, useGpu_);
...@@ -100,22 +114,24 @@ void CrossChannelNormLayer::backward(const UpdateCallback& callback) { ...@@ -100,22 +114,24 @@ void CrossChannelNormLayer::backward(const UpdateCallback& callback) {
scaleDiff_->add(*channelBuffer_, 1.); scaleDiff_->add(*channelBuffer_, 1.);
sampleBuffer_->dotMul(*inVTmp, *outGTmp); sampleBuffer_->dotMul(*inVTmp, *outGTmp);
spatialBuffer_->sumCols(*sampleBuffer_, 1., 1.); spatialBuffer_->sumCols(*sampleBuffer_, 1., 0.);
// scale the grad // scale the grad
inGTmp->copyFrom(*inVTmp); inGBuffer->copyFrom(*inVTmp);
inGTmp->mulRowVector(*spatialBuffer_); inGBuffer->mulRowVector(*spatialBuffer_);
// divide by square of norm // divide by square of norm
spatialBuffer_->dotMul(*normTmp, *normTmp); spatialBuffer_->dotMul(*normTmp, *normTmp);
inGTmp->divRowVector(*spatialBuffer_); inGBuffer->divRowVector(*spatialBuffer_);
// subtract // subtract
inGTmp->add(*outGTmp, -1, 1); inGBuffer->add(*outGTmp, -1, 1);
// divide by norm // divide by norm
inGTmp->divRowVector(*normTmp); inGBuffer->divRowVector(*normTmp);
// scale the diff // scale the diff
inGTmp->mulColVector(*scale_->getW()); inGBuffer->mulColVector(*scale_->getW());
inGTmp->add(*inGBuffer);
} }
// updata scale // updata scale
if (scale_->getWGrad()) scale_->getWGrad()->copyFrom(*scaleDiff_); if (scale_->getWGrad()) scale_->getWGrad()->add(*scaleDiff_);
scale_->getParameterPtr()->incUpdate(callback); scale_->getParameterPtr()->incUpdate(callback);
} }
......
...@@ -56,14 +56,4 @@ bool ResponseNormLayer::init(const LayerMap& layerMap, ...@@ -56,14 +56,4 @@ bool ResponseNormLayer::init(const LayerMap& layerMap,
return true; return true;
} }
bool CrossChannelNormLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
Layer::init(layerMap, parameterMap);
CHECK(parameters_[0]);
const NormConfig& conf = config_.inputs(0).norm_conf();
channels_ = conf.channels();
scale_.reset(new Weight(channels_, 1, parameters_[0]));
return true;
}
} // namespace paddle } // namespace paddle
...@@ -465,7 +465,6 @@ void initTestLayer(TestConfig testConf, ...@@ -465,7 +465,6 @@ void initTestLayer(TestConfig testConf,
ParameterConfig paraConfig) { ParameterConfig paraConfig) {
paraConfig.set_name(paraName); paraConfig.set_name(paraName);
paraConfig.set_size(paraSize); paraConfig.set_size(paraSize);
paraConfig.set_initial_std(1);
paraConfig.set_is_static(isStatic); paraConfig.set_is_static(isStatic);
auto para = auto para =
std::make_shared<Parameter>(paraConfig, FLAGS_use_gpu, initialize); std::make_shared<Parameter>(paraConfig, FLAGS_use_gpu, initialize);
...@@ -499,6 +498,12 @@ void initTestLayer(TestConfig testConf, ...@@ -499,6 +498,12 @@ void initTestLayer(TestConfig testConf,
paraConfig.add_dims((*layerMap)[input.input_layer_name()]->getSize()); paraConfig.add_dims((*layerMap)[input.input_layer_name()]->getSize());
paraConfig.add_dims(testConf.layerConfig.size()); paraConfig.add_dims(testConf.layerConfig.size());
} }
if (testConf.hasParamInitialValue) {
paraConfig.set_initial_mean(testConf.paramInitialMean);
paraConfig.set_initial_std(testConf.paramInitialStd);
} else {
paraConfig.set_initial_std(1);
}
initParameter(paraName, paraSize, inputDef.isStatic, false, paraConfig); initParameter(paraName, paraSize, inputDef.isStatic, false, paraConfig);
} }
} }
......
...@@ -125,12 +125,18 @@ struct TestConfig { ...@@ -125,12 +125,18 @@ struct TestConfig {
LayerConfig layerConfig; LayerConfig layerConfig;
std::vector<InputDef> inputDefs; std::vector<InputDef> inputDefs;
size_t biasSize; size_t biasSize;
real paramInitialMean;
real paramInitialStd;
bool hasParamInitialValue;
bool testAccumulate; bool testAccumulate;
bool testState; bool testState;
bool staticBias; bool staticBias;
bool testBatchState; bool testBatchState;
TestConfig() TestConfig()
: biasSize(0), : biasSize(0),
paramInitialMean(0),
paramInitialStd(1),
hasParamInitialValue(false),
testAccumulate(true), testAccumulate(true),
testState(false), testState(false),
staticBias(false), staticBias(false),
......
...@@ -1661,6 +1661,9 @@ TEST(Layer, PadLayer) { ...@@ -1661,6 +1661,9 @@ TEST(Layer, PadLayer) {
TEST(Layer, CrossChannelNormLayer) { TEST(Layer, CrossChannelNormLayer) {
TestConfig config; TestConfig config;
config.hasParamInitialValue = true;
config.paramInitialMean = 1.;
config.paramInitialStd = 0.;
config.layerConfig.set_type("norm"); config.layerConfig.set_type("norm");
config.layerConfig.set_size(100); config.layerConfig.set_size(100);
LayerInputConfig* input = config.layerConfig.add_inputs(); LayerInputConfig* input = config.layerConfig.add_inputs();
...@@ -1674,7 +1677,7 @@ TEST(Layer, CrossChannelNormLayer) { ...@@ -1674,7 +1677,7 @@ TEST(Layer, CrossChannelNormLayer) {
config.inputDefs.push_back({INPUT_DATA, "layer_0", 100, 10}); config.inputDefs.push_back({INPUT_DATA, "layer_0", 100, 10});
for (auto useGpu : {false, true}) { for (auto useGpu : {false, true}) {
testLayerGrad(config, "cross-channel-norm", 10, false, useGpu, false, 5); testLayerGrad(config, "cross-channel-norm", 10, false, useGpu, false);
} }
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册