提交 e1f57bfd 编写于 作者: L luotao1 提交者: emailweixu

add base class for seqlastin/max/average layer (#187)

上级 76fb74dc
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "AverageLayer.h" #include "AverageLayer.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
...@@ -25,13 +24,8 @@ REGISTER_LAYER(average, AverageLayer); ...@@ -25,13 +24,8 @@ REGISTER_LAYER(average, AverageLayer);
bool AverageLayer::init(const LayerMap& layerMap, bool AverageLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) { const ParameterMap& parameterMap) {
/* Initialize the basic parent class */ SequencePoolLayer::init(layerMap, parameterMap);
Layer::init(layerMap, parameterMap);
/* initialize biases_ */
if (biasParameter_.get() != NULL) {
biases_ = std::unique_ptr<Weight>(new Weight(1, getSize(), biasParameter_));
}
dataMtx_ = Matrix::create(nullptr, 1, 1, false, useGpu_); dataMtx_ = Matrix::create(nullptr, 1, 1, false, useGpu_);
outMtx_ = Matrix::create(nullptr, 1, getSize(), false, useGpu_); outMtx_ = Matrix::create(nullptr, 1, getSize(), false, useGpu_);
// average strategy // average strategy
...@@ -44,57 +38,15 @@ bool AverageLayer::init(const LayerMap& layerMap, ...@@ -44,57 +38,15 @@ bool AverageLayer::init(const LayerMap& layerMap,
} else { } else {
LOG(FATAL) << "Unknown average strategy: " << config_.average_strategy(); LOG(FATAL) << "Unknown average strategy: " << config_.average_strategy();
} }
// transform to which sequence type
if (config_.trans_type() == "non-seq") {
type_ = kNonSeq;
} else if (config_.trans_type() == "seq") {
type_ = kSeq;
} else {
LOG(FATAL) << "Unknown trans_type: " << config_.trans_type();
}
setNeedSequenceInfo(false);
return true; return true;
} }
void AverageLayer::forward(PassType passType) { void AverageLayer::forward(PassType passType) {
Layer::forward(passType); SequencePoolLayer::forward(passType);
// average layer should have exactly 1 input
CHECK_EQ(1U, inputLayers_.size());
size_t dim = getSize();
const Argument& input = getInput(0);
CHECK(input.sequenceStartPositions);
if (type_) {
CHECK(input.subSequenceStartPositions)
<< "when trans_type = seq, input must hasSubseq";
}
int64_t newBatchSize =
type_ ? input.getNumSubSequences() : input.getNumSequences();
ICpuGpuVectorPtr startPositions =
type_ ? input.subSequenceStartPositions
: input.sequenceStartPositions;
const int* starts = startPositions->getData(false);
size_t numSequences = startPositions->getSize() - 1;
// check
CHECK_EQ(numSequences, (size_t)newBatchSize);
CHECK_EQ(starts[numSequences], input.getBatchSize());
CHECK_EQ(dim, input.value->getWidth());
resetOutput(newBatchSize, dim);
auto startsPos = startPositions->getVector(useGpu_);
MatrixPtr inputValue = getInputValue(0); MatrixPtr inputValue = getInputValue(0);
getOutputValue()->sequenceAvgForward(*inputValue, *startsPos, mode_); getOutputValue()->sequenceAvgForward(
*inputValue, *startPositions_->getVector(useGpu_), mode_);
/* If type_ = kNonSeq, both seq has or not has sub-seq degrade to a non-seq,
* thus, in this case, output_ has no sequenceStartPositions.
* If type_ = kSeq, seq has sub-seq degrades to a seq, thus, only in this
* case, we should compute the new sequenceStartPositions.
*/
if (type_) {
output_.degradeSequence(input, useGpu_);
}
/* add the bias-vector AFTER average operation */ /* add the bias-vector AFTER average operation */
if (biases_.get() != NULL) { if (biases_.get() != NULL) {
...@@ -106,26 +58,16 @@ void AverageLayer::forward(PassType passType) { ...@@ -106,26 +58,16 @@ void AverageLayer::forward(PassType passType) {
} }
void AverageLayer::backward(const UpdateCallback& callback) { void AverageLayer::backward(const UpdateCallback& callback) {
const Argument& input = getInput(0); SequencePoolLayer::backward(callback);
ICpuGpuVectorPtr startPositions =
type_ ? input.subSequenceStartPositions
: input.sequenceStartPositions;
const int* starts = startPositions->getData(false);
/* Do derivation */ { backwardActivation(); }
if (biases_ && biases_->getWGrad()) {
biases_->getWGrad()->collectBias(*getOutputGrad(), 1);
// Increasing the number of gradient
biases_->getParameterPtr()->incUpdate(callback);
}
const int* starts = startPositions_->getData(false);
MatrixPtr grad = getInputGrad(0); MatrixPtr grad = getInputGrad(0);
if (grad) { if (grad) {
size_t dim = getSize(); size_t dim = getSize();
real* gradientData = getInputGrad(0)->getData(); real* gradientData = getInputGrad(0)->getData();
real* gradient = getOutputGrad()->getData(); real* gradient = getOutputGrad()->getData();
size_t numSequences = startPositions->getSize() - 1; size_t numSequences = startPositions_->getSize() - 1;
for (size_t sequenceId = 0; sequenceId < numSequences; ++sequenceId) { for (size_t sequenceId = 0; sequenceId < numSequences; ++sequenceId) {
// TODO(Dangqingqing) optimization for GPU // TODO(Dangqingqing) optimization for GPU
int sequenceLength = starts[sequenceId + 1] - starts[sequenceId]; int sequenceLength = starts[sequenceId + 1] - starts[sequenceId];
......
...@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include "Layer.h" #include "SequencePoolLayer.h"
#include "paddle/math/Matrix.h" #include "paddle/math/Matrix.h"
namespace paddle { namespace paddle {
...@@ -23,20 +22,21 @@ namespace paddle { ...@@ -23,20 +22,21 @@ namespace paddle {
/** /**
* A layer for "internal average" for sequence input. * A layer for "internal average" for sequence input.
* Input: one or more sequences. Each sequence contains some instances. * Input: one or more sequences. Each sequence contains some instances.
* If AverageLevel = kNonSeq: * If SequenceLevel = kNonSeq:
* Output: output size is the number of input sequences (NOT input instances) * Output: output size is the number of input sequences (NOT input instances)
* output[i] = average_{for each instance in this sequence}{input[i]} * output[i] = average_{for each instance in this sequence}{input[i]}
* If AverageLevel = kSeq: * If SequenceLevel = kSeq:
* Check input sequence must has sub-sequence * Check input sequence must has sub-sequence
* Output: output size is the number of input sub-sequences * Output: output size is the number of input sub-sequences
* output[i] = average_{for each instance in this sub-sequence}{input[i]} * output[i] = average_{for each instance in this sub-sequence}{input[i]}
*
* The config file api is pooling_layer.
*/ */
class AverageLayer : public SequencePoolLayer {
class AverageLayer : public Layer {
public: public:
enum AverageStrategy { kAverage = 0, kSum = 1, kAverageSquareRootN = 2 }; enum AverageStrategy { kAverage = 0, kSum = 1, kAverageSquareRootN = 2 };
enum AverageLevel { kNonSeq = 0, kSeq = 1 }; explicit AverageLayer(const LayerConfig& config)
explicit AverageLayer(const LayerConfig& config) : Layer(config) {} : SequencePoolLayer(config) {}
~AverageLayer() {} ~AverageLayer() {}
...@@ -46,11 +46,8 @@ public: ...@@ -46,11 +46,8 @@ public:
void backward(const UpdateCallback& callback = nullptr); void backward(const UpdateCallback& callback = nullptr);
protected: protected:
std::unique_ptr<Weight> biases_;
MatrixPtr outMtx_; MatrixPtr outMtx_;
MatrixPtr dataMtx_; MatrixPtr dataMtx_;
int mode_; int mode_;
int type_;
}; };
} // namespace paddle } // namespace paddle
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "MaxLayer.h" #include "MaxLayer.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h" #include "paddle/utils/Stat.h"
...@@ -21,55 +20,11 @@ namespace paddle { ...@@ -21,55 +20,11 @@ namespace paddle {
REGISTER_LAYER(max, MaxLayer); REGISTER_LAYER(max, MaxLayer);
bool MaxLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
/* Initialize the basic parent class */
Layer::init(layerMap, parameterMap);
/* initialize biases_ */
if (biasParameter_.get() != NULL) {
biases_ = std::unique_ptr<Weight>(new Weight(1, getSize(), biasParameter_));
}
// transform to which sequence type
if (config_.trans_type() == "non-seq") {
type_ = kNonSeq;
} else if (config_.trans_type() == "seq") {
type_ = kSeq;
} else {
LOG(FATAL) << "Unknown trans_type: " << config_.trans_type();
}
setNeedSequenceInfo(false);
return true;
}
void MaxLayer::forward(PassType passType) { void MaxLayer::forward(PassType passType) {
Layer::forward(passType); SequencePoolLayer::forward(passType);
// max layer should have exactly 1 input
CHECK_EQ(1U, inputLayers_.size());
size_t dim = getSize();
const Argument& input = getInput(0);
int64_t newBatchSize =
type_ ? input.getNumSubSequences() : input.getNumSequences();
ICpuGpuVectorPtr startPositions =
type_ ? input.subSequenceStartPositions
: input.sequenceStartPositions;
auto starts = startPositions->getVector(useGpu_);
size_t numSequences = startPositions->getSize() - 1;
CHECK_EQ(dim, input.value->getWidth()); IVector::resizeOrCreate(maxIndex_, newBatchSize_ * getSize(),
CHECK_EQ(numSequences, (size_t)newBatchSize); useGpu(deviceId_));
CHECK_EQ(startPositions->getData(false)[numSequences], input.getBatchSize());
if (type_) {
// when trans_type = seq, input must hasSubseq
CHECK_EQ(input.hasSubseq(), 1UL);
}
// reset output: resize to "num of sequences", not "batch size".
resetOutput(newBatchSize, dim);
IVector::resizeOrCreate(maxIndex_, newBatchSize * dim, useGpu(deviceId_));
maxIndex_->zeroMem(); maxIndex_->zeroMem();
MatrixPtr inputValue = getInputValue(0); MatrixPtr inputValue = getInputValue(0);
...@@ -77,16 +32,8 @@ void MaxLayer::forward(PassType passType) { ...@@ -77,16 +32,8 @@ void MaxLayer::forward(PassType passType) {
{ {
REGISTER_TIMER_INFO("MaxLayerForward", getName().c_str()); REGISTER_TIMER_INFO("MaxLayerForward", getName().c_str());
outputValue->maxSequenceForward(*inputValue, *starts, *maxIndex_); outputValue->maxSequenceForward(
} *inputValue, *startPositions_->getVector(useGpu_), *maxIndex_);
/* If type_ = kNonSeq, both seq has or not has sub-seq degrade to a non-seq,
* thus, in this case, output_ has no cpuSequenceStartPositions.
* If type_ = kSeq, seq has sub-seq degrades to a seq, thus, only in this
* case, we should compute the new cpuSequenceStartPositions.
*/
if (type_) {
output_.degradeSequence(input, useGpu_);
} }
if (config_.output_max_index()) { if (config_.output_max_index()) {
...@@ -104,24 +51,14 @@ void MaxLayer::forward(PassType passType) { ...@@ -104,24 +51,14 @@ void MaxLayer::forward(PassType passType) {
void MaxLayer::backward(const UpdateCallback& callback) { void MaxLayer::backward(const UpdateCallback& callback) {
CHECK(!config_.output_max_index()) CHECK(!config_.output_max_index())
<< "backward is not available when output_max_index is set"; << "backward is not available when output_max_index is set";
/* Do derivation */ { backwardActivation(); } SequencePoolLayer::backward(callback);
if (biases_ && biases_->getWGrad()) {
biases_->getWGrad()->collectBias(*getOutputGrad(), 1);
// Increasing the number of gradient
biases_->getParameterPtr()->incUpdate(callback);
}
MatrixPtr inputGrad = getInputGrad(0); MatrixPtr inputGrad = getInputGrad(0);
MatrixPtr outputGrad = getOutputGrad(); MatrixPtr outputGrad = getOutputGrad();
if (inputGrad) { if (inputGrad) {
ICpuGpuVectorPtr starts =
type_ ? getInput(0).subSequenceStartPositions
: getInput(0).sequenceStartPositions;
REGISTER_TIMER_INFO("MaxLayerBackward", getName().c_str()); REGISTER_TIMER_INFO("MaxLayerBackward", getName().c_str());
inputGrad->maxSequenceBackward(*outputGrad, inputGrad->maxSequenceBackward(
*(starts->getVector(useGpu_)), *maxIndex_); *outputGrad, *(startPositions_->getVector(useGpu_)), *maxIndex_);
} }
} }
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#pragma once #pragma once
#include "Layer.h" #include "SequencePoolLayer.h"
#include "paddle/math/Matrix.h" #include "paddle/math/Matrix.h"
#include "paddle/utils/ThreadLocal.h" #include "paddle/utils/ThreadLocal.h"
...@@ -24,29 +24,30 @@ namespace paddle { ...@@ -24,29 +24,30 @@ namespace paddle {
/** /**
* A layer for "internal max" for sequence input. * A layer for "internal max" for sequence input.
* Input: one or more sequences. Each sequence contains some instances. * Input: one or more sequences. Each sequence contains some instances.
* If MaxLevel = kNonSeq: * If SequenceLevel = kNonSeq:
* Output: output size is the number of input sequences (NOT input instances) * Output: output size is the number of input sequences (NOT input instances)
* output[i] = max_{for each instance in this sequence}{input[i]} * output[i] = max_{for each instance in this sequence}{input[i]}
* If MaxLevel = kSeq: * If SequenceLevel = kSeq:
* Check input sequence must has sub-sequence * Check input sequence must has sub-sequence
* Output: output size is the number of input sub-sequences * Output: output size is the number of input sub-sequences
* output[i] = max_{for each instance in this sub-sequence}{input[i]} * output[i] = max_{for each instance in this sub-sequence}{input[i]}
*
* The config file api is pooling_layer.
*/ */
class MaxLayer : public Layer { class MaxLayer : public SequencePoolLayer {
protected: protected:
std::unique_ptr<Weight> biases_;
// maxIndex_[i][j] = k : the value at (i, j) is from input[k]. // maxIndex_[i][j] = k : the value at (i, j) is from input[k].
IVectorPtr maxIndex_; IVectorPtr maxIndex_;
int type_;
public: public:
explicit MaxLayer(const LayerConfig& config) : Layer(config) {} explicit MaxLayer(const LayerConfig& config) : SequencePoolLayer(config) {}
enum MaxLevel {kNonSeq = 0, kSeq = 1 };
~MaxLayer() {} ~MaxLayer() {}
bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) {
return SequencePoolLayer::init(layerMap, parameterMap);
}
void forward(PassType passType); void forward(PassType passType);
void backward(const UpdateCallback& callback = nullptr); void backward(const UpdateCallback& callback = nullptr);
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
#include "Layer.h" #include "SequencePoolLayer.h"
#include "paddle/math/Matrix.h" #include "paddle/math/Matrix.h"
#include "paddle/utils/Stat.h" #include "paddle/utils/Stat.h"
...@@ -30,19 +30,18 @@ namespace paddle { ...@@ -30,19 +30,18 @@ namespace paddle {
* Check input sequence must has sub-sequence * Check input sequence must has sub-sequence
* Output: a sequence containing only the last instance of each sub-sequence * Output: a sequence containing only the last instance of each sub-sequence
* of the input sequence * of the input sequence
*
* The config file api is last_seq and first_seq.
*/ */
class SequenceLastInstanceLayer : public Layer { class SequenceLastInstanceLayer : public SequencePoolLayer {
protected: protected:
std::unique_ptr<Weight> biases_;
MatrixPtr tmpSrc_; MatrixPtr tmpSrc_;
MatrixPtr tmpDest_; MatrixPtr tmpDest_;
enum SequenceLevel { kNonSeq = 0, kSeq = 1 };
int type_;
public: public:
explicit SequenceLastInstanceLayer(const LayerConfig& config) explicit SequenceLastInstanceLayer(const LayerConfig& config)
: Layer(config) {} : SequencePoolLayer(config) {}
~SequenceLastInstanceLayer() {} ~SequenceLastInstanceLayer() {}
...@@ -56,56 +55,20 @@ REGISTER_LAYER(seqlastins, SequenceLastInstanceLayer); ...@@ -56,56 +55,20 @@ REGISTER_LAYER(seqlastins, SequenceLastInstanceLayer);
bool SequenceLastInstanceLayer::init(const LayerMap& layerMap, bool SequenceLastInstanceLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) { const ParameterMap& parameterMap) {
/* Initialize the basic parent class */ SequencePoolLayer::init(layerMap, parameterMap);
Layer::init(layerMap, parameterMap);
// seqlastins layer should have exactly 1 input
CHECK_EQ(1U, inputLayers_.size());
/* initialize biases_ */
if (biasParameter_.get() != NULL) {
biases_ = std::unique_ptr<Weight>(new Weight(1, getSize(), biasParameter_));
}
tmpSrc_ = tmpSrc_ =
Matrix::create(nullptr, /* height= */ 1, 1, /* trans= */ false, useGpu_); Matrix::create(nullptr, /* height= */ 1, 1, /* trans= */ false, useGpu_);
tmpDest_ = tmpDest_ =
Matrix::create(nullptr, /* height= */ 1, 1, /* trans= */ false, useGpu_); Matrix::create(nullptr, /* height= */ 1, 1, /* trans= */ false, useGpu_);
// transform to which sequence type
if (config_.trans_type() == "non-seq") {
type_ = kNonSeq;
} else if (config_.trans_type() == "seq") {
type_ = kSeq;
} else {
LOG(FATAL) << "Unknown trans_type: " << config_.trans_type();
}
setNeedSequenceInfo(false);
return true; return true;
} }
void SequenceLastInstanceLayer::forward(PassType passType) { void SequenceLastInstanceLayer::forward(PassType passType) {
Layer::forward(passType); SequencePoolLayer::forward(passType);
size_t dim = getSize();
const Argument& input = getInput(0);
// check const int* starts = startPositions_->getData(false);
CHECK(input.sequenceStartPositions);
if (type_) {
CHECK(input.subSequenceStartPositions)
<< "when trans_type = seq, input must hasSubseq";
}
auto startPositions =
type_ ? input.subSequenceStartPositions->getVector(false)
: input.sequenceStartPositions->getVector(false);
size_t height = type_ ? input.getNumSubSequences() : input.getNumSequences();
CHECK_EQ(dim, input.value->getWidth());
CHECK_EQ(startPositions->getData()[height], input.getBatchSize());
CHECK_EQ(height, startPositions->getSize() - 1);
reserveOutput(height, dim);
const int* starts = startPositions->getData();
MatrixPtr inputValue = getInputValue(0); MatrixPtr inputValue = getInputValue(0);
MatrixPtr outputValue = getOutputValue(); MatrixPtr outputValue = getOutputValue();
...@@ -113,21 +76,13 @@ void SequenceLastInstanceLayer::forward(PassType passType) { ...@@ -113,21 +76,13 @@ void SequenceLastInstanceLayer::forward(PassType passType) {
AsyncGpuBlock asyncGpuBlock; AsyncGpuBlock asyncGpuBlock;
REGISTER_TIMER_INFO("SequenceLastInstanceLayerForward", getName().c_str()); REGISTER_TIMER_INFO("SequenceLastInstanceLayerForward", getName().c_str());
for (size_t seqId = 0; seqId < height; ++seqId) { for (size_t seqId = 0; seqId < newBatchSize_; ++seqId) {
int insId = int insId =
config_.select_first() ? starts[seqId] : starts[seqId + 1] - 1; config_.select_first() ? starts[seqId] : starts[seqId + 1] - 1;
outputValue->subMatrix(seqId, 1, tmpDest_) outputValue->subMatrix(seqId, 1, tmpDest_)
->assign(*(inputValue->subMatrix(insId, 1, tmpSrc_))); ->assign(*(inputValue->subMatrix(insId, 1, tmpSrc_)));
} }
/* If type_ = kNonSeq, both seq has or not has sub-seq degrade to a non-seq,
* thus, in this case, output_ has no sequenceStartPositions.
* If type_ = kSeq, seq has sub-seq degrades to a seq, thus, only in this
* case, we should compute the new sequenceStartPositions.
*/
if (type_) {
output_.degradeSequence(input, useGpu_);
}
} }
if (biases_.get() != NULL) { if (biases_.get() != NULL) {
...@@ -139,23 +94,12 @@ void SequenceLastInstanceLayer::forward(PassType passType) { ...@@ -139,23 +94,12 @@ void SequenceLastInstanceLayer::forward(PassType passType) {
} }
void SequenceLastInstanceLayer::backward(const UpdateCallback& callback) { void SequenceLastInstanceLayer::backward(const UpdateCallback& callback) {
/* activation, should set to 'linear' in most cases */ SequencePoolLayer::backward(callback);
backwardActivation();
if (biases_ && biases_->getWGrad()) {
biases_->getWGrad()->collectBias(*getOutputGrad(), 1);
// Increasing the number of gradient
biases_->getParameterPtr()->incUpdate(callback);
}
MatrixPtr inputGrad = getInputGrad(0); MatrixPtr inputGrad = getInputGrad(0);
MatrixPtr outputGrad = getOutputGrad(); MatrixPtr outputGrad = getOutputGrad();
auto startPositions = const int* starts = startPositions_->getData(false);
type_ ? getInput(0).subSequenceStartPositions->getVector(false) size_t numSequences = startPositions_->getSize() - 1;
: getInput(0).sequenceStartPositions->getVector(false);
const int* starts = startPositions->getData();
size_t numSequences = startPositions->getSize() - 1;
if (inputGrad) { if (inputGrad) {
AsyncGpuBlock asyncGpuBlock; AsyncGpuBlock asyncGpuBlock;
......
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Logging.h"
#include "SequencePoolLayer.h"
namespace paddle {
bool SequencePoolLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
/* Initialize the basic parent class */
Layer::init(layerMap, parameterMap);
// seqlastins/max/average layer should have exactly 1 input
CHECK_EQ(1U, inputLayers_.size());
/* initialize biases_ */
if (biasParameter_.get() != NULL) {
biases_ = std::unique_ptr<Weight>(new Weight(1, getSize(), biasParameter_));
}
// transform to which sequence type
if (config_.trans_type() == "non-seq") {
type_ = kNonSeq;
} else if (config_.trans_type() == "seq") {
type_ = kSeq;
} else {
LOG(FATAL) << "Unknown trans_type: " << config_.trans_type();
}
setNeedSequenceInfo(false);
return true;
}
void SequencePoolLayer::forward(PassType passType) {
Layer::forward(passType);
const Argument& input = getInput(0);
newBatchSize_ = type_ ? input.getNumSubSequences() : input.getNumSequences();
size_t dim = getSize();
// check
CHECK_EQ(dim, input.value->getWidth());
startPositions_ =
type_ ? input.subSequenceStartPositions : input.sequenceStartPositions;
auto starts = startPositions_->getVector(false);
CHECK_EQ(starts->getData()[newBatchSize_], input.getBatchSize());
CHECK_EQ(newBatchSize_, starts->getSize() - 1);
resetOutput(newBatchSize_, dim);
if (type_) {
CHECK(input.subSequenceStartPositions)
<< "when trans_type = seq, input must hasSubseq";
}
/* If type_ = kNonSeq, both seq has or not has sub-seq degrade to a non-seq,
* thus, in this case, output_ has no sequenceStartPositions.
* If type_ = kSeq, seq has sub-seq degrades to a seq, thus, only in this
* case, we should compute the new sequenceStartPositions.
*/
if (type_) {
output_.degradeSequence(input, useGpu_);
}
}
void SequencePoolLayer::backward(const UpdateCallback& callback) {
/* Do derivation */ { backwardActivation(); }
if (biases_ && biases_->getWGrad()) {
biases_->getWGrad()->collectBias(*getOutputGrad(), 1);
// Increasing the number of gradient
biases_->getParameterPtr()->incUpdate(callback);
}
}
} // namespace paddle
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Layer.h"
#include "paddle/math/Matrix.h"
namespace paddle {
/**
* A base layer for SequenceLastInstanceLayer/AverageLayer/MaxLayer.
*
* Input: one or more sequences. Each sequence contains some instances.
* If SequenceLevel = kNonSeq:
* Output: output size is the number of input sequences (NOT input instances)
* output[i] = seqlastin/average/max_{for each instance in this
* sequence}{input[i]}
* If SequenceLevel = kSeq:
* Check input sequence must has sub-sequence
* Output: output size is the number of input sub-sequences
* output[i] = seqlastin/average/max_{for each instance in this
* sub-sequence}{input[i]}
*
* The config file api is pooling_layer.
*/
class SequencePoolLayer : public Layer {
protected:
int type_;
std::unique_ptr<Weight> biases_;
enum SequenceLevel { kNonSeq = 0, kSeq = 1 };
size_t newBatchSize_;
ICpuGpuVectorPtr startPositions_;
public:
explicit SequencePoolLayer(const LayerConfig& config) : Layer(config) {}
virtual ~SequencePoolLayer() {}
bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
void forward(PassType passType);
void backward(const UpdateCallback& callback = nullptr);
};
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册