提交 e1f57bfd 编写于 作者: L luotao1 提交者: emailweixu

add base class for seqlastin/max/average layer (#187)

上级 76fb74dc
......@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "AverageLayer.h"
#include "paddle/utils/Logging.h"
......@@ -25,13 +24,8 @@ REGISTER_LAYER(average, AverageLayer);
bool AverageLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
/* Initialize the basic parent class */
Layer::init(layerMap, parameterMap);
SequencePoolLayer::init(layerMap, parameterMap);
/* initialize biases_ */
if (biasParameter_.get() != NULL) {
biases_ = std::unique_ptr<Weight>(new Weight(1, getSize(), biasParameter_));
}
dataMtx_ = Matrix::create(nullptr, 1, 1, false, useGpu_);
outMtx_ = Matrix::create(nullptr, 1, getSize(), false, useGpu_);
// average strategy
......@@ -44,57 +38,15 @@ bool AverageLayer::init(const LayerMap& layerMap,
} else {
LOG(FATAL) << "Unknown average strategy: " << config_.average_strategy();
}
// transform to which sequence type
if (config_.trans_type() == "non-seq") {
type_ = kNonSeq;
} else if (config_.trans_type() == "seq") {
type_ = kSeq;
} else {
LOG(FATAL) << "Unknown trans_type: " << config_.trans_type();
}
setNeedSequenceInfo(false);
return true;
}
void AverageLayer::forward(PassType passType) {
Layer::forward(passType);
// average layer should have exactly 1 input
CHECK_EQ(1U, inputLayers_.size());
SequencePoolLayer::forward(passType);
size_t dim = getSize();
const Argument& input = getInput(0);
CHECK(input.sequenceStartPositions);
if (type_) {
CHECK(input.subSequenceStartPositions)
<< "when trans_type = seq, input must hasSubseq";
}
int64_t newBatchSize =
type_ ? input.getNumSubSequences() : input.getNumSequences();
ICpuGpuVectorPtr startPositions =
type_ ? input.subSequenceStartPositions
: input.sequenceStartPositions;
const int* starts = startPositions->getData(false);
size_t numSequences = startPositions->getSize() - 1;
// check
CHECK_EQ(numSequences, (size_t)newBatchSize);
CHECK_EQ(starts[numSequences], input.getBatchSize());
CHECK_EQ(dim, input.value->getWidth());
resetOutput(newBatchSize, dim);
auto startsPos = startPositions->getVector(useGpu_);
MatrixPtr inputValue = getInputValue(0);
getOutputValue()->sequenceAvgForward(*inputValue, *startsPos, mode_);
/* If type_ = kNonSeq, both seq has or not has sub-seq degrade to a non-seq,
* thus, in this case, output_ has no sequenceStartPositions.
* If type_ = kSeq, seq has sub-seq degrades to a seq, thus, only in this
* case, we should compute the new sequenceStartPositions.
*/
if (type_) {
output_.degradeSequence(input, useGpu_);
}
getOutputValue()->sequenceAvgForward(
*inputValue, *startPositions_->getVector(useGpu_), mode_);
/* add the bias-vector AFTER average operation */
if (biases_.get() != NULL) {
......@@ -106,26 +58,16 @@ void AverageLayer::forward(PassType passType) {
}
void AverageLayer::backward(const UpdateCallback& callback) {
const Argument& input = getInput(0);
ICpuGpuVectorPtr startPositions =
type_ ? input.subSequenceStartPositions
: input.sequenceStartPositions;
const int* starts = startPositions->getData(false);
/* Do derivation */ { backwardActivation(); }
if (biases_ && biases_->getWGrad()) {
biases_->getWGrad()->collectBias(*getOutputGrad(), 1);
// Increasing the number of gradient
biases_->getParameterPtr()->incUpdate(callback);
}
SequencePoolLayer::backward(callback);
const int* starts = startPositions_->getData(false);
MatrixPtr grad = getInputGrad(0);
if (grad) {
size_t dim = getSize();
real* gradientData = getInputGrad(0)->getData();
real* gradient = getOutputGrad()->getData();
size_t numSequences = startPositions->getSize() - 1;
size_t numSequences = startPositions_->getSize() - 1;
for (size_t sequenceId = 0; sequenceId < numSequences; ++sequenceId) {
// TODO(Dangqingqing) optimization for GPU
int sequenceLength = starts[sequenceId + 1] - starts[sequenceId];
......
......@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Layer.h"
#include "SequencePoolLayer.h"
#include "paddle/math/Matrix.h"
namespace paddle {
......@@ -23,20 +22,21 @@ namespace paddle {
/**
* A layer for "internal average" for sequence input.
* Input: one or more sequences. Each sequence contains some instances.
* If AverageLevel = kNonSeq:
* If SequenceLevel = kNonSeq:
* Output: output size is the number of input sequences (NOT input instances)
* output[i] = average_{for each instance in this sequence}{input[i]}
* If AverageLevel = kSeq:
* If SequenceLevel = kSeq:
* Check input sequence must has sub-sequence
* Output: output size is the number of input sub-sequences
* output[i] = average_{for each instance in this sub-sequence}{input[i]}
*
* The config file api is pooling_layer.
*/
class AverageLayer : public Layer {
class AverageLayer : public SequencePoolLayer {
public:
enum AverageStrategy { kAverage = 0, kSum = 1, kAverageSquareRootN = 2 };
enum AverageLevel { kNonSeq = 0, kSeq = 1 };
explicit AverageLayer(const LayerConfig& config) : Layer(config) {}
explicit AverageLayer(const LayerConfig& config)
: SequencePoolLayer(config) {}
~AverageLayer() {}
......@@ -46,11 +46,8 @@ public:
void backward(const UpdateCallback& callback = nullptr);
protected:
std::unique_ptr<Weight> biases_;
MatrixPtr outMtx_;
MatrixPtr dataMtx_;
int mode_;
int type_;
};
} // namespace paddle
......@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "MaxLayer.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
......@@ -21,55 +20,11 @@ namespace paddle {
REGISTER_LAYER(max, MaxLayer);
bool MaxLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
/* Initialize the basic parent class */
Layer::init(layerMap, parameterMap);
/* initialize biases_ */
if (biasParameter_.get() != NULL) {
biases_ = std::unique_ptr<Weight>(new Weight(1, getSize(), biasParameter_));
}
// transform to which sequence type
if (config_.trans_type() == "non-seq") {
type_ = kNonSeq;
} else if (config_.trans_type() == "seq") {
type_ = kSeq;
} else {
LOG(FATAL) << "Unknown trans_type: " << config_.trans_type();
}
setNeedSequenceInfo(false);
return true;
}
void MaxLayer::forward(PassType passType) {
Layer::forward(passType);
// max layer should have exactly 1 input
CHECK_EQ(1U, inputLayers_.size());
size_t dim = getSize();
const Argument& input = getInput(0);
int64_t newBatchSize =
type_ ? input.getNumSubSequences() : input.getNumSequences();
ICpuGpuVectorPtr startPositions =
type_ ? input.subSequenceStartPositions
: input.sequenceStartPositions;
auto starts = startPositions->getVector(useGpu_);
size_t numSequences = startPositions->getSize() - 1;
SequencePoolLayer::forward(passType);
CHECK_EQ(dim, input.value->getWidth());
CHECK_EQ(numSequences, (size_t)newBatchSize);
CHECK_EQ(startPositions->getData(false)[numSequences], input.getBatchSize());
if (type_) {
// when trans_type = seq, input must hasSubseq
CHECK_EQ(input.hasSubseq(), 1UL);
}
// reset output: resize to "num of sequences", not "batch size".
resetOutput(newBatchSize, dim);
IVector::resizeOrCreate(maxIndex_, newBatchSize * dim, useGpu(deviceId_));
IVector::resizeOrCreate(maxIndex_, newBatchSize_ * getSize(),
useGpu(deviceId_));
maxIndex_->zeroMem();
MatrixPtr inputValue = getInputValue(0);
......@@ -77,16 +32,8 @@ void MaxLayer::forward(PassType passType) {
{
REGISTER_TIMER_INFO("MaxLayerForward", getName().c_str());
outputValue->maxSequenceForward(*inputValue, *starts, *maxIndex_);
}
/* If type_ = kNonSeq, both seq has or not has sub-seq degrade to a non-seq,
* thus, in this case, output_ has no cpuSequenceStartPositions.
* If type_ = kSeq, seq has sub-seq degrades to a seq, thus, only in this
* case, we should compute the new cpuSequenceStartPositions.
*/
if (type_) {
output_.degradeSequence(input, useGpu_);
outputValue->maxSequenceForward(
*inputValue, *startPositions_->getVector(useGpu_), *maxIndex_);
}
if (config_.output_max_index()) {
......@@ -104,24 +51,14 @@ void MaxLayer::forward(PassType passType) {
void MaxLayer::backward(const UpdateCallback& callback) {
CHECK(!config_.output_max_index())
<< "backward is not available when output_max_index is set";
/* Do derivation */ { backwardActivation(); }
if (biases_ && biases_->getWGrad()) {
biases_->getWGrad()->collectBias(*getOutputGrad(), 1);
// Increasing the number of gradient
biases_->getParameterPtr()->incUpdate(callback);
}
SequencePoolLayer::backward(callback);
MatrixPtr inputGrad = getInputGrad(0);
MatrixPtr outputGrad = getOutputGrad();
if (inputGrad) {
ICpuGpuVectorPtr starts =
type_ ? getInput(0).subSequenceStartPositions
: getInput(0).sequenceStartPositions;
REGISTER_TIMER_INFO("MaxLayerBackward", getName().c_str());
inputGrad->maxSequenceBackward(*outputGrad,
*(starts->getVector(useGpu_)), *maxIndex_);
inputGrad->maxSequenceBackward(
*outputGrad, *(startPositions_->getVector(useGpu_)), *maxIndex_);
}
}
......
......@@ -15,7 +15,7 @@ limitations under the License. */
#pragma once
#include "Layer.h"
#include "SequencePoolLayer.h"
#include "paddle/math/Matrix.h"
#include "paddle/utils/ThreadLocal.h"
......@@ -24,29 +24,30 @@ namespace paddle {
/**
* A layer for "internal max" for sequence input.
* Input: one or more sequences. Each sequence contains some instances.
* If MaxLevel = kNonSeq:
* If SequenceLevel = kNonSeq:
* Output: output size is the number of input sequences (NOT input instances)
* output[i] = max_{for each instance in this sequence}{input[i]}
* If MaxLevel = kSeq:
* If SequenceLevel = kSeq:
* Check input sequence must has sub-sequence
* Output: output size is the number of input sub-sequences
* output[i] = max_{for each instance in this sub-sequence}{input[i]}
*
* The config file api is pooling_layer.
*/
class MaxLayer : public Layer {
class MaxLayer : public SequencePoolLayer {
protected:
std::unique_ptr<Weight> biases_;
// maxIndex_[i][j] = k : the value at (i, j) is from input[k].
IVectorPtr maxIndex_;
int type_;
public:
explicit MaxLayer(const LayerConfig& config) : Layer(config) {}
enum MaxLevel {kNonSeq = 0, kSeq = 1 };
explicit MaxLayer(const LayerConfig& config) : SequencePoolLayer(config) {}
~MaxLayer() {}
bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) {
return SequencePoolLayer::init(layerMap, parameterMap);
}
void forward(PassType passType);
void backward(const UpdateCallback& callback = nullptr);
......
......@@ -15,7 +15,7 @@ limitations under the License. */
#include "paddle/utils/Logging.h"
#include "Layer.h"
#include "SequencePoolLayer.h"
#include "paddle/math/Matrix.h"
#include "paddle/utils/Stat.h"
......@@ -29,20 +29,19 @@ namespace paddle {
* If SequenceLevel = kSeq:
* Check input sequence must has sub-sequence
* Output: a sequence containing only the last instance of each sub-sequence
* of the input sequence
* of the input sequence
*
* The config file api is last_seq and first_seq.
*/
class SequenceLastInstanceLayer : public Layer {
class SequenceLastInstanceLayer : public SequencePoolLayer {
protected:
std::unique_ptr<Weight> biases_;
MatrixPtr tmpSrc_;
MatrixPtr tmpDest_;
enum SequenceLevel { kNonSeq = 0, kSeq = 1 };
int type_;
public:
explicit SequenceLastInstanceLayer(const LayerConfig& config)
: Layer(config) {}
: SequencePoolLayer(config) {}
~SequenceLastInstanceLayer() {}
......@@ -56,56 +55,20 @@ REGISTER_LAYER(seqlastins, SequenceLastInstanceLayer);
bool SequenceLastInstanceLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
/* Initialize the basic parent class */
Layer::init(layerMap, parameterMap);
// seqlastins layer should have exactly 1 input
CHECK_EQ(1U, inputLayers_.size());
/* initialize biases_ */
if (biasParameter_.get() != NULL) {
biases_ = std::unique_ptr<Weight>(new Weight(1, getSize(), biasParameter_));
}
SequencePoolLayer::init(layerMap, parameterMap);
tmpSrc_ =
Matrix::create(nullptr, /* height= */ 1, 1, /* trans= */ false, useGpu_);
tmpDest_ =
Matrix::create(nullptr, /* height= */ 1, 1, /* trans= */ false, useGpu_);
// transform to which sequence type
if (config_.trans_type() == "non-seq") {
type_ = kNonSeq;
} else if (config_.trans_type() == "seq") {
type_ = kSeq;
} else {
LOG(FATAL) << "Unknown trans_type: " << config_.trans_type();
}
setNeedSequenceInfo(false);
return true;
}
void SequenceLastInstanceLayer::forward(PassType passType) {
Layer::forward(passType);
size_t dim = getSize();
const Argument& input = getInput(0);
SequencePoolLayer::forward(passType);
// check
CHECK(input.sequenceStartPositions);
if (type_) {
CHECK(input.subSequenceStartPositions)
<< "when trans_type = seq, input must hasSubseq";
}
auto startPositions =
type_ ? input.subSequenceStartPositions->getVector(false)
: input.sequenceStartPositions->getVector(false);
size_t height = type_ ? input.getNumSubSequences() : input.getNumSequences();
CHECK_EQ(dim, input.value->getWidth());
CHECK_EQ(startPositions->getData()[height], input.getBatchSize());
CHECK_EQ(height, startPositions->getSize() - 1);
reserveOutput(height, dim);
const int* starts = startPositions->getData();
const int* starts = startPositions_->getData(false);
MatrixPtr inputValue = getInputValue(0);
MatrixPtr outputValue = getOutputValue();
......@@ -113,21 +76,13 @@ void SequenceLastInstanceLayer::forward(PassType passType) {
AsyncGpuBlock asyncGpuBlock;
REGISTER_TIMER_INFO("SequenceLastInstanceLayerForward", getName().c_str());
for (size_t seqId = 0; seqId < height; ++seqId) {
for (size_t seqId = 0; seqId < newBatchSize_; ++seqId) {
int insId =
config_.select_first() ? starts[seqId] : starts[seqId + 1] - 1;
outputValue->subMatrix(seqId, 1, tmpDest_)
->assign(*(inputValue->subMatrix(insId, 1, tmpSrc_)));
}
/* If type_ = kNonSeq, both seq has or not has sub-seq degrade to a non-seq,
* thus, in this case, output_ has no sequenceStartPositions.
* If type_ = kSeq, seq has sub-seq degrades to a seq, thus, only in this
* case, we should compute the new sequenceStartPositions.
*/
if (type_) {
output_.degradeSequence(input, useGpu_);
}
}
if (biases_.get() != NULL) {
......@@ -139,23 +94,12 @@ void SequenceLastInstanceLayer::forward(PassType passType) {
}
void SequenceLastInstanceLayer::backward(const UpdateCallback& callback) {
/* activation, should set to 'linear' in most cases */
backwardActivation();
if (biases_ && biases_->getWGrad()) {
biases_->getWGrad()->collectBias(*getOutputGrad(), 1);
// Increasing the number of gradient
biases_->getParameterPtr()->incUpdate(callback);
}
SequencePoolLayer::backward(callback);
MatrixPtr inputGrad = getInputGrad(0);
MatrixPtr outputGrad = getOutputGrad();
auto startPositions =
type_ ? getInput(0).subSequenceStartPositions->getVector(false)
: getInput(0).sequenceStartPositions->getVector(false);
const int* starts = startPositions->getData();
size_t numSequences = startPositions->getSize() - 1;
const int* starts = startPositions_->getData(false);
size_t numSequences = startPositions_->getSize() - 1;
if (inputGrad) {
AsyncGpuBlock asyncGpuBlock;
......
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Logging.h"
#include "SequencePoolLayer.h"
namespace paddle {
bool SequencePoolLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
/* Initialize the basic parent class */
Layer::init(layerMap, parameterMap);
// seqlastins/max/average layer should have exactly 1 input
CHECK_EQ(1U, inputLayers_.size());
/* initialize biases_ */
if (biasParameter_.get() != NULL) {
biases_ = std::unique_ptr<Weight>(new Weight(1, getSize(), biasParameter_));
}
// transform to which sequence type
if (config_.trans_type() == "non-seq") {
type_ = kNonSeq;
} else if (config_.trans_type() == "seq") {
type_ = kSeq;
} else {
LOG(FATAL) << "Unknown trans_type: " << config_.trans_type();
}
setNeedSequenceInfo(false);
return true;
}
void SequencePoolLayer::forward(PassType passType) {
Layer::forward(passType);
const Argument& input = getInput(0);
newBatchSize_ = type_ ? input.getNumSubSequences() : input.getNumSequences();
size_t dim = getSize();
// check
CHECK_EQ(dim, input.value->getWidth());
startPositions_ =
type_ ? input.subSequenceStartPositions : input.sequenceStartPositions;
auto starts = startPositions_->getVector(false);
CHECK_EQ(starts->getData()[newBatchSize_], input.getBatchSize());
CHECK_EQ(newBatchSize_, starts->getSize() - 1);
resetOutput(newBatchSize_, dim);
if (type_) {
CHECK(input.subSequenceStartPositions)
<< "when trans_type = seq, input must hasSubseq";
}
/* If type_ = kNonSeq, both seq has or not has sub-seq degrade to a non-seq,
* thus, in this case, output_ has no sequenceStartPositions.
* If type_ = kSeq, seq has sub-seq degrades to a seq, thus, only in this
* case, we should compute the new sequenceStartPositions.
*/
if (type_) {
output_.degradeSequence(input, useGpu_);
}
}
void SequencePoolLayer::backward(const UpdateCallback& callback) {
/* Do derivation */ { backwardActivation(); }
if (biases_ && biases_->getWGrad()) {
biases_->getWGrad()->collectBias(*getOutputGrad(), 1);
// Increasing the number of gradient
biases_->getParameterPtr()->incUpdate(callback);
}
}
} // namespace paddle
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Layer.h"
#include "paddle/math/Matrix.h"
namespace paddle {
/**
* A base layer for SequenceLastInstanceLayer/AverageLayer/MaxLayer.
*
* Input: one or more sequences. Each sequence contains some instances.
* If SequenceLevel = kNonSeq:
* Output: output size is the number of input sequences (NOT input instances)
* output[i] = seqlastin/average/max_{for each instance in this
* sequence}{input[i]}
* If SequenceLevel = kSeq:
* Check input sequence must has sub-sequence
* Output: output size is the number of input sub-sequences
* output[i] = seqlastin/average/max_{for each instance in this
* sub-sequence}{input[i]}
*
* The config file api is pooling_layer.
*/
class SequencePoolLayer : public Layer {
protected:
int type_;
std::unique_ptr<Weight> biases_;
enum SequenceLevel { kNonSeq = 0, kSeq = 1 };
size_t newBatchSize_;
ICpuGpuVectorPtr startPositions_;
public:
explicit SequencePoolLayer(const LayerConfig& config) : Layer(config) {}
virtual ~SequencePoolLayer() {}
bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
void forward(PassType passType);
void backward(const UpdateCallback& callback = nullptr);
};
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册