提交 17994e38 编写于 作者: X xuwei06

RecurrentGroup with mixed input sequence types

No longer need to use SubsequenceInput. The framework will detect.
上级 14c0e71d
......@@ -214,7 +214,6 @@ void RecurrentGradientMachine::init(
inFrameLines_[i].linkName = subModelConfig->in_links(i).link_name();
inFrameLines_[i].inLayer =
rootNetwork_->getLayer(subModelConfig->in_links(i).layer_name());
inFrameLines_[i].hasSubseq = subModelConfig->in_links(i).has_subseq();
}
outFrameLines_.resize(subModelConfig->out_links_size());
......@@ -241,11 +240,8 @@ void RecurrentGradientMachine::init(
rootNetwork_->getLayer(memoryConfig.boot_layer_name());
LayerConfig scatterConfig = *agentConfig;
memoryFrameLines_[i].is_sequence = memoryConfig.is_sequence();
memoryFrameLines_[i].rootAgent.reset(
memoryConfig.is_sequence()
? new SequenceScatterAgentLayer(scatterConfig)
: new ScatterAgentLayer(scatterConfig));
new ScatterAgentLayer(scatterConfig));
memoryFrameLines_[i].rootAgent->init(LayerMap(), parameterMap_);
memoryFrameLines_[i].bootLayer = memoryFrameLines_[i].rootAgent;
......@@ -267,9 +263,7 @@ void RecurrentGradientMachine::init(
if (subModelConfig->has_generator()) {
memoryFrameLines_[i].scatterAgents.resize(2);
for (auto& agent : memoryFrameLines_[i].scatterAgents) {
agent.reset(memoryConfig.is_sequence()
? new SequenceScatterAgentLayer(*agentConfig)
: new ScatterAgentLayer(*agentConfig));
agent.reset(new ScatterAgentLayer(*agentConfig));
agent->init(LayerMap(), parameterMap_);
}
}
......@@ -297,8 +291,6 @@ void RecurrentGradientMachine::init(
if (subModelConfig->evaluator_names_size() > 0) {
evaluator_.reset(frames_[0]->makeEvaluator());
}
targetInfoInlinkId_ = subModelConfig->target_inlinkid();
}
void RecurrentGradientMachine::resizeOrCreateFrames(int numFrames) {
......@@ -376,108 +368,102 @@ void RecurrentGradientMachine::prefetch(const std::vector<Argument>& inArgs) {
LOG(FATAL) << "should not use this function";
}
void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
std::vector<Argument>* outArgs,
PassType passType) {
if (inFrameLines_.empty() && passType == PASS_TEST) {
generateSequence();
return;
} // else forward..
const Argument& input = inFrameLines_[0].inLayer->getOutput();
CHECK(input.sequenceStartPositions);
int batchSize = input.getBatchSize();
size_t numSequences = input.getNumSequences();
const int* starts = input.sequenceStartPositions->getData(false);
bool hasSubseq = input.hasSubseq();
// In case of !hasSubseq or targetInfoInlinkId_ == -1, all inlinks share the
// same inframe info
bool shareInlinkInfo = !hasSubseq || targetInfoInlinkId_ == -1;
// Defaultly, share info with the first inlink
if (shareInlinkInfo) {
targetInfoInlinkId_ = 0;
}
// check hasSubseq in both config and input are the same
CHECK_EQ(hasSubseq, inFrameLines_[0].hasSubseq);
CHECK_EQ(starts[numSequences], batchSize);
CHECK(input.sequenceStartPositions);
// check other inputs has same sequence length and start
for (size_t i = 1; i < inFrameLines_.size(); ++i) {
const Argument& input1 = inFrameLines_[i].inLayer->getOutput();
CHECK_EQ((size_t)input1.getNumSequences(), numSequences);
// check all inputs should have same hasSubseq flag
CHECK_EQ(input.hasSubseq(), inFrameLines_[0].hasSubseq);
// if shareInlinkInfo, checks:
// 1. all inlinks have same number of total tokens
// 2. all inlinks have same number of tokens for each sentence of each
// sample. If hasSubseq, one sample has multiple sentence, else, one
// sample is one sentence
if (shareInlinkInfo) {
CHECK_EQ(input1.getBatchSize(), batchSize);
CHECK(std::equal(starts,
starts + numSequences + 1,
input1.sequenceStartPositions->getData(false)));
void RecurrentGradientMachine::checkInputConsistency(
int inlinkId, const std::vector<Argument::SeqInfo>& seqInfo) {
if (commonSeqInfo_.empty()) {
commonSeqInfo_.resize(seqInfo.size());
for (size_t i = 0; i < seqInfo.size(); ++i) {
commonSeqInfo_[i].topLevelLength = seqInfo[i].topLevelLength;
commonSeqInfo_[i].seqId = seqInfo[i].seqId;
}
} else {
CHECK_EQ(commonSeqInfo_.size(), seqInfo.size())
<< " RecurrentGroup " << subModelName_ << " input " << inlinkId
<< " has mismatched number of sequences";
for (size_t i = 0; i < seqInfo.size(); ++i) {
CHECK_EQ(commonSeqInfo_[i].topLevelLength, seqInfo[i].topLevelLength)
<< " RecurrentGroup " << subModelName_ << " input " << inlinkId
<< " has mismatched sequence length";
CHECK_EQ(commonSeqInfo_[i].seqId, seqInfo[i].seqId)
<< " RecurrentGroup " << subModelName_ << " input " << inlinkId
<< " has mismatched sequence length";
}
}
}
if (hasSubseq) {
CHECK(input.subSequenceStartPositions);
size_t numSubSequences = input.getNumSubSequences();
const int* subStarts = input.subSequenceStartPositions->getData(false);
CHECK_EQ(subStarts[numSubSequences], batchSize);
// if hasSubseq, check other inputs has same sub-sequence and sub-start
for (size_t i = 1; i < inFrameLines_.size(); ++i) {
const Argument& input1 = inFrameLines_[i].inLayer->getOutput();
CHECK_EQ((size_t)input1.getNumSubSequences(), numSubSequences);
if (shareInlinkInfo) {
CHECK(std::equal(subStarts,
subStarts + numSubSequences + 1,
input1.subSequenceStartPositions->getData(false)));
}
void RecurrentGradientMachine::calcNumSequencesAtEachStep() {
int numSequences = commonSeqInfo_.size();
numSeqs_.resize(maxSequenceLength_);
for (int i = 0; i < numSequences; ++i) {
for (int j = 0; j < commonSeqInfo_[i].topLevelLength; ++j) {
numSeqs_[j] = i + 1;
}
}
}
void RecurrentGradientMachine::reorganizeInput(PassType passType) {
info_.clear();
info_.resize(inFrameLines_.size());
commonSeqInfo_.clear();
seqInfos_.clear();
seqInfos_.resize(inFrameLines_.size());
for (size_t i = 0; i < inFrameLines_.size(); i++) {
const Argument& input = inFrameLines_[i].inLayer->getOutput();
if (!input.hasSeq()) {
continue;
}
input.getSeqInfo(&seqInfos_[i]);
checkInputConsistency(i, seqInfos_[i]);
}
CHECK(!commonSeqInfo_.empty())
<< "At least one input needs to be sequence or subsequence";
maxSequenceLength_ = commonSeqInfo_[0].topLevelLength;
calcNumSequencesAtEachStep();
for (size_t i = 0; i < inFrameLines_.size(); ++i) {
const Argument& input = inFrameLines_[i].inLayer->getOutput();
if (!input.hasSeq()) {
seqInfos_[i] = commonSeqInfo_;
}
createInFrameInfo(i, input, passType);
}
{
AsyncGpuBlock asyncGpuBlock;
// if shareInlinkInfo, only calculate info of the first inlink
// else, calculate info for each inlink
if (shareInlinkInfo) {
input.getSeqInfo(&seqInfos_[0]);
maxSequenceLength_ = seqInfos_[0][0].topLevelLength;
createInFrameInfo(0, input, passType);
} else {
for (size_t i = 0; i < inFrameLines_.size(); i++) {
const Argument& input1 = inFrameLines_[i].inLayer->getOutput();
input1.getSeqInfo(&seqInfos_[i]);
maxSequenceLength_ = seqInfos_[i][0].topLevelLength;
createInFrameInfo(i, input1, passType);
}
}
// inFrameLine select rows in real layer one time
for (size_t i = 0; i < inFrameLines_.size(); i++) {
int curInlinkId = shareInlinkInfo ? 0 : i;
selectRowsOneTime(inFrameLines_[i].inLayer,
info_[curInlinkId].allIds,
info_[i].allIds,
&(inFrameLines_[i].outArg),
passType);
}
}
resizeOrCreateFrames(maxSequenceLength_);
resizeBootFrame(numSequences);
}
void RecurrentGradientMachine::reorganizeOutput(PassType passType) {
calcSequenceStartPositions();
for (size_t i = 0; i < outFrameLines_.size(); ++i) {
Info info;
auto& outFrameLine = outFrameLines_[i];
ICpuGpuVectorPtr sequenceStartPositions;
ICpuGpuVectorPtr subSequenceStartPositions;
createOutFrameInfo(
outFrameLine, info, sequenceStartPositions, subSequenceStartPositions);
auto gatherAgent =
dynamic_cast<GatherAgentLayer*>(outFrameLine.agentLayer.get());
CHECK_NOTNULL(gatherAgent);
gatherAgent->copyIdAndSequenceInfo(sequenceStartPositions,
subSequenceStartPositions,
info.allIds,
info.idIndex);
}
}
void RecurrentGradientMachine::connectFrames(PassType passType) {
for (auto& memoryFrameLine : memoryFrameLines_) {
if (memoryFrameLine.rootAgent) {
auto scatterAgent =
......@@ -487,8 +473,9 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
memoryFrameLine.outArg,
memoryFrameLine.allIds,
/* idIndex */ 0,
memoryFrameLine.allIds->getSize());
if (memoryFrameLine.is_sequence) { // memoryConfig is sequence
memoryFrameLine.allIds->getSize(),
/* handleBackward */ true);
if (memoryFrameLine.sequenceStartPositions) {
int size = memoryFrameLine.sequenceStartPositions->getSize();
scatterAgent->setSequenceStartPositions(
memoryFrameLine.sequenceStartPositions,
......@@ -501,28 +488,26 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
for (auto& outFrameLine : outFrameLines_) {
auto gatherAgent =
dynamic_cast<GatherAgentLayer*>(outFrameLine.agentLayer.get());
CHECK_NOTNULL(gatherAgent);
gatherAgent->copyIdAndSequenceInfo(input,
info_[targetInfoInlinkId_].allIds,
info_[targetInfoInlinkId_].idIndex);
gatherAgent->clearRealLayers();
}
for (int i = 0; i < maxSequenceLength_; ++i) {
int idSize = 0;
// connect in_links
for (size_t j = 0; j < inFrameLines_.size(); ++j) {
Info& info = info_[shareInlinkInfo ? 0 : j];
Info& info = info_[j];
// idSize denotes the sum number of tokens in each length i
idSize = info.idIndex[i + 1] - info.idIndex[i];
int idIndex = info.idIndex.empty() ? 0 : info.idIndex[i];
int idSize = info.idIndex.empty() ? numSeqs_[i]
: info.idIndex[i + 1] - info.idIndex[i];
InFrameLine inFrameLine = inFrameLines_[j];
auto scatterAgent =
dynamic_cast<ScatterAgentLayer*>(inFrameLine.agents[i].get());
scatterAgent->setRealLayerAndOutput(inFrameLine.inLayer,
inFrameLine.outArg,
info.allIds,
info.idIndex[i],
idSize);
if (hasSubseq) {
idIndex,
idSize,
i == 0);
if (info.sequenceStartPositions) {
// size: the length of subsequence
int size = info.seqStartPosIndex[i + 1] - info.seqStartPosIndex[i];
scatterAgent->setSequenceStartPositions(
......@@ -536,11 +521,6 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
dynamic_cast<GatherAgentLayer*>(outFrameLine.agentLayer.get());
gatherAgent->addRealLayer(outFrameLine.frames[i]);
}
// connect memory links
// Adopt info_[0].idIndex because seq which has_subseq=True
// doesn't support Memory with !hasSubseq bootlayer;
// And inlinks that !hasSubSeq must have same inlink length.
idSize = info_[0].idIndex[i + 1] - info_[0].idIndex[i];
for (auto& memoryFrameLine : memoryFrameLines_) {
NeuralNetwork::connect(
memoryFrameLine.agents[i],
......@@ -548,6 +528,28 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
numSeqs_[i] /*height of agent*/);
}
}
}
void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
std::vector<Argument>* outArgs,
PassType passType) {
/* inArgs and outArgs are not used.
The inputs are inFrameLines_[i].inLayer.
The outputs are outFramesLines_[i].agentLayer
*/
if (inFrameLines_.empty() && passType == PASS_TEST) {
generateSequence();
return;
} // else forward..
reorganizeInput(passType);
int numSequences = commonSeqInfo_.size();
resizeOrCreateFrames(maxSequenceLength_);
resizeBootFrame(numSequences);
connectFrames(passType);
REGISTER_TIMER_INFO("RecurrentFwTime", "RecurrentFwTime");
// forward
......@@ -558,16 +560,12 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
const std::vector<Argument> inArgs;
std::vector<Argument> outArgs;
frames_[i]->forward(inArgs, &outArgs, passType);
if (hasSubseq) {
for (auto& outFrameLine : outFrameLines_) {
CHECK(outFrameLine.frames[i]->getOutput().sequenceStartPositions)
<< "In hierachical RNN, all out links should be from sequences.";
}
}
}
if (evaluator_ && passType == PASS_TEST) {
this->eval(evaluator_.get());
}
reorganizeOutput(passType);
}
void RecurrentGradientMachine::backward(const UpdateCallback& callback) {
......@@ -634,76 +632,228 @@ void RecurrentGradientMachine::removeBeamSearchStatisticsCallbacks() {
this->beamSearchStatistics_ = nullptr;
}
}
namespace {
void lenToStarts(std::vector<int>& starts) {
int pos = 0;
starts.back() = 0;
for (auto& start : starts) {
int tmp = start;
start = pos;
pos += tmp;
}
starts.back() = pos;
}
}
void RecurrentGradientMachine::calcSequenceStartPositions() {
std::vector<int> starts(commonSeqInfo_.size() + 1);
for (auto& seqInfo : commonSeqInfo_) {
starts[seqInfo.seqId] = seqInfo.topLevelLength;
}
lenToStarts(starts);
ICpuGpuVector::resizeOrCreate(sequenceStartPositions_, starts.size(), false);
std::copy(starts.begin(),
starts.end(),
sequenceStartPositions_->getMutableData(false));
}
void RecurrentGradientMachine::checkOutputConsistency(
OutFrameLine& outFrameLine) {
bool hasSeq = outFrameLine.frames[0]->getOutput().hasSeq();
for (int i = 0; i < maxSequenceLength_; ++i) {
LayerPtr frame = outFrameLine.frames[i];
CHECK_EQ(hasSeq, frame->getOutput().hasSeq());
int numSequences = frame->getOutput().getNumSequences();
CHECK_EQ(numSeqs_[i], numSequences);
}
}
void RecurrentGradientMachine::createOutFrameInfo(
OutFrameLine& outFrameLine,
Info& info,
ICpuGpuVectorPtr& sequenceStartPositions,
ICpuGpuVectorPtr& subSequenceStartPositions) {
checkOutputConsistency(outFrameLine);
if (!outFrameLine.frames[0]->getOutput().hasSeq()) {
createOutFrameInfo_seq(
outFrameLine, info, sequenceStartPositions, subSequenceStartPositions);
} else {
createOutFrameInfo_subseq(
outFrameLine, info, sequenceStartPositions, subSequenceStartPositions);
}
}
void RecurrentGradientMachine::createOutFrameInfo_seq(
OutFrameLine& outFrameLine,
Info& info,
ICpuGpuVectorPtr& sequenceStartPositions,
ICpuGpuVectorPtr& subSequenceStartPositions) {
std::vector<int> allIds;
info.idIndex.resize(1, 0); // first idIndex = 0
const int* starts = sequenceStartPositions_->getData(false);
for (int i = 0; i < maxSequenceLength_; ++i) {
LayerPtr frame = outFrameLine.frames[i];
size_t numSequences = frame->getOutput().getNumSequences();
for (size_t j = 0; j < numSequences; ++j) {
int seqStart = starts[commonSeqInfo_[j].seqId];
int seqLength = commonSeqInfo_[j].topLevelLength;
allIds.push_back(reversed_ ? (seqStart + seqLength - 1 - i)
: (seqStart + i));
}
info.idIndex.push_back(allIds.size());
}
sequenceStartPositions = sequenceStartPositions_;
copyScattedId(allIds, &info.allIds, allIds.size());
CHECK_EQ(info.idIndex.size(), static_cast<size_t>(maxSequenceLength_ + 1));
}
void RecurrentGradientMachine::createOutFrameInfo_subseq(
OutFrameLine& outFrameLine,
Info& info,
ICpuGpuVectorPtr& sequenceStartPositions,
ICpuGpuVectorPtr& subSequenceStartPositions) {
size_t numSequences = commonSeqInfo_.size();
std::vector<int> allIds;
info.idIndex.resize(1, 0); // first idIndex = 0
const int* starts = sequenceStartPositions_->getData(false);
std::vector<int> subStarts(starts[numSequences] + 1);
for (int i = 0; i < maxSequenceLength_; ++i) {
LayerPtr frame = outFrameLine.frames[i];
size_t numSequences = frame->getOutput().getNumSequences();
const int* seqStarts =
frame->getOutput().sequenceStartPositions->getData(false);
for (size_t j = 0; j < numSequences; ++j) {
subStarts[starts[commonSeqInfo_[j].seqId] + i] =
seqStarts[j + 1] - seqStarts[j];
}
}
lenToStarts(subStarts);
for (int i = 0; i < maxSequenceLength_; ++i) {
LayerPtr frame = outFrameLine.frames[i];
size_t numSequences = frame->getOutput().getNumSequences();
for (size_t j = 0; j < numSequences; ++j) {
int pos = starts[commonSeqInfo_[j].seqId] + i;
int subSeqStart = subStarts[pos];
int subSeqEnd = subStarts[pos + 1];
for (int k = subSeqStart; k < subSeqEnd; ++k) {
allIds.push_back(k);
}
}
info.idIndex.push_back(allIds.size());
}
ICpuGpuVector::resizeOrCreate(
subSequenceStartPositions, subStarts.size(), false);
int* cpuSubSequenceStartPositions =
subSequenceStartPositions->getMutableData(false);
std::copy(subStarts.begin(), subStarts.end(), cpuSubSequenceStartPositions);
ICpuGpuVector::resizeOrCreate(
sequenceStartPositions, numSequences + 1, false);
int* cpuSequenceStartPositions =
sequenceStartPositions->getMutableData(false);
for (size_t i = 0; i <= numSequences; ++i) {
cpuSequenceStartPositions[i] = subStarts[starts[i]];
}
copyScattedId(allIds, &info.allIds, allIds.size());
CHECK_EQ(info.idIndex.size(), static_cast<size_t>(maxSequenceLength_ + 1));
}
/* create scattered id infomation for all realLayer of inFrameLines one time.
* If hasSubseq, will also create scattered sequenceStartPositions infomation
* for all realLayer of inFrameLines one time.
*/
void RecurrentGradientMachine::createInFrameInfo(int inlinkId,
const Argument& input,
PassType passType) {
bool hasSubseq = input.hasSubseq();
// numSequences: # samples(sequences) in a batch
size_t numSequences = input.getNumSequences();
if (!input.hasSeq()) {
createInFrameInfo_nonseq(inlinkId, input, passType);
} else if (!input.hasSubseq()) {
createInFrameInfo_seq(inlinkId, input, passType);
} else {
createInFrameInfo_subseq(inlinkId, input, passType);
}
}
void RecurrentGradientMachine::createInFrameInfo_nonseq(int inlinkId,
const Argument& input,
PassType passType) {
std::vector<int> allIds;
auto& seqInfo = seqInfos_[inlinkId];
numSeqs_.clear();
Info* inlinkInfo = &info_[inlinkId];
inlinkInfo->idIndex.clear();
inlinkInfo->idIndex.push_back(0); // first idIndex = 0
for (size_t i = 0; i < seqInfo.size(); ++i) {
allIds.push_back(seqInfo[i].seqId);
}
// copy and check scatterId
copyScattedId(allIds, &inlinkInfo->allIds, input.getBatchSize());
}
void RecurrentGradientMachine::createInFrameInfo_seq(int inlinkId,
const Argument& input,
PassType passType) {
std::vector<int> allIds;
auto& seqInfo = seqInfos_[inlinkId];
Info* inlinkInfo = &info_[inlinkId];
inlinkInfo->idIndex.resize(1, 0); // first idIndex = 0
for (int i = 0; i < maxSequenceLength_; ++i) {
for (int j = 0; j < numSeqs_[i]; ++j) {
int seqLength = seqInfo[j].topLevelLength;
int seqStart = seqInfo[j].seqStart;
allIds.push_back(reversed_ ? (seqStart + seqLength - 1 - i)
: (seqStart + i));
}
inlinkInfo->idIndex.push_back(allIds.size());
}
// copy and check scatterId
copyScattedId(allIds, &inlinkInfo->allIds, input.getBatchSize());
CHECK_EQ(inlinkInfo->idIndex.size(),
static_cast<size_t>(maxSequenceLength_ + 1));
}
void RecurrentGradientMachine::createInFrameInfo_subseq(int inlinkId,
const Argument& input,
PassType passType) {
std::vector<int> allIds;
auto& seqInfo = seqInfos_[inlinkId];
Info* inlinkInfo = &info_[inlinkId];
inlinkInfo->idIndex.resize(1, 0); // first idIndex = 0
std::vector<int> sequenceStartPositions;
const int* subSequenceStartPositions = nullptr;
if (hasSubseq) { // for sequenceScatterAgentLayer
subSequenceStartPositions = input.subSequenceStartPositions->getData(false);
inlinkInfo->seqStartPosIndex.clear();
inlinkInfo->seqStartPosIndex.push_back(0); // first seqStartPosIndex = 0
}
// maxSequenceLength_: max topLevelLength in allsamples
subSequenceStartPositions = input.subSequenceStartPositions->getData(false);
inlinkInfo->seqStartPosIndex.clear();
inlinkInfo->seqStartPosIndex.push_back(0); // first seqStartPosIndex = 0
for (int i = 0; i < maxSequenceLength_; ++i) {
if (hasSubseq) {
sequenceStartPositions.push_back(0); // first element = 0
}
int numSeqs = 0;
for (size_t j = 0; j < numSequences; ++j) {
int seqLength = seqInfo[j].topLevelLength;
if (i >= seqLength) {
break;
}
++numSeqs;
if (hasSubseq) {
int subSeqStart = subSequenceStartPositions[seqInfo[j].subSeqStart + i];
int subSeqEnd =
subSequenceStartPositions[seqInfo[j].subSeqStart + i + 1];
for (int k = subSeqStart; k < subSeqEnd; ++k) {
allIds.push_back(k);
}
sequenceStartPositions.push_back(sequenceStartPositions.back() +
subSeqEnd - subSeqStart);
} else {
int seqStart = seqInfo[j].seqStart;
allIds.push_back(reversed_ ? (seqStart + seqLength - 1 - i)
: (seqStart + i));
sequenceStartPositions.push_back(0); // first element = 0
for (int j = 0; j < numSeqs_[i]; ++j) {
int subSeqStart = subSequenceStartPositions[seqInfo[j].subSeqStart + i];
int subSeqEnd = subSequenceStartPositions[seqInfo[j].subSeqStart + i + 1];
for (int k = subSeqStart; k < subSeqEnd; ++k) {
allIds.push_back(k);
}
sequenceStartPositions.push_back(sequenceStartPositions.back() +
subSeqEnd - subSeqStart);
}
inlinkInfo->idIndex.push_back(allIds.size());
numSeqs_.push_back(numSeqs);
if (hasSubseq) {
inlinkInfo->seqStartPosIndex.push_back(sequenceStartPositions.size());
}
}
if (hasSubseq) {
// inFrameLine create sequenceStartPositions one time
CHECK_EQ(
sequenceStartPositions.size(),
static_cast<size_t>(maxSequenceLength_ + input.getNumSubSequences()));
CHECK_EQ(inlinkInfo->seqStartPosIndex.size(),
static_cast<size_t>(maxSequenceLength_ + 1));
createSeqPos(sequenceStartPositions, &inlinkInfo->sequenceStartPositions);
inlinkInfo->seqStartPosIndex.push_back(sequenceStartPositions.size());
}
// inFrameLine create sequenceStartPositions one time
CHECK_EQ(
sequenceStartPositions.size(),
static_cast<size_t>(maxSequenceLength_ + input.getNumSubSequences()));
CHECK_EQ(inlinkInfo->seqStartPosIndex.size(),
static_cast<size_t>(maxSequenceLength_ + 1));
createSeqPos(sequenceStartPositions, &inlinkInfo->sequenceStartPositions);
// copy and check scatterId
copyScattedId(allIds, &inlinkInfo->allIds, input.getBatchSize());
......@@ -717,11 +867,11 @@ void RecurrentGradientMachine::createMemoryFrameInfo(
const Argument& input = (*memoryFrameLine).rootLayer->getOutput();
size_t numSequences = input.getNumSequences();
std::vector<int> allIds;
bool seqFlag = (*memoryFrameLine).is_sequence;
bool seqFlag = input.hasSeq();
CHECK(!input.hasSubseq())
<< "Subsequence boot layer for memory is not supported";
if (seqFlag) { // for sequenceScatterAgentLayer
CHECK(input.sequenceStartPositions)
<< "boot layer must be a sequence when is_sequence = true";
std::vector<int> sequenceStartPositions;
sequenceStartPositions.push_back(0); // first element = 0
const int* starts = input.sequenceStartPositions->getData(false);
......@@ -804,8 +954,7 @@ size_t RecurrentGradientMachine::getGenBatchSize() {
for (auto& memoryFrameLine : memoryFrameLines_) {
if (!memoryFrameLine.rootLayer) continue;
Argument& bootArg = memoryFrameLine.rootLayer->getOutput();
size_t batchSize = memoryFrameLine.is_sequence ? bootArg.getNumSequences()
: bootArg.getBatchSize();
size_t batchSize = bootArg.getNumSequences();
if (numSequences) {
CHECK_EQ(numSequences, batchSize);
} else {
......@@ -845,12 +994,7 @@ void RecurrentGradientMachine::generateSequence() {
if (memoryFrameLine.rootAgent) {
auto scatterAgent =
dynamic_cast<ScatterAgentLayer*>(memoryFrameLine.rootAgent.get());
bool seqFlag = memoryFrameLine.is_sequence;
scatterAgent->setRealLayer(memoryFrameLine.rootLayer, ids, seqFlag);
if (seqFlag) {
CHECK(memoryFrameLine.rootLayer->getOutput().sequenceStartPositions)
<< "boot layer must be a sequence when is_sequence = true";
}
scatterAgent->setRealLayer(memoryFrameLine.rootLayer, ids);
}
NeuralNetwork::connect(
memoryFrameLine.agents[0], memoryFrameLine.bootLayer, ids.size());
......@@ -858,6 +1002,7 @@ void RecurrentGradientMachine::generateSequence() {
// boot layer forward
AsyncGpuBlock asyncGpuBlock;
for (auto& memoryFrameLine : memoryFrameLines_) {
memoryFrameLine.bootLayer->forward(PASS_TEST);
}
......@@ -930,8 +1075,7 @@ void RecurrentGradientMachine::oneWaySearch(size_t batchSize) {
auto scatterAgent = dynamic_cast<ScatterAgentLayer*>(
memoryFrameLine.scatterAgents[machineCur].get());
scatterAgent->setRealLayer(memoryFrameLine.frames[machinePrev],
scatterIds,
memoryFrameLine.is_sequence);
scatterIds);
scatterAgent->forward(PASS_TEST);
NeuralNetwork::connect(memoryFrameLine.agents[machineCur],
memoryFrameLine.scatterAgents[machineCur]);
......@@ -1003,8 +1147,7 @@ void RecurrentGradientMachine::connectPrevFrame(int stepId,
auto scatterAgent = dynamic_cast<ScatterAgentLayer*>(
memoryFrameLine.scatterAgents[machineCur].get());
scatterAgent->setRealLayer(memoryFrameLine.frames[machinePrev],
isOutIds ? topIds_ : machineIds_,
memoryFrameLine.is_sequence);
isOutIds ? topIds_ : machineIds_);
scatterAgent->forward(PASS_TEST);
NeuralNetwork::connect(memoryFrameLine.agents[machineCur],
memoryFrameLine.scatterAgents[machineCur]);
......
......@@ -284,6 +284,16 @@ public:
}
protected:
std::vector<Argument::SeqInfo> commonSeqInfo_;
ICpuGpuVectorPtr sequenceStartPositions_;
void calcSequenceStartPositions();
void checkInputConsistency(int inlinkId,
const std::vector<Argument::SeqInfo>& seqInfo);
void reorganizeInput(PassType passType);
void reorganizeOutput(PassType passType);
void connectFrames(PassType passType);
void calcNumSequencesAtEachStep();
void resizeOrCreateFrames(int numFrames);
void resizeBootFrame(int numSequences);
......@@ -295,8 +305,7 @@ protected:
std::string linkName;
LayerPtr inLayer;
std::vector<LayerPtr> agents; // Scatter Agents to reform batch input
bool hasSubseq;
Argument outArg; // scatter output argument
Argument outArg; // scatter output argument
};
std::vector<InFrameLine> inFrameLines_;
......@@ -318,7 +327,6 @@ protected:
std::vector<LayerPtr> agents;
std::vector<LayerPtr> scatterAgents; // scatter agent used by beam search
Argument outArg; // scatter output argument
bool is_sequence;
// Different memoryFrameLine have different element as follows
IVectorPtr allIds; // scattered id of realLayer
ICpuGpuVectorPtr
......@@ -330,22 +338,27 @@ protected:
// and all outFrameLines(outlinks) share the info with one inFrameLine,
// which is assigned by targetInfoInlinkId_.
struct Info {
IVectorPtr allIds; // scattered id of realLayer
std::vector<int> idIndex; // index of allIds
// The original positions in the original batch
IVectorPtr allIds; // scattered id of realLayer [batchSize]
// index of allIds for each step [maxSequenceLength_]
// idIndex[i] is the total length of the first i sequences
std::vector<int> idIndex;
ICpuGpuVectorPtr
sequenceStartPositions; // scattered sequenceStartPositions
std::vector<int> seqStartPosIndex; // index of sequenceStartPositions
};
std::vector<Info> info_;
std::vector<Info> info_; // for input
// numSeqs_[i] is the number sequences which is longer than i (for sequence
// data) or has more than i subsequences (for subsequence data)
// Equivalently, numSeqs_[i] is the number of sequences at step i;
std::vector<int> numSeqs_;
std::vector<std::vector<Argument::SeqInfo>> seqInfos_;
// the id of inlink which share info with outlinks
int targetInfoInlinkId_;
void checkOutputConsistency(OutFrameLine& outFrameLine);
/* create scattered id infomation for all realLayer of inFrameLines one time.
* If hasSubseq, will also create scattered sequenceStartPositions infomation
......@@ -354,6 +367,28 @@ protected:
void createInFrameInfo(int inlinks_id,
const Argument& input,
PassType passType);
void createInFrameInfo_nonseq(int inlinks_id,
const Argument& input,
PassType passType);
void createInFrameInfo_seq(int inlinks_id,
const Argument& input,
PassType passType);
void createInFrameInfo_subseq(int inlinks_id,
const Argument& input,
PassType passType);
void createOutFrameInfo(OutFrameLine& outFrameLine,
Info& info,
ICpuGpuVectorPtr& sequenceStartPositions,
ICpuGpuVectorPtr& subSequenceStartPositions);
void createOutFrameInfo_seq(OutFrameLine& outFrameLine,
Info& info,
ICpuGpuVectorPtr& sequenceStartPositions,
ICpuGpuVectorPtr& subSequenceStartPositions);
void createOutFrameInfo_subseq(OutFrameLine& outFrameLine,
Info& info,
ICpuGpuVectorPtr& sequenceStartPositions,
ICpuGpuVectorPtr& subSequenceStartPositions);
void createMemoryFrameInfo(MemoryFrameLine* memoryFrameLine,
PassType passType);
......@@ -386,9 +421,7 @@ protected:
NeuralNetwork* rootNetwork_;
bool reversed_;
// if hasSubseq: max number of sentences(subseq)in batchsize samples
// else: max number of tokens in batchsize samples(sentences)
int maxSequenceLength_;
int maxSequenceLength_; // Max top-level length
bool useGpu_;
bool stopBeamSearch_;
......
......@@ -36,14 +36,23 @@ void AgentLayer::forward(PassType passType) {
Layer::forward(passType);
Argument& realOutput = realLayer_->getOutput();
int realHeight = realOutput.getBatchSize();
CHECK_LE(numSamples_, realHeight);
int realNumSequences = realOutput.getNumSequences();
CHECK_LE(numSamples_, realNumSequences);
// get Arguments from real layers
if (numSamples_ > 0 && numSamples_ < realHeight) {
if (realOutput.ids) {
output_.ids =
IVector::create(realOutput.ids->getData(), numSamples_, useGpu_);
if (numSamples_ > 0 && numSamples_ < realNumSequences) {
if (realOutput.hasSeq()) {
int numRows =
realOutput.sequenceStartPositions->getData(false)[numSamples_];
output_.subArgFrom(realOutput,
/* offset */ 0,
numRows,
getSize(),
useGpu_,
/* trans */ false,
/* seqFlag */ true,
/* seqStart */ 0,
/* seqSize */ numSamples_ + 1);
} else {
output_.subArgFrom(
realOutput, /* offset */ 0, numSamples_, getSize(), useGpu_);
......@@ -53,34 +62,6 @@ void AgentLayer::forward(PassType passType) {
}
}
void SequenceAgentLayer::forward(PassType passType) {
Layer::forward(passType);
Argument& realOutput = realLayer_->getOutput();
int realNumSequences = realOutput.getNumSequences();
CHECK_LE(numSamples_, realNumSequences);
// get Arguments from real layers
if (numSamples_ > 0 && numSamples_ < realNumSequences) {
int numRows =
realOutput.sequenceStartPositions->getData(false)[numSamples_];
CHECK(!realOutput.ids) << "Not supported";
output_.subArgFrom(realOutput,
/* offset */ 0,
numRows,
getSize(),
useGpu_,
/* trans */ false,
/* seqFlag */ true,
/* seqStart */ 0,
/* seqSize */ numSamples_ + 1);
} else {
output_ = realOutput;
}
}
REGISTER_LAYER(sequence_agent, SequenceAgentLayer);
bool GatherAgentLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
CHECK_EQ(config_.inputs_size(), 0);
......@@ -91,18 +72,26 @@ bool GatherAgentLayer::init(const LayerMap& layerMap,
return true;
}
void GatherAgentLayer::copyIdAndSequenceInfo(const Argument& input,
const IVectorPtr& ids,
const std::vector<int>& idIndex) {
output_.sequenceStartPositions = input.sequenceStartPositions;
output_.subSequenceStartPositions = input.subSequenceStartPositions;
realLayers_.clear();
void GatherAgentLayer::copyIdAndSequenceInfo(
ICpuGpuVectorPtr sequenceStartPositions,
ICpuGpuVectorPtr subSequenceStartPositions,
const IVectorPtr& ids,
const std::vector<int>& idIndex) {
output_.sequenceStartPositions = sequenceStartPositions;
output_.subSequenceStartPositions = subSequenceStartPositions;
allIds_ = ids;
idIndex_ = idIndex;
}
void GatherAgentLayer::forward(PassType passType) {
Layer::forward(passType);
forwardIds(passType);
forwardValue(passType);
}
void GatherAgentLayer::forwardValue(PassType passType) {
MatrixPtr valueReal = realLayers_[0]->getOutputValue();
if (!valueReal) return;
int height = allIds_->getSize();
int width = this->getSize();
......@@ -147,7 +136,9 @@ void ScatterAgentLayer::forward(PassType passType) {
CHECK_EQ(realLayer_->getDeviceId(), this->getDeviceId());
int width = this->getSize();
if (realOutArg_.value || realOutArg_.ids) {
if (realOutArg_.hasSeq()) {
forwardSequence(passType);
} else if (realOutArg_.value || realOutArg_.ids) {
output_.subArgFrom(
realOutArg_, /* offset */ idIndex_, idSize_, width, useGpu_);
} else { // used in generation
......@@ -174,7 +165,7 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) {
if (realGrad) {
// for agent in inFrameLines and memoryFrameLines,
// only first scatterAgentLayer should do addToRows in backward
if (idIndex_ == 0) {
if (handleBackward_) {
outputGrad->addToRows(*realGrad, *ids_);
}
}
......@@ -183,12 +174,14 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) {
REGISTER_LAYER(gather_agent, GatherAgentLayer);
REGISTER_LAYER(scatter_agent, ScatterAgentLayer);
void SequenceGatherAgentLayer::forward(PassType passType) {
Layer::forward(passType);
void GatherAgentLayer::forwardIds(PassType passType) {
int height = 0;
int* starts = output_.subSequenceStartPositions->getMutableData(false);
IVectorPtr idReal = realLayers_[0]->getOutputLabel();
if (idReal) {
if (!idReal) return;
if (output_.subSequenceStartPositions) {
int* starts = output_.subSequenceStartPositions->getMutableData(false);
// Gather generator.idsVec
// if is beam search generation result. Get first result.
if (idReal->getData()[idReal->getSize() - 1] == -1) {
......@@ -212,13 +205,11 @@ void SequenceGatherAgentLayer::forward(PassType passType) {
->copyFrom(*realLayers_[i]->getOutputLabel());
}
} else {
// Gather output.value, same as GatherAgentLayer
CHECK(output_.subSequenceStartPositions);
GatherAgentLayer::forward(passType);
LOG(FATAL) << "Not implemented";
}
}
void SequenceScatterAgentLayer::forward(PassType passType) {
void ScatterAgentLayer::forwardSequence(PassType passType) {
Layer::forward(passType);
CHECK_EQ(realLayer_->getDeviceId(), this->getDeviceId());
......@@ -241,6 +232,7 @@ void SequenceScatterAgentLayer::forward(PassType passType) {
/* seqStart */ seqStartPosIndex_,
/* seqSize */ numSequences_);
} else {
// Putting the generation logic here is really an ugly hack!
// used in generation
int height = 0;
size_t numSequences = ids_->getSize();
......@@ -284,7 +276,4 @@ void SequenceScatterAgentLayer::forward(PassType passType) {
}
}
REGISTER_LAYER(sequence_gather_agent, SequenceGatherAgentLayer);
REGISTER_LAYER(sequence_scatter_agent, SequenceScatterAgentLayer);
} // namespace paddle
......@@ -49,18 +49,6 @@ public:
void backward(const UpdateCallback& callback = nullptr) override {}
};
/**
* like AgentLayer, but use first *numSamples* sequences
*/
class SequenceAgentLayer : public AgentLayer {
public:
explicit SequenceAgentLayer(const LayerConfig& config) : AgentLayer(config) {}
~SequenceAgentLayer() {}
void forward(PassType passType) override;
void backward(const UpdateCallback& callback = nullptr) override {}
};
/**
* Like AgentLayer, but it can gather many real layers. Each real
* layer give a few rows of a sequence, after gather all real layers,
......@@ -83,7 +71,10 @@ public:
const ParameterMap& parameterMap) override;
// call before addRealLayer
void copyIdAndSequenceInfo(const Argument& input,
void clearRealLayers() { realLayers_.clear(); }
void copyIdAndSequenceInfo(ICpuGpuVectorPtr sequenceStartPositions,
ICpuGpuVectorPtr subSequenceStartPositions,
const IVectorPtr& allIds,
const std::vector<int>& idIndex);
......@@ -92,24 +83,8 @@ public:
void forward(PassType passType) override;
void backward(const UpdateCallback& callback) override;
};
/**
* Like GatherAgentLayer, but select a few sequence in real layer.
* *ids* in addRealLayer() are the ids of selected sequence.
* It's used to reorder sequence output.
*/
class SequenceGatherAgentLayer : public GatherAgentLayer {
public:
explicit SequenceGatherAgentLayer(const LayerConfig& config)
: GatherAgentLayer(config) {}
virtual ~SequenceGatherAgentLayer() {}
void forward(PassType passType);
void backward(const UpdateCallback& callback) {
// same as GatherAgentLayer
GatherAgentLayer::backward(callback);
}
void forwardValue(PassType passType);
void forwardIds(PassType passType);
};
/**
......@@ -129,6 +104,11 @@ protected:
int idSize_;
int seqStartPosIndex_;
int numSequences_; // number of sequences in this scatterAgentLayer
bool handleBackward_;
// use to store expanded cpuStartPositions or subSequenceStartPositions
// of real layer.
ICpuGpuVectorPtr inputStartPos_;
public:
explicit ScatterAgentLayer(const LayerConfig& config) : Layer(config) {}
......@@ -147,19 +127,15 @@ public:
* false(default) in ScatterAgentLayer, and
* true in SequenceScatterAgentLayer.
*/
void setRealLayer(LayerPtr layer,
const std::vector<int>& ids,
bool copyId = false) {
void setRealLayer(LayerPtr layer, const std::vector<int>& ids) {
realLayer_ = layer;
IVector::resizeOrCreate(ids_, ids.size(), useGpu_);
ids_->copyFrom(ids.data(), ids.size());
if (copyId) {
if (useGpu_) {
IVector::resizeOrCreate(cpuIds_, ids.size(), false);
cpuIds_->copyFrom(ids.data(), ids.size());
} else {
cpuIds_ = ids_;
}
if (useGpu_) {
IVector::resizeOrCreate(cpuIds_, ids.size(), false);
cpuIds_->copyFrom(ids.data(), ids.size());
} else {
cpuIds_ = ids_;
}
}
......@@ -169,12 +145,14 @@ public:
const Argument& outArg,
const IVectorPtr& ids,
int idIndex,
int idSize) {
int idSize,
bool handleBackward) {
realLayer_ = layer;
realOutArg_ = outArg;
ids_ = ids;
idIndex_ = idIndex;
idSize_ = idSize;
handleBackward_ = handleBackward;
}
void setSequenceStartPositions(const ICpuGpuVectorPtr& sequenceStartPositions,
......@@ -187,28 +165,8 @@ public:
void forward(PassType passType) override;
void backward(const UpdateCallback& callback) override;
};
/**
* Like ScatterAgentLayer, but select a few sequence in real layer.
* *ids* in setRealLayer() or setRealLayerAndOutput() are the ids of
* selected sequence. It's used to reorder sequence input.
*/
class SequenceScatterAgentLayer : public ScatterAgentLayer {
protected:
// use to store expanded cpuStartPositions or subSequenceStartPositions
// of real layer.
ICpuGpuVectorPtr inputStartPos_;
public:
explicit SequenceScatterAgentLayer(const LayerConfig& config)
: ScatterAgentLayer(config) {}
virtual ~SequenceScatterAgentLayer() {}
void forward(PassType passType);
void backward(const UpdateCallback& callback) {
ScatterAgentLayer::backward(callback);
}
void forwardSequence(PassType passType);
};
} // namespace paddle
......@@ -46,6 +46,9 @@ void SequencePoolLayer::forward(PassType passType) {
Layer::forward(passType);
const Argument& input = getInput(0);
CHECK(input.hasSeq() || input.hasSubseq())
<< "Input should be a sequence or subsequence for layer " << getName();
newBatchSize_ = type_ ? input.getNumSubSequences() : input.getNumSequences();
size_t dim = getSize();
// check
......
......@@ -95,3 +95,22 @@ def process_unequalength_seq(settings, file_name):
words1 = reduce(lambda x, y: x + y, d[0])
words2 = reduce(lambda x, y: x + y, d[1])
yield words1, words2, d[2]
###########################################################
data3 = [
[[[1, 2], [4, 5, 2]], [1, 2], 0],
[[[0, 2], [2, 5], [0, 1, 2]], [2, 3, 0], 1],
]
# Used for sequence_nest_mixed_inputs.conf
@provider(
input_types=[
integer_value_sub_sequence(10), integer_value_sequence(10),
integer_value(2)
],
should_shuffle=False)
def process_mixed(settings, file_name):
for d in data3:
yield d
......@@ -19,7 +19,7 @@ from paddle.trainer_config_helpers import *
define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list',
test_list=None,
module='rnn_data_provider',
obj='process_subseq2')
obj='process_subseq')
settings(batch_size=2, learning_rate=0.01)
......@@ -57,7 +57,7 @@ def outer_step(wid, x):
last = last_seq(input=inner_rnn_output, name="outer_rnn_state")
# "return last" should also work. But currently RecurrentGradientMachine
# does not handle it, and will report error: In hierachical RNN, all out
# does not handle it, and will report error: In hierachical RNN, all out
# links should be from sequences now.
return inner_rnn_output
......
# edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
######################## data source ################################
define_py_data_sources2(
train_list='gserver/tests/Sequence/dummy.list',
test_list=None,
module='rnn_data_provider',
obj='process_mixed')
settings(batch_size=2, learning_rate=0.01)
######################## network configure ################################
dict_dim = 10
word_dim = 2
hidden_dim = 2
label_dim = 2
data1 = data_layer(name="word1", size=dict_dim)
data2 = data_layer(name="word2", size=dict_dim)
label = data_layer(name="label", size=label_dim)
encoding = embedding_layer(input=data2, size=word_dim)
subseq = embedding_layer(input=data1, size=word_dim)
seq = embedding_layer(input=data2, size=word_dim)
nonseq = embedding_layer(input=label, size=word_dim)
# This hierarchical RNN is designed to be equivalent to the simple RNN in
# sequence_rnn_multi_unequalength_inputs.conf
def outer_step(subseq, seq, nonseq, encoding):
outer_mem = memory(name="outer_rnn_state", size=hidden_dim)
def inner_step(subseq, seq, nonseq):
inner_mem = memory(
name="inner_rnn_state", size=hidden_dim, boot_layer=outer_mem)
out = fc_layer(
input=[subseq, seq, nonseq, inner_mem],
size=hidden_dim,
act=TanhActivation(),
bias_attr=True,
name='inner_rnn_state')
return out
decoder = recurrent_group(
step=inner_step, name='inner', input=[subseq, seq, nonseq])
last = last_seq(name="outer_rnn_state", input=decoder)
context = simple_attention(
encoded_sequence=encoding, encoded_proj=encoding, decoder_state=last)
return context
out = recurrent_group(
name="outer",
step=outer_step,
input=[
subseq, expand_layer(
seq, expand_as=subseq,
expand_level=ExpandLevel.FROM_SEQUENCE), expand_layer(
nonseq,
expand_as=subseq,
expand_level=ExpandLevel.FROM_NO_SEQUENCE),
StaticInput(encoding)
])
rep = last_seq(input=out)
prob = fc_layer(
size=label_dim, input=rep, act=SoftmaxActivation(), bias_attr=True)
outputs(classification_cost(input=prob, label=label))
# edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
######################## data source ################################
define_py_data_sources2(
train_list='gserver/tests/Sequence/dummy.list',
test_list=None,
module='rnn_data_provider',
obj='process_mixed')
settings(batch_size=2, learning_rate=0.01)
######################## network configure ################################
dict_dim = 10
word_dim = 2
hidden_dim = 2
label_dim = 2
data1 = data_layer(name="word1", size=dict_dim)
data2 = data_layer(name="word2", size=dict_dim)
label = data_layer(name="label", size=label_dim)
encoding = embedding_layer(input=data2, size=word_dim)
# This hierarchical RNN is designed to be equivalent to the simple RNN in
# sequence_rnn_multi_unequalength_inputs.conf
def outer_step(subseq, seq, nonseq, encoding):
outer_mem = memory(name="outer_rnn_state", size=hidden_dim)
def inner_step(data1, data2, label):
inner_mem = memory(
name="inner_rnn_state", size=hidden_dim, boot_layer=outer_mem)
subseq = embedding_layer(input=data1, size=word_dim)
seq = embedding_layer(input=data2, size=word_dim)
nonseq = embedding_layer(input=label, size=word_dim)
print_layer(input=[data1, seq, label, inner_mem])
out = fc_layer(
input=[subseq, seq, nonseq, inner_mem],
size=hidden_dim,
act=TanhActivation(),
bias_attr=True,
name='inner_rnn_state')
return out
decoder = recurrent_group(
step=inner_step, name='inner', input=[subseq, seq, nonseq])
last = last_seq(name="outer_rnn_state", input=decoder)
context = simple_attention(
encoded_sequence=encoding, encoded_proj=encoding, decoder_state=last)
return context
out = recurrent_group(
name="outer",
step=outer_step,
input=[data1, data2, label, StaticInput(encoding)])
rep = last_seq(input=out)
prob = fc_layer(
size=label_dim, input=rep, act=SoftmaxActivation(), bias_attr=True)
outputs(classification_cost(input=prob, label=label))
......@@ -19,7 +19,7 @@ from paddle.trainer_config_helpers import *
define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list',
test_list=None,
module='rnn_data_provider',
obj='process_seq2')
obj='process_seq')
settings(batch_size=2, learning_rate=0.01)
......
......@@ -155,6 +155,15 @@ TEST(RecurrentGradientMachine, rnn_multi_unequalength_input) {
}
}
TEST(RecurrentGradientMachine, rnn_mixed_input) {
for (bool useGpu : {false, true}) {
test("gserver/tests/sequence_rnn_mixed_inputs.py",
"gserver/tests/sequence_rnn_matched_inputs.py",
1e-6,
useGpu);
}
}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
......
......@@ -908,12 +908,13 @@ const T* CpuGpuVectorT<T>::getData(bool useGpu) const {
// Operation will change data and need to reset sync_ & syncFlag_.
#define MUTABLE_VECTOR_OP(OP, useGpu, args...) \
do { \
setSync(useGpu); \
if (useGpu) { \
copyToGpu(); \
setSync(useGpu); \
return gpuVectorT_->OP(args); \
} else { \
copyToCpu(); \
setSync(useGpu); \
return cpuVectorT_->OP(args); \
} \
} while (0)
......@@ -1030,7 +1031,7 @@ void CpuGpuVectorT<T>::copyToCpu() {
case DATA_AT_GPU:
CHECK(gpuVectorT_);
this->resizeOrCreate(gpuVectorT_->getSize(), false);
cpuVectorT_->copyFrom(*gpuVectorT_, HPPL_STREAM_DEFAULT);
cpuVectorT_->copyFrom(*gpuVectorT_);
setSync(SYNCED);
break;
case DATA_AT_CPU:
......@@ -1049,7 +1050,7 @@ void CpuGpuVectorT<T>::copyToGpu() {
case DATA_AT_CPU:
CHECK(cpuVectorT_);
this->resizeOrCreate(cpuVectorT_->getSize(), true);
gpuVectorT_->copyFrom(*cpuVectorT_, HPPL_STREAM_DEFAULT);
gpuVectorT_->copyFrom(*cpuVectorT_);
setSync(SYNCED);
break;
case DATA_AT_GPU:
......
......@@ -149,6 +149,7 @@ struct Argument {
: getBatchSize();
}
bool hasSeq() const { return sequenceStartPositions != nullptr; }
bool hasSubseq() const { return subSequenceStartPositions != nullptr; }
const int* getCpuStartPositions() const {
......
......@@ -124,6 +124,8 @@ TEST(RecurrentGradientMachine, test_generation) {
bool beam_search) {
FLAGS_config_args = beam_search ? "beam_search=1" : "beam_search=0";
for (auto useGpu : useGpuConfs) {
LOG(INFO) << configFile << " useGpu=" << useGpu
<< " beam_search=" << beam_search;
testGeneration(configFile, useGpu, hasSubseq, expRetFile);
}
};
......
......@@ -333,48 +333,32 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
for linkid, link in enumerate(in_links):
if isinstance(link, basestring):
name = link
has_subseq = False
else:
name = link.link_name
has_subseq = link.has_subseq
# assign target_inlinkid according to target_inlinkname
if target_inlinkname == name:
g_current_submodel.target_inlinkid = linkid
if in_links_count == 0:
in_links_has_subseq = has_subseq
else:
config_assert(
in_links_has_subseq == has_subseq,
"The sequence type of in_links should be the same in RecurrentLayerGroup"
)
in_links_count += 1
layer_name = MakeLayerNameInParentSubmodel(name)
layer = g_layer_map[layer_name]
if has_subseq:
SequenceScatterAgentLayer(name=name, size=layer.size)
else:
ScatterAgentLayer(name=name, size=layer.size)
ScatterAgentLayer(name=name, size=layer.size)
pair = g_current_submodel.in_links.add()
pair.layer_name = layer_name
pair.link_name = MakeLayerNameInSubmodel(name)
pair.has_subseq = has_subseq
@config_func
def RecurrentLayerGroupSetOutLink(link):
if isinstance(link, basestring):
name = link
has_subseq = False
else:
name = link.link_name
has_subseq = link.has_subseq
layer_name = MakeLayerNameInParentSubmodel(name)
pair = g_current_submodel.out_links.add()
pair.layer_name = MakeLayerNameInSubmodel(name)
pair.link_name = layer_name
pair.has_subseq = has_subseq
def RecurrentLayerGroupSetGenerator(generator=None):
......@@ -425,8 +409,6 @@ def RecurrentLayerGroupEnd(name):
agent_name = GetLayerBaseName(pair.link_name)
if prev_submodel.HasField("generator"):
DataLayer(name=agent_name, size=layer.size)
elif pair.has_subseq:
SequenceGatherAgentLayer(name=agent_name, size=layer.size)
else:
GatherAgentLayer(name=agent_name, size=layer.size)
......@@ -2253,13 +2235,6 @@ class AgentLayer(LayerBase):
name, 'agent', size, inputs=[], device=device)
@config_layer('sequence_agent')
class SequenceAgentLayer(LayerBase):
def __init__(self, name, size, device=None):
super(SequenceAgentLayer, self).__init__(
name, 'sequence_agent', size, inputs=[], device=device)
@config_layer('gather_agent')
class GatherAgentLayer(LayerBase):
def __init__(self, name, size, device=None):
......@@ -2274,20 +2249,6 @@ class ScatterAgentLayer(LayerBase):
name, 'scatter_agent', size, inputs=[], device=device)
@config_layer('sequence_gather_agent')
class SequenceGatherAgentLayer(LayerBase):
def __init__(self, name, size, device=None):
super(SequenceGatherAgentLayer, self).__init__(
name, 'sequence_gather_agent', size, inputs=[], device=device)
@config_layer('sequence_scatter_agent')
class SequenceScatterAgentLayer(LayerBase):
def __init__(self, name, size, device=None):
super(SequenceScatterAgentLayer, self).__init__(
name, 'sequence_scatter_agent', size, inputs=[], device=device)
@config_layer('multiplex')
class MultiplexLayer(LayerBase):
def __init__(self, name, inputs, size, device=None):
......@@ -2303,12 +2264,12 @@ class MultiplexLayer(LayerBase):
@config_func
def Link(
name,
has_subseq=False, ):
def Link(name, has_subseq=False):
"""
Still keeping has_subseq for backward compatibility
"""
link_config = LinkConfig()
link_config.link_name = name
link_config.has_subseq = has_subseq
return link_config
......@@ -2341,13 +2302,7 @@ def Memory(name,
config_assert(name is not None, "name needs cannot be None")
memory_name = name + "+delay1"
agent_name = memory_name
if is_sequence:
config_assert(
boot_layer is not None,
"there must be boot_layer in network when is_sequence = True")
agent_layer = SequenceAgentLayer(agent_name, size)
else:
agent_layer = AgentLayer(agent_name, size)
agent_layer = AgentLayer(agent_name, size)
config_assert(g_current_submodel.is_recurrent_layer_group,
'Memory should be used in recurrent layer group only')
memory = g_current_submodel.memories.add()
......
......@@ -3329,8 +3329,9 @@ class StaticInput(object):
input.size = size
class SubsequenceInput(object):
def SubsequenceInput(input):
"""
DEPRECATED.
Input sequence has sub-sequence, used in recurrent_group.
The example usage is:
......@@ -3339,11 +3340,7 @@ class SubsequenceInput(object):
input = SubsequenceInput(layer)
"""
def __init__(self, input):
assert isinstance(input, LayerOutput)
assert input.size is not None
self.input = input
return input
@wrap_name_default("recurrent_group")
......@@ -3407,7 +3404,8 @@ def recurrent_group(step,
input sequence in a reverse order.
:type reverse: bool
:param targetInlink: the input layer which share info with layer group's output
:param targetInlink: DEPRECATED.
The input layer which share info with layer group's output
Param input specifies multiple input layers. For
SubsequenceInput inputs, config should assign one input
......@@ -3429,46 +3427,21 @@ def recurrent_group(step,
model_type('recurrent_nn')
def is_single_input(x):
return isinstance(x, LayerOutput) or isinstance(x, StaticInput) \
or isinstance(x, SubsequenceInput)
return isinstance(x, LayerOutput) or isinstance(x, StaticInput)
if is_single_input(input):
input = [input]
assert isinstance(input, collections.Sequence)
def is_in_links(x):
return isinstance(x, LayerOutput) or isinstance(x, SubsequenceInput)
return isinstance(x, LayerOutput)
in_links = filter(is_in_links, input)
def targetInlink_in_inlinks():
for inlink in in_links:
if isinstance(inlink, SubsequenceInput):
if targetInlink == inlink.input:
return True
elif targetInlink == inlink:
return True
return False
assert (targetInlink == None or targetInlink_in_inlinks())
targetInlinkName = None if targetInlink == None \
else targetInlink.name if isinstance(targetInlink, LayerOutput) \
else targetInlink.input.name
contains_sub_seq = [False]
def map_in_links(x):
if isinstance(x, SubsequenceInput):
contains_sub_seq[0] = True
return Link(name=x.input.name, has_subseq=True)
else:
return x.name
RecurrentLayerGroupWithoutOutLinksBegin(
name=name,
in_links=map(map_in_links, in_links),
seq_reversed=reverse,
target_inlinkname=targetInlinkName)
in_links=map(lambda x: x.name, in_links),
seq_reversed=reverse)
in_args = []
has_LayerOutput = False
for each_input in input:
......@@ -3476,10 +3449,7 @@ def recurrent_group(step,
if isinstance(each_input, LayerOutput):
in_args.append(each_input)
has_LayerOutput = True
elif isinstance(each_input, SubsequenceInput):
in_args.append(each_input.input)
has_LayerOutput = True
else:
else: # StaticInput
mem_name = "__%s_memory__" % each_input.input.name
mem = memory(
name=mem_name,
......@@ -3503,10 +3473,7 @@ def recurrent_group(step,
for ot in layer_outs:
assert isinstance(ot, LayerOutput)
ot.reverse = reverse
if contains_sub_seq[0]:
RecurrentLayerGroupSetOutLink(Link(ot.name, has_subseq=True))
else:
RecurrentLayerGroupSetOutLink(ot.name)
RecurrentLayerGroupSetOutLink(ot.name)
RecurrentLayerGroupEnd(name=name)
......@@ -5608,13 +5575,13 @@ def row_conv_layer(input,
to deploy in an online and low-latency setting. The lookahead convolution
incorporates information from future subsequences in a computationally
efficient manner to improve unidirectional recurrent neural networks.
The connection of row convolution is different form the 1D sequence
convolution. Assumed that, the future context-length is k, that is to say,
it can get the output at timestep t by using the the input feature from t-th
timestep to (t+k+1)-th timestep. Assumed that the hidden dim of input
activations are d, the activations r_t for the new layer at time-step t are:
.. math::
r_{t,r} = \sum_{j=1}^{k + 1} {w_{i,j}h_{t+j-1, i}}
......
......@@ -261,12 +261,10 @@ sub_models {
in_links {
layer_name: "__simple_gru_0___transform"
link_name: "__simple_gru_0___transform@__simple_gru_0___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__simple_gru_0__@__simple_gru_0___recurrent_group"
link_name: "__simple_gru_0__"
has_subseq: false
}
target_inlinkid: -1
}
......@@ -285,12 +283,10 @@ sub_models {
in_links {
layer_name: "__simple_gru_1___transform"
link_name: "__simple_gru_1___transform@__simple_gru_1___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__simple_gru_1__@__simple_gru_1___recurrent_group"
link_name: "__simple_gru_1__"
has_subseq: false
}
target_inlinkid: -1
}
......
......@@ -351,12 +351,10 @@ sub_models {
in_links {
layer_name: "__mixed_0__"
link_name: "__mixed_0__@__lstm_group_0___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0__"
has_subseq: false
}
target_inlinkid: -1
}
......@@ -383,12 +381,10 @@ sub_models {
in_links {
layer_name: "__mixed_1__"
link_name: "__mixed_1__@__lstm_group_1___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__lstm_group_1__@__lstm_group_1___recurrent_group"
link_name: "__lstm_group_1__"
has_subseq: false
}
target_inlinkid: -1
}
......
......@@ -155,7 +155,7 @@ layers {
}
layers {
name: "sub_seq_input@__recurrent_group_2__"
type: "sequence_scatter_agent"
type: "scatter_agent"
size: 100
active_type: ""
}
......@@ -182,7 +182,7 @@ layers {
}
layers {
name: "rnn_subseq_forward"
type: "sequence_gather_agent"
type: "gather_agent"
size: 200
active_type: ""
}
......@@ -623,12 +623,10 @@ sub_models {
in_links {
layer_name: "seq_input"
link_name: "seq_input@__recurrent_group_0__"
has_subseq: false
}
out_links {
layer_name: "rnn_forward@__recurrent_group_0__"
link_name: "rnn_forward"
has_subseq: false
}
target_inlinkid: -1
}
......@@ -647,12 +645,10 @@ sub_models {
in_links {
layer_name: "seq_input"
link_name: "seq_input@__recurrent_group_1__"
has_subseq: false
}
out_links {
layer_name: "rnn_back@__recurrent_group_1__"
link_name: "rnn_back"
has_subseq: false
}
target_inlinkid: -1
}
......@@ -671,12 +667,10 @@ sub_models {
in_links {
layer_name: "sub_seq_input"
link_name: "sub_seq_input@__recurrent_group_2__"
has_subseq: true
}
out_links {
layer_name: "rnn_subseq_forward@__recurrent_group_2__"
link_name: "rnn_subseq_forward"
has_subseq: true
}
target_inlinkid: -1
}
......@@ -703,12 +697,10 @@ sub_models {
in_links {
layer_name: "__mixed_0__"
link_name: "__mixed_0__@__lstm_group_0___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0__"
has_subseq: false
}
target_inlinkid: -1
}
......@@ -727,12 +719,10 @@ sub_models {
in_links {
layer_name: "__mixed_1__"
link_name: "__mixed_1__@__gru_group_0___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__gru_group_0__@__gru_group_0___recurrent_group"
link_name: "__gru_group_0__"
has_subseq: false
}
target_inlinkid: -1
}
......@@ -751,12 +741,10 @@ sub_models {
in_links {
layer_name: "seq_input"
link_name: "seq_input@__recurrent_group_3__"
has_subseq: false
}
out_links {
layer_name: "__fc_layer_0__@__recurrent_group_3__"
link_name: "__fc_layer_0__"
has_subseq: false
}
target_inlinkid: -1
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册