提交 09f34c4b 编写于 作者: E emailweixu 提交者: GitHub

Merge pull request #2479 from emailweixu/mixed_input_rnn

RecurrentGroup with mixed input sequence types
......@@ -284,6 +284,16 @@ public:
}
protected:
std::vector<Argument::SeqInfo> commonSeqInfo_;
ICpuGpuVectorPtr sequenceStartPositions_;
void calcSequenceStartPositions();
void checkInputConsistency(int inlinkId,
const std::vector<Argument::SeqInfo>& seqInfo);
void reorganizeInput(PassType passType);
void reorganizeOutput(PassType passType);
void connectFrames(PassType passType);
void calcNumSequencesAtEachStep();
void resizeOrCreateFrames(int numFrames);
void resizeBootFrame(int numSequences);
......@@ -295,8 +305,7 @@ protected:
std::string linkName;
LayerPtr inLayer;
std::vector<LayerPtr> agents; // Scatter Agents to reform batch input
bool hasSubseq;
Argument outArg; // scatter output argument
Argument outArg; // scatter output argument
};
std::vector<InFrameLine> inFrameLines_;
......@@ -318,7 +327,6 @@ protected:
std::vector<LayerPtr> agents;
std::vector<LayerPtr> scatterAgents; // scatter agent used by beam search
Argument outArg; // scatter output argument
bool is_sequence;
// Different memoryFrameLine have different element as follows
IVectorPtr allIds; // scattered id of realLayer
ICpuGpuVectorPtr
......@@ -330,22 +338,27 @@ protected:
// and all outFrameLines(outlinks) share the info with one inFrameLine,
// which is assigned by targetInfoInlinkId_.
struct Info {
IVectorPtr allIds; // scattered id of realLayer
std::vector<int> idIndex; // index of allIds
// The original positions in the original batch
IVectorPtr allIds; // scattered id of realLayer [batchSize]
// index of allIds for each step [maxSequenceLength_]
// idIndex[i] is the total length of the first i sequences
std::vector<int> idIndex;
ICpuGpuVectorPtr
sequenceStartPositions; // scattered sequenceStartPositions
std::vector<int> seqStartPosIndex; // index of sequenceStartPositions
};
std::vector<Info> info_;
std::vector<Info> info_; // for input
// numSeqs_[i] is the number sequences which is longer than i (for sequence
// data) or has more than i subsequences (for subsequence data)
// Equivalently, numSeqs_[i] is the number of sequences at step i;
std::vector<int> numSeqs_;
std::vector<std::vector<Argument::SeqInfo>> seqInfos_;
// the id of inlink which share info with outlinks
int targetInfoInlinkId_;
void checkOutputConsistency(OutFrameLine& outFrameLine);
/* create scattered id infomation for all realLayer of inFrameLines one time.
* If hasSubseq, will also create scattered sequenceStartPositions infomation
......@@ -354,6 +367,28 @@ protected:
void createInFrameInfo(int inlinks_id,
const Argument& input,
PassType passType);
void createInFrameInfo_nonseq(int inlinks_id,
const Argument& input,
PassType passType);
void createInFrameInfo_seq(int inlinks_id,
const Argument& input,
PassType passType);
void createInFrameInfo_subseq(int inlinks_id,
const Argument& input,
PassType passType);
void createOutFrameInfo(OutFrameLine& outFrameLine,
Info& info,
ICpuGpuVectorPtr& sequenceStartPositions,
ICpuGpuVectorPtr& subSequenceStartPositions);
void createOutFrameInfo_seq(OutFrameLine& outFrameLine,
Info& info,
ICpuGpuVectorPtr& sequenceStartPositions,
ICpuGpuVectorPtr& subSequenceStartPositions);
void createOutFrameInfo_subseq(OutFrameLine& outFrameLine,
Info& info,
ICpuGpuVectorPtr& sequenceStartPositions,
ICpuGpuVectorPtr& subSequenceStartPositions);
void createMemoryFrameInfo(MemoryFrameLine* memoryFrameLine,
PassType passType);
......@@ -386,9 +421,7 @@ protected:
NeuralNetwork* rootNetwork_;
bool reversed_;
// if hasSubseq: max number of sentences(subseq)in batchsize samples
// else: max number of tokens in batchsize samples(sentences)
int maxSequenceLength_;
int maxSequenceLength_; // Max top-level length
bool useGpu_;
bool stopBeamSearch_;
......
......@@ -36,14 +36,23 @@ void AgentLayer::forward(PassType passType) {
Layer::forward(passType);
Argument& realOutput = realLayer_->getOutput();
int realHeight = realOutput.getBatchSize();
CHECK_LE(numSamples_, realHeight);
int realNumSequences = realOutput.getNumSequences();
CHECK_LE(numSamples_, realNumSequences);
// get Arguments from real layers
if (numSamples_ > 0 && numSamples_ < realHeight) {
if (realOutput.ids) {
output_.ids =
IVector::create(realOutput.ids->getData(), numSamples_, useGpu_);
if (numSamples_ > 0 && numSamples_ < realNumSequences) {
if (realOutput.hasSeq()) {
int numRows =
realOutput.sequenceStartPositions->getData(false)[numSamples_];
output_.subArgFrom(realOutput,
/* offset */ 0,
numRows,
getSize(),
useGpu_,
/* trans */ false,
/* seqFlag */ true,
/* seqStart */ 0,
/* seqSize */ numSamples_ + 1);
} else {
output_.subArgFrom(
realOutput, /* offset */ 0, numSamples_, getSize(), useGpu_);
......@@ -53,34 +62,6 @@ void AgentLayer::forward(PassType passType) {
}
}
void SequenceAgentLayer::forward(PassType passType) {
Layer::forward(passType);
Argument& realOutput = realLayer_->getOutput();
int realNumSequences = realOutput.getNumSequences();
CHECK_LE(numSamples_, realNumSequences);
// get Arguments from real layers
if (numSamples_ > 0 && numSamples_ < realNumSequences) {
int numRows =
realOutput.sequenceStartPositions->getData(false)[numSamples_];
CHECK(!realOutput.ids) << "Not supported";
output_.subArgFrom(realOutput,
/* offset */ 0,
numRows,
getSize(),
useGpu_,
/* trans */ false,
/* seqFlag */ true,
/* seqStart */ 0,
/* seqSize */ numSamples_ + 1);
} else {
output_ = realOutput;
}
}
REGISTER_LAYER(sequence_agent, SequenceAgentLayer);
bool GatherAgentLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
CHECK_EQ(config_.inputs_size(), 0);
......@@ -91,18 +72,26 @@ bool GatherAgentLayer::init(const LayerMap& layerMap,
return true;
}
void GatherAgentLayer::copyIdAndSequenceInfo(const Argument& input,
const IVectorPtr& ids,
const std::vector<int>& idIndex) {
output_.sequenceStartPositions = input.sequenceStartPositions;
output_.subSequenceStartPositions = input.subSequenceStartPositions;
realLayers_.clear();
void GatherAgentLayer::copyIdAndSequenceInfo(
ICpuGpuVectorPtr sequenceStartPositions,
ICpuGpuVectorPtr subSequenceStartPositions,
const IVectorPtr& ids,
const std::vector<int>& idIndex) {
output_.sequenceStartPositions = sequenceStartPositions;
output_.subSequenceStartPositions = subSequenceStartPositions;
allIds_ = ids;
idIndex_ = idIndex;
}
void GatherAgentLayer::forward(PassType passType) {
Layer::forward(passType);
forwardIds(passType);
forwardValue(passType);
}
void GatherAgentLayer::forwardValue(PassType passType) {
MatrixPtr valueReal = realLayers_[0]->getOutputValue();
if (!valueReal) return;
int height = allIds_->getSize();
int width = this->getSize();
......@@ -147,7 +136,9 @@ void ScatterAgentLayer::forward(PassType passType) {
CHECK_EQ(realLayer_->getDeviceId(), this->getDeviceId());
int width = this->getSize();
if (realOutArg_.value || realOutArg_.ids) {
if (realOutArg_.hasSeq()) {
forwardSequence(passType);
} else if (realOutArg_.value || realOutArg_.ids) {
output_.subArgFrom(
realOutArg_, /* offset */ idIndex_, idSize_, width, useGpu_);
} else { // used in generation
......@@ -174,7 +165,7 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) {
if (realGrad) {
// for agent in inFrameLines and memoryFrameLines,
// only first scatterAgentLayer should do addToRows in backward
if (idIndex_ == 0) {
if (handleBackward_) {
outputGrad->addToRows(*realGrad, *ids_);
}
}
......@@ -183,12 +174,14 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) {
REGISTER_LAYER(gather_agent, GatherAgentLayer);
REGISTER_LAYER(scatter_agent, ScatterAgentLayer);
void SequenceGatherAgentLayer::forward(PassType passType) {
Layer::forward(passType);
void GatherAgentLayer::forwardIds(PassType passType) {
int height = 0;
int* starts = output_.subSequenceStartPositions->getMutableData(false);
IVectorPtr idReal = realLayers_[0]->getOutputLabel();
if (idReal) {
if (!idReal) return;
if (output_.subSequenceStartPositions) {
int* starts = output_.subSequenceStartPositions->getMutableData(false);
// Gather generator.idsVec
// if is beam search generation result. Get first result.
if (idReal->getData()[idReal->getSize() - 1] == -1) {
......@@ -212,13 +205,11 @@ void SequenceGatherAgentLayer::forward(PassType passType) {
->copyFrom(*realLayers_[i]->getOutputLabel());
}
} else {
// Gather output.value, same as GatherAgentLayer
CHECK(output_.subSequenceStartPositions);
GatherAgentLayer::forward(passType);
LOG(FATAL) << "Not implemented";
}
}
void SequenceScatterAgentLayer::forward(PassType passType) {
void ScatterAgentLayer::forwardSequence(PassType passType) {
Layer::forward(passType);
CHECK_EQ(realLayer_->getDeviceId(), this->getDeviceId());
......@@ -241,6 +232,7 @@ void SequenceScatterAgentLayer::forward(PassType passType) {
/* seqStart */ seqStartPosIndex_,
/* seqSize */ numSequences_);
} else {
// Putting the generation logic here is really an ugly hack!
// used in generation
int height = 0;
size_t numSequences = ids_->getSize();
......@@ -284,7 +276,4 @@ void SequenceScatterAgentLayer::forward(PassType passType) {
}
}
REGISTER_LAYER(sequence_gather_agent, SequenceGatherAgentLayer);
REGISTER_LAYER(sequence_scatter_agent, SequenceScatterAgentLayer);
} // namespace paddle
......@@ -49,18 +49,6 @@ public:
void backward(const UpdateCallback& callback = nullptr) override {}
};
/**
* like AgentLayer, but use first *numSamples* sequences
*/
class SequenceAgentLayer : public AgentLayer {
public:
explicit SequenceAgentLayer(const LayerConfig& config) : AgentLayer(config) {}
~SequenceAgentLayer() {}
void forward(PassType passType) override;
void backward(const UpdateCallback& callback = nullptr) override {}
};
/**
* Like AgentLayer, but it can gather many real layers. Each real
* layer give a few rows of a sequence, after gather all real layers,
......@@ -83,7 +71,10 @@ public:
const ParameterMap& parameterMap) override;
// call before addRealLayer
void copyIdAndSequenceInfo(const Argument& input,
void clearRealLayers() { realLayers_.clear(); }
void copyIdAndSequenceInfo(ICpuGpuVectorPtr sequenceStartPositions,
ICpuGpuVectorPtr subSequenceStartPositions,
const IVectorPtr& allIds,
const std::vector<int>& idIndex);
......@@ -92,24 +83,8 @@ public:
void forward(PassType passType) override;
void backward(const UpdateCallback& callback) override;
};
/**
* Like GatherAgentLayer, but select a few sequence in real layer.
* *ids* in addRealLayer() are the ids of selected sequence.
* It's used to reorder sequence output.
*/
class SequenceGatherAgentLayer : public GatherAgentLayer {
public:
explicit SequenceGatherAgentLayer(const LayerConfig& config)
: GatherAgentLayer(config) {}
virtual ~SequenceGatherAgentLayer() {}
void forward(PassType passType);
void backward(const UpdateCallback& callback) {
// same as GatherAgentLayer
GatherAgentLayer::backward(callback);
}
void forwardValue(PassType passType);
void forwardIds(PassType passType);
};
/**
......@@ -129,6 +104,11 @@ protected:
int idSize_;
int seqStartPosIndex_;
int numSequences_; // number of sequences in this scatterAgentLayer
bool handleBackward_;
// use to store expanded cpuStartPositions or subSequenceStartPositions
// of real layer.
ICpuGpuVectorPtr inputStartPos_;
public:
explicit ScatterAgentLayer(const LayerConfig& config) : Layer(config) {}
......@@ -147,19 +127,15 @@ public:
* false(default) in ScatterAgentLayer, and
* true in SequenceScatterAgentLayer.
*/
void setRealLayer(LayerPtr layer,
const std::vector<int>& ids,
bool copyId = false) {
void setRealLayer(LayerPtr layer, const std::vector<int>& ids) {
realLayer_ = layer;
IVector::resizeOrCreate(ids_, ids.size(), useGpu_);
ids_->copyFrom(ids.data(), ids.size());
if (copyId) {
if (useGpu_) {
IVector::resizeOrCreate(cpuIds_, ids.size(), false);
cpuIds_->copyFrom(ids.data(), ids.size());
} else {
cpuIds_ = ids_;
}
if (useGpu_) {
IVector::resizeOrCreate(cpuIds_, ids.size(), false);
cpuIds_->copyFrom(ids.data(), ids.size());
} else {
cpuIds_ = ids_;
}
}
......@@ -169,12 +145,14 @@ public:
const Argument& outArg,
const IVectorPtr& ids,
int idIndex,
int idSize) {
int idSize,
bool handleBackward) {
realLayer_ = layer;
realOutArg_ = outArg;
ids_ = ids;
idIndex_ = idIndex;
idSize_ = idSize;
handleBackward_ = handleBackward;
}
void setSequenceStartPositions(const ICpuGpuVectorPtr& sequenceStartPositions,
......@@ -187,28 +165,8 @@ public:
void forward(PassType passType) override;
void backward(const UpdateCallback& callback) override;
};
/**
* Like ScatterAgentLayer, but select a few sequence in real layer.
* *ids* in setRealLayer() or setRealLayerAndOutput() are the ids of
* selected sequence. It's used to reorder sequence input.
*/
class SequenceScatterAgentLayer : public ScatterAgentLayer {
protected:
// use to store expanded cpuStartPositions or subSequenceStartPositions
// of real layer.
ICpuGpuVectorPtr inputStartPos_;
public:
explicit SequenceScatterAgentLayer(const LayerConfig& config)
: ScatterAgentLayer(config) {}
virtual ~SequenceScatterAgentLayer() {}
void forward(PassType passType);
void backward(const UpdateCallback& callback) {
ScatterAgentLayer::backward(callback);
}
void forwardSequence(PassType passType);
};
} // namespace paddle
......@@ -46,6 +46,9 @@ void SequencePoolLayer::forward(PassType passType) {
Layer::forward(passType);
const Argument& input = getInput(0);
CHECK(input.hasSeq() || input.hasSubseq())
<< "Input should be a sequence or subsequence for layer " << getName();
newBatchSize_ = type_ ? input.getNumSubSequences() : input.getNumSequences();
size_t dim = getSize();
// check
......
......@@ -95,3 +95,22 @@ def process_unequalength_seq(settings, file_name):
words1 = reduce(lambda x, y: x + y, d[0])
words2 = reduce(lambda x, y: x + y, d[1])
yield words1, words2, d[2]
###########################################################
data3 = [
[[[1, 2], [4, 5, 2]], [1, 2], 0],
[[[0, 2], [2, 5], [0, 1, 2]], [2, 3, 0], 1],
]
# Used for sequence_nest_mixed_inputs.conf
@provider(
input_types=[
integer_value_sub_sequence(10), integer_value_sequence(10),
integer_value(2)
],
should_shuffle=False)
def process_mixed(settings, file_name):
for d in data3:
yield d
......@@ -19,7 +19,7 @@ from paddle.trainer_config_helpers import *
define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list',
test_list=None,
module='rnn_data_provider',
obj='process_subseq2')
obj='process_subseq')
settings(batch_size=2, learning_rate=0.01)
......@@ -57,7 +57,7 @@ def outer_step(wid, x):
last = last_seq(input=inner_rnn_output, name="outer_rnn_state")
# "return last" should also work. But currently RecurrentGradientMachine
# does not handle it, and will report error: In hierachical RNN, all out
# does not handle it, and will report error: In hierachical RNN, all out
# links should be from sequences now.
return inner_rnn_output
......
# edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
######################## data source ################################
define_py_data_sources2(
train_list='gserver/tests/Sequence/dummy.list',
test_list=None,
module='rnn_data_provider',
obj='process_mixed')
settings(batch_size=2, learning_rate=0.01)
######################## network configure ################################
dict_dim = 10
word_dim = 2
hidden_dim = 2
label_dim = 2
data1 = data_layer(name="word1", size=dict_dim)
data2 = data_layer(name="word2", size=dict_dim)
label = data_layer(name="label", size=label_dim)
encoding = embedding_layer(input=data2, size=word_dim)
subseq = embedding_layer(input=data1, size=word_dim)
seq = embedding_layer(input=data2, size=word_dim)
nonseq = embedding_layer(input=label, size=word_dim)
# This hierarchical RNN is designed to be equivalent to the simple RNN in
# sequence_rnn_multi_unequalength_inputs.conf
def outer_step(subseq, seq, nonseq, encoding):
outer_mem = memory(name="outer_rnn_state", size=hidden_dim)
def inner_step(subseq, seq, nonseq):
inner_mem = memory(
name="inner_rnn_state", size=hidden_dim, boot_layer=outer_mem)
out = fc_layer(
input=[subseq, seq, nonseq, inner_mem],
size=hidden_dim,
act=TanhActivation(),
bias_attr=True,
name='inner_rnn_state')
return out
decoder = recurrent_group(
step=inner_step, name='inner', input=[subseq, seq, nonseq])
last = last_seq(name="outer_rnn_state", input=decoder)
context = simple_attention(
encoded_sequence=encoding, encoded_proj=encoding, decoder_state=last)
return context
out = recurrent_group(
name="outer",
step=outer_step,
input=[
subseq, expand_layer(
seq, expand_as=subseq,
expand_level=ExpandLevel.FROM_SEQUENCE), expand_layer(
nonseq,
expand_as=subseq,
expand_level=ExpandLevel.FROM_NO_SEQUENCE),
StaticInput(encoding)
])
rep = last_seq(input=out)
prob = fc_layer(
size=label_dim, input=rep, act=SoftmaxActivation(), bias_attr=True)
outputs(classification_cost(input=prob, label=label))
# edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
######################## data source ################################
define_py_data_sources2(
train_list='gserver/tests/Sequence/dummy.list',
test_list=None,
module='rnn_data_provider',
obj='process_mixed')
settings(batch_size=2, learning_rate=0.01)
######################## network configure ################################
dict_dim = 10
word_dim = 2
hidden_dim = 2
label_dim = 2
data1 = data_layer(name="word1", size=dict_dim)
data2 = data_layer(name="word2", size=dict_dim)
label = data_layer(name="label", size=label_dim)
encoding = embedding_layer(input=data2, size=word_dim)
# This hierarchical RNN is designed to be equivalent to the simple RNN in
# sequence_rnn_multi_unequalength_inputs.conf
def outer_step(subseq, seq, nonseq, encoding):
outer_mem = memory(name="outer_rnn_state", size=hidden_dim)
def inner_step(data1, data2, label):
inner_mem = memory(
name="inner_rnn_state", size=hidden_dim, boot_layer=outer_mem)
subseq = embedding_layer(input=data1, size=word_dim)
seq = embedding_layer(input=data2, size=word_dim)
nonseq = embedding_layer(input=label, size=word_dim)
print_layer(input=[data1, seq, label, inner_mem])
out = fc_layer(
input=[subseq, seq, nonseq, inner_mem],
size=hidden_dim,
act=TanhActivation(),
bias_attr=True,
name='inner_rnn_state')
return out
decoder = recurrent_group(
step=inner_step, name='inner',
input=[subseq, StaticInput(seq), nonseq])
last = last_seq(name="outer_rnn_state", input=decoder)
context = simple_attention(
encoded_sequence=encoding, encoded_proj=encoding, decoder_state=last)
return context
out = recurrent_group(
name="outer",
step=outer_step,
input=[data1, data2, StaticInput(label), StaticInput(encoding)])
rep = last_seq(input=out)
prob = fc_layer(
size=label_dim, input=rep, act=SoftmaxActivation(), bias_attr=True)
outputs(classification_cost(input=prob, label=label))
......@@ -19,7 +19,7 @@ from paddle.trainer_config_helpers import *
define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list',
test_list=None,
module='rnn_data_provider',
obj='process_seq2')
obj='process_seq')
settings(batch_size=2, learning_rate=0.01)
......
......@@ -155,6 +155,15 @@ TEST(RecurrentGradientMachine, rnn_multi_unequalength_input) {
}
}
TEST(RecurrentGradientMachine, rnn_mixed_input) {
for (bool useGpu : {false, true}) {
test("gserver/tests/sequence_rnn_mixed_inputs.py",
"gserver/tests/sequence_rnn_matched_inputs.py",
1e-6,
useGpu);
}
}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
......
......@@ -908,12 +908,13 @@ const T* CpuGpuVectorT<T>::getData(bool useGpu) const {
// Operation will change data and need to reset sync_ & syncFlag_.
#define MUTABLE_VECTOR_OP(OP, useGpu, args...) \
do { \
setSync(useGpu); \
if (useGpu) { \
copyToGpu(); \
setSync(useGpu); \
return gpuVectorT_->OP(args); \
} else { \
copyToCpu(); \
setSync(useGpu); \
return cpuVectorT_->OP(args); \
} \
} while (0)
......@@ -1030,7 +1031,7 @@ void CpuGpuVectorT<T>::copyToCpu() {
case DATA_AT_GPU:
CHECK(gpuVectorT_);
this->resizeOrCreate(gpuVectorT_->getSize(), false);
cpuVectorT_->copyFrom(*gpuVectorT_, HPPL_STREAM_DEFAULT);
cpuVectorT_->copyFrom(*gpuVectorT_);
setSync(SYNCED);
break;
case DATA_AT_CPU:
......@@ -1049,7 +1050,7 @@ void CpuGpuVectorT<T>::copyToGpu() {
case DATA_AT_CPU:
CHECK(cpuVectorT_);
this->resizeOrCreate(cpuVectorT_->getSize(), true);
gpuVectorT_->copyFrom(*cpuVectorT_, HPPL_STREAM_DEFAULT);
gpuVectorT_->copyFrom(*cpuVectorT_);
setSync(SYNCED);
break;
case DATA_AT_GPU:
......
......@@ -149,6 +149,7 @@ struct Argument {
: getBatchSize();
}
bool hasSeq() const { return sequenceStartPositions != nullptr; }
bool hasSubseq() const { return subSequenceStartPositions != nullptr; }
const int* getCpuStartPositions() const {
......
......@@ -124,6 +124,8 @@ TEST(RecurrentGradientMachine, test_generation) {
bool beam_search) {
FLAGS_config_args = beam_search ? "beam_search=1" : "beam_search=0";
for (auto useGpu : useGpuConfs) {
LOG(INFO) << configFile << " useGpu=" << useGpu
<< " beam_search=" << beam_search;
testGeneration(configFile, useGpu, hasSubseq, expRetFile);
}
};
......
......@@ -328,53 +328,33 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
SubModelBegin(name)
g_current_submodel.is_recurrent_layer_group = True
g_current_submodel.reversed = seq_reversed
g_current_submodel.target_inlinkid = -1
in_links_count = 0
for linkid, link in enumerate(in_links):
if isinstance(link, basestring):
name = link
has_subseq = False
else:
name = link.link_name
has_subseq = link.has_subseq
# assign target_inlinkid according to target_inlinkname
if target_inlinkname == name:
g_current_submodel.target_inlinkid = linkid
if in_links_count == 0:
in_links_has_subseq = has_subseq
else:
config_assert(
in_links_has_subseq == has_subseq,
"The sequence type of in_links should be the same in RecurrentLayerGroup"
)
in_links_count += 1
layer_name = MakeLayerNameInParentSubmodel(name)
layer = g_layer_map[layer_name]
if has_subseq:
SequenceScatterAgentLayer(name=name, size=layer.size)
else:
ScatterAgentLayer(name=name, size=layer.size)
ScatterAgentLayer(name=name, size=layer.size)
pair = g_current_submodel.in_links.add()
pair.layer_name = layer_name
pair.link_name = MakeLayerNameInSubmodel(name)
pair.has_subseq = has_subseq
@config_func
def RecurrentLayerGroupSetOutLink(link):
if isinstance(link, basestring):
name = link
has_subseq = False
else:
name = link.link_name
has_subseq = link.has_subseq
layer_name = MakeLayerNameInParentSubmodel(name)
pair = g_current_submodel.out_links.add()
pair.layer_name = MakeLayerNameInSubmodel(name)
pair.link_name = layer_name
pair.has_subseq = has_subseq
def RecurrentLayerGroupSetGenerator(generator=None):
......@@ -389,8 +369,7 @@ def RecurrentLayerGroupBegin(name,
generator=None,
target_inlinkname="",
seq_reversed=False):
RecurrentLayerGroupWithoutOutLinksBegin(name, in_links, seq_reversed,
target_inlinkname)
RecurrentLayerGroupWithoutOutLinksBegin(name, in_links, seq_reversed)
for link in out_links:
RecurrentLayerGroupSetOutLink(link)
......@@ -425,8 +404,6 @@ def RecurrentLayerGroupEnd(name):
agent_name = GetLayerBaseName(pair.link_name)
if prev_submodel.HasField("generator"):
DataLayer(name=agent_name, size=layer.size)
elif pair.has_subseq:
SequenceGatherAgentLayer(name=agent_name, size=layer.size)
else:
GatherAgentLayer(name=agent_name, size=layer.size)
......@@ -2253,13 +2230,6 @@ class AgentLayer(LayerBase):
name, 'agent', size, inputs=[], device=device)
@config_layer('sequence_agent')
class SequenceAgentLayer(LayerBase):
def __init__(self, name, size, device=None):
super(SequenceAgentLayer, self).__init__(
name, 'sequence_agent', size, inputs=[], device=device)
@config_layer('gather_agent')
class GatherAgentLayer(LayerBase):
def __init__(self, name, size, device=None):
......@@ -2274,20 +2244,6 @@ class ScatterAgentLayer(LayerBase):
name, 'scatter_agent', size, inputs=[], device=device)
@config_layer('sequence_gather_agent')
class SequenceGatherAgentLayer(LayerBase):
def __init__(self, name, size, device=None):
super(SequenceGatherAgentLayer, self).__init__(
name, 'sequence_gather_agent', size, inputs=[], device=device)
@config_layer('sequence_scatter_agent')
class SequenceScatterAgentLayer(LayerBase):
def __init__(self, name, size, device=None):
super(SequenceScatterAgentLayer, self).__init__(
name, 'sequence_scatter_agent', size, inputs=[], device=device)
@config_layer('multiplex')
class MultiplexLayer(LayerBase):
def __init__(self, name, inputs, size, device=None):
......@@ -2303,12 +2259,12 @@ class MultiplexLayer(LayerBase):
@config_func
def Link(
name,
has_subseq=False, ):
def Link(name, has_subseq=False):
"""
Still keeping has_subseq for backward compatibility
"""
link_config = LinkConfig()
link_config.link_name = name
link_config.has_subseq = has_subseq
return link_config
......@@ -2341,20 +2297,13 @@ def Memory(name,
config_assert(name is not None, "name needs cannot be None")
memory_name = name + "+delay1"
agent_name = memory_name
if is_sequence:
config_assert(
boot_layer is not None,
"there must be boot_layer in network when is_sequence = True")
agent_layer = SequenceAgentLayer(agent_name, size)
else:
agent_layer = AgentLayer(agent_name, size)
agent_layer = AgentLayer(agent_name, size)
config_assert(g_current_submodel.is_recurrent_layer_group,
'Memory should be used in recurrent layer group only')
memory = g_current_submodel.memories.add()
if name is not None:
memory.layer_name = MakeLayerNameInSubmodel(name)
memory.link_name = MakeLayerNameInSubmodel(agent_name)
memory.is_sequence = is_sequence
options = sum((boot_layer is not None, bool(boot_bias),
boot_with_const_id is not None))
config_assert(
......
......@@ -311,18 +311,6 @@ class LayerOutput(object):
self.outputs = outputs
self.reverse = reverse
def __repr__(self):
"""
Disable __repr__ for debug reason. Will be implemented when release
"""
assert False, "this method should not be invoked"
def __str__(self):
"""
Disable __str__ for debug reason. Will be implemented when release
"""
assert False, "this method should not be invoked"
def set_input(self, input):
"""
Set the input for a memory layer. Can only be used for memory layer
......@@ -2944,7 +2932,7 @@ def memory(name,
:param memory_name: the name of the memory.
It is ignored when name is provided.
:type memory_name: basestring
:param is_seq: is sequence for boot_layer
:param is_seq: DEPRECATED. is sequence for boot_layer
:type is_seq: bool
:param boot_layer: boot layer of memory.
:type boot_layer: LayerOutput|None
......@@ -2971,7 +2959,6 @@ def memory(name,
memory_name = Memory(
name,
size,
is_sequence=is_seq,
boot_layer=boot_layer.name if boot_layer is not None else None,
boot_bias=boot_bias,
boot_bias_active_type=boot_bias_active_type.name,
......@@ -3318,19 +3305,21 @@ class StaticInput(object):
"""
StaticInput is only used in recurrent_group which defines a read-only memory
that can be a sequence or non-sequence.
:param size: DEPRECATED
:param is_seq: DEPRECATED
"""
def __init__(self, input, is_seq=False, size=None):
assert isinstance(input, LayerOutput)
self.input = input
self.is_seq = is_seq
assert input.size is not None or size is not None
assert input.size is not None
if size is not None:
input.size = size
assert input.size == size
class SubsequenceInput(object):
def SubsequenceInput(input):
"""
DEPRECATED.
Input sequence has sub-sequence, used in recurrent_group.
The example usage is:
......@@ -3339,11 +3328,7 @@ class SubsequenceInput(object):
input = SubsequenceInput(layer)
"""
def __init__(self, input):
assert isinstance(input, LayerOutput)
assert input.size is not None
self.input = input
return input
@wrap_name_default("recurrent_group")
......@@ -3407,7 +3392,8 @@ def recurrent_group(step,
input sequence in a reverse order.
:type reverse: bool
:param targetInlink: the input layer which share info with layer group's output
:param targetInlink: DEPRECATED.
The input layer which share info with layer group's output
Param input specifies multiple input layers. For
SubsequenceInput inputs, config should assign one input
......@@ -3429,46 +3415,21 @@ def recurrent_group(step,
model_type('recurrent_nn')
def is_single_input(x):
return isinstance(x, LayerOutput) or isinstance(x, StaticInput) \
or isinstance(x, SubsequenceInput)
return isinstance(x, LayerOutput) or isinstance(x, StaticInput)
if is_single_input(input):
input = [input]
assert isinstance(input, collections.Sequence)
def is_in_links(x):
return isinstance(x, LayerOutput) or isinstance(x, SubsequenceInput)
return isinstance(x, LayerOutput)
in_links = filter(is_in_links, input)
def targetInlink_in_inlinks():
for inlink in in_links:
if isinstance(inlink, SubsequenceInput):
if targetInlink == inlink.input:
return True
elif targetInlink == inlink:
return True
return False
assert (targetInlink == None or targetInlink_in_inlinks())
targetInlinkName = None if targetInlink == None \
else targetInlink.name if isinstance(targetInlink, LayerOutput) \
else targetInlink.input.name
contains_sub_seq = [False]
def map_in_links(x):
if isinstance(x, SubsequenceInput):
contains_sub_seq[0] = True
return Link(name=x.input.name, has_subseq=True)
else:
return x.name
RecurrentLayerGroupWithoutOutLinksBegin(
name=name,
in_links=map(map_in_links, in_links),
seq_reversed=reverse,
target_inlinkname=targetInlinkName)
in_links=map(lambda x: x.name, in_links),
seq_reversed=reverse)
in_args = []
has_LayerOutput = False
for each_input in input:
......@@ -3476,21 +3437,13 @@ def recurrent_group(step,
if isinstance(each_input, LayerOutput):
in_args.append(each_input)
has_LayerOutput = True
elif isinstance(each_input, SubsequenceInput):
in_args.append(each_input.input)
has_LayerOutput = True
else:
else: # StaticInput
mem_name = "__%s_memory__" % each_input.input.name
mem = memory(
name=mem_name,
is_seq=each_input.is_seq,
name=None,
size=each_input.input.size,
boot_layer=each_input.input)
with mixed_layer(
name=mem_name,
size=each_input.input.size,
act=IdentityActivation()) as mix:
mix += identity_projection(mem)
mem.set_input(mem)
in_args.append(mem)
assert (is_generating != has_LayerOutput)
......@@ -3503,10 +3456,7 @@ def recurrent_group(step,
for ot in layer_outs:
assert isinstance(ot, LayerOutput)
ot.reverse = reverse
if contains_sub_seq[0]:
RecurrentLayerGroupSetOutLink(Link(ot.name, has_subseq=True))
else:
RecurrentLayerGroupSetOutLink(ot.name)
RecurrentLayerGroupSetOutLink(ot.name)
RecurrentLayerGroupEnd(name=name)
......@@ -5608,13 +5558,13 @@ def row_conv_layer(input,
to deploy in an online and low-latency setting. The lookahead convolution
incorporates information from future subsequences in a computationally
efficient manner to improve unidirectional recurrent neural networks.
The connection of row convolution is different form the 1D sequence
convolution. Assumed that, the future context-length is k, that is to say,
it can get the output at timestep t by using the the input feature from t-th
timestep to (t+k+1)-th timestep. Assumed that the hidden dim of input
activations are d, the activations r_t for the new layer at time-step t are:
.. math::
r_{t,r} = \sum_{j=1}^{k + 1} {w_{i,j}h_{t+j-1, i}}
......
......@@ -256,19 +256,15 @@ sub_models {
memories {
layer_name: "__simple_gru_0__@__simple_gru_0___recurrent_group"
link_name: "__simple_gru_0__+delay1@__simple_gru_0___recurrent_group"
is_sequence: false
}
in_links {
layer_name: "__simple_gru_0___transform"
link_name: "__simple_gru_0___transform@__simple_gru_0___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__simple_gru_0__@__simple_gru_0___recurrent_group"
link_name: "__simple_gru_0__"
has_subseq: false
}
target_inlinkid: -1
}
sub_models {
name: "__simple_gru_1___recurrent_group"
......@@ -280,18 +276,14 @@ sub_models {
memories {
layer_name: "__simple_gru_1__@__simple_gru_1___recurrent_group"
link_name: "__simple_gru_1__+delay1@__simple_gru_1___recurrent_group"
is_sequence: false
}
in_links {
layer_name: "__simple_gru_1___transform"
link_name: "__simple_gru_1___transform@__simple_gru_1___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__simple_gru_1__@__simple_gru_1___recurrent_group"
link_name: "__simple_gru_1__"
has_subseq: false
}
target_inlinkid: -1
}
......@@ -341,24 +341,19 @@ sub_models {
memories {
layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group"
is_sequence: false
}
memories {
layer_name: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group"
is_sequence: false
}
in_links {
layer_name: "__mixed_0__"
link_name: "__mixed_0__@__lstm_group_0___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0__"
has_subseq: false
}
target_inlinkid: -1
}
sub_models {
name: "__lstm_group_1___recurrent_group"
......@@ -373,23 +368,18 @@ sub_models {
memories {
layer_name: "__lstm_group_1__@__lstm_group_1___recurrent_group"
link_name: "__lstm_group_1__+delay1@__lstm_group_1___recurrent_group"
is_sequence: false
}
memories {
layer_name: "__lstm_group_1___state@__lstm_group_1___recurrent_group"
link_name: "__lstm_group_1___state+delay1@__lstm_group_1___recurrent_group"
is_sequence: false
}
in_links {
layer_name: "__mixed_1__"
link_name: "__mixed_1__@__lstm_group_1___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__lstm_group_1__@__lstm_group_1___recurrent_group"
link_name: "__lstm_group_1__"
has_subseq: false
}
target_inlinkid: -1
}
......@@ -155,7 +155,7 @@ layers {
}
layers {
name: "sub_seq_input@__recurrent_group_2__"
type: "sequence_scatter_agent"
type: "scatter_agent"
size: 100
active_type: ""
}
......@@ -182,7 +182,7 @@ layers {
}
layers {
name: "rnn_subseq_forward"
type: "sequence_gather_agent"
type: "gather_agent"
size: 200
active_type: ""
}
......@@ -618,19 +618,15 @@ sub_models {
memories {
layer_name: "rnn_forward@__recurrent_group_0__"
link_name: "rnn_forward+delay1@__recurrent_group_0__"
is_sequence: false
}
in_links {
layer_name: "seq_input"
link_name: "seq_input@__recurrent_group_0__"
has_subseq: false
}
out_links {
layer_name: "rnn_forward@__recurrent_group_0__"
link_name: "rnn_forward"
has_subseq: false
}
target_inlinkid: -1
}
sub_models {
name: "__recurrent_group_1__"
......@@ -642,19 +638,15 @@ sub_models {
memories {
layer_name: "rnn_back@__recurrent_group_1__"
link_name: "rnn_back+delay1@__recurrent_group_1__"
is_sequence: false
}
in_links {
layer_name: "seq_input"
link_name: "seq_input@__recurrent_group_1__"
has_subseq: false
}
out_links {
layer_name: "rnn_back@__recurrent_group_1__"
link_name: "rnn_back"
has_subseq: false
}
target_inlinkid: -1
}
sub_models {
name: "__recurrent_group_2__"
......@@ -666,19 +658,15 @@ sub_models {
memories {
layer_name: "rnn_subseq_forward@__recurrent_group_2__"
link_name: "rnn_subseq_forward+delay1@__recurrent_group_2__"
is_sequence: false
}
in_links {
layer_name: "sub_seq_input"
link_name: "sub_seq_input@__recurrent_group_2__"
has_subseq: true
}
out_links {
layer_name: "rnn_subseq_forward@__recurrent_group_2__"
link_name: "rnn_subseq_forward"
has_subseq: true
}
target_inlinkid: -1
}
sub_models {
name: "__lstm_group_0___recurrent_group"
......@@ -693,24 +681,19 @@ sub_models {
memories {
layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group"
is_sequence: false
}
memories {
layer_name: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group"
is_sequence: false
}
in_links {
layer_name: "__mixed_0__"
link_name: "__mixed_0__@__lstm_group_0___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0__"
has_subseq: false
}
target_inlinkid: -1
}
sub_models {
name: "__gru_group_0___recurrent_group"
......@@ -722,19 +705,15 @@ sub_models {
memories {
layer_name: "__gru_group_0__@__gru_group_0___recurrent_group"
link_name: "__gru_group_0__+delay1@__gru_group_0___recurrent_group"
is_sequence: false
}
in_links {
layer_name: "__mixed_1__"
link_name: "__mixed_1__@__gru_group_0___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__gru_group_0__@__gru_group_0___recurrent_group"
link_name: "__gru_group_0__"
has_subseq: false
}
target_inlinkid: -1
}
sub_models {
name: "__recurrent_group_3__"
......@@ -746,18 +725,14 @@ sub_models {
memories {
layer_name: "__fc_layer_0__@__recurrent_group_3__"
link_name: "__memory_6__@__recurrent_group_3__"
is_sequence: false
}
in_links {
layer_name: "seq_input"
link_name: "seq_input@__recurrent_group_3__"
has_subseq: false
}
out_links {
layer_name: "__fc_layer_0__@__recurrent_group_3__"
link_name: "__fc_layer_0__"
has_subseq: false
}
target_inlinkid: -1
}
......@@ -260,7 +260,7 @@ def parse_network(output_layers, extra_layers=None):
else:
extra_layers = []
layer_names = __get_used_layers__(output_layers + extra_layers)
layer_names = __get_used_layers__(list(output_layers) + list(extra_layers))
submodel_names = __get_used_submodels__(layer_names)
submodel_names.add('root')
evaluator_names = __get_used_evaluators__(layer_names)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册