提交 699d5f26 编写于 作者: Z zhangruiqing01 提交者: Yu Yang

modify RecurrentGradientMachine to support unequal length inputs

* modify RecurrentGradientMachine to support hasSubSeq sequence inlinks with the same number of sentence but different number of tokens for each sentence

Change-Id: Ic71f00a4bb346b4fa93e650dfb4b1a0d8d2338b0
上级 0f91ea7e
......@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "GradientMachine.h"
......@@ -101,7 +100,7 @@ public:
* Return true if this prefix or candidate is expected to be dropped.
*/
typedef std::function<bool(int seqId, const std::vector<int>&,
const std::vector<real>&)> DropCallback;
const std::vector<real>&)> DropCallback;
/**
* @brief NormOrDropNodeCallback
......@@ -117,7 +116,7 @@ public:
* The fourth parameter is the probability of the whole path.
*/
typedef std::function<void(int seqId, const std::vector<int>&,
std::vector<real>&, real*)> NormOrDropNodeCallback;
std::vector<real>&, real*)> NormOrDropNodeCallback;
/**
* @brief Register beam search control callbacks. Used for prediction.
......@@ -192,7 +191,7 @@ public:
int machineId; // index of sample in frame
int topIndex; // index of MaxIdLayer output in one sample
int seqId; // index of sequence in batch generation
int seqId; // index of sequence in batch generation
std::vector<int> machineIdVec;
/**
......@@ -206,7 +205,10 @@ public:
/**
* @brief Path default ctor, first logProb is 0.
*/
Path() { logProb = 0; seqId = 0; }
Path() {
logProb = 0;
seqId = 0;
}
explicit Path(size_t seqId) : seqId(seqId) { logProb = 0; }
/**
......@@ -319,21 +321,33 @@ protected:
};
std::vector<MemoryFrameLine> memoryFrameLines_;
// All inFrameLines and outFrameLines have the same element as follows.
// Each inFrameLines(inlinks) has its own info(elements) below,
// and all outFrameLines(outlinks) share the info with one inFrameLine,
// which is assigned by targetInfoInlinkId_.
struct Info {
IVectorPtr allIds; // scattered id of realLayer
std::vector<int> idIndex; // index of allIds
ICpuGpuVectorPtr
sequenceStartPositions; // scattered sequenceStartPositions
sequenceStartPositions; // scattered sequenceStartPositions
std::vector<int> seqStartPosIndex; // index of sequenceStartPositions
};
Info info_;
std::vector<Info> info_;
// if no subSeq, tuple of (seqLength, seqStart, seqIndex, seqIndex)
// else, tuple of (subSeqLength, subSeqStart, seqIndex, subSeqIndex)
std::vector<std::tuple<int, int, int, int>> seqLengthAndStart_;
// each inlinks has a "std::vector<std::tuple<int, int, int, int>>" denotes
// its sequence info:
// if hasSubSeq, tuple of (subSeqLength, subSeqStart, seqIndex, subSeqIndex)
// else, tuple of (seqLength, seqStart, seqIndex, seqIndex)
std::vector<std::vector<std::tuple<int, int, int, int>>> seqLengthAndStart_;
void createInFrameInfo(const Argument& input, PassType passType);
// the id of inlink which share info with outlinks
int targetInfoInlinkId_;
/* create scattered id infomation for all realLayer of inFrameLines one time.
* If hasSubseq, will also create scattered sequenceStartPositions infomation
* for all realLayer of inFrameLines one time.
*/
void createInFrameInfo(int inlinks_id, const Argument& input,
PassType passType);
void createMemoryFrameInfo(MemoryFrameLine* memoryFrameLine,
PassType passType);
......@@ -363,6 +377,9 @@ protected:
NeuralNetwork* rootNetwork_;
bool reversed_;
// if hasSubseq: max number of sentences(subseq)in batchsize samples
// else: max number of tokens in batchsize samples(sentences)
int maxSequenceLength_;
bool useGpu_;
bool stopBeamSearch_;
......@@ -415,7 +432,7 @@ private:
* @param machineIdVec : select a row of output matrix in each frame
* that the generation process expanded.
*/
void createDataOutlink(std::vector<int> & machineIdVec);
void createDataOutlink(std::vector<int>& machineIdVec);
/*
* @brief used in beam search, connect previous frame to form recurrent link
......
......@@ -452,6 +452,9 @@ message SubModelConfig {
repeated LinkConfig out_links = 10;
optional GeneratorConfig generator = 11;
// the id of inlink which share info with outlinks, used in recurrent layer group
optional int32 target_inlinkid = 12;
}
message ModelConfig {
......
......@@ -303,7 +303,8 @@ def MakeLayerNameInSubmodel(name, submodel_name = None):
@config_func
def RecurrentLayerGroupWithoutOutLinksBegin(name,
in_links,
seq_reversed=False):
seq_reversed=False,
target_inlinkname=""):
global g_current_submodel
config_assert(g_config.model_config.type == "recurrent_nn",
"RecurrentLayerGroup should be used only in recurrent_nn")
......@@ -311,14 +312,19 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
SubModelBegin(name)
g_current_submodel.is_recurrent_layer_group = True
g_current_submodel.reversed = seq_reversed
g_current_submodel.target_inlinkid = -1
in_links_count = 0
for link in in_links:
for linkid, link in enumerate(in_links):
if isinstance(link, basestring):
name = link
has_subseq = False
else:
name = link.link_name
has_subseq = link.has_subseq
# assign target_inlinkid according to target_inlinkname
if target_inlinkname == name:
g_current_submodel.target_inlinkid = linkid
if in_links_count == 0:
in_links_has_subseq = has_subseq
else:
......@@ -331,6 +337,7 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
SequenceScatterAgentLayer(name=name, size=layer.size)
else:
ScatterAgentLayer(name=name, size=layer.size)
pair = g_current_submodel.in_links.add()
pair.layer_name = layer_name
pair.link_name = MakeLayerNameInSubmodel(name)
......@@ -362,10 +369,12 @@ def RecurrentLayerGroupBegin(name,
in_links,
out_links,
generator=None,
target_inlinkname="",
seq_reversed=False):
RecurrentLayerGroupWithoutOutLinksBegin(name,
in_links,
seq_reversed)
seq_reversed,
target_inlinkname)
for link in out_links:
RecurrentLayerGroupSetOutLink(link)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册