提交 699d5f26 编写于 作者: Z zhangruiqing01 提交者: Yu Yang

modify RecurrentGradientMachine to support unequal length inputs

* modify RecurrentGradientMachine to support hasSubSeq sequence inlinks with the same number of sentence but different number of tokens for each sentence

Change-Id: Ic71f00a4bb346b4fa93e650dfb4b1a0d8d2338b0
上级 0f91ea7e
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include "GradientMachine.h" #include "GradientMachine.h"
...@@ -101,7 +100,7 @@ public: ...@@ -101,7 +100,7 @@ public:
* Return true if this prefix or candidate is expected to be dropped. * Return true if this prefix or candidate is expected to be dropped.
*/ */
typedef std::function<bool(int seqId, const std::vector<int>&, typedef std::function<bool(int seqId, const std::vector<int>&,
const std::vector<real>&)> DropCallback; const std::vector<real>&)> DropCallback;
/** /**
* @brief NormOrDropNodeCallback * @brief NormOrDropNodeCallback
...@@ -117,7 +116,7 @@ public: ...@@ -117,7 +116,7 @@ public:
* The fourth parameter is the probability of the whole path. * The fourth parameter is the probability of the whole path.
*/ */
typedef std::function<void(int seqId, const std::vector<int>&, typedef std::function<void(int seqId, const std::vector<int>&,
std::vector<real>&, real*)> NormOrDropNodeCallback; std::vector<real>&, real*)> NormOrDropNodeCallback;
/** /**
* @brief Register beam search control callbacks. Used for prediction. * @brief Register beam search control callbacks. Used for prediction.
...@@ -192,7 +191,7 @@ public: ...@@ -192,7 +191,7 @@ public:
int machineId; // index of sample in frame int machineId; // index of sample in frame
int topIndex; // index of MaxIdLayer output in one sample int topIndex; // index of MaxIdLayer output in one sample
int seqId; // index of sequence in batch generation int seqId; // index of sequence in batch generation
std::vector<int> machineIdVec; std::vector<int> machineIdVec;
/** /**
...@@ -206,7 +205,10 @@ public: ...@@ -206,7 +205,10 @@ public:
/** /**
* @brief Path default ctor, first logProb is 0. * @brief Path default ctor, first logProb is 0.
*/ */
Path() { logProb = 0; seqId = 0; } Path() {
logProb = 0;
seqId = 0;
}
explicit Path(size_t seqId) : seqId(seqId) { logProb = 0; } explicit Path(size_t seqId) : seqId(seqId) { logProb = 0; }
/** /**
...@@ -319,21 +321,33 @@ protected: ...@@ -319,21 +321,33 @@ protected:
}; };
std::vector<MemoryFrameLine> memoryFrameLines_; std::vector<MemoryFrameLine> memoryFrameLines_;
// All inFrameLines and outFrameLines have the same element as follows. // Each inFrameLines(inlinks) has its own info(elements) below,
// and all outFrameLines(outlinks) share the info with one inFrameLine,
// which is assigned by targetInfoInlinkId_.
struct Info { struct Info {
IVectorPtr allIds; // scattered id of realLayer IVectorPtr allIds; // scattered id of realLayer
std::vector<int> idIndex; // index of allIds std::vector<int> idIndex; // index of allIds
ICpuGpuVectorPtr ICpuGpuVectorPtr
sequenceStartPositions; // scattered sequenceStartPositions sequenceStartPositions; // scattered sequenceStartPositions
std::vector<int> seqStartPosIndex; // index of sequenceStartPositions std::vector<int> seqStartPosIndex; // index of sequenceStartPositions
}; };
Info info_; std::vector<Info> info_;
// if no subSeq, tuple of (seqLength, seqStart, seqIndex, seqIndex) // each inlinks has a "std::vector<std::tuple<int, int, int, int>>" denotes
// else, tuple of (subSeqLength, subSeqStart, seqIndex, subSeqIndex) // its sequence info:
std::vector<std::tuple<int, int, int, int>> seqLengthAndStart_; // if hasSubSeq, tuple of (subSeqLength, subSeqStart, seqIndex, subSeqIndex)
// else, tuple of (seqLength, seqStart, seqIndex, seqIndex)
std::vector<std::vector<std::tuple<int, int, int, int>>> seqLengthAndStart_;
void createInFrameInfo(const Argument& input, PassType passType); // the id of inlink which share info with outlinks
int targetInfoInlinkId_;
/* create scattered id infomation for all realLayer of inFrameLines one time.
* If hasSubseq, will also create scattered sequenceStartPositions infomation
* for all realLayer of inFrameLines one time.
*/
void createInFrameInfo(int inlinks_id, const Argument& input,
PassType passType);
void createMemoryFrameInfo(MemoryFrameLine* memoryFrameLine, void createMemoryFrameInfo(MemoryFrameLine* memoryFrameLine,
PassType passType); PassType passType);
...@@ -363,6 +377,9 @@ protected: ...@@ -363,6 +377,9 @@ protected:
NeuralNetwork* rootNetwork_; NeuralNetwork* rootNetwork_;
bool reversed_; bool reversed_;
// if hasSubseq: max number of sentences(subseq)in batchsize samples
// else: max number of tokens in batchsize samples(sentences)
int maxSequenceLength_; int maxSequenceLength_;
bool useGpu_; bool useGpu_;
bool stopBeamSearch_; bool stopBeamSearch_;
...@@ -415,7 +432,7 @@ private: ...@@ -415,7 +432,7 @@ private:
* @param machineIdVec : select a row of output matrix in each frame * @param machineIdVec : select a row of output matrix in each frame
* that the generation process expanded. * that the generation process expanded.
*/ */
void createDataOutlink(std::vector<int> & machineIdVec); void createDataOutlink(std::vector<int>& machineIdVec);
/* /*
* @brief used in beam search, connect previous frame to form recurrent link * @brief used in beam search, connect previous frame to form recurrent link
......
...@@ -452,6 +452,9 @@ message SubModelConfig { ...@@ -452,6 +452,9 @@ message SubModelConfig {
repeated LinkConfig out_links = 10; repeated LinkConfig out_links = 10;
optional GeneratorConfig generator = 11; optional GeneratorConfig generator = 11;
// the id of inlink which share info with outlinks, used in recurrent layer group
optional int32 target_inlinkid = 12;
} }
message ModelConfig { message ModelConfig {
......
...@@ -303,7 +303,8 @@ def MakeLayerNameInSubmodel(name, submodel_name = None): ...@@ -303,7 +303,8 @@ def MakeLayerNameInSubmodel(name, submodel_name = None):
@config_func @config_func
def RecurrentLayerGroupWithoutOutLinksBegin(name, def RecurrentLayerGroupWithoutOutLinksBegin(name,
in_links, in_links,
seq_reversed=False): seq_reversed=False,
target_inlinkname=""):
global g_current_submodel global g_current_submodel
config_assert(g_config.model_config.type == "recurrent_nn", config_assert(g_config.model_config.type == "recurrent_nn",
"RecurrentLayerGroup should be used only in recurrent_nn") "RecurrentLayerGroup should be used only in recurrent_nn")
...@@ -311,14 +312,19 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name, ...@@ -311,14 +312,19 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
SubModelBegin(name) SubModelBegin(name)
g_current_submodel.is_recurrent_layer_group = True g_current_submodel.is_recurrent_layer_group = True
g_current_submodel.reversed = seq_reversed g_current_submodel.reversed = seq_reversed
g_current_submodel.target_inlinkid = -1
in_links_count = 0 in_links_count = 0
for link in in_links: for linkid, link in enumerate(in_links):
if isinstance(link, basestring): if isinstance(link, basestring):
name = link name = link
has_subseq = False has_subseq = False
else: else:
name = link.link_name name = link.link_name
has_subseq = link.has_subseq has_subseq = link.has_subseq
# assign target_inlinkid according to target_inlinkname
if target_inlinkname == name:
g_current_submodel.target_inlinkid = linkid
if in_links_count == 0: if in_links_count == 0:
in_links_has_subseq = has_subseq in_links_has_subseq = has_subseq
else: else:
...@@ -331,6 +337,7 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name, ...@@ -331,6 +337,7 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
SequenceScatterAgentLayer(name=name, size=layer.size) SequenceScatterAgentLayer(name=name, size=layer.size)
else: else:
ScatterAgentLayer(name=name, size=layer.size) ScatterAgentLayer(name=name, size=layer.size)
pair = g_current_submodel.in_links.add() pair = g_current_submodel.in_links.add()
pair.layer_name = layer_name pair.layer_name = layer_name
pair.link_name = MakeLayerNameInSubmodel(name) pair.link_name = MakeLayerNameInSubmodel(name)
...@@ -362,10 +369,12 @@ def RecurrentLayerGroupBegin(name, ...@@ -362,10 +369,12 @@ def RecurrentLayerGroupBegin(name,
in_links, in_links,
out_links, out_links,
generator=None, generator=None,
target_inlinkname="",
seq_reversed=False): seq_reversed=False):
RecurrentLayerGroupWithoutOutLinksBegin(name, RecurrentLayerGroupWithoutOutLinksBegin(name,
in_links, in_links,
seq_reversed) seq_reversed,
target_inlinkname)
for link in out_links: for link in out_links:
RecurrentLayerGroupSetOutLink(link) RecurrentLayerGroupSetOutLink(link)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册