diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h index a3d04b207c0d84ea9c77c646314efedcd49772b1..cc0eda9f13b13fcc715ac0bf337eaa557d782faa 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h +++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h @@ -190,7 +190,7 @@ public: std::vector ids; /** - * @brief idsProb, log probability of each generated words. + * @brief idsProb, log probability of each generated word. */ std::vector idsProb; @@ -472,16 +472,42 @@ private: void copyDataOutlinkFrame(size_t machineCur); /* - * @brief In generation, if the layer group has more than 1 outlink, outlinks - * except the first one are data outlinks. This function creates the data - * outlinks. - * @note In beam search, only one generated sequence with the hightest log - * probabilites are retained. + * @brief In generation, if the layer group has more than 1 outlink, outlink + * except the first one is a data outlink. In RecurrentLayerGroup, each time + * step is a separate Network, outputs of a layer inside the + * RecurrentLayerGroup are stored in separate Arguments. If one layer is + * specified as an outlink of RecurrentLayerGroup. This function will + * collect outputs in each time step of each generated sequence which are + * dispersed in separate Arguments to form a new single Argument as output of + * RecurrentLayerGroup. */ void createDataOutlink(); + + /* + * @brief decide to select how many rows from the Matrix stored the forward + * pass results from a start position. + * + * @param isSeq: a flag indicating whetehr the layer to be output of the + * RecurrentGradientMachine is a sequence or not + * @param outArgs: all of the the returned Arguments of the forward pass + * during the generation process. + * @param copySize: the returned result, number of rows to select from the + * Matrix stored the forward pass results from a start position. + */ void createDataOutlinkCopySizeInfo(bool isSeq, std::vector& outArgs, std::vector& copySize); + + /* + * @brief decide index of the start row for each time step of a generated + * sequence in Matrix stored the entire beam search batch's forward pass + * results. + * + * @param isSeq: a flag indicating whetehr the layer to be output of the + * RecurrentGradientMachine is a sequence or not + * @param outArgs: all of the the returned Arguments of the forward pass + * during the generation process. + */ void createDataOutlinkSelRowsInfo(bool isSeq, std::vector& outArgs); /* diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp index f45a51d7b1836f02945f5efa3b061abc5fe7f974..9a9092af9b03e31277bad9a0db1611ab1f158d2f 100644 --- a/paddle/parameter/Argument.cpp +++ b/paddle/parameter/Argument.cpp @@ -352,8 +352,8 @@ void Argument::concat(const std::vector& args, CHECK_GE(args.size(), static_cast(endPos - startPos)); for (int j = startPos; j < endPos; ++j) { const Argument& arg = args[j - startPos]; - CHECK_EQ(arg.dataId, dataId) << "Arguments in concat should have the " - << "same dataId"; + CHECK_EQ(arg.dataId, dataId) << "Arguments to concatenate should have " + << "the same dataId."; const int srcStartRow = selectRows[j]; copyArg(in, arg.in, desStartRow, srcStartRow, copySize[i], useGpu); copyArg(value, arg.value, desStartRow, srcStartRow, copySize[i], useGpu); diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py index 30c826ffc8e7e5ef384520e6c95cd69584df971d..810bea913ec79b2df0eb63ed5a4fd411549ff2e9 100755 --- a/python/paddle/trainer_config_helpers/networks.py +++ b/python/paddle/trainer_config_helpers/networks.py @@ -1375,9 +1375,9 @@ def simple_attention(encoded_sequence, weight=attention_weight, input=encoded_sequence, name='%s_scaling' % name) + return pooling_layer( - input=scaled, pooling_type=SumPooling(), - name="%s_pooling" % name), attention_weight + input=scaled, pooling_type=SumPooling(), name="%s_pooling" % name) def inputs(layers, *args):