提交 023fe1e3 编写于 作者: T Tao Luo 提交者: GitHub

Merge pull request #3208 from luotao1/save_log_prob

Save log prob
......@@ -967,8 +967,9 @@ void RecurrentGradientMachine::generateSequence() {
size_t numSequences = getGenBatchSize();
resizeBootFrame(numSequences);
// We create only two sub-network in generation for alternate use.
// Thus, we can reduce total memory of output_ in layer forward.
// We create only two sub-network in generation, one stores states of all
// layers in previous time step and the other storing the states at current
// time step.
resizeOrCreateFrames(2);
// outFrameLines_.size() > 1UL
......@@ -1001,10 +1002,9 @@ void RecurrentGradientMachine::generateSequence() {
// init outArg
size_t resultNum = generator_.config.num_results_per_sample();
IVector::resizeOrCreate(
generator_.outArg.ids,
generator_.config.max_num_frames() * numSequences * resultNum,
false);
size_t maxGenWordCount =
generator_.config.max_num_frames() * numSequences * resultNum;
IVector::resizeOrCreate(generator_.outArg.ids, maxGenWordCount, false);
if (resultNum > 1) {
CHECK_LE(resultNum, static_cast<size_t>(generator_.config.beam_size()));
Matrix::resizeOrCreate(generator_.outArg.in,
......@@ -1012,6 +1012,11 @@ void RecurrentGradientMachine::generateSequence() {
/* width */ resultNum,
false,
/* useGpu */ false);
Matrix::resizeOrCreate(generator_.outArg.value,
/* height */ maxGenWordCount,
/* width */ 1,
false,
/* useGpu */ false);
}
ICpuGpuVector::resizeOrCreate(generator_.outArg.sequenceStartPositions,
numSequences + 1,
......@@ -1313,13 +1318,20 @@ void RecurrentGradientMachine::fillGenOutputs() {
starts[0] = 0;
if (numResults > 1) {
real* probs = generator_.outArg.in->getData();
real* idsProb = generator_.outArg.value->getData();
size_t curPos = 0;
for (size_t i = 0; i < finalPaths_.size(); ++i) {
for (size_t j = 0; j < finalPaths_[i].size(); ++j) {
Path& path = finalPaths_[i][j];
generator_.ids.push_back(path.ids.size()); // sequence size
size_t genLen = path.ids.size();
generator_.ids.push_back(genLen); // sequence size
generator_.ids.insert(
generator_.ids.end(), path.ids.begin(), path.ids.end());
generator_.ids.push_back(-1); // end of sequence
memcpy(idsProb + curPos, path.idsProb.data(), sizeof(real) * genLen);
curPos += genLen;
idsProb[curPos++] = -1.0;
probs[i * numResults + j] = path.logProb;
if (!j && dataArgsSize_) {
......
......@@ -189,6 +189,11 @@ public:
*/
std::vector<int> ids;
/**
* @brief idsProb, log probability of each generated words.
*/
std::vector<real> idsProb;
/**
* @brief logProb, current probability of path.
*/
......@@ -228,11 +233,13 @@ public:
*/
Path(Path& old, int newId, real logProb, int machineId, int topIndex)
: ids(old.ids),
idsProb(old.idsProb),
logProb(old.logProb + logProb),
machineId(machineId),
topIndex(topIndex),
seqId(old.seqId) {
ids.push_back(newId);
idsProb.push_back(logProb);
if (!old.probHistory.empty()) {
this->probHistory = old.probHistory;
// probHistory store current prob, not sum
......@@ -411,8 +418,9 @@ protected:
struct Generator {
GeneratorConfig config;
std::vector<int> ids; // store generated sequences
Argument outArg; // final output argument
std::vector<int> ids; // store generated sequences
std::vector<real> idsProb; // log probability of each generated word
Argument outArg; // final output argument
};
bool generating_;
Generator generator_;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册