提交 93006787 编写于 作者: E emailweixu 提交者: GitHub

Merge pull request #73 from reyoung/merge_icode

Merge Baidu Internal Changes.
......@@ -9,6 +9,7 @@ Install PaddlePaddle
:glob:
install_*
internal/install_from_jumbo.md
Build from Source
-----------------
......
......@@ -5,3 +5,4 @@ Cluster Train
:glob:
opensource/cluster_train.md
internal/index.md
......@@ -9,7 +9,11 @@ Note: The intallation packages are still in pre-release state and your experienc
.. toctree::
:maxdepth: 1
:glob:
源码下载(对内) <../build/internal/download_paddle_source_zh_cn.rst>
使用Jumbo安装(对内) <../build/internal/install_from_jumbo.rst>
从源码编译安装(对内) <../build/internal/build_from_source_zh_cn.rst>
install/docker_install.rst
install/ubuntu_install.rst
cmake/index.rst
集群训练
========
* `集群训练 <../../doc/cluster/index.html>`_
.. toctree::
:maxdepth: 2
:glob:
集群训练(对内) <internal/index.md>
......@@ -8,7 +8,7 @@ PaddlePaddle文档
* `用户接口 <ui/index.html>`_
* `使用示例 <demo/index.html>`_
* `模型配置 <../doc/ui/api/trainer_config_helpers/index.html>`_
* `集群训练 <../doc/cluster/index.html>`_
* `集群训练 <cluster/index.html>`_
开发指南
--------
......
......@@ -194,8 +194,8 @@ public:
virtual real evalImp(std::vector<Argument>& arguments) {
CHECK_EQ(arguments.size(), (size_t)2);
Argument output, label;
output.resizeAndCopyFrom(arguments[0], false);
label.resizeAndCopyFrom(arguments[1], false);
output.resizeAndCopyFrom(arguments[0], false, HPPL_STREAM_DEFAULT);
label.resizeAndCopyFrom(arguments[1], false, HPPL_STREAM_DEFAULT);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
CHECK(label.sequenceStartPositions);
CHECK(label.ids);
......
......@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Stat.h"
#include "paddle/utils/Util.h"
#include "paddle/utils/Flags.h"
......@@ -291,6 +290,8 @@ void RecurrentGradientMachine::init(
if (subModelConfig->evaluator_names_size() > 0) {
evaluator_.reset(frames_[0]->makeEvaluator());
}
targetInfoInlinkId_ = subModelConfig->target_inlinkid();
}
void RecurrentGradientMachine::resizeOrCreateFrames(int numFrames) {
......@@ -325,7 +326,7 @@ void RecurrentGradientMachine::resizeOrCreateFrames(int numFrames) {
for (int i = frames_.size(); i < numFrames; ++i) {
std::unique_ptr<NeuralNetwork> frame(
NeuralNetwork::newNeuralNetwork(subModelName_));
NeuralNetwork::newNeuralNetwork(subModelName_));
frame->init(config_, subParamInitCb);
for (auto& inFrameLine : inFrameLines_) {
......@@ -382,6 +383,16 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
size_t numSequences = input.getNumSequences();
const int* starts = input.sequenceStartPositions->getData(false);
bool hasSubseq = input.hasSubseq();
// In case of !hasSubseq or targetInfoInlinkId_ == -1, all inlinks share the
// same inframe info
bool shareInlinkInfo = !hasSubseq || targetInfoInlinkId_ == -1;
// Defaultly, share info with the first inlink
if (shareInlinkInfo) {
targetInfoInlinkId_ = 0;
}
// check hasSubseq in both config and input are the same
CHECK_EQ(hasSubseq, inFrameLines_[0].hasSubseq);
......@@ -394,9 +405,17 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
CHECK_EQ((size_t)input1.getNumSequences(), numSequences);
// check all inputs should have same hasSubseq flag
CHECK_EQ(input.hasSubseq(), inFrameLines_[0].hasSubseq);
CHECK_EQ(input1.getBatchSize(), batchSize);
CHECK(std::equal(starts, starts + numSequences + 1,
input1.sequenceStartPositions->getData(false)));
// if shareInlinkInfo, checks:
// 1. all inlinks have same number of total tokens
// 2. all inlinks have same number of tokens for each sentence of each
// sample. If hasSubseq, one sample has multiple sentence, else, one
// sample is one sentence
if (shareInlinkInfo) {
CHECK_EQ(input1.getBatchSize(), batchSize);
CHECK(std::equal(starts, starts + numSequences + 1,
input1.sequenceStartPositions->getData(false)));
}
}
if (hasSubseq) {
......@@ -408,19 +427,44 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
for (size_t i = 1; i < inFrameLines_.size(); ++i) {
const Argument& input1 = inFrameLines_[i].inLayer->getOutput();
CHECK_EQ((size_t)input1.getNumSubSequences(), numSubSequences);
CHECK(std::equal(subStarts, subStarts + numSubSequences + 1,
input1.subSequenceStartPositions->getData(false)));
if (shareInlinkInfo) {
CHECK(std::equal(subStarts, subStarts + numSubSequences + 1,
input1.subSequenceStartPositions->getData(false)));
}
}
}
seqLengthAndStart_.clear();
input.getSeqLengthAndStart(&seqLengthAndStart_, &maxSequenceLength_);
info_.clear();
info_.resize(inFrameLines_.size());
seqLengthAndStart_.resize(inFrameLines_.size());
{
AsyncGpuBlock asyncGpuBlock;
// if shareInlinkInfo, only calculate info of the first inlink
// else, calculate info for each inlink
if (shareInlinkInfo) {
input.getSeqLengthAndStart(&seqLengthAndStart_[0], &maxSequenceLength_);
createInFrameInfo(0, input, passType);
} else {
for (size_t i = 0; i < inFrameLines_.size(); i++) {
const Argument& input1 = inFrameLines_[i].inLayer->getOutput();
input1.getSeqLengthAndStart(&seqLengthAndStart_[i],
&maxSequenceLength_);
createInFrameInfo(i, input1, passType);
}
}
// inFrameLine select rows in real layer one time
for (size_t i = 0; i < inFrameLines_.size(); i++) {
int curInlinkId = shareInlinkInfo ? 0 : i;
selectRowsOneTime(inFrameLines_[i].inLayer, info_[curInlinkId].allIds,
&(inFrameLines_[i].outArg), passType);
}
}
resizeOrCreateFrames(maxSequenceLength_);
resizeBootFrame(numSequences);
AsyncGpuBlock asyncGpuBlock;
createInFrameInfo(input, passType);
for (auto& memoryFrameLine : memoryFrameLines_) {
if (memoryFrameLine.rootAgent) {
auto scatterAgent =
......@@ -443,23 +487,29 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
auto gatherAgent =
dynamic_cast<GatherAgentLayer*>(outFrameLine.agentLayer.get());
CHECK_NOTNULL(gatherAgent);
gatherAgent->copyIdAndSequenceInfo(input, info_.allIds, info_.idIndex);
gatherAgent->copyIdAndSequenceInfo(input, info_[targetInfoInlinkId_].allIds,
info_[targetInfoInlinkId_].idIndex);
}
for (int i = 0; i < maxSequenceLength_; ++i) {
int idSize = info_.idIndex[i + 1] - info_.idIndex[i];
int idSize = 0;
// connect in_links
for (auto& inFrameLine : inFrameLines_) {
for (size_t j = 0; j < inFrameLines_.size(); ++j) {
// idSize denotes the sum number of tokens in each length i
idSize = info_[j].idIndex[i + 1] - info_[j].idIndex[i];
InFrameLine inFrameLine = inFrameLines_[j];
auto scatterAgent =
dynamic_cast<ScatterAgentLayer*>(inFrameLine.agents[i].get());
scatterAgent->setRealLayerAndOutput(inFrameLine.inLayer,
inFrameLine.outArg, info_.allIds,
info_.idIndex[i], idSize);
inFrameLine.outArg, info_[j].allIds,
info_[j].idIndex[i], idSize);
if (hasSubseq) {
int size = info_.seqStartPosIndex[i + 1] - info_.seqStartPosIndex[i];
scatterAgent->setSequenceStartPositions(
info_.sequenceStartPositions, info_.seqStartPosIndex[i], size);
// size: the length of subsequence
int size =
info_[j].seqStartPosIndex[i + 1] - info_[j].seqStartPosIndex[i];
scatterAgent->setSequenceStartPositions(info_[j].sequenceStartPositions,
info_[j].seqStartPosIndex[i],
size);
}
}
......@@ -469,13 +519,16 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
dynamic_cast<GatherAgentLayer*>(outFrameLine.agentLayer.get());
gatherAgent->addRealLayer(outFrameLine.frames[i]);
}
// connect memory links
// Adopt info_[0].idIndex because seq which has_subseq=True
// doesn't support Memory with !hasSubseq bootlayer;
// And inlinks that !hasSubSeq must have same inlink length.
idSize = info_[0].idIndex[i + 1] - info_[0].idIndex[i];
for (auto& memoryFrameLine : memoryFrameLines_) {
NeuralNetwork::connect(
memoryFrameLine.agents[i],
i == 0 ? memoryFrameLine.bootLayer : memoryFrameLine.frames[i - 1],
idSize /*height of agent*/);
numSeqs_[i] /*height of agent*/);
}
}
......@@ -560,62 +613,76 @@ void RecurrentGradientMachine::removeBeamSearchStatisticsCallbacks() {
* If hasSubseq, will also create scattered sequenceStartPositions infomation
* for all realLayer of inFrameLines one time.
*/
void RecurrentGradientMachine::createInFrameInfo(const Argument& input,
void RecurrentGradientMachine::createInFrameInfo(int inlinks_id,
const Argument& input,
PassType passType) {
bool hasSubseq = input.hasSubseq();
// numSequences: # samples(sequences) in a batch
size_t numSequences = input.getNumSequences();
std::vector<int> allIds;
info_.idIndex.clear();
info_.idIndex.push_back(0); // first idIndex = 0
if (hasSubseq) { // for sequenceScatterAgentLayer
numSeqs_.clear();
Info* inlink_info = &info_[inlinks_id];
inlink_info->idIndex.clear();
inlink_info->idIndex.push_back(0); // first idIndex = 0
if (hasSubseq) { // for sequenceScatterAgentLayer
// numSubSequences : all sentences within all samples(batch)
size_t numSubSequences = input.getNumSubSequences();
std::vector<int> sequenceStartPositions;
info_.seqStartPosIndex.clear();
info_.seqStartPosIndex.push_back(0); // first seqStartPosIndex = 0
inlink_info->seqStartPosIndex.clear();
inlink_info->seqStartPosIndex.push_back(0); // first seqStartPosIndex = 0
// maxSequenceLength_: max number of sentences(subseq) in allsamples
for (int i = 0; i < maxSequenceLength_; ++i) {
sequenceStartPositions.push_back(0); // first element = 0
for (size_t j = 0; j < numSubSequences; ++j) {
if (std::get<3>(seqLengthAndStart_[j]) == i) {
int subSeqStart = std::get<1>(seqLengthAndStart_[j]);
int subSeqLength = std::get<0>(seqLengthAndStart_[j]);
sequenceStartPositions.push_back(0); // first element = 0
int numSeqs = 0;
for (size_t j = 0; j < numSubSequences; ++j) { // for each sentence
// seqLengthAndStart_[inlinks_id][j]:
// a 4-tuple including <subseqlen, subseqstart, seqid, subseqid>
if (std::get<3>(seqLengthAndStart_[inlinks_id][j]) == i) {
++numSeqs;
// subseqstart: the cpuSubSequenceStartPositions of this subseq
int subSeqStart = std::get<1>(seqLengthAndStart_[inlinks_id][j]);
int subSeqLength = std::get<0>(seqLengthAndStart_[inlinks_id][j]);
for (int k = subSeqStart; k < subSeqStart + subSeqLength; ++k) {
allIds.push_back(k);
}
sequenceStartPositions.push_back(sequenceStartPositions.back() +
subSeqLength);
subSeqLength);
}
}
info_.idIndex.push_back(allIds.size());
info_.seqStartPosIndex.push_back(sequenceStartPositions.size());
inlink_info->idIndex.push_back(allIds.size());
inlink_info->seqStartPosIndex.push_back(sequenceStartPositions.size());
numSeqs_.push_back(numSeqs);
}
// inFrameLine create sequenceStartPositions one time
CHECK_EQ(sequenceStartPositions.size(),
maxSequenceLength_ + numSubSequences);
CHECK_EQ(info_.seqStartPosIndex.size(),
CHECK_EQ(inlink_info->seqStartPosIndex.size(),
static_cast<size_t>(maxSequenceLength_ + 1));
createSeqPos(sequenceStartPositions, &info_.sequenceStartPositions);
createSeqPos(sequenceStartPositions, &inlink_info->sequenceStartPositions);
} else { // for scatterAgentLayer
for (int i = 0; i < maxSequenceLength_; ++i) {
int numSeqs = 0;
for (size_t j = 0; j < numSequences; ++j) {
int seqLength = std::get<0>(seqLengthAndStart_[j]);
int seqLength = std::get<0>(seqLengthAndStart_[inlinks_id][j]);
if (i >= seqLength) {
break;
}
int seqStart = std::get<1>(seqLengthAndStart_[j]);
++numSeqs;
int seqStart = std::get<1>(seqLengthAndStart_[inlinks_id][j]);
allIds.push_back(reversed_ ? (seqStart + seqLength - 1 - i)
: (seqStart + i));
}
info_.idIndex.push_back(allIds.size());
inlink_info->idIndex.push_back(allIds.size());
numSeqs_.push_back(numSeqs);
}
}
// copy and check scatterId
copyScattedId(allIds, &info_.allIds, input.getBatchSize());
CHECK_EQ(info_.idIndex.size(), static_cast<size_t>(maxSequenceLength_ + 1));
// inFrameLine select rows in real layer one time
for (auto& inFrameLine : inFrameLines_) {
selectRowsOneTime(inFrameLine.inLayer, info_.allIds, &inFrameLine.outArg,
passType);
}
copyScattedId(allIds, &inlink_info->allIds, input.getBatchSize());
CHECK_EQ(inlink_info->idIndex.size(),
static_cast<size_t>(maxSequenceLength_ + 1));
}
/* like createInFrameInfo, but for all realLayer of memoryFrameLines*/
......@@ -633,19 +700,20 @@ void RecurrentGradientMachine::createMemoryFrameInfo(
sequenceStartPositions.push_back(0); // first element = 0
const int* starts = input.sequenceStartPositions->getData(false);
for (size_t i = 0; i < numSequences; ++i) {
int seqId = std::get<2>(seqLengthAndStart_[i]);
// memory info adopt info of inlinks[0]
int seqId = std::get<2>(seqLengthAndStart_[0][i]);
for (int k = starts[seqId]; k < starts[seqId + 1]; ++k) {
allIds.push_back(k);
}
sequenceStartPositions.push_back(sequenceStartPositions.back() +
starts[seqId + 1] - starts[seqId]);
starts[seqId + 1] - starts[seqId]);
}
createSeqPos(sequenceStartPositions,
&(*memoryFrameLine).sequenceStartPositions);
} else { // for scatterAgentLayer
for (size_t i = 0; i < numSequences; ++i) {
allIds.push_back(std::get<2>(seqLengthAndStart_[i]));
allIds.push_back(std::get<2>(seqLengthAndStart_[0][i]));
}
}
// copy and check scatterId
......@@ -699,18 +767,19 @@ size_t RecurrentGradientMachine::getGenBatchSize() {
for (auto& memoryFrameLine : memoryFrameLines_) {
if (!memoryFrameLine.rootLayer) continue;
Argument& bootArg = memoryFrameLine.rootLayer->getOutput();
size_t batchSize = memoryFrameLine.is_sequence ?
bootArg.getNumSequences() : bootArg.getBatchSize();
size_t batchSize = memoryFrameLine.is_sequence ? bootArg.getNumSequences()
: bootArg.getBatchSize();
if (numSequences) {
CHECK_EQ(numSequences, batchSize);
} else {
numSequences = batchSize;
}
}
CHECK(numSequences) << "Fail to get batch size in generation. "
"At least one of the Memory layer MUST have a layer that is NOT in "
"the layer group to boot it, and this boot layer is used to "
"decide batch_size in generation process.";
CHECK(numSequences)
<< "Fail to get batch size in generation. "
"At least one of the Memory layer MUST have a layer that is NOT in "
"the layer group to boot it, and this boot layer is used to "
"decide batch_size in generation process.";
return numSequences;
}
......@@ -732,7 +801,9 @@ void RecurrentGradientMachine::generateSequence() {
// connect boot frame memory links
std::vector<int> ids(numSequences);
for (size_t i = 0; i < numSequences; ++i) { ids[i] = i; }
for (size_t i = 0; i < numSequences; ++i) {
ids[i] = i;
}
for (auto& memoryFrameLine : memoryFrameLines_) {
if (memoryFrameLine.rootAgent) {
auto scatterAgent =
......@@ -756,7 +827,8 @@ void RecurrentGradientMachine::generateSequence() {
// init outArg
size_t resultNum = generator_.config.num_results_per_sample();
IVector::resizeOrCreate(generator_.outArg.ids,
IVector::resizeOrCreate(
generator_.outArg.ids,
generator_.config.max_num_frames() * numSequences * resultNum, false);
if (resultNum > 1) {
CHECK_LE(resultNum, static_cast<size_t>(generator_.config.beam_size()));
......@@ -847,7 +919,9 @@ void RecurrentGradientMachine::oneWaySearch(size_t batchSize) {
// path.seqId = -1 indicates end of generation
// of an input sequence
finalPaths[seqIds_[j]].seqId = -1;
} else { scatterIds.push_back(j); }
} else {
scatterIds.push_back(j);
}
}
}
......@@ -856,13 +930,12 @@ void RecurrentGradientMachine::oneWaySearch(size_t batchSize) {
starts[0] = 0;
generator_.ids.clear();
for (size_t i = 0; i < batchSize; ++i) {
generator_.ids.insert(generator_.ids.end(),
finalPaths[i].ids.begin(),
generator_.ids.insert(generator_.ids.end(), finalPaths[i].ids.begin(),
finalPaths[i].ids.end());
starts[i + 1] = generator_.ids.size();
batchMachineIdVec_.insert(batchMachineIdVec_.end(),
finalPaths[i].machineIdVec.begin(),
finalPaths[i].machineIdVec.end());
finalPaths[i].machineIdVec.begin(),
finalPaths[i].machineIdVec.end());
}
}
......@@ -920,9 +993,9 @@ void RecurrentGradientMachine::forwardFrame(int machineCur) {
}
}
void RecurrentGradientMachine::singlePathExpand(
Path& curPath, size_t curPathId, std::vector<Path>& newPaths,
size_t expandWidth) {
void RecurrentGradientMachine::singlePathExpand(Path& curPath, size_t curPathId,
std::vector<Path>& newPaths,
size_t expandWidth) {
int calc_id =
gDiyProbStart ? gDiyProbStart(curPath.ids.size(), curPath.ids.data()) : 0;
......@@ -946,19 +1019,20 @@ void RecurrentGradientMachine::singlePathExpand(
if (id == -1) break;
real newLogProb = generator_.config.log_prob() ? std::log(prob) : prob;
Path newPath(curPath, id, newLogProb,
curPathId /*machineId*/, k /*topIndex*/);
Path newPath(curPath, id, newLogProb, curPathId /*machineId*/,
k /*topIndex*/);
if (this->beamSearchCtrlCallbacks_) {
if (beamSearchCtrlCallbacks_->stopDetermineCandidates(
newPath.seqId, newPath.ids, newPath.probHistory)) return;
newPath.seqId, newPath.ids, newPath.probHistory))
return;
}
// outFrameLines_.size() > 1UL
if (dataArgsSize_) {
newPath.machineIdVec = curPath.machineIdVec;
newPath.machineIdVec.push_back(curPathId);
}
bool atEos = eosVec[index] == 1U ||
newPath.ids.size() >= (size_t)maxSequenceLength_;
bool atEos =
eosVec[index] == 1U || newPath.ids.size() >= (size_t)maxSequenceLength_;
// adjustNewPath
newPath.adjustProb(calc_id, atEos);
if (this->beamSearchCtrlCallbacks_) {
......@@ -966,16 +1040,18 @@ void RecurrentGradientMachine::singlePathExpand(
newPath.seqId, newPath.ids, newPath.probHistory, &newPath.logProb);
}
if (!newPath.isDropable()) {
atEos ? finalPaths_[curPath.seqId].push_back(newPath) :
newPaths.push_back(newPath);
atEos ? finalPaths_[curPath.seqId].push_back(newPath)
: newPaths.push_back(newPath);
}
} // for expandWidth
if (gDiyProbStop) { gDiyProbStop(calc_id); }
if (gDiyProbStop) {
gDiyProbStop(calc_id);
}
}
void RecurrentGradientMachine::beamExpand(
std::vector<Path>& paths, std::vector<Path>& newPaths) {
void RecurrentGradientMachine::beamExpand(std::vector<Path>& paths,
std::vector<Path>& newPaths) {
size_t candidatePathCount = paths.size();
// idVec.size() could be larger than candidatePathCount * beam,
// so user can drop some node customly.
......@@ -988,7 +1064,7 @@ void RecurrentGradientMachine::beamExpand(
int curSeqId = 0;
for (size_t j = 0; j <= candidatePathCount; j++) {
// expansions of a single sequence are all processed
curSeqId = (j < candidatePathCount? paths[j].seqId : curSeqId + 1);
curSeqId = (j < candidatePathCount ? paths[j].seqId : curSeqId + 1);
if (prevSeqId != -1 && curSeqId != prevSeqId) {
totalExpandCount += beamShrink(newPaths, prevSeqId, totalExpandCount);
}
......@@ -1000,11 +1076,14 @@ void RecurrentGradientMachine::beamExpand(
}
// Drop extra nodes to beam size.
size_t RecurrentGradientMachine::beamShrink(
std::vector<Path>& newPaths, size_t seqId, size_t totalExpandCount) {
size_t minNewPathSize = std::min(getBeamSize(),
newPaths.size() - totalExpandCount);
if (!minNewPathSize) { return 0; }
size_t RecurrentGradientMachine::beamShrink(std::vector<Path>& newPaths,
size_t seqId,
size_t totalExpandCount) {
size_t minNewPathSize =
std::min(getBeamSize(), newPaths.size() - totalExpandCount);
if (!minNewPathSize) {
return 0;
}
std::nth_element(newPaths.begin() + totalExpandCount,
newPaths.begin() + totalExpandCount + minNewPathSize,
newPaths.end(), Path::greaterPath);
......@@ -1017,11 +1096,8 @@ size_t RecurrentGradientMachine::beamShrink(
// Remove the already formed paths that are relatively short
finalPaths_[seqId].erase(
std::remove_if(finalPaths_[seqId].begin(),
finalPaths_[seqId].end(),
[&](Path& p) {
return p.logProb < minPathLogProb;
}),
std::remove_if(finalPaths_[seqId].begin(), finalPaths_[seqId].end(),
[&](Path& p) { return p.logProb < minPathLogProb; }),
finalPaths_[seqId].end());
for (auto p : finalPaths_[seqId]) {
if (minFinalPathLogProb_[seqId] > p.logProb) {
......@@ -1030,7 +1106,7 @@ size_t RecurrentGradientMachine::beamShrink(
}
if (finalPaths_[seqId].size() >= getBeamSize() &&
minFinalPathLogProb_[seqId] >= maxPathLogProb) {
minFinalPathLogProb_[seqId] >= maxPathLogProb) {
newPaths.resize(totalExpandCount);
return 0;
}
......@@ -1067,7 +1143,8 @@ void RecurrentGradientMachine::fillGenOutputs() {
// in beam search, here only reserved the top 1 generated result
// for out_links that are not the generated word indices.
batchMachineIdVec_.insert(batchMachineIdVec_.end(),
path.machineIdVec.begin(), path.machineIdVec.end());
path.machineIdVec.begin(),
path.machineIdVec.end());
}
}
starts[i + 1] = generator_.ids.size();
......@@ -1091,21 +1168,21 @@ void RecurrentGradientMachine::copyDataOutlinkFrame(size_t machineCur) {
void RecurrentGradientMachine::createDataOutlink(
std::vector<int>& machineIdVec) {
size_t seqNum = getBeamSize() > 1UL ?
finalPaths_.size() : finalPaths_[0].size();
size_t seqNum =
getBeamSize() > 1UL ? finalPaths_.size() : finalPaths_[0].size();
std::vector<int> starts(seqNum + 1, 0);
for (size_t i = 0; i < seqNum; ++i) {
size_t seqLen = getBeamSize() > 1UL ? finalPaths_[i][0].ids.size() :
finalPaths_[0][i].ids.size();
size_t seqLen = getBeamSize() > 1UL ? finalPaths_[i][0].ids.size()
: finalPaths_[0][i].ids.size();
starts[i + 1] = starts[i] + seqLen;
}
for (size_t i = 0; i < dataArgsSize_; i++) {
dataArgs_[i].concat(dataArgsFrame_[i], machineIdVec,
starts, useGpu_, HPPL_STREAM_1, PASS_TEST);
dataArgs_[i].concat(dataArgsFrame_[i], machineIdVec, starts, useGpu_,
HPPL_STREAM_1, PASS_TEST);
auto dataAgent = dynamic_cast<DataLayer*>(
outFrameLines_[i + 1].agentLayer.get());
auto dataAgent =
dynamic_cast<DataLayer*>(outFrameLines_[i + 1].agentLayer.get());
CHECK_NOTNULL(dataAgent);
dataAgent->setData(dataArgs_[i]);
}
......
......@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "GradientMachine.h"
......@@ -101,7 +100,7 @@ public:
* Return true if this prefix or candidate is expected to be dropped.
*/
typedef std::function<bool(int seqId, const std::vector<int>&,
const std::vector<real>&)> DropCallback;
const std::vector<real>&)> DropCallback;
/**
* @brief NormOrDropNodeCallback
......@@ -117,7 +116,7 @@ public:
* The fourth parameter is the probability of the whole path.
*/
typedef std::function<void(int seqId, const std::vector<int>&,
std::vector<real>&, real*)> NormOrDropNodeCallback;
std::vector<real>&, real*)> NormOrDropNodeCallback;
/**
* @brief Register beam search control callbacks. Used for prediction.
......@@ -192,7 +191,7 @@ public:
int machineId; // index of sample in frame
int topIndex; // index of MaxIdLayer output in one sample
int seqId; // index of sequence in batch generation
int seqId; // index of sequence in batch generation
std::vector<int> machineIdVec;
/**
......@@ -206,7 +205,10 @@ public:
/**
* @brief Path default ctor, first logProb is 0.
*/
Path() { logProb = 0; seqId = 0; }
Path() {
logProb = 0;
seqId = 0;
}
explicit Path(size_t seqId) : seqId(seqId) { logProb = 0; }
/**
......@@ -319,21 +321,37 @@ protected:
};
std::vector<MemoryFrameLine> memoryFrameLines_;
// All inFrameLines and outFrameLines have the same element as follows.
// Each inFrameLines(inlinks) has its own info(elements) below,
// and all outFrameLines(outlinks) share the info with one inFrameLine,
// which is assigned by targetInfoInlinkId_.
struct Info {
IVectorPtr allIds; // scattered id of realLayer
std::vector<int> idIndex; // index of allIds
ICpuGpuVectorPtr
sequenceStartPositions; // scattered sequenceStartPositions
sequenceStartPositions; // scattered sequenceStartPositions
std::vector<int> seqStartPosIndex; // index of sequenceStartPositions
};
Info info_;
std::vector<Info> info_;
// numSeqs_[i] is the number sequences which is longer than i (for sequence
// data) or has more than i subsequences (for subsequence data)
std::vector<int> numSeqs_;
// if no subSeq, tuple of (seqLength, seqStart, seqIndex, seqIndex)
// else, tuple of (subSeqLength, subSeqStart, seqIndex, subSeqIndex)
std::vector<std::tuple<int, int, int, int>> seqLengthAndStart_;
// each inlinks has a "std::vector<std::tuple<int, int, int, int>>" denotes
// its sequence info:
// if hasSubSeq, tuple of (subSeqLength, subSeqStart, seqIndex, subSeqIndex)
// else, tuple of (seqLength, seqStart, seqIndex, seqIndex)
std::vector<std::vector<std::tuple<int, int, int, int>>> seqLengthAndStart_;
void createInFrameInfo(const Argument& input, PassType passType);
// the id of inlink which share info with outlinks
int targetInfoInlinkId_;
/* create scattered id infomation for all realLayer of inFrameLines one time.
* If hasSubseq, will also create scattered sequenceStartPositions infomation
* for all realLayer of inFrameLines one time.
*/
void createInFrameInfo(int inlinks_id, const Argument& input,
PassType passType);
void createMemoryFrameInfo(MemoryFrameLine* memoryFrameLine,
PassType passType);
......@@ -363,6 +381,9 @@ protected:
NeuralNetwork* rootNetwork_;
bool reversed_;
// if hasSubseq: max number of sentences(subseq)in batchsize samples
// else: max number of tokens in batchsize samples(sentences)
int maxSequenceLength_;
bool useGpu_;
bool stopBeamSearch_;
......@@ -415,7 +436,7 @@ private:
* @param machineIdVec : select a row of output matrix in each frame
* that the generation process expanded.
*/
void createDataOutlink(std::vector<int> & machineIdVec);
void createDataOutlink(std::vector<int>& machineIdVec);
/*
* @brief used in beam search, connect previous frame to form recurrent link
......
......@@ -49,8 +49,10 @@ void CTCLayer::forward(PassType passType) {
Layer::forward(passType);
if (useGpu_) {
for (size_t i = 0; i < inputLayers_.size(); i++) {
tmpCpuInput_[i].resizeAndCopyFrom(getInput(i), false, HPPL_STREAM_1);
tmpCpuInput_[i].resizeAndCopyFrom(
getInput(i), false, HPPL_STREAM_DEFAULT);
}
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
forwardImp(tmpCpuInput_[0], tmpCpuInput_[1]);
} else {
forwardImp(getInput(0), getInput(1));
......@@ -92,9 +94,9 @@ void CTCLayer::backward(const UpdateCallback &callback) {
if (useGpu_) {
backwardImp(callback, tmpCpuInput_[0], tmpCpuInput_[1]);
const_cast<Argument&>(getInput(0)).
resizeAndCopyFrom(tmpCpuInput_[0], true, HPPL_STREAM_1);
resizeAndCopyFrom(tmpCpuInput_[0], true, HPPL_STREAM_DEFAULT);
const_cast<Argument&>(getInput(1)).
resizeAndCopyFrom(tmpCpuInput_[1], true, HPPL_STREAM_1);
resizeAndCopyFrom(tmpCpuInput_[1], true, HPPL_STREAM_DEFAULT);
} else {
backwardImp(callback, getInput(0), getInput(1));
}
......
......@@ -248,7 +248,7 @@ void ConvOperator::forward() {
CHECK_EQ(ins_[1]->value->getHeight(), batchSize);
checkFilterSize(ins_[1]->value);
Matrix::resizeOrCreate(out_->value, batchSize,
outputH_ * outputW_ * numFilters_);
outputH_ * outputW_ * numFilters_, false, useGpu_);
{
AsyncGpuBlock block;
for (size_t batchId = 0; batchId < batchSize; ++batchId) {
......
......@@ -509,8 +509,10 @@ void HuberTwoClass::forwardImp(Matrix &output, Argument &label,
Matrix &cost) {
if (useGpu_) {
for (size_t i = 0; i < inputLayers_.size(); i++) {
tmpCpuInput_[i].resizeAndCopyFrom(getInput(i), false, HPPL_STREAM_1);
tmpCpuInput_[i].resizeAndCopyFrom(
getInput(i), false, HPPL_STREAM_DEFAULT);
}
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
}
forwardImpIn(output, label, cost);
}
......
......@@ -52,8 +52,10 @@ public:
Layer::forward(passType);
if (useGpu_) {
for (size_t i = 0; i < inputLayers_.size(); i++) {
tmpCpuInput_[i].resizeAndCopyFrom(getInput(i), false, HPPL_STREAM_1);
tmpCpuInput_[i].resizeAndCopyFrom(
getInput(i), false, HPPL_STREAM_DEFAULT);
}
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
forwardImp(tmpCpuInput_[0]);
} else {
forwardImp(getInput(0));
......
......@@ -92,7 +92,6 @@ void testState(LayerPtr testLayer, vector<DataLayerPtr>& dataLayers,
testLayer->forward(PASS_TEST);
Argument out;
out.resizeAndCopyFrom(testLayer->getOutput(), /* useGpu= */ false);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
if (batchOut.value) {
size_t dim = batchOut.value->getWidth();
ASSERT_TRUE((bool)out.value);
......@@ -220,7 +219,6 @@ void testBatchState(LayerPtr testLayer, vector<DataLayerPtr>& dataLayers,
testLayer->forward(PASS_TEST);
Argument out;
out.resizeAndCopyFrom(testLayer->getOutput(), /* useGpu= */ false);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
if (batchOut.value) {
size_t dim = batchOut.value->getWidth();
ASSERT_TRUE((bool)out.value);
......
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.PyDataProvider2 import *
data = [
[[[1, 3, 2], [4, 5, 2]], 0],
[[[0, 2], [2, 5], [0, 1, 2]], 1],
]
@provider(input_types=[integer_value_sub_sequence(10),
integer_value(2)])
def process_subseq(settings, file_name):
for d in data:
yield d
@provider(input_types=[integer_value_sequence(10),
integer_value(2)])
def process_seq(settings, file_name):
for d in data:
seq = []
for subseq in d[0]:
seq += subseq
yield seq, d[1]
#!/usr/bin/env python
#coding=utf-8
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
......
#edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
######################## data source ################################
define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list',
test_list=None,
module='rnn_data_provider',
obj='process_subseq')
settings(batch_size=2, learning_rate=0.01)
######################## network configure ################################
dict_dim = 10
word_dim = 8
hidden_dim = 8
label_dim = 3
data = data_layer(name="word", size=dict_dim)
emb = embedding_layer(input=data, size=word_dim)
# This hierachical RNN is designed to be equivalent to the simple RNN in
# sequence_rnn.conf
def outer_step(x):
outer_mem = memory(name="outer_rnn_state", size=hidden_dim)
def inner_step(y):
inner_mem = memory(name="inner_rnn_state",
size=hidden_dim,
boot_layer=outer_mem)
return fc_layer(input=[y, inner_mem],
size=hidden_dim,
act=TanhActivation(),
bias_attr=True,
name="inner_rnn_state")
inner_rnn_output = recurrent_group(
step=inner_step,
input=x)
last = last_seq(input=inner_rnn_output, name="outer_rnn_state")
# "return last" should also work. But currently RecurrentGradientMachine
# does not handle it correctly. Current implementation requires that
# all the out links are from sequences. However, it does not report error
# when the out links are not sequences.
return inner_rnn_output
out = recurrent_group(
step=outer_step,
input=SubsequenceInput(emb))
value_printer_evaluator(input=out)
rep = last_seq(input=out)
prob = fc_layer(size=label_dim,
input=rep,
act=SoftmaxActivation(),
bias_attr=True)
outputs(classification_cost(input=prob,
label=data_layer(name="label", size=label_dim)))
#edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
######################## data source ################################
define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list',
test_list=None,
module='rnn_data_provider',
obj='process_seq')
settings(batch_size=2, learning_rate=0.01)
######################## network configure ################################
dict_dim = 10
word_dim = 8
hidden_dim = 8
label_dim = 3
data = data_layer(name="word", size=dict_dim)
emb = embedding_layer(input=data, size=word_dim)
def step(y):
mem = memory(name="rnn_state", size=hidden_dim)
return fc_layer(input=[y, mem],
size=hidden_dim,
act=TanhActivation(),
bias_attr=True,
name="rnn_state")
out = recurrent_group(
step=step,
input=emb)
value_printer_evaluator(input=out)
rep = last_seq(input=out)
prob = fc_layer(size=label_dim,
input=rep,
act=SoftmaxActivation(),
bias_attr=True)
outputs(classification_cost(input=prob,
label=data_layer(name="label", size=label_dim)))
......@@ -21,6 +21,8 @@ limitations under the License. */
#include <paddle/trainer/TrainerInternal.h>
#include <paddle/gserver/gradientmachines/GradientMachine.h>
P_DECLARE_int32(seed);
using namespace paddle; // NOLINT
using namespace std; // NOLINT
class TrainerForTest : public paddle::Trainer {
......@@ -68,7 +70,9 @@ void CalCost(const string& conf, const string& dir, real* cost,
CpuVector vecMomentum(dim);
// vecW needs to be assigned, otherwise the variable is an uncertain value.
vecW.zeroMem();
*ThreadLocalRand::getSeed() = FLAGS_seed;
vecW.randnorm(0, 0.1);
trainer.startTrain();
for (int i = 0; i < num_passes; ++i) {
......@@ -88,15 +92,13 @@ void CalCost(const string& conf, const string& dir, real* cost,
rmDir(dir.c_str());
}
TEST(RecurrentGradientMachine, HasSubSequence) {
void test(const string& conf1, const string& conf2) {
int num_passes = 5;
real* cost1 = new real[num_passes];
const string conf1 = "gserver/tests/sequence_layer_group.conf";
const string dir1 = "gserver/tests/t1";
CalCost(conf1, dir1, cost1, num_passes);
real* cost2 = new real[num_passes];
const string conf2 = "gserver/tests/sequence_nest_layer_group.conf";
const string dir2 = "gserver/tests/t2";
CalCost(conf2, dir2, cost2, num_passes);
......@@ -109,6 +111,17 @@ TEST(RecurrentGradientMachine, HasSubSequence) {
delete[] cost2;
}
TEST(RecurrentGradientMachine, HasSubSequence) {
test("gserver/tests/sequence_layer_group.conf",
"gserver/tests/sequence_nest_layer_group.conf");
}
TEST(RecurrentGradientMachine, rnn) {
test("gserver/tests/sequence_rnn.conf",
"gserver/tests/sequence_nest_rnn.conf");
}
int main(int argc, char** argv) {
if (paddle::version::isWithPyDataProvider()) {
if (!paddle::version::isWithGpu()) {
......
......@@ -299,7 +299,6 @@ void checkRecurrentLayer(LayerConfig layerConfig, size_t batchSize,
Argument& cpuInput = testCpu.dataLayer_->getOutput();
Argument& gpuInput = testGpu.dataLayer_->getOutput();
gpuInput.resizeAndCopyFrom(cpuInput, true);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
const VectorPtr& cpuVec = testCpu.para_->getBuf(PARAMETER_VALUE);
const VectorPtr& gpuVec = testGpu.para_->getBuf(PARAMETER_VALUE);
......
......@@ -146,6 +146,7 @@ void Matrix::resizeOrCreate(MatrixPtr& matrix, size_t height, size_t width,
if (!matrix) {
matrix = Matrix::create(height, width, trans, useGpu);
} else {
CHECK_EQ(matrix->useGpu(), useGpu);
matrix->resize(height, width);
}
}
......@@ -161,6 +162,7 @@ void Matrix::resizeOrCreateSparseMatrix(MatrixPtr& matrix, size_t height,
} else {
CHECK(dynamic_cast<CpuSparseMatrix*>(matrix.get()) ||
dynamic_cast<GpuSparseMatrix*>(matrix.get()));
CHECK_EQ(matrix->useGpu(), useGpu);
matrix->resize(height, width, nnz, valueType, format);
}
}
......
......@@ -800,6 +800,7 @@ void CpuGpuVectorT<T>::resizeOrCreate(size_t size, bool useGpu) {
} else if ((!useGpu) && (!cpuVectorT_)) {
cpuVectorT_ = VectorT<T>::create(size, false);
} else {
CHECK((useGpu && gpuVectorT_) || (!useGpu && cpuVectorT_));
this->resize(size, useGpu);
}
}
......
......@@ -25,6 +25,7 @@ static void resizeAndCopy(MatrixPtr& dest, const MatrixPtr& src, bool useGpu,
if (!dest) {
dest = src->clone(0, 0, useGpu);
} else {
CHECK_EQ(dest->useGpu(), useGpu);
dest->resize(src->getHeight(), src->getWidth());
}
dest->copyFrom(*src, stream);
......@@ -60,12 +61,12 @@ static void resizeAndCopy(MatrixPtr& dest, const MatrixPtr& src,
hl_stream_t stream = HPPL_STREAM_DEFAULT) {
if (src) {
CHECK_LE((size_t)startRow + copySize, src->getHeight());
int height = copySize;
int width = src->getWidth();
if (!dest) {
dest = src->clone(height, width, useGpu);
} else {
CHECK_EQ(dest->useGpu(), useGpu);
dest->resize(height, width);
}
MatrixPtr submat = src->subMatrix(startRow, copySize);
......@@ -182,6 +183,11 @@ static void resizeAndCopy(SVectorPtr& dest, const SVectorPtr& src,
}
}
void Argument::resizeAndCopyFrom(const Argument& src, bool useGpu) {
resizeAndCopyFrom(src, useGpu, HPPL_STREAM_DEFAULT);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
}
void Argument::resizeAndCopyFrom(const Argument& src, bool useGpu,
hl_stream_t stream) {
dataId = src.dataId;
......@@ -199,6 +205,14 @@ void Argument::resizeAndCopyFrom(const Argument& src, bool useGpu,
resizeAndCopy(strs, src.strs, useGpu, stream);
}
int32_t Argument::resizeAndCopyFrom(const Argument& src, int32_t startSeq,
int32_t copySize, bool useGpu) {
int32_t size = resizeAndCopyFrom(src, startSeq, copySize, useGpu,
HPPL_STREAM_DEFAULT);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
return size;
}
int32_t Argument::resizeAndCopyFrom(const Argument& src, int32_t startSeq,
int32_t copySize, bool useGpu,
hl_stream_t stream) {
......
......@@ -203,13 +203,28 @@ struct Argument {
* startSeq: the sample id of start
* copySize: how many samples need to copy
* return value: how many samples are copied
* Note that when specifying the stream explicitly in this case,
* synchronize should also be called somewhere after this function
*/
int32_t resizeAndCopyFrom(const Argument& src, int32_t startSeq,
int32_t copySize, bool useGpu = FLAGS_use_gpu,
hl_stream_t stream = HPPL_STREAM_DEFAULT);
int32_t copySize, bool useGpu, hl_stream_t stream);
void resizeAndCopyFrom(const Argument& src, bool useGpu = FLAGS_use_gpu,
hl_stream_t stream = HPPL_STREAM_DEFAULT);
/*
* same with the above function, except that the stream is
* HPPL_STREAM_DEFAULT and synchronize is automatically called
* inside it
*/
int32_t resizeAndCopyFrom(const Argument& src, int32_t startSeq,
int32_t copySize, bool useGpu = FLAGS_use_gpu);
void resizeAndCopyFrom(const Argument& src, bool useGpu, hl_stream_t stream);
/*
* same with the above function, except that the stream is
* HPPL_STREAM_DEFAULT and synchronize is automatically called
* inside it
*/
void resizeAndCopyFrom(const Argument& src, bool useGpu = FLAGS_use_gpu);
/*
@brief Concatenate several arguments into one and put the result into it.
......@@ -240,6 +255,15 @@ struct Argument {
/*
Get Sequence Length, startPositions and max Length according to input
1. For sequence data:
Each tuple is (seq_length, seq_start, seq_id, seq_id)
The tuples are sorted according to seq_length or subseq_length
*maxSequenceLength is the maximal sequence length
2. For subsequence data:
Each tuple is (subseq_length, subseq_start, seq_id, subseq_id)
The tuples are not sorted. They are in the original order.
*maxSequenceLenth is the maximal number of subsequences in each sequence.
*/
void getSeqLengthAndStart(
std::vector<std::tuple<int, int, int, int>>* seqLengthAndStart,
......
......@@ -452,6 +452,9 @@ message SubModelConfig {
repeated LinkConfig out_links = 10;
optional GeneratorConfig generator = 11;
// the id of inlink which share info with outlinks, used in recurrent layer group
optional int32 target_inlinkid = 12;
}
message ModelConfig {
......
......@@ -303,7 +303,8 @@ def MakeLayerNameInSubmodel(name, submodel_name = None):
@config_func
def RecurrentLayerGroupWithoutOutLinksBegin(name,
in_links,
seq_reversed=False):
seq_reversed=False,
target_inlinkname=""):
global g_current_submodel
config_assert(g_config.model_config.type == "recurrent_nn",
"RecurrentLayerGroup should be used only in recurrent_nn")
......@@ -311,14 +312,19 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
SubModelBegin(name)
g_current_submodel.is_recurrent_layer_group = True
g_current_submodel.reversed = seq_reversed
g_current_submodel.target_inlinkid = -1
in_links_count = 0
for link in in_links:
for linkid, link in enumerate(in_links):
if isinstance(link, basestring):
name = link
has_subseq = False
else:
name = link.link_name
has_subseq = link.has_subseq
# assign target_inlinkid according to target_inlinkname
if target_inlinkname == name:
g_current_submodel.target_inlinkid = linkid
if in_links_count == 0:
in_links_has_subseq = has_subseq
else:
......@@ -331,6 +337,7 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
SequenceScatterAgentLayer(name=name, size=layer.size)
else:
ScatterAgentLayer(name=name, size=layer.size)
pair = g_current_submodel.in_links.add()
pair.layer_name = layer_name
pair.link_name = MakeLayerNameInSubmodel(name)
......@@ -362,10 +369,12 @@ def RecurrentLayerGroupBegin(name,
in_links,
out_links,
generator=None,
target_inlinkname="",
seq_reversed=False):
RecurrentLayerGroupWithoutOutLinksBegin(name,
in_links,
seq_reversed)
seq_reversed,
target_inlinkname)
for link in out_links:
RecurrentLayerGroupSetOutLink(link)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册