提交 93006787 编写于 作者: E emailweixu 提交者: GitHub

Merge pull request #73 from reyoung/merge_icode

Merge Baidu Internal Changes.
...@@ -9,6 +9,7 @@ Install PaddlePaddle ...@@ -9,6 +9,7 @@ Install PaddlePaddle
:glob: :glob:
install_* install_*
internal/install_from_jumbo.md
Build from Source Build from Source
----------------- -----------------
......
...@@ -5,3 +5,4 @@ Cluster Train ...@@ -5,3 +5,4 @@ Cluster Train
:glob: :glob:
opensource/cluster_train.md opensource/cluster_train.md
internal/index.md
...@@ -9,7 +9,11 @@ Note: The intallation packages are still in pre-release state and your experienc ...@@ -9,7 +9,11 @@ Note: The intallation packages are still in pre-release state and your experienc
.. toctree:: .. toctree::
:maxdepth: 1 :maxdepth: 1
:glob:
源码下载(对内) <../build/internal/download_paddle_source_zh_cn.rst>
使用Jumbo安装(对内) <../build/internal/install_from_jumbo.rst>
从源码编译安装(对内) <../build/internal/build_from_source_zh_cn.rst>
install/docker_install.rst install/docker_install.rst
install/ubuntu_install.rst install/ubuntu_install.rst
cmake/index.rst cmake/index.rst
集群训练
========
* `集群训练 <../../doc/cluster/index.html>`_
.. toctree::
:maxdepth: 2
:glob:
集群训练(对内) <internal/index.md>
...@@ -8,7 +8,7 @@ PaddlePaddle文档 ...@@ -8,7 +8,7 @@ PaddlePaddle文档
* `用户接口 <ui/index.html>`_ * `用户接口 <ui/index.html>`_
* `使用示例 <demo/index.html>`_ * `使用示例 <demo/index.html>`_
* `模型配置 <../doc/ui/api/trainer_config_helpers/index.html>`_ * `模型配置 <../doc/ui/api/trainer_config_helpers/index.html>`_
* `集群训练 <../doc/cluster/index.html>`_ * `集群训练 <cluster/index.html>`_
开发指南 开发指南
-------- --------
......
...@@ -194,8 +194,8 @@ public: ...@@ -194,8 +194,8 @@ public:
virtual real evalImp(std::vector<Argument>& arguments) { virtual real evalImp(std::vector<Argument>& arguments) {
CHECK_EQ(arguments.size(), (size_t)2); CHECK_EQ(arguments.size(), (size_t)2);
Argument output, label; Argument output, label;
output.resizeAndCopyFrom(arguments[0], false); output.resizeAndCopyFrom(arguments[0], false, HPPL_STREAM_DEFAULT);
label.resizeAndCopyFrom(arguments[1], false); label.resizeAndCopyFrom(arguments[1], false, HPPL_STREAM_DEFAULT);
hl_stream_synchronize(HPPL_STREAM_DEFAULT); hl_stream_synchronize(HPPL_STREAM_DEFAULT);
CHECK(label.sequenceStartPositions); CHECK(label.sequenceStartPositions);
CHECK(label.ids); CHECK(label.ids);
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/utils/Stat.h" #include "paddle/utils/Stat.h"
#include "paddle/utils/Util.h" #include "paddle/utils/Util.h"
#include "paddle/utils/Flags.h" #include "paddle/utils/Flags.h"
...@@ -291,6 +290,8 @@ void RecurrentGradientMachine::init( ...@@ -291,6 +290,8 @@ void RecurrentGradientMachine::init(
if (subModelConfig->evaluator_names_size() > 0) { if (subModelConfig->evaluator_names_size() > 0) {
evaluator_.reset(frames_[0]->makeEvaluator()); evaluator_.reset(frames_[0]->makeEvaluator());
} }
targetInfoInlinkId_ = subModelConfig->target_inlinkid();
} }
void RecurrentGradientMachine::resizeOrCreateFrames(int numFrames) { void RecurrentGradientMachine::resizeOrCreateFrames(int numFrames) {
...@@ -325,7 +326,7 @@ void RecurrentGradientMachine::resizeOrCreateFrames(int numFrames) { ...@@ -325,7 +326,7 @@ void RecurrentGradientMachine::resizeOrCreateFrames(int numFrames) {
for (int i = frames_.size(); i < numFrames; ++i) { for (int i = frames_.size(); i < numFrames; ++i) {
std::unique_ptr<NeuralNetwork> frame( std::unique_ptr<NeuralNetwork> frame(
NeuralNetwork::newNeuralNetwork(subModelName_)); NeuralNetwork::newNeuralNetwork(subModelName_));
frame->init(config_, subParamInitCb); frame->init(config_, subParamInitCb);
for (auto& inFrameLine : inFrameLines_) { for (auto& inFrameLine : inFrameLines_) {
...@@ -382,6 +383,16 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs, ...@@ -382,6 +383,16 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
size_t numSequences = input.getNumSequences(); size_t numSequences = input.getNumSequences();
const int* starts = input.sequenceStartPositions->getData(false); const int* starts = input.sequenceStartPositions->getData(false);
bool hasSubseq = input.hasSubseq(); bool hasSubseq = input.hasSubseq();
// In case of !hasSubseq or targetInfoInlinkId_ == -1, all inlinks share the
// same inframe info
bool shareInlinkInfo = !hasSubseq || targetInfoInlinkId_ == -1;
// Defaultly, share info with the first inlink
if (shareInlinkInfo) {
targetInfoInlinkId_ = 0;
}
// check hasSubseq in both config and input are the same // check hasSubseq in both config and input are the same
CHECK_EQ(hasSubseq, inFrameLines_[0].hasSubseq); CHECK_EQ(hasSubseq, inFrameLines_[0].hasSubseq);
...@@ -394,9 +405,17 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs, ...@@ -394,9 +405,17 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
CHECK_EQ((size_t)input1.getNumSequences(), numSequences); CHECK_EQ((size_t)input1.getNumSequences(), numSequences);
// check all inputs should have same hasSubseq flag // check all inputs should have same hasSubseq flag
CHECK_EQ(input.hasSubseq(), inFrameLines_[0].hasSubseq); CHECK_EQ(input.hasSubseq(), inFrameLines_[0].hasSubseq);
CHECK_EQ(input1.getBatchSize(), batchSize);
CHECK(std::equal(starts, starts + numSequences + 1, // if shareInlinkInfo, checks:
input1.sequenceStartPositions->getData(false))); // 1. all inlinks have same number of total tokens
// 2. all inlinks have same number of tokens for each sentence of each
// sample. If hasSubseq, one sample has multiple sentence, else, one
// sample is one sentence
if (shareInlinkInfo) {
CHECK_EQ(input1.getBatchSize(), batchSize);
CHECK(std::equal(starts, starts + numSequences + 1,
input1.sequenceStartPositions->getData(false)));
}
} }
if (hasSubseq) { if (hasSubseq) {
...@@ -408,19 +427,44 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs, ...@@ -408,19 +427,44 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
for (size_t i = 1; i < inFrameLines_.size(); ++i) { for (size_t i = 1; i < inFrameLines_.size(); ++i) {
const Argument& input1 = inFrameLines_[i].inLayer->getOutput(); const Argument& input1 = inFrameLines_[i].inLayer->getOutput();
CHECK_EQ((size_t)input1.getNumSubSequences(), numSubSequences); CHECK_EQ((size_t)input1.getNumSubSequences(), numSubSequences);
CHECK(std::equal(subStarts, subStarts + numSubSequences + 1, if (shareInlinkInfo) {
input1.subSequenceStartPositions->getData(false))); CHECK(std::equal(subStarts, subStarts + numSubSequences + 1,
input1.subSequenceStartPositions->getData(false)));
}
} }
} }
seqLengthAndStart_.clear(); seqLengthAndStart_.clear();
input.getSeqLengthAndStart(&seqLengthAndStart_, &maxSequenceLength_); info_.clear();
info_.resize(inFrameLines_.size());
seqLengthAndStart_.resize(inFrameLines_.size());
{
AsyncGpuBlock asyncGpuBlock;
// if shareInlinkInfo, only calculate info of the first inlink
// else, calculate info for each inlink
if (shareInlinkInfo) {
input.getSeqLengthAndStart(&seqLengthAndStart_[0], &maxSequenceLength_);
createInFrameInfo(0, input, passType);
} else {
for (size_t i = 0; i < inFrameLines_.size(); i++) {
const Argument& input1 = inFrameLines_[i].inLayer->getOutput();
input1.getSeqLengthAndStart(&seqLengthAndStart_[i],
&maxSequenceLength_);
createInFrameInfo(i, input1, passType);
}
}
// inFrameLine select rows in real layer one time
for (size_t i = 0; i < inFrameLines_.size(); i++) {
int curInlinkId = shareInlinkInfo ? 0 : i;
selectRowsOneTime(inFrameLines_[i].inLayer, info_[curInlinkId].allIds,
&(inFrameLines_[i].outArg), passType);
}
}
resizeOrCreateFrames(maxSequenceLength_); resizeOrCreateFrames(maxSequenceLength_);
resizeBootFrame(numSequences); resizeBootFrame(numSequences);
AsyncGpuBlock asyncGpuBlock;
createInFrameInfo(input, passType);
for (auto& memoryFrameLine : memoryFrameLines_) { for (auto& memoryFrameLine : memoryFrameLines_) {
if (memoryFrameLine.rootAgent) { if (memoryFrameLine.rootAgent) {
auto scatterAgent = auto scatterAgent =
...@@ -443,23 +487,29 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs, ...@@ -443,23 +487,29 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
auto gatherAgent = auto gatherAgent =
dynamic_cast<GatherAgentLayer*>(outFrameLine.agentLayer.get()); dynamic_cast<GatherAgentLayer*>(outFrameLine.agentLayer.get());
CHECK_NOTNULL(gatherAgent); CHECK_NOTNULL(gatherAgent);
gatherAgent->copyIdAndSequenceInfo(input, info_.allIds, info_.idIndex); gatherAgent->copyIdAndSequenceInfo(input, info_[targetInfoInlinkId_].allIds,
info_[targetInfoInlinkId_].idIndex);
} }
for (int i = 0; i < maxSequenceLength_; ++i) { for (int i = 0; i < maxSequenceLength_; ++i) {
int idSize = info_.idIndex[i + 1] - info_.idIndex[i]; int idSize = 0;
// connect in_links // connect in_links
for (auto& inFrameLine : inFrameLines_) { for (size_t j = 0; j < inFrameLines_.size(); ++j) {
// idSize denotes the sum number of tokens in each length i
idSize = info_[j].idIndex[i + 1] - info_[j].idIndex[i];
InFrameLine inFrameLine = inFrameLines_[j];
auto scatterAgent = auto scatterAgent =
dynamic_cast<ScatterAgentLayer*>(inFrameLine.agents[i].get()); dynamic_cast<ScatterAgentLayer*>(inFrameLine.agents[i].get());
scatterAgent->setRealLayerAndOutput(inFrameLine.inLayer, scatterAgent->setRealLayerAndOutput(inFrameLine.inLayer,
inFrameLine.outArg, info_.allIds, inFrameLine.outArg, info_[j].allIds,
info_.idIndex[i], idSize); info_[j].idIndex[i], idSize);
if (hasSubseq) { if (hasSubseq) {
int size = info_.seqStartPosIndex[i + 1] - info_.seqStartPosIndex[i]; // size: the length of subsequence
scatterAgent->setSequenceStartPositions( int size =
info_.sequenceStartPositions, info_.seqStartPosIndex[i], size); info_[j].seqStartPosIndex[i + 1] - info_[j].seqStartPosIndex[i];
scatterAgent->setSequenceStartPositions(info_[j].sequenceStartPositions,
info_[j].seqStartPosIndex[i],
size);
} }
} }
...@@ -469,13 +519,16 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs, ...@@ -469,13 +519,16 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
dynamic_cast<GatherAgentLayer*>(outFrameLine.agentLayer.get()); dynamic_cast<GatherAgentLayer*>(outFrameLine.agentLayer.get());
gatherAgent->addRealLayer(outFrameLine.frames[i]); gatherAgent->addRealLayer(outFrameLine.frames[i]);
} }
// connect memory links // connect memory links
// Adopt info_[0].idIndex because seq which has_subseq=True
// doesn't support Memory with !hasSubseq bootlayer;
// And inlinks that !hasSubSeq must have same inlink length.
idSize = info_[0].idIndex[i + 1] - info_[0].idIndex[i];
for (auto& memoryFrameLine : memoryFrameLines_) { for (auto& memoryFrameLine : memoryFrameLines_) {
NeuralNetwork::connect( NeuralNetwork::connect(
memoryFrameLine.agents[i], memoryFrameLine.agents[i],
i == 0 ? memoryFrameLine.bootLayer : memoryFrameLine.frames[i - 1], i == 0 ? memoryFrameLine.bootLayer : memoryFrameLine.frames[i - 1],
idSize /*height of agent*/); numSeqs_[i] /*height of agent*/);
} }
} }
...@@ -560,62 +613,76 @@ void RecurrentGradientMachine::removeBeamSearchStatisticsCallbacks() { ...@@ -560,62 +613,76 @@ void RecurrentGradientMachine::removeBeamSearchStatisticsCallbacks() {
* If hasSubseq, will also create scattered sequenceStartPositions infomation * If hasSubseq, will also create scattered sequenceStartPositions infomation
* for all realLayer of inFrameLines one time. * for all realLayer of inFrameLines one time.
*/ */
void RecurrentGradientMachine::createInFrameInfo(const Argument& input,
void RecurrentGradientMachine::createInFrameInfo(int inlinks_id,
const Argument& input,
PassType passType) { PassType passType) {
bool hasSubseq = input.hasSubseq(); bool hasSubseq = input.hasSubseq();
// numSequences: # samples(sequences) in a batch
size_t numSequences = input.getNumSequences(); size_t numSequences = input.getNumSequences();
std::vector<int> allIds; std::vector<int> allIds;
info_.idIndex.clear();
info_.idIndex.push_back(0); // first idIndex = 0 numSeqs_.clear();
if (hasSubseq) { // for sequenceScatterAgentLayer Info* inlink_info = &info_[inlinks_id];
inlink_info->idIndex.clear();
inlink_info->idIndex.push_back(0); // first idIndex = 0
if (hasSubseq) { // for sequenceScatterAgentLayer
// numSubSequences : all sentences within all samples(batch)
size_t numSubSequences = input.getNumSubSequences(); size_t numSubSequences = input.getNumSubSequences();
std::vector<int> sequenceStartPositions; std::vector<int> sequenceStartPositions;
info_.seqStartPosIndex.clear(); inlink_info->seqStartPosIndex.clear();
info_.seqStartPosIndex.push_back(0); // first seqStartPosIndex = 0 inlink_info->seqStartPosIndex.push_back(0); // first seqStartPosIndex = 0
// maxSequenceLength_: max number of sentences(subseq) in allsamples
for (int i = 0; i < maxSequenceLength_; ++i) { for (int i = 0; i < maxSequenceLength_; ++i) {
sequenceStartPositions.push_back(0); // first element = 0 sequenceStartPositions.push_back(0); // first element = 0
for (size_t j = 0; j < numSubSequences; ++j) { int numSeqs = 0;
if (std::get<3>(seqLengthAndStart_[j]) == i) { for (size_t j = 0; j < numSubSequences; ++j) { // for each sentence
int subSeqStart = std::get<1>(seqLengthAndStart_[j]); // seqLengthAndStart_[inlinks_id][j]:
int subSeqLength = std::get<0>(seqLengthAndStart_[j]); // a 4-tuple including <subseqlen, subseqstart, seqid, subseqid>
if (std::get<3>(seqLengthAndStart_[inlinks_id][j]) == i) {
++numSeqs;
// subseqstart: the cpuSubSequenceStartPositions of this subseq
int subSeqStart = std::get<1>(seqLengthAndStart_[inlinks_id][j]);
int subSeqLength = std::get<0>(seqLengthAndStart_[inlinks_id][j]);
for (int k = subSeqStart; k < subSeqStart + subSeqLength; ++k) { for (int k = subSeqStart; k < subSeqStart + subSeqLength; ++k) {
allIds.push_back(k); allIds.push_back(k);
} }
sequenceStartPositions.push_back(sequenceStartPositions.back() + sequenceStartPositions.push_back(sequenceStartPositions.back() +
subSeqLength); subSeqLength);
} }
} }
info_.idIndex.push_back(allIds.size()); inlink_info->idIndex.push_back(allIds.size());
info_.seqStartPosIndex.push_back(sequenceStartPositions.size()); inlink_info->seqStartPosIndex.push_back(sequenceStartPositions.size());
numSeqs_.push_back(numSeqs);
} }
// inFrameLine create sequenceStartPositions one time // inFrameLine create sequenceStartPositions one time
CHECK_EQ(sequenceStartPositions.size(), CHECK_EQ(sequenceStartPositions.size(),
maxSequenceLength_ + numSubSequences); maxSequenceLength_ + numSubSequences);
CHECK_EQ(info_.seqStartPosIndex.size(), CHECK_EQ(inlink_info->seqStartPosIndex.size(),
static_cast<size_t>(maxSequenceLength_ + 1)); static_cast<size_t>(maxSequenceLength_ + 1));
createSeqPos(sequenceStartPositions, &info_.sequenceStartPositions); createSeqPos(sequenceStartPositions, &inlink_info->sequenceStartPositions);
} else { // for scatterAgentLayer } else { // for scatterAgentLayer
for (int i = 0; i < maxSequenceLength_; ++i) { for (int i = 0; i < maxSequenceLength_; ++i) {
int numSeqs = 0;
for (size_t j = 0; j < numSequences; ++j) { for (size_t j = 0; j < numSequences; ++j) {
int seqLength = std::get<0>(seqLengthAndStart_[j]); int seqLength = std::get<0>(seqLengthAndStart_[inlinks_id][j]);
if (i >= seqLength) { if (i >= seqLength) {
break; break;
} }
int seqStart = std::get<1>(seqLengthAndStart_[j]); ++numSeqs;
int seqStart = std::get<1>(seqLengthAndStart_[inlinks_id][j]);
allIds.push_back(reversed_ ? (seqStart + seqLength - 1 - i) allIds.push_back(reversed_ ? (seqStart + seqLength - 1 - i)
: (seqStart + i)); : (seqStart + i));
} }
info_.idIndex.push_back(allIds.size()); inlink_info->idIndex.push_back(allIds.size());
numSeqs_.push_back(numSeqs);
} }
} }
// copy and check scatterId // copy and check scatterId
copyScattedId(allIds, &info_.allIds, input.getBatchSize()); copyScattedId(allIds, &inlink_info->allIds, input.getBatchSize());
CHECK_EQ(info_.idIndex.size(), static_cast<size_t>(maxSequenceLength_ + 1)); CHECK_EQ(inlink_info->idIndex.size(),
// inFrameLine select rows in real layer one time static_cast<size_t>(maxSequenceLength_ + 1));
for (auto& inFrameLine : inFrameLines_) {
selectRowsOneTime(inFrameLine.inLayer, info_.allIds, &inFrameLine.outArg,
passType);
}
} }
/* like createInFrameInfo, but for all realLayer of memoryFrameLines*/ /* like createInFrameInfo, but for all realLayer of memoryFrameLines*/
...@@ -633,19 +700,20 @@ void RecurrentGradientMachine::createMemoryFrameInfo( ...@@ -633,19 +700,20 @@ void RecurrentGradientMachine::createMemoryFrameInfo(
sequenceStartPositions.push_back(0); // first element = 0 sequenceStartPositions.push_back(0); // first element = 0
const int* starts = input.sequenceStartPositions->getData(false); const int* starts = input.sequenceStartPositions->getData(false);
for (size_t i = 0; i < numSequences; ++i) { for (size_t i = 0; i < numSequences; ++i) {
int seqId = std::get<2>(seqLengthAndStart_[i]); // memory info adopt info of inlinks[0]
int seqId = std::get<2>(seqLengthAndStart_[0][i]);
for (int k = starts[seqId]; k < starts[seqId + 1]; ++k) { for (int k = starts[seqId]; k < starts[seqId + 1]; ++k) {
allIds.push_back(k); allIds.push_back(k);
} }
sequenceStartPositions.push_back(sequenceStartPositions.back() + sequenceStartPositions.push_back(sequenceStartPositions.back() +
starts[seqId + 1] - starts[seqId]); starts[seqId + 1] - starts[seqId]);
} }
createSeqPos(sequenceStartPositions, createSeqPos(sequenceStartPositions,
&(*memoryFrameLine).sequenceStartPositions); &(*memoryFrameLine).sequenceStartPositions);
} else { // for scatterAgentLayer } else { // for scatterAgentLayer
for (size_t i = 0; i < numSequences; ++i) { for (size_t i = 0; i < numSequences; ++i) {
allIds.push_back(std::get<2>(seqLengthAndStart_[i])); allIds.push_back(std::get<2>(seqLengthAndStart_[0][i]));
} }
} }
// copy and check scatterId // copy and check scatterId
...@@ -699,18 +767,19 @@ size_t RecurrentGradientMachine::getGenBatchSize() { ...@@ -699,18 +767,19 @@ size_t RecurrentGradientMachine::getGenBatchSize() {
for (auto& memoryFrameLine : memoryFrameLines_) { for (auto& memoryFrameLine : memoryFrameLines_) {
if (!memoryFrameLine.rootLayer) continue; if (!memoryFrameLine.rootLayer) continue;
Argument& bootArg = memoryFrameLine.rootLayer->getOutput(); Argument& bootArg = memoryFrameLine.rootLayer->getOutput();
size_t batchSize = memoryFrameLine.is_sequence ? size_t batchSize = memoryFrameLine.is_sequence ? bootArg.getNumSequences()
bootArg.getNumSequences() : bootArg.getBatchSize(); : bootArg.getBatchSize();
if (numSequences) { if (numSequences) {
CHECK_EQ(numSequences, batchSize); CHECK_EQ(numSequences, batchSize);
} else { } else {
numSequences = batchSize; numSequences = batchSize;
} }
} }
CHECK(numSequences) << "Fail to get batch size in generation. " CHECK(numSequences)
"At least one of the Memory layer MUST have a layer that is NOT in " << "Fail to get batch size in generation. "
"the layer group to boot it, and this boot layer is used to " "At least one of the Memory layer MUST have a layer that is NOT in "
"decide batch_size in generation process."; "the layer group to boot it, and this boot layer is used to "
"decide batch_size in generation process.";
return numSequences; return numSequences;
} }
...@@ -732,7 +801,9 @@ void RecurrentGradientMachine::generateSequence() { ...@@ -732,7 +801,9 @@ void RecurrentGradientMachine::generateSequence() {
// connect boot frame memory links // connect boot frame memory links
std::vector<int> ids(numSequences); std::vector<int> ids(numSequences);
for (size_t i = 0; i < numSequences; ++i) { ids[i] = i; } for (size_t i = 0; i < numSequences; ++i) {
ids[i] = i;
}
for (auto& memoryFrameLine : memoryFrameLines_) { for (auto& memoryFrameLine : memoryFrameLines_) {
if (memoryFrameLine.rootAgent) { if (memoryFrameLine.rootAgent) {
auto scatterAgent = auto scatterAgent =
...@@ -756,7 +827,8 @@ void RecurrentGradientMachine::generateSequence() { ...@@ -756,7 +827,8 @@ void RecurrentGradientMachine::generateSequence() {
// init outArg // init outArg
size_t resultNum = generator_.config.num_results_per_sample(); size_t resultNum = generator_.config.num_results_per_sample();
IVector::resizeOrCreate(generator_.outArg.ids, IVector::resizeOrCreate(
generator_.outArg.ids,
generator_.config.max_num_frames() * numSequences * resultNum, false); generator_.config.max_num_frames() * numSequences * resultNum, false);
if (resultNum > 1) { if (resultNum > 1) {
CHECK_LE(resultNum, static_cast<size_t>(generator_.config.beam_size())); CHECK_LE(resultNum, static_cast<size_t>(generator_.config.beam_size()));
...@@ -847,7 +919,9 @@ void RecurrentGradientMachine::oneWaySearch(size_t batchSize) { ...@@ -847,7 +919,9 @@ void RecurrentGradientMachine::oneWaySearch(size_t batchSize) {
// path.seqId = -1 indicates end of generation // path.seqId = -1 indicates end of generation
// of an input sequence // of an input sequence
finalPaths[seqIds_[j]].seqId = -1; finalPaths[seqIds_[j]].seqId = -1;
} else { scatterIds.push_back(j); } } else {
scatterIds.push_back(j);
}
} }
} }
...@@ -856,13 +930,12 @@ void RecurrentGradientMachine::oneWaySearch(size_t batchSize) { ...@@ -856,13 +930,12 @@ void RecurrentGradientMachine::oneWaySearch(size_t batchSize) {
starts[0] = 0; starts[0] = 0;
generator_.ids.clear(); generator_.ids.clear();
for (size_t i = 0; i < batchSize; ++i) { for (size_t i = 0; i < batchSize; ++i) {
generator_.ids.insert(generator_.ids.end(), generator_.ids.insert(generator_.ids.end(), finalPaths[i].ids.begin(),
finalPaths[i].ids.begin(),
finalPaths[i].ids.end()); finalPaths[i].ids.end());
starts[i + 1] = generator_.ids.size(); starts[i + 1] = generator_.ids.size();
batchMachineIdVec_.insert(batchMachineIdVec_.end(), batchMachineIdVec_.insert(batchMachineIdVec_.end(),
finalPaths[i].machineIdVec.begin(), finalPaths[i].machineIdVec.begin(),
finalPaths[i].machineIdVec.end()); finalPaths[i].machineIdVec.end());
} }
} }
...@@ -920,9 +993,9 @@ void RecurrentGradientMachine::forwardFrame(int machineCur) { ...@@ -920,9 +993,9 @@ void RecurrentGradientMachine::forwardFrame(int machineCur) {
} }
} }
void RecurrentGradientMachine::singlePathExpand( void RecurrentGradientMachine::singlePathExpand(Path& curPath, size_t curPathId,
Path& curPath, size_t curPathId, std::vector<Path>& newPaths, std::vector<Path>& newPaths,
size_t expandWidth) { size_t expandWidth) {
int calc_id = int calc_id =
gDiyProbStart ? gDiyProbStart(curPath.ids.size(), curPath.ids.data()) : 0; gDiyProbStart ? gDiyProbStart(curPath.ids.size(), curPath.ids.data()) : 0;
...@@ -946,19 +1019,20 @@ void RecurrentGradientMachine::singlePathExpand( ...@@ -946,19 +1019,20 @@ void RecurrentGradientMachine::singlePathExpand(
if (id == -1) break; if (id == -1) break;
real newLogProb = generator_.config.log_prob() ? std::log(prob) : prob; real newLogProb = generator_.config.log_prob() ? std::log(prob) : prob;
Path newPath(curPath, id, newLogProb, Path newPath(curPath, id, newLogProb, curPathId /*machineId*/,
curPathId /*machineId*/, k /*topIndex*/); k /*topIndex*/);
if (this->beamSearchCtrlCallbacks_) { if (this->beamSearchCtrlCallbacks_) {
if (beamSearchCtrlCallbacks_->stopDetermineCandidates( if (beamSearchCtrlCallbacks_->stopDetermineCandidates(
newPath.seqId, newPath.ids, newPath.probHistory)) return; newPath.seqId, newPath.ids, newPath.probHistory))
return;
} }
// outFrameLines_.size() > 1UL // outFrameLines_.size() > 1UL
if (dataArgsSize_) { if (dataArgsSize_) {
newPath.machineIdVec = curPath.machineIdVec; newPath.machineIdVec = curPath.machineIdVec;
newPath.machineIdVec.push_back(curPathId); newPath.machineIdVec.push_back(curPathId);
} }
bool atEos = eosVec[index] == 1U || bool atEos =
newPath.ids.size() >= (size_t)maxSequenceLength_; eosVec[index] == 1U || newPath.ids.size() >= (size_t)maxSequenceLength_;
// adjustNewPath // adjustNewPath
newPath.adjustProb(calc_id, atEos); newPath.adjustProb(calc_id, atEos);
if (this->beamSearchCtrlCallbacks_) { if (this->beamSearchCtrlCallbacks_) {
...@@ -966,16 +1040,18 @@ void RecurrentGradientMachine::singlePathExpand( ...@@ -966,16 +1040,18 @@ void RecurrentGradientMachine::singlePathExpand(
newPath.seqId, newPath.ids, newPath.probHistory, &newPath.logProb); newPath.seqId, newPath.ids, newPath.probHistory, &newPath.logProb);
} }
if (!newPath.isDropable()) { if (!newPath.isDropable()) {
atEos ? finalPaths_[curPath.seqId].push_back(newPath) : atEos ? finalPaths_[curPath.seqId].push_back(newPath)
newPaths.push_back(newPath); : newPaths.push_back(newPath);
} }
} // for expandWidth } // for expandWidth
if (gDiyProbStop) { gDiyProbStop(calc_id); } if (gDiyProbStop) {
gDiyProbStop(calc_id);
}
} }
void RecurrentGradientMachine::beamExpand( void RecurrentGradientMachine::beamExpand(std::vector<Path>& paths,
std::vector<Path>& paths, std::vector<Path>& newPaths) { std::vector<Path>& newPaths) {
size_t candidatePathCount = paths.size(); size_t candidatePathCount = paths.size();
// idVec.size() could be larger than candidatePathCount * beam, // idVec.size() could be larger than candidatePathCount * beam,
// so user can drop some node customly. // so user can drop some node customly.
...@@ -988,7 +1064,7 @@ void RecurrentGradientMachine::beamExpand( ...@@ -988,7 +1064,7 @@ void RecurrentGradientMachine::beamExpand(
int curSeqId = 0; int curSeqId = 0;
for (size_t j = 0; j <= candidatePathCount; j++) { for (size_t j = 0; j <= candidatePathCount; j++) {
// expansions of a single sequence are all processed // expansions of a single sequence are all processed
curSeqId = (j < candidatePathCount? paths[j].seqId : curSeqId + 1); curSeqId = (j < candidatePathCount ? paths[j].seqId : curSeqId + 1);
if (prevSeqId != -1 && curSeqId != prevSeqId) { if (prevSeqId != -1 && curSeqId != prevSeqId) {
totalExpandCount += beamShrink(newPaths, prevSeqId, totalExpandCount); totalExpandCount += beamShrink(newPaths, prevSeqId, totalExpandCount);
} }
...@@ -1000,11 +1076,14 @@ void RecurrentGradientMachine::beamExpand( ...@@ -1000,11 +1076,14 @@ void RecurrentGradientMachine::beamExpand(
} }
// Drop extra nodes to beam size. // Drop extra nodes to beam size.
size_t RecurrentGradientMachine::beamShrink( size_t RecurrentGradientMachine::beamShrink(std::vector<Path>& newPaths,
std::vector<Path>& newPaths, size_t seqId, size_t totalExpandCount) { size_t seqId,
size_t minNewPathSize = std::min(getBeamSize(), size_t totalExpandCount) {
newPaths.size() - totalExpandCount); size_t minNewPathSize =
if (!minNewPathSize) { return 0; } std::min(getBeamSize(), newPaths.size() - totalExpandCount);
if (!minNewPathSize) {
return 0;
}
std::nth_element(newPaths.begin() + totalExpandCount, std::nth_element(newPaths.begin() + totalExpandCount,
newPaths.begin() + totalExpandCount + minNewPathSize, newPaths.begin() + totalExpandCount + minNewPathSize,
newPaths.end(), Path::greaterPath); newPaths.end(), Path::greaterPath);
...@@ -1017,11 +1096,8 @@ size_t RecurrentGradientMachine::beamShrink( ...@@ -1017,11 +1096,8 @@ size_t RecurrentGradientMachine::beamShrink(
// Remove the already formed paths that are relatively short // Remove the already formed paths that are relatively short
finalPaths_[seqId].erase( finalPaths_[seqId].erase(
std::remove_if(finalPaths_[seqId].begin(), std::remove_if(finalPaths_[seqId].begin(), finalPaths_[seqId].end(),
finalPaths_[seqId].end(), [&](Path& p) { return p.logProb < minPathLogProb; }),
[&](Path& p) {
return p.logProb < minPathLogProb;
}),
finalPaths_[seqId].end()); finalPaths_[seqId].end());
for (auto p : finalPaths_[seqId]) { for (auto p : finalPaths_[seqId]) {
if (minFinalPathLogProb_[seqId] > p.logProb) { if (minFinalPathLogProb_[seqId] > p.logProb) {
...@@ -1030,7 +1106,7 @@ size_t RecurrentGradientMachine::beamShrink( ...@@ -1030,7 +1106,7 @@ size_t RecurrentGradientMachine::beamShrink(
} }
if (finalPaths_[seqId].size() >= getBeamSize() && if (finalPaths_[seqId].size() >= getBeamSize() &&
minFinalPathLogProb_[seqId] >= maxPathLogProb) { minFinalPathLogProb_[seqId] >= maxPathLogProb) {
newPaths.resize(totalExpandCount); newPaths.resize(totalExpandCount);
return 0; return 0;
} }
...@@ -1067,7 +1143,8 @@ void RecurrentGradientMachine::fillGenOutputs() { ...@@ -1067,7 +1143,8 @@ void RecurrentGradientMachine::fillGenOutputs() {
// in beam search, here only reserved the top 1 generated result // in beam search, here only reserved the top 1 generated result
// for out_links that are not the generated word indices. // for out_links that are not the generated word indices.
batchMachineIdVec_.insert(batchMachineIdVec_.end(), batchMachineIdVec_.insert(batchMachineIdVec_.end(),
path.machineIdVec.begin(), path.machineIdVec.end()); path.machineIdVec.begin(),
path.machineIdVec.end());
} }
} }
starts[i + 1] = generator_.ids.size(); starts[i + 1] = generator_.ids.size();
...@@ -1091,21 +1168,21 @@ void RecurrentGradientMachine::copyDataOutlinkFrame(size_t machineCur) { ...@@ -1091,21 +1168,21 @@ void RecurrentGradientMachine::copyDataOutlinkFrame(size_t machineCur) {
void RecurrentGradientMachine::createDataOutlink( void RecurrentGradientMachine::createDataOutlink(
std::vector<int>& machineIdVec) { std::vector<int>& machineIdVec) {
size_t seqNum = getBeamSize() > 1UL ? size_t seqNum =
finalPaths_.size() : finalPaths_[0].size(); getBeamSize() > 1UL ? finalPaths_.size() : finalPaths_[0].size();
std::vector<int> starts(seqNum + 1, 0); std::vector<int> starts(seqNum + 1, 0);
for (size_t i = 0; i < seqNum; ++i) { for (size_t i = 0; i < seqNum; ++i) {
size_t seqLen = getBeamSize() > 1UL ? finalPaths_[i][0].ids.size() : size_t seqLen = getBeamSize() > 1UL ? finalPaths_[i][0].ids.size()
finalPaths_[0][i].ids.size(); : finalPaths_[0][i].ids.size();
starts[i + 1] = starts[i] + seqLen; starts[i + 1] = starts[i] + seqLen;
} }
for (size_t i = 0; i < dataArgsSize_; i++) { for (size_t i = 0; i < dataArgsSize_; i++) {
dataArgs_[i].concat(dataArgsFrame_[i], machineIdVec, dataArgs_[i].concat(dataArgsFrame_[i], machineIdVec, starts, useGpu_,
starts, useGpu_, HPPL_STREAM_1, PASS_TEST); HPPL_STREAM_1, PASS_TEST);
auto dataAgent = dynamic_cast<DataLayer*>( auto dataAgent =
outFrameLines_[i + 1].agentLayer.get()); dynamic_cast<DataLayer*>(outFrameLines_[i + 1].agentLayer.get());
CHECK_NOTNULL(dataAgent); CHECK_NOTNULL(dataAgent);
dataAgent->setData(dataArgs_[i]); dataAgent->setData(dataArgs_[i]);
} }
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include "GradientMachine.h" #include "GradientMachine.h"
...@@ -101,7 +100,7 @@ public: ...@@ -101,7 +100,7 @@ public:
* Return true if this prefix or candidate is expected to be dropped. * Return true if this prefix or candidate is expected to be dropped.
*/ */
typedef std::function<bool(int seqId, const std::vector<int>&, typedef std::function<bool(int seqId, const std::vector<int>&,
const std::vector<real>&)> DropCallback; const std::vector<real>&)> DropCallback;
/** /**
* @brief NormOrDropNodeCallback * @brief NormOrDropNodeCallback
...@@ -117,7 +116,7 @@ public: ...@@ -117,7 +116,7 @@ public:
* The fourth parameter is the probability of the whole path. * The fourth parameter is the probability of the whole path.
*/ */
typedef std::function<void(int seqId, const std::vector<int>&, typedef std::function<void(int seqId, const std::vector<int>&,
std::vector<real>&, real*)> NormOrDropNodeCallback; std::vector<real>&, real*)> NormOrDropNodeCallback;
/** /**
* @brief Register beam search control callbacks. Used for prediction. * @brief Register beam search control callbacks. Used for prediction.
...@@ -192,7 +191,7 @@ public: ...@@ -192,7 +191,7 @@ public:
int machineId; // index of sample in frame int machineId; // index of sample in frame
int topIndex; // index of MaxIdLayer output in one sample int topIndex; // index of MaxIdLayer output in one sample
int seqId; // index of sequence in batch generation int seqId; // index of sequence in batch generation
std::vector<int> machineIdVec; std::vector<int> machineIdVec;
/** /**
...@@ -206,7 +205,10 @@ public: ...@@ -206,7 +205,10 @@ public:
/** /**
* @brief Path default ctor, first logProb is 0. * @brief Path default ctor, first logProb is 0.
*/ */
Path() { logProb = 0; seqId = 0; } Path() {
logProb = 0;
seqId = 0;
}
explicit Path(size_t seqId) : seqId(seqId) { logProb = 0; } explicit Path(size_t seqId) : seqId(seqId) { logProb = 0; }
/** /**
...@@ -319,21 +321,37 @@ protected: ...@@ -319,21 +321,37 @@ protected:
}; };
std::vector<MemoryFrameLine> memoryFrameLines_; std::vector<MemoryFrameLine> memoryFrameLines_;
// All inFrameLines and outFrameLines have the same element as follows. // Each inFrameLines(inlinks) has its own info(elements) below,
// and all outFrameLines(outlinks) share the info with one inFrameLine,
// which is assigned by targetInfoInlinkId_.
struct Info { struct Info {
IVectorPtr allIds; // scattered id of realLayer IVectorPtr allIds; // scattered id of realLayer
std::vector<int> idIndex; // index of allIds std::vector<int> idIndex; // index of allIds
ICpuGpuVectorPtr ICpuGpuVectorPtr
sequenceStartPositions; // scattered sequenceStartPositions sequenceStartPositions; // scattered sequenceStartPositions
std::vector<int> seqStartPosIndex; // index of sequenceStartPositions std::vector<int> seqStartPosIndex; // index of sequenceStartPositions
}; };
Info info_; std::vector<Info> info_;
// numSeqs_[i] is the number sequences which is longer than i (for sequence
// data) or has more than i subsequences (for subsequence data)
std::vector<int> numSeqs_;
// if no subSeq, tuple of (seqLength, seqStart, seqIndex, seqIndex) // each inlinks has a "std::vector<std::tuple<int, int, int, int>>" denotes
// else, tuple of (subSeqLength, subSeqStart, seqIndex, subSeqIndex) // its sequence info:
std::vector<std::tuple<int, int, int, int>> seqLengthAndStart_; // if hasSubSeq, tuple of (subSeqLength, subSeqStart, seqIndex, subSeqIndex)
// else, tuple of (seqLength, seqStart, seqIndex, seqIndex)
std::vector<std::vector<std::tuple<int, int, int, int>>> seqLengthAndStart_;
void createInFrameInfo(const Argument& input, PassType passType); // the id of inlink which share info with outlinks
int targetInfoInlinkId_;
/* create scattered id infomation for all realLayer of inFrameLines one time.
* If hasSubseq, will also create scattered sequenceStartPositions infomation
* for all realLayer of inFrameLines one time.
*/
void createInFrameInfo(int inlinks_id, const Argument& input,
PassType passType);
void createMemoryFrameInfo(MemoryFrameLine* memoryFrameLine, void createMemoryFrameInfo(MemoryFrameLine* memoryFrameLine,
PassType passType); PassType passType);
...@@ -363,6 +381,9 @@ protected: ...@@ -363,6 +381,9 @@ protected:
NeuralNetwork* rootNetwork_; NeuralNetwork* rootNetwork_;
bool reversed_; bool reversed_;
// if hasSubseq: max number of sentences(subseq)in batchsize samples
// else: max number of tokens in batchsize samples(sentences)
int maxSequenceLength_; int maxSequenceLength_;
bool useGpu_; bool useGpu_;
bool stopBeamSearch_; bool stopBeamSearch_;
...@@ -415,7 +436,7 @@ private: ...@@ -415,7 +436,7 @@ private:
* @param machineIdVec : select a row of output matrix in each frame * @param machineIdVec : select a row of output matrix in each frame
* that the generation process expanded. * that the generation process expanded.
*/ */
void createDataOutlink(std::vector<int> & machineIdVec); void createDataOutlink(std::vector<int>& machineIdVec);
/* /*
* @brief used in beam search, connect previous frame to form recurrent link * @brief used in beam search, connect previous frame to form recurrent link
......
...@@ -49,8 +49,10 @@ void CTCLayer::forward(PassType passType) { ...@@ -49,8 +49,10 @@ void CTCLayer::forward(PassType passType) {
Layer::forward(passType); Layer::forward(passType);
if (useGpu_) { if (useGpu_) {
for (size_t i = 0; i < inputLayers_.size(); i++) { for (size_t i = 0; i < inputLayers_.size(); i++) {
tmpCpuInput_[i].resizeAndCopyFrom(getInput(i), false, HPPL_STREAM_1); tmpCpuInput_[i].resizeAndCopyFrom(
getInput(i), false, HPPL_STREAM_DEFAULT);
} }
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
forwardImp(tmpCpuInput_[0], tmpCpuInput_[1]); forwardImp(tmpCpuInput_[0], tmpCpuInput_[1]);
} else { } else {
forwardImp(getInput(0), getInput(1)); forwardImp(getInput(0), getInput(1));
...@@ -92,9 +94,9 @@ void CTCLayer::backward(const UpdateCallback &callback) { ...@@ -92,9 +94,9 @@ void CTCLayer::backward(const UpdateCallback &callback) {
if (useGpu_) { if (useGpu_) {
backwardImp(callback, tmpCpuInput_[0], tmpCpuInput_[1]); backwardImp(callback, tmpCpuInput_[0], tmpCpuInput_[1]);
const_cast<Argument&>(getInput(0)). const_cast<Argument&>(getInput(0)).
resizeAndCopyFrom(tmpCpuInput_[0], true, HPPL_STREAM_1); resizeAndCopyFrom(tmpCpuInput_[0], true, HPPL_STREAM_DEFAULT);
const_cast<Argument&>(getInput(1)). const_cast<Argument&>(getInput(1)).
resizeAndCopyFrom(tmpCpuInput_[1], true, HPPL_STREAM_1); resizeAndCopyFrom(tmpCpuInput_[1], true, HPPL_STREAM_DEFAULT);
} else { } else {
backwardImp(callback, getInput(0), getInput(1)); backwardImp(callback, getInput(0), getInput(1));
} }
......
...@@ -248,7 +248,7 @@ void ConvOperator::forward() { ...@@ -248,7 +248,7 @@ void ConvOperator::forward() {
CHECK_EQ(ins_[1]->value->getHeight(), batchSize); CHECK_EQ(ins_[1]->value->getHeight(), batchSize);
checkFilterSize(ins_[1]->value); checkFilterSize(ins_[1]->value);
Matrix::resizeOrCreate(out_->value, batchSize, Matrix::resizeOrCreate(out_->value, batchSize,
outputH_ * outputW_ * numFilters_); outputH_ * outputW_ * numFilters_, false, useGpu_);
{ {
AsyncGpuBlock block; AsyncGpuBlock block;
for (size_t batchId = 0; batchId < batchSize; ++batchId) { for (size_t batchId = 0; batchId < batchSize; ++batchId) {
......
...@@ -509,8 +509,10 @@ void HuberTwoClass::forwardImp(Matrix &output, Argument &label, ...@@ -509,8 +509,10 @@ void HuberTwoClass::forwardImp(Matrix &output, Argument &label,
Matrix &cost) { Matrix &cost) {
if (useGpu_) { if (useGpu_) {
for (size_t i = 0; i < inputLayers_.size(); i++) { for (size_t i = 0; i < inputLayers_.size(); i++) {
tmpCpuInput_[i].resizeAndCopyFrom(getInput(i), false, HPPL_STREAM_1); tmpCpuInput_[i].resizeAndCopyFrom(
getInput(i), false, HPPL_STREAM_DEFAULT);
} }
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
} }
forwardImpIn(output, label, cost); forwardImpIn(output, label, cost);
} }
......
...@@ -52,8 +52,10 @@ public: ...@@ -52,8 +52,10 @@ public:
Layer::forward(passType); Layer::forward(passType);
if (useGpu_) { if (useGpu_) {
for (size_t i = 0; i < inputLayers_.size(); i++) { for (size_t i = 0; i < inputLayers_.size(); i++) {
tmpCpuInput_[i].resizeAndCopyFrom(getInput(i), false, HPPL_STREAM_1); tmpCpuInput_[i].resizeAndCopyFrom(
getInput(i), false, HPPL_STREAM_DEFAULT);
} }
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
forwardImp(tmpCpuInput_[0]); forwardImp(tmpCpuInput_[0]);
} else { } else {
forwardImp(getInput(0)); forwardImp(getInput(0));
......
...@@ -92,7 +92,6 @@ void testState(LayerPtr testLayer, vector<DataLayerPtr>& dataLayers, ...@@ -92,7 +92,6 @@ void testState(LayerPtr testLayer, vector<DataLayerPtr>& dataLayers,
testLayer->forward(PASS_TEST); testLayer->forward(PASS_TEST);
Argument out; Argument out;
out.resizeAndCopyFrom(testLayer->getOutput(), /* useGpu= */ false); out.resizeAndCopyFrom(testLayer->getOutput(), /* useGpu= */ false);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
if (batchOut.value) { if (batchOut.value) {
size_t dim = batchOut.value->getWidth(); size_t dim = batchOut.value->getWidth();
ASSERT_TRUE((bool)out.value); ASSERT_TRUE((bool)out.value);
...@@ -220,7 +219,6 @@ void testBatchState(LayerPtr testLayer, vector<DataLayerPtr>& dataLayers, ...@@ -220,7 +219,6 @@ void testBatchState(LayerPtr testLayer, vector<DataLayerPtr>& dataLayers,
testLayer->forward(PASS_TEST); testLayer->forward(PASS_TEST);
Argument out; Argument out;
out.resizeAndCopyFrom(testLayer->getOutput(), /* useGpu= */ false); out.resizeAndCopyFrom(testLayer->getOutput(), /* useGpu= */ false);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
if (batchOut.value) { if (batchOut.value) {
size_t dim = batchOut.value->getWidth(); size_t dim = batchOut.value->getWidth();
ASSERT_TRUE((bool)out.value); ASSERT_TRUE((bool)out.value);
......
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.PyDataProvider2 import *
data = [
[[[1, 3, 2], [4, 5, 2]], 0],
[[[0, 2], [2, 5], [0, 1, 2]], 1],
]
@provider(input_types=[integer_value_sub_sequence(10),
integer_value(2)])
def process_subseq(settings, file_name):
for d in data:
yield d
@provider(input_types=[integer_value_sequence(10),
integer_value(2)])
def process_seq(settings, file_name):
for d in data:
seq = []
for subseq in d[0]:
seq += subseq
yield seq, d[1]
#!/usr/bin/env python
#coding=utf-8
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved # Copyright (c) 2016 Baidu, Inc. All Rights Reserved
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
......
#edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
######################## data source ################################
define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list',
test_list=None,
module='rnn_data_provider',
obj='process_subseq')
settings(batch_size=2, learning_rate=0.01)
######################## network configure ################################
dict_dim = 10
word_dim = 8
hidden_dim = 8
label_dim = 3
data = data_layer(name="word", size=dict_dim)
emb = embedding_layer(input=data, size=word_dim)
# This hierachical RNN is designed to be equivalent to the simple RNN in
# sequence_rnn.conf
def outer_step(x):
outer_mem = memory(name="outer_rnn_state", size=hidden_dim)
def inner_step(y):
inner_mem = memory(name="inner_rnn_state",
size=hidden_dim,
boot_layer=outer_mem)
return fc_layer(input=[y, inner_mem],
size=hidden_dim,
act=TanhActivation(),
bias_attr=True,
name="inner_rnn_state")
inner_rnn_output = recurrent_group(
step=inner_step,
input=x)
last = last_seq(input=inner_rnn_output, name="outer_rnn_state")
# "return last" should also work. But currently RecurrentGradientMachine
# does not handle it correctly. Current implementation requires that
# all the out links are from sequences. However, it does not report error
# when the out links are not sequences.
return inner_rnn_output
out = recurrent_group(
step=outer_step,
input=SubsequenceInput(emb))
value_printer_evaluator(input=out)
rep = last_seq(input=out)
prob = fc_layer(size=label_dim,
input=rep,
act=SoftmaxActivation(),
bias_attr=True)
outputs(classification_cost(input=prob,
label=data_layer(name="label", size=label_dim)))
#edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
######################## data source ################################
define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list',
test_list=None,
module='rnn_data_provider',
obj='process_seq')
settings(batch_size=2, learning_rate=0.01)
######################## network configure ################################
dict_dim = 10
word_dim = 8
hidden_dim = 8
label_dim = 3
data = data_layer(name="word", size=dict_dim)
emb = embedding_layer(input=data, size=word_dim)
def step(y):
mem = memory(name="rnn_state", size=hidden_dim)
return fc_layer(input=[y, mem],
size=hidden_dim,
act=TanhActivation(),
bias_attr=True,
name="rnn_state")
out = recurrent_group(
step=step,
input=emb)
value_printer_evaluator(input=out)
rep = last_seq(input=out)
prob = fc_layer(size=label_dim,
input=rep,
act=SoftmaxActivation(),
bias_attr=True)
outputs(classification_cost(input=prob,
label=data_layer(name="label", size=label_dim)))
...@@ -21,6 +21,8 @@ limitations under the License. */ ...@@ -21,6 +21,8 @@ limitations under the License. */
#include <paddle/trainer/TrainerInternal.h> #include <paddle/trainer/TrainerInternal.h>
#include <paddle/gserver/gradientmachines/GradientMachine.h> #include <paddle/gserver/gradientmachines/GradientMachine.h>
P_DECLARE_int32(seed);
using namespace paddle; // NOLINT using namespace paddle; // NOLINT
using namespace std; // NOLINT using namespace std; // NOLINT
class TrainerForTest : public paddle::Trainer { class TrainerForTest : public paddle::Trainer {
...@@ -68,7 +70,9 @@ void CalCost(const string& conf, const string& dir, real* cost, ...@@ -68,7 +70,9 @@ void CalCost(const string& conf, const string& dir, real* cost,
CpuVector vecMomentum(dim); CpuVector vecMomentum(dim);
// vecW needs to be assigned, otherwise the variable is an uncertain value. // vecW needs to be assigned, otherwise the variable is an uncertain value.
vecW.zeroMem();
*ThreadLocalRand::getSeed() = FLAGS_seed;
vecW.randnorm(0, 0.1);
trainer.startTrain(); trainer.startTrain();
for (int i = 0; i < num_passes; ++i) { for (int i = 0; i < num_passes; ++i) {
...@@ -88,15 +92,13 @@ void CalCost(const string& conf, const string& dir, real* cost, ...@@ -88,15 +92,13 @@ void CalCost(const string& conf, const string& dir, real* cost,
rmDir(dir.c_str()); rmDir(dir.c_str());
} }
TEST(RecurrentGradientMachine, HasSubSequence) { void test(const string& conf1, const string& conf2) {
int num_passes = 5; int num_passes = 5;
real* cost1 = new real[num_passes]; real* cost1 = new real[num_passes];
const string conf1 = "gserver/tests/sequence_layer_group.conf";
const string dir1 = "gserver/tests/t1"; const string dir1 = "gserver/tests/t1";
CalCost(conf1, dir1, cost1, num_passes); CalCost(conf1, dir1, cost1, num_passes);
real* cost2 = new real[num_passes]; real* cost2 = new real[num_passes];
const string conf2 = "gserver/tests/sequence_nest_layer_group.conf";
const string dir2 = "gserver/tests/t2"; const string dir2 = "gserver/tests/t2";
CalCost(conf2, dir2, cost2, num_passes); CalCost(conf2, dir2, cost2, num_passes);
...@@ -109,6 +111,17 @@ TEST(RecurrentGradientMachine, HasSubSequence) { ...@@ -109,6 +111,17 @@ TEST(RecurrentGradientMachine, HasSubSequence) {
delete[] cost2; delete[] cost2;
} }
TEST(RecurrentGradientMachine, HasSubSequence) {
test("gserver/tests/sequence_layer_group.conf",
"gserver/tests/sequence_nest_layer_group.conf");
}
TEST(RecurrentGradientMachine, rnn) {
test("gserver/tests/sequence_rnn.conf",
"gserver/tests/sequence_nest_rnn.conf");
}
int main(int argc, char** argv) { int main(int argc, char** argv) {
if (paddle::version::isWithPyDataProvider()) { if (paddle::version::isWithPyDataProvider()) {
if (!paddle::version::isWithGpu()) { if (!paddle::version::isWithGpu()) {
......
...@@ -299,7 +299,6 @@ void checkRecurrentLayer(LayerConfig layerConfig, size_t batchSize, ...@@ -299,7 +299,6 @@ void checkRecurrentLayer(LayerConfig layerConfig, size_t batchSize,
Argument& cpuInput = testCpu.dataLayer_->getOutput(); Argument& cpuInput = testCpu.dataLayer_->getOutput();
Argument& gpuInput = testGpu.dataLayer_->getOutput(); Argument& gpuInput = testGpu.dataLayer_->getOutput();
gpuInput.resizeAndCopyFrom(cpuInput, true); gpuInput.resizeAndCopyFrom(cpuInput, true);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
const VectorPtr& cpuVec = testCpu.para_->getBuf(PARAMETER_VALUE); const VectorPtr& cpuVec = testCpu.para_->getBuf(PARAMETER_VALUE);
const VectorPtr& gpuVec = testGpu.para_->getBuf(PARAMETER_VALUE); const VectorPtr& gpuVec = testGpu.para_->getBuf(PARAMETER_VALUE);
......
...@@ -146,6 +146,7 @@ void Matrix::resizeOrCreate(MatrixPtr& matrix, size_t height, size_t width, ...@@ -146,6 +146,7 @@ void Matrix::resizeOrCreate(MatrixPtr& matrix, size_t height, size_t width,
if (!matrix) { if (!matrix) {
matrix = Matrix::create(height, width, trans, useGpu); matrix = Matrix::create(height, width, trans, useGpu);
} else { } else {
CHECK_EQ(matrix->useGpu(), useGpu);
matrix->resize(height, width); matrix->resize(height, width);
} }
} }
...@@ -161,6 +162,7 @@ void Matrix::resizeOrCreateSparseMatrix(MatrixPtr& matrix, size_t height, ...@@ -161,6 +162,7 @@ void Matrix::resizeOrCreateSparseMatrix(MatrixPtr& matrix, size_t height,
} else { } else {
CHECK(dynamic_cast<CpuSparseMatrix*>(matrix.get()) || CHECK(dynamic_cast<CpuSparseMatrix*>(matrix.get()) ||
dynamic_cast<GpuSparseMatrix*>(matrix.get())); dynamic_cast<GpuSparseMatrix*>(matrix.get()));
CHECK_EQ(matrix->useGpu(), useGpu);
matrix->resize(height, width, nnz, valueType, format); matrix->resize(height, width, nnz, valueType, format);
} }
} }
......
...@@ -800,6 +800,7 @@ void CpuGpuVectorT<T>::resizeOrCreate(size_t size, bool useGpu) { ...@@ -800,6 +800,7 @@ void CpuGpuVectorT<T>::resizeOrCreate(size_t size, bool useGpu) {
} else if ((!useGpu) && (!cpuVectorT_)) { } else if ((!useGpu) && (!cpuVectorT_)) {
cpuVectorT_ = VectorT<T>::create(size, false); cpuVectorT_ = VectorT<T>::create(size, false);
} else { } else {
CHECK((useGpu && gpuVectorT_) || (!useGpu && cpuVectorT_));
this->resize(size, useGpu); this->resize(size, useGpu);
} }
} }
......
...@@ -25,6 +25,7 @@ static void resizeAndCopy(MatrixPtr& dest, const MatrixPtr& src, bool useGpu, ...@@ -25,6 +25,7 @@ static void resizeAndCopy(MatrixPtr& dest, const MatrixPtr& src, bool useGpu,
if (!dest) { if (!dest) {
dest = src->clone(0, 0, useGpu); dest = src->clone(0, 0, useGpu);
} else { } else {
CHECK_EQ(dest->useGpu(), useGpu);
dest->resize(src->getHeight(), src->getWidth()); dest->resize(src->getHeight(), src->getWidth());
} }
dest->copyFrom(*src, stream); dest->copyFrom(*src, stream);
...@@ -60,12 +61,12 @@ static void resizeAndCopy(MatrixPtr& dest, const MatrixPtr& src, ...@@ -60,12 +61,12 @@ static void resizeAndCopy(MatrixPtr& dest, const MatrixPtr& src,
hl_stream_t stream = HPPL_STREAM_DEFAULT) { hl_stream_t stream = HPPL_STREAM_DEFAULT) {
if (src) { if (src) {
CHECK_LE((size_t)startRow + copySize, src->getHeight()); CHECK_LE((size_t)startRow + copySize, src->getHeight());
int height = copySize; int height = copySize;
int width = src->getWidth(); int width = src->getWidth();
if (!dest) { if (!dest) {
dest = src->clone(height, width, useGpu); dest = src->clone(height, width, useGpu);
} else { } else {
CHECK_EQ(dest->useGpu(), useGpu);
dest->resize(height, width); dest->resize(height, width);
} }
MatrixPtr submat = src->subMatrix(startRow, copySize); MatrixPtr submat = src->subMatrix(startRow, copySize);
...@@ -182,6 +183,11 @@ static void resizeAndCopy(SVectorPtr& dest, const SVectorPtr& src, ...@@ -182,6 +183,11 @@ static void resizeAndCopy(SVectorPtr& dest, const SVectorPtr& src,
} }
} }
void Argument::resizeAndCopyFrom(const Argument& src, bool useGpu) {
resizeAndCopyFrom(src, useGpu, HPPL_STREAM_DEFAULT);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
}
void Argument::resizeAndCopyFrom(const Argument& src, bool useGpu, void Argument::resizeAndCopyFrom(const Argument& src, bool useGpu,
hl_stream_t stream) { hl_stream_t stream) {
dataId = src.dataId; dataId = src.dataId;
...@@ -199,6 +205,14 @@ void Argument::resizeAndCopyFrom(const Argument& src, bool useGpu, ...@@ -199,6 +205,14 @@ void Argument::resizeAndCopyFrom(const Argument& src, bool useGpu,
resizeAndCopy(strs, src.strs, useGpu, stream); resizeAndCopy(strs, src.strs, useGpu, stream);
} }
int32_t Argument::resizeAndCopyFrom(const Argument& src, int32_t startSeq,
int32_t copySize, bool useGpu) {
int32_t size = resizeAndCopyFrom(src, startSeq, copySize, useGpu,
HPPL_STREAM_DEFAULT);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
return size;
}
int32_t Argument::resizeAndCopyFrom(const Argument& src, int32_t startSeq, int32_t Argument::resizeAndCopyFrom(const Argument& src, int32_t startSeq,
int32_t copySize, bool useGpu, int32_t copySize, bool useGpu,
hl_stream_t stream) { hl_stream_t stream) {
......
...@@ -203,13 +203,28 @@ struct Argument { ...@@ -203,13 +203,28 @@ struct Argument {
* startSeq: the sample id of start * startSeq: the sample id of start
* copySize: how many samples need to copy * copySize: how many samples need to copy
* return value: how many samples are copied * return value: how many samples are copied
* Note that when specifying the stream explicitly in this case,
* synchronize should also be called somewhere after this function
*/ */
int32_t resizeAndCopyFrom(const Argument& src, int32_t startSeq, int32_t resizeAndCopyFrom(const Argument& src, int32_t startSeq,
int32_t copySize, bool useGpu = FLAGS_use_gpu, int32_t copySize, bool useGpu, hl_stream_t stream);
hl_stream_t stream = HPPL_STREAM_DEFAULT);
void resizeAndCopyFrom(const Argument& src, bool useGpu = FLAGS_use_gpu, /*
hl_stream_t stream = HPPL_STREAM_DEFAULT); * same with the above function, except that the stream is
* HPPL_STREAM_DEFAULT and synchronize is automatically called
* inside it
*/
int32_t resizeAndCopyFrom(const Argument& src, int32_t startSeq,
int32_t copySize, bool useGpu = FLAGS_use_gpu);
void resizeAndCopyFrom(const Argument& src, bool useGpu, hl_stream_t stream);
/*
* same with the above function, except that the stream is
* HPPL_STREAM_DEFAULT and synchronize is automatically called
* inside it
*/
void resizeAndCopyFrom(const Argument& src, bool useGpu = FLAGS_use_gpu);
/* /*
@brief Concatenate several arguments into one and put the result into it. @brief Concatenate several arguments into one and put the result into it.
...@@ -240,6 +255,15 @@ struct Argument { ...@@ -240,6 +255,15 @@ struct Argument {
/* /*
Get Sequence Length, startPositions and max Length according to input Get Sequence Length, startPositions and max Length according to input
1. For sequence data:
Each tuple is (seq_length, seq_start, seq_id, seq_id)
The tuples are sorted according to seq_length or subseq_length
*maxSequenceLength is the maximal sequence length
2. For subsequence data:
Each tuple is (subseq_length, subseq_start, seq_id, subseq_id)
The tuples are not sorted. They are in the original order.
*maxSequenceLenth is the maximal number of subsequences in each sequence.
*/ */
void getSeqLengthAndStart( void getSeqLengthAndStart(
std::vector<std::tuple<int, int, int, int>>* seqLengthAndStart, std::vector<std::tuple<int, int, int, int>>* seqLengthAndStart,
......
...@@ -452,6 +452,9 @@ message SubModelConfig { ...@@ -452,6 +452,9 @@ message SubModelConfig {
repeated LinkConfig out_links = 10; repeated LinkConfig out_links = 10;
optional GeneratorConfig generator = 11; optional GeneratorConfig generator = 11;
// the id of inlink which share info with outlinks, used in recurrent layer group
optional int32 target_inlinkid = 12;
} }
message ModelConfig { message ModelConfig {
......
...@@ -303,7 +303,8 @@ def MakeLayerNameInSubmodel(name, submodel_name = None): ...@@ -303,7 +303,8 @@ def MakeLayerNameInSubmodel(name, submodel_name = None):
@config_func @config_func
def RecurrentLayerGroupWithoutOutLinksBegin(name, def RecurrentLayerGroupWithoutOutLinksBegin(name,
in_links, in_links,
seq_reversed=False): seq_reversed=False,
target_inlinkname=""):
global g_current_submodel global g_current_submodel
config_assert(g_config.model_config.type == "recurrent_nn", config_assert(g_config.model_config.type == "recurrent_nn",
"RecurrentLayerGroup should be used only in recurrent_nn") "RecurrentLayerGroup should be used only in recurrent_nn")
...@@ -311,14 +312,19 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name, ...@@ -311,14 +312,19 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
SubModelBegin(name) SubModelBegin(name)
g_current_submodel.is_recurrent_layer_group = True g_current_submodel.is_recurrent_layer_group = True
g_current_submodel.reversed = seq_reversed g_current_submodel.reversed = seq_reversed
g_current_submodel.target_inlinkid = -1
in_links_count = 0 in_links_count = 0
for link in in_links: for linkid, link in enumerate(in_links):
if isinstance(link, basestring): if isinstance(link, basestring):
name = link name = link
has_subseq = False has_subseq = False
else: else:
name = link.link_name name = link.link_name
has_subseq = link.has_subseq has_subseq = link.has_subseq
# assign target_inlinkid according to target_inlinkname
if target_inlinkname == name:
g_current_submodel.target_inlinkid = linkid
if in_links_count == 0: if in_links_count == 0:
in_links_has_subseq = has_subseq in_links_has_subseq = has_subseq
else: else:
...@@ -331,6 +337,7 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name, ...@@ -331,6 +337,7 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
SequenceScatterAgentLayer(name=name, size=layer.size) SequenceScatterAgentLayer(name=name, size=layer.size)
else: else:
ScatterAgentLayer(name=name, size=layer.size) ScatterAgentLayer(name=name, size=layer.size)
pair = g_current_submodel.in_links.add() pair = g_current_submodel.in_links.add()
pair.layer_name = layer_name pair.layer_name = layer_name
pair.link_name = MakeLayerNameInSubmodel(name) pair.link_name = MakeLayerNameInSubmodel(name)
...@@ -362,10 +369,12 @@ def RecurrentLayerGroupBegin(name, ...@@ -362,10 +369,12 @@ def RecurrentLayerGroupBegin(name,
in_links, in_links,
out_links, out_links,
generator=None, generator=None,
target_inlinkname="",
seq_reversed=False): seq_reversed=False):
RecurrentLayerGroupWithoutOutLinksBegin(name, RecurrentLayerGroupWithoutOutLinksBegin(name,
in_links, in_links,
seq_reversed) seq_reversed,
target_inlinkname)
for link in out_links: for link in out_links:
RecurrentLayerGroupSetOutLink(link) RecurrentLayerGroupSetOutLink(link)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册