diff --git a/.travis.yml b/.travis.yml index a78853e15b15825354ffbc6e1ca8ffb10c5257c6..d3dae9efd416bd92dde9b327424544da401f2025 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,6 +2,9 @@ language: cpp cache: ccache sudo: required dist: trusty +env: + - JOB=DOCS + - JOB=BUILD_AND_TEST addons: apt: packages: @@ -16,6 +19,7 @@ addons: - python2.7-dev - m4 - libprotobuf-dev + - doxygen - protobuf-compiler - python-protobuf - python-numpy @@ -24,12 +28,10 @@ addons: - libgflags-dev - libgtest-dev before_install: - - pip install wheel protobuf + - pip install wheel protobuf sphinx breathe recommonmark - sudo paddle/scripts/travis/before_install.sh script: - - paddle/scripts/travis/build.sh - - paddle/scripts/travis/unittest.sh - - paddle/scripts/travis/make_install.sh + - paddle/scripts/travis/main.sh notifications: email: on_success: change diff --git a/doc/build/contribute_to_paddle.md b/doc/build/contribute_to_paddle.md index b3d5fa7c9ff5f0b879e15b8017d029bc23e9ada8..10d5d86311333c223d1024f520fccddcb4c5050d 100644 --- a/doc/build/contribute_to_paddle.md +++ b/doc/build/contribute_to_paddle.md @@ -25,7 +25,7 @@ repo or just head straight to the command line: ```shell # Clone your fork to your local machine -git clone git@github.com:USERNAME/paddle.git +git clone git@github.com:USERNAME/Paddle.git ``` Then you can start to develop. @@ -52,7 +52,7 @@ To do this, you'll need to add a remote at first: # see the current configured remote repository git remote -v # add upstream repository -git remote add upstream https://github.com/paddle/paddle.git +git remote add upstream https://github.com/baidu/Paddle.git # verify the new upstream git remote -v ``` diff --git a/doc/build/index.rst b/doc/build/index.rst index 2b983dceb2777e6c79ee1efaa977fef6e5c33ad6..d6d0d19e110fc35faec87da90d784a6775b9c91f 100644 --- a/doc/build/index.rst +++ b/doc/build/index.rst @@ -9,6 +9,7 @@ Install PaddlePaddle :glob: install_* + internal/install_from_jumbo.md Build from Source ----------------- diff --git a/doc/cluster/index.rst b/doc/cluster/index.rst index cf1ea97715402ec5b5b565a295ff4c1515df2570..9062f85f98d2981b5c8dcf8149e32c2ccdac77f4 100644 --- a/doc/cluster/index.rst +++ b/doc/cluster/index.rst @@ -5,3 +5,4 @@ Cluster Train :glob: opensource/cluster_train.md + internal/index.md diff --git a/doc/ui/api/trainer_config_helpers/layers.rst b/doc/ui/api/trainer_config_helpers/layers.rst index 1583fce981fed64141acdccc0d89b46b63d13cc0..f902d1c995bc5045d62d0b2e279ee612f9dc7c93 100644 --- a/doc/ui/api/trainer_config_helpers/layers.rst +++ b/doc/ui/api/trainer_config_helpers/layers.rst @@ -245,10 +245,10 @@ addto_layer :members: addto_layer :noindex: -convex_comb_layer +linear_comb_layer ----------------- .. automodule:: paddle.trainer_config_helpers.layers - :members: convex_comb_layer + :members: linear_comb_layer :noindex: interpolation_layer @@ -280,7 +280,13 @@ tensor_layer .. automodule:: paddle.trainer_config_helpers.layers :members: tensor_layer :noindex: - + +cos_sim +------- +.. automodule:: paddle.trainer_config_helpers.layers + :members: cos_sim + :noindex: + trans_layer ------------ .. automodule:: paddle.trainer_config_helpers.layers @@ -341,12 +347,6 @@ rank_cost :members: rank_cost :noindex: -cos_sim -------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: cos_sim - :noindex: - crf_layer ----------------- .. automodule:: paddle.trainer_config_helpers.layers diff --git a/doc_cn/build_and_install/index.rst b/doc_cn/build_and_install/index.rst index e9182903c5f62b3a96c196d5ba1ebba2fd14f669..e21fc98c63dcdcda8202dad349ffe24dda62492d 100644 --- a/doc_cn/build_and_install/index.rst +++ b/doc_cn/build_and_install/index.rst @@ -9,7 +9,11 @@ Note: The intallation packages are still in pre-release state and your experienc .. toctree:: :maxdepth: 1 + :glob: + 源码下载(对内) <../build/internal/download_paddle_source_zh_cn.rst> + 使用Jumbo安装(对内) <../build/internal/install_from_jumbo.rst> + 从源码编译安装(对内) <../build/internal/build_from_source_zh_cn.rst> install/docker_install.rst install/ubuntu_install.rst cmake/index.rst diff --git a/doc_cn/cluster/index.rst b/doc_cn/cluster/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..25313a9635bbf567a1aedfac3c379802d601d283 --- /dev/null +++ b/doc_cn/cluster/index.rst @@ -0,0 +1,11 @@ +集群训练 +======== + +* `集群训练 <../../doc/cluster/index.html>`_ + +.. toctree:: + :maxdepth: 2 + :glob: + + 集群训练(对内) + diff --git a/doc_cn/index.rst b/doc_cn/index.rst index 5f06463899f6b7b8166ff2cccd87b17817c6f5d1..6cf5588b5b34f5e80ea4c70cc364d4c6c42cce3d 100644 --- a/doc_cn/index.rst +++ b/doc_cn/index.rst @@ -8,7 +8,7 @@ PaddlePaddle文档 * `用户接口 `_ * `使用示例 `_ * `模型配置 <../doc/ui/api/trainer_config_helpers/index.html>`_ -* `集群训练 <../doc/cluster/index.html>`_ +* `集群训练 `_ 开发指南 -------- diff --git a/paddle/cuda/src/hl_cuda_cudnn.cc b/paddle/cuda/src/hl_cuda_cudnn.cc index 19c94b2453981301bcb632ecbe5d322369009973..c2dce1977bdf5daefb6c5b8032bb6b12563e9425 100644 --- a/paddle/cuda/src/hl_cuda_cudnn.cc +++ b/paddle/cuda/src/hl_cuda_cudnn.cc @@ -150,7 +150,7 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_R3(DYNAMIC_LOAD_CUDNN_WRAP) // APIs available after R4: -#if CUDNN_VERSION >= 4000 +#if CUDNN_VERSION >= 4007 #define CUDNN_DNN_ROUTINE_EACH_AFTER_R4(__macro) \ __macro(cudnnBatchNormalizationForwardTraining) \ __macro(cudnnBatchNormalizationForwardInference) \ @@ -999,7 +999,7 @@ void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc, double epsilon, real *savedMean, real *savedVar) { -#if CUDNN_VERSION >= 4000 +#if CUDNN_VERSION >= 4007 if ((NULL != runningMean && NULL == runningInvVar) || (NULL == runningMean && NULL != runningInvVar)) { LOG(FATAL) << "runningMean and runningInvVar can be NULL " @@ -1024,7 +1024,7 @@ void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc, CHECK_SYNC("hl_batch_norm_forward_training failed"); #else - LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4000. " + LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4007. " << "But cudnn lib version is " << g_cudnn_lib_version; #endif } @@ -1039,7 +1039,7 @@ void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc, real *estimatedMean, real *estimatedInvVar, double epsilon) { -#if CUDNN_VERSION >= 4000 +#if CUDNN_VERSION >= 4007 cudnnTensorDescriptor_t xDesc = GET_TENSOR_DESCRIPTOR(inputDesc); cudnnTensorDescriptor_t yDesc = GET_TENSOR_DESCRIPTOR(outputDesc); cudnnTensorDescriptor_t bnDesc = GET_TENSOR_DESCRIPTOR(bnParamDesc); @@ -1053,7 +1053,7 @@ void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc, CHECK_SYNC("hl_batch_norm_forward_inference failed"); #else - LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4000. " + LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4007. " << "But cudnn lib version is " << g_cudnn_lib_version; #endif } @@ -1071,7 +1071,7 @@ void hl_batch_norm_backward(hl_tensor_descriptor inputDesc, double epsilon, real *savedMean, real *savedInvVar) { -#if CUDNN_VERSION >= 4000 +#if CUDNN_VERSION >= 4007 if ((NULL != savedMean && NULL == savedInvVar) || (NULL == savedMean && NULL != savedInvVar)) { LOG(FATAL) << "savedMean and savedVar can be NULL " @@ -1087,16 +1087,14 @@ void hl_batch_norm_backward(hl_tensor_descriptor inputDesc, cudnnBatchNormMode_t mode = CUDNN_BATCHNORM_SPATIAL; CHECK_CUDNN(dynload::cudnnBatchNormalizationBackward( t_resource.cudnn_handle, mode, &alpha, &beta, -#if CUDNN_VERSION >= 5000 &alpha, &beta, -#endif xDesc, input, dyDesc, outGrad, dxDesc, inGrad, bnDesc, scale, scaleGrad, biasGrad, epsilon, savedMean, savedInvVar)); CHECK_SYNC("hl_batch_norm_backward failed"); #else - LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4000. " + LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4007. " << "But cudnn lib version is " << g_cudnn_lib_version; #endif } diff --git a/paddle/cuda/src/hl_cuda_matrix.cu b/paddle/cuda/src/hl_cuda_matrix.cu index fc003b7d6377d199c3859aa8b257de07478992af..ecc44944e4fa19b064fb0aa09d81e2143e5bc85d 100644 --- a/paddle/cuda/src/hl_cuda_matrix.cu +++ b/paddle/cuda/src/hl_cuda_matrix.cu @@ -19,6 +19,7 @@ limitations under the License. */ #include "hl_matrix_apply.cuh" #include "hl_sequence.h" #include "paddle/utils/Logging.h" +#include "hl_device_functions.cuh" DEFINE_MATRIX_UNARY_OP(Zero, a = 0); DEFINE_MATRIX_TERNARY_PARAMETER_OP(_add, TWO_PARAMETER, c = p1*a + p2*b); diff --git a/paddle/gserver/evaluators/CTCErrorEvaluator.cpp b/paddle/gserver/evaluators/CTCErrorEvaluator.cpp index d0b1c0447d23d3e7072b2ee4f8e860708eb44bb2..e397c71c877dce8c34aefac12481373a037510f6 100644 --- a/paddle/gserver/evaluators/CTCErrorEvaluator.cpp +++ b/paddle/gserver/evaluators/CTCErrorEvaluator.cpp @@ -194,8 +194,8 @@ public: virtual real evalImp(std::vector& arguments) { CHECK_EQ(arguments.size(), (size_t)2); Argument output, label; - output.resizeAndCopyFrom(arguments[0], false); - label.resizeAndCopyFrom(arguments[1], false); + output.resizeAndCopyFrom(arguments[0], false, HPPL_STREAM_DEFAULT); + label.resizeAndCopyFrom(arguments[1], false, HPPL_STREAM_DEFAULT); hl_stream_synchronize(HPPL_STREAM_DEFAULT); CHECK(label.sequenceStartPositions); CHECK(label.ids); @@ -207,7 +207,7 @@ public: real err = 0; err = editDistance( output.value->getData() + output.value->getWidth() * outputStarts[i], - output.value->getHeight(), output.value->getWidth(), + outputStarts[i+1] - outputStarts[i], output.value->getWidth(), label.ids->getData() + labelStarts[i], labelStarts[i + 1] - labelStarts[i]); @@ -224,6 +224,9 @@ public: for (const std::string& name : config_.input_layers()) { arguments.push_back(nn.getLayer(name)->getOutput()); } + } + + virtual void updateSamplesNum(const std::vector& arguments) { numSequences_ += arguments[1].getNumSequences(); } diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp index 7bc5fe51813c94a4347118f1366370ec8b867e02..bf7aa1c8d89aeff396a4ed094fc36043defeb1a5 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - #include "paddle/utils/Stat.h" #include "paddle/utils/Util.h" #include "paddle/utils/Flags.h" @@ -291,6 +290,8 @@ void RecurrentGradientMachine::init( if (subModelConfig->evaluator_names_size() > 0) { evaluator_.reset(frames_[0]->makeEvaluator()); } + + targetInfoInlinkId_ = subModelConfig->target_inlinkid(); } void RecurrentGradientMachine::resizeOrCreateFrames(int numFrames) { @@ -325,7 +326,7 @@ void RecurrentGradientMachine::resizeOrCreateFrames(int numFrames) { for (int i = frames_.size(); i < numFrames; ++i) { std::unique_ptr frame( - NeuralNetwork::newNeuralNetwork(subModelName_)); + NeuralNetwork::newNeuralNetwork(subModelName_)); frame->init(config_, subParamInitCb); for (auto& inFrameLine : inFrameLines_) { @@ -382,6 +383,16 @@ void RecurrentGradientMachine::forward(const std::vector& inArgs, size_t numSequences = input.getNumSequences(); const int* starts = input.sequenceStartPositions->getData(false); bool hasSubseq = input.hasSubseq(); + + // In case of !hasSubseq or targetInfoInlinkId_ == -1, all inlinks share the + // same inframe info + bool shareInlinkInfo = !hasSubseq || targetInfoInlinkId_ == -1; + + // Defaultly, share info with the first inlink + if (shareInlinkInfo) { + targetInfoInlinkId_ = 0; + } + // check hasSubseq in both config and input are the same CHECK_EQ(hasSubseq, inFrameLines_[0].hasSubseq); @@ -394,9 +405,17 @@ void RecurrentGradientMachine::forward(const std::vector& inArgs, CHECK_EQ((size_t)input1.getNumSequences(), numSequences); // check all inputs should have same hasSubseq flag CHECK_EQ(input.hasSubseq(), inFrameLines_[0].hasSubseq); - CHECK_EQ(input1.getBatchSize(), batchSize); - CHECK(std::equal(starts, starts + numSequences + 1, - input1.sequenceStartPositions->getData(false))); + + // if shareInlinkInfo, checks: + // 1. all inlinks have same number of total tokens + // 2. all inlinks have same number of tokens for each sentence of each + // sample. If hasSubseq, one sample has multiple sentence, else, one + // sample is one sentence + if (shareInlinkInfo) { + CHECK_EQ(input1.getBatchSize(), batchSize); + CHECK(std::equal(starts, starts + numSequences + 1, + input1.sequenceStartPositions->getData(false))); + } } if (hasSubseq) { @@ -408,19 +427,46 @@ void RecurrentGradientMachine::forward(const std::vector& inArgs, for (size_t i = 1; i < inFrameLines_.size(); ++i) { const Argument& input1 = inFrameLines_[i].inLayer->getOutput(); CHECK_EQ((size_t)input1.getNumSubSequences(), numSubSequences); - CHECK(std::equal(subStarts, subStarts + numSubSequences + 1, - input1.subSequenceStartPositions->getData(false))); + if (shareInlinkInfo) { + CHECK(std::equal(subStarts, subStarts + numSubSequences + 1, + input1.subSequenceStartPositions->getData(false))); + } } } - seqLengthAndStart_.clear(); - input.getSeqLengthAndStart(&seqLengthAndStart_, &maxSequenceLength_); + info_.clear(); + info_.resize(inFrameLines_.size()); + + seqInfos_.clear(); + seqInfos_.resize(inFrameLines_.size()); + + { + AsyncGpuBlock asyncGpuBlock; + // if shareInlinkInfo, only calculate info of the first inlink + // else, calculate info for each inlink + if (shareInlinkInfo) { + input.getSeqInfo(&seqInfos_[0]); + maxSequenceLength_ = seqInfos_[0][0].topLevelLength; + createInFrameInfo(0, input, passType); + } else { + for (size_t i = 0; i < inFrameLines_.size(); i++) { + const Argument& input1 = inFrameLines_[i].inLayer->getOutput(); + input1.getSeqInfo(&seqInfos_[i]); + maxSequenceLength_ = seqInfos_[i][0].topLevelLength; + createInFrameInfo(i, input1, passType); + } + } + + // inFrameLine select rows in real layer one time + for (size_t i = 0; i < inFrameLines_.size(); i++) { + int curInlinkId = shareInlinkInfo ? 0 : i; + selectRowsOneTime(inFrameLines_[i].inLayer, info_[curInlinkId].allIds, + &(inFrameLines_[i].outArg), passType); + } + } resizeOrCreateFrames(maxSequenceLength_); resizeBootFrame(numSequences); - AsyncGpuBlock asyncGpuBlock; - createInFrameInfo(input, passType); - for (auto& memoryFrameLine : memoryFrameLines_) { if (memoryFrameLine.rootAgent) { auto scatterAgent = @@ -443,23 +489,29 @@ void RecurrentGradientMachine::forward(const std::vector& inArgs, auto gatherAgent = dynamic_cast(outFrameLine.agentLayer.get()); CHECK_NOTNULL(gatherAgent); - gatherAgent->copyIdAndSequenceInfo(input, info_.allIds, info_.idIndex); + gatherAgent->copyIdAndSequenceInfo(input, info_[targetInfoInlinkId_].allIds, + info_[targetInfoInlinkId_].idIndex); } for (int i = 0; i < maxSequenceLength_; ++i) { - int idSize = info_.idIndex[i + 1] - info_.idIndex[i]; - + int idSize = 0; // connect in_links - for (auto& inFrameLine : inFrameLines_) { + for (size_t j = 0; j < inFrameLines_.size(); ++j) { + // idSize denotes the sum number of tokens in each length i + idSize = info_[j].idIndex[i + 1] - info_[j].idIndex[i]; + InFrameLine inFrameLine = inFrameLines_[j]; auto scatterAgent = dynamic_cast(inFrameLine.agents[i].get()); scatterAgent->setRealLayerAndOutput(inFrameLine.inLayer, - inFrameLine.outArg, info_.allIds, - info_.idIndex[i], idSize); + inFrameLine.outArg, info_[j].allIds, + info_[j].idIndex[i], idSize); if (hasSubseq) { - int size = info_.seqStartPosIndex[i + 1] - info_.seqStartPosIndex[i]; - scatterAgent->setSequenceStartPositions( - info_.sequenceStartPositions, info_.seqStartPosIndex[i], size); + // size: the length of subsequence + int size = + info_[j].seqStartPosIndex[i + 1] - info_[j].seqStartPosIndex[i]; + scatterAgent->setSequenceStartPositions(info_[j].sequenceStartPositions, + info_[j].seqStartPosIndex[i], + size); } } @@ -469,13 +521,16 @@ void RecurrentGradientMachine::forward(const std::vector& inArgs, dynamic_cast(outFrameLine.agentLayer.get()); gatherAgent->addRealLayer(outFrameLine.frames[i]); } - // connect memory links + // Adopt info_[0].idIndex because seq which has_subseq=True + // doesn't support Memory with !hasSubseq bootlayer; + // And inlinks that !hasSubSeq must have same inlink length. + idSize = info_[0].idIndex[i + 1] - info_[0].idIndex[i]; for (auto& memoryFrameLine : memoryFrameLines_) { NeuralNetwork::connect( memoryFrameLine.agents[i], i == 0 ? memoryFrameLine.bootLayer : memoryFrameLine.frames[i - 1], - idSize /*height of agent*/); + numSeqs_[i] /*height of agent*/); } } @@ -560,62 +615,77 @@ void RecurrentGradientMachine::removeBeamSearchStatisticsCallbacks() { * If hasSubseq, will also create scattered sequenceStartPositions infomation * for all realLayer of inFrameLines one time. */ -void RecurrentGradientMachine::createInFrameInfo(const Argument& input, + +void RecurrentGradientMachine::createInFrameInfo(int inlinkId, + const Argument& input, PassType passType) { bool hasSubseq = input.hasSubseq(); + // numSequences: # samples(sequences) in a batch size_t numSequences = input.getNumSequences(); std::vector allIds; - info_.idIndex.clear(); - info_.idIndex.push_back(0); // first idIndex = 0 - if (hasSubseq) { // for sequenceScatterAgentLayer - size_t numSubSequences = input.getNumSubSequences(); - std::vector sequenceStartPositions; - info_.seqStartPosIndex.clear(); - info_.seqStartPosIndex.push_back(0); // first seqStartPosIndex = 0 - for (int i = 0; i < maxSequenceLength_; ++i) { - sequenceStartPositions.push_back(0); // first element = 0 - for (size_t j = 0; j < numSubSequences; ++j) { - if (std::get<3>(seqLengthAndStart_[j]) == i) { - int subSeqStart = std::get<1>(seqLengthAndStart_[j]); - int subSeqLength = std::get<0>(seqLengthAndStart_[j]); - for (int k = subSeqStart; k < subSeqStart + subSeqLength; ++k) { - allIds.push_back(k); - } - sequenceStartPositions.push_back(sequenceStartPositions.back() + - subSeqLength); - } - } - info_.idIndex.push_back(allIds.size()); - info_.seqStartPosIndex.push_back(sequenceStartPositions.size()); + + auto& seqInfo = seqInfos_[inlinkId]; + + numSeqs_.clear(); + Info* inlinkInfo = &info_[inlinkId]; + inlinkInfo->idIndex.clear(); + inlinkInfo->idIndex.push_back(0); // first idIndex = 0 + + std::vector sequenceStartPositions; + const int* subSequenceStartPositions = nullptr; + + if (hasSubseq) { // for sequenceScatterAgentLayer + subSequenceStartPositions = + input.subSequenceStartPositions->getData(false); + inlinkInfo->seqStartPosIndex.clear(); + inlinkInfo->seqStartPosIndex.push_back(0); // first seqStartPosIndex = 0 + } + // maxSequenceLength_: max topLevelLength in allsamples + for (int i = 0; i < maxSequenceLength_; ++i) { + if (hasSubseq) { + sequenceStartPositions.push_back(0); // first element = 0 } - // inFrameLine create sequenceStartPositions one time - CHECK_EQ(sequenceStartPositions.size(), - maxSequenceLength_ + numSubSequences); - CHECK_EQ(info_.seqStartPosIndex.size(), - static_cast(maxSequenceLength_ + 1)); - createSeqPos(sequenceStartPositions, &info_.sequenceStartPositions); - } else { // for scatterAgentLayer - for (int i = 0; i < maxSequenceLength_; ++i) { - for (size_t j = 0; j < numSequences; ++j) { - int seqLength = std::get<0>(seqLengthAndStart_[j]); - if (i >= seqLength) { - break; + int numSeqs = 0; + for (size_t j = 0; j < numSequences; ++j) { + int seqLength = seqInfo[j].topLevelLength; + if (i >= seqLength) { + break; + } + ++numSeqs; + if (hasSubseq) { + int subSeqStart = subSequenceStartPositions[seqInfo[j].subSeqStart + i]; + int subSeqEnd = + subSequenceStartPositions[seqInfo[j].subSeqStart + i + 1]; + for (int k = subSeqStart; k < subSeqEnd; ++k) { + allIds.push_back(k); } - int seqStart = std::get<1>(seqLengthAndStart_[j]); + sequenceStartPositions.push_back(sequenceStartPositions.back() + + subSeqEnd - subSeqStart); + } else { + int seqStart = seqInfo[j].seqStart; allIds.push_back(reversed_ ? (seqStart + seqLength - 1 - i) : (seqStart + i)); } - info_.idIndex.push_back(allIds.size()); + } + inlinkInfo->idIndex.push_back(allIds.size()); + numSeqs_.push_back(numSeqs); + if (hasSubseq) { + inlinkInfo->seqStartPosIndex.push_back(sequenceStartPositions.size()); } } - // copy and check scatterId - copyScattedId(allIds, &info_.allIds, input.getBatchSize()); - CHECK_EQ(info_.idIndex.size(), static_cast(maxSequenceLength_ + 1)); - // inFrameLine select rows in real layer one time - for (auto& inFrameLine : inFrameLines_) { - selectRowsOneTime(inFrameLine.inLayer, info_.allIds, &inFrameLine.outArg, - passType); + if (hasSubseq) { + // inFrameLine create sequenceStartPositions one time + CHECK_EQ(sequenceStartPositions.size(), + maxSequenceLength_ + input.getNumSubSequences()); + CHECK_EQ(inlinkInfo->seqStartPosIndex.size(), + static_cast(maxSequenceLength_ + 1)); + createSeqPos(sequenceStartPositions, &inlinkInfo->sequenceStartPositions); } + + // copy and check scatterId + copyScattedId(allIds, &inlinkInfo->allIds, input.getBatchSize()); + CHECK_EQ(inlinkInfo->idIndex.size(), + static_cast(maxSequenceLength_ + 1)); } /* like createInFrameInfo, but for all realLayer of memoryFrameLines*/ @@ -633,19 +703,20 @@ void RecurrentGradientMachine::createMemoryFrameInfo( sequenceStartPositions.push_back(0); // first element = 0 const int* starts = input.sequenceStartPositions->getData(false); for (size_t i = 0; i < numSequences; ++i) { - int seqId = std::get<2>(seqLengthAndStart_[i]); + // memory info adopt info of inlinks[0] + int seqId = seqInfos_[0][i].seqId; for (int k = starts[seqId]; k < starts[seqId + 1]; ++k) { allIds.push_back(k); } sequenceStartPositions.push_back(sequenceStartPositions.back() + - starts[seqId + 1] - starts[seqId]); + starts[seqId + 1] - starts[seqId]); } createSeqPos(sequenceStartPositions, &(*memoryFrameLine).sequenceStartPositions); } else { // for scatterAgentLayer for (size_t i = 0; i < numSequences; ++i) { - allIds.push_back(std::get<2>(seqLengthAndStart_[i])); + allIds.push_back(seqInfos_[0][i].seqId); } } // copy and check scatterId @@ -699,18 +770,19 @@ size_t RecurrentGradientMachine::getGenBatchSize() { for (auto& memoryFrameLine : memoryFrameLines_) { if (!memoryFrameLine.rootLayer) continue; Argument& bootArg = memoryFrameLine.rootLayer->getOutput(); - size_t batchSize = memoryFrameLine.is_sequence ? - bootArg.getNumSequences() : bootArg.getBatchSize(); + size_t batchSize = memoryFrameLine.is_sequence ? bootArg.getNumSequences() + : bootArg.getBatchSize(); if (numSequences) { CHECK_EQ(numSequences, batchSize); } else { numSequences = batchSize; } } - CHECK(numSequences) << "Fail to get batch size in generation. " - "At least one of the Memory layer MUST have a layer that is NOT in " - "the layer group to boot it, and this boot layer is used to " - "decide batch_size in generation process."; + CHECK(numSequences) + << "Fail to get batch size in generation. " + "At least one of the Memory layer MUST have a layer that is NOT in " + "the layer group to boot it, and this boot layer is used to " + "decide batch_size in generation process."; return numSequences; } @@ -732,7 +804,9 @@ void RecurrentGradientMachine::generateSequence() { // connect boot frame memory links std::vector ids(numSequences); - for (size_t i = 0; i < numSequences; ++i) { ids[i] = i; } + for (size_t i = 0; i < numSequences; ++i) { + ids[i] = i; + } for (auto& memoryFrameLine : memoryFrameLines_) { if (memoryFrameLine.rootAgent) { auto scatterAgent = @@ -756,7 +830,8 @@ void RecurrentGradientMachine::generateSequence() { // init outArg size_t resultNum = generator_.config.num_results_per_sample(); - IVector::resizeOrCreate(generator_.outArg.ids, + IVector::resizeOrCreate( + generator_.outArg.ids, generator_.config.max_num_frames() * numSequences * resultNum, false); if (resultNum > 1) { CHECK_LE(resultNum, static_cast(generator_.config.beam_size())); @@ -847,7 +922,9 @@ void RecurrentGradientMachine::oneWaySearch(size_t batchSize) { // path.seqId = -1 indicates end of generation // of an input sequence finalPaths[seqIds_[j]].seqId = -1; - } else { scatterIds.push_back(j); } + } else { + scatterIds.push_back(j); + } } } @@ -856,13 +933,12 @@ void RecurrentGradientMachine::oneWaySearch(size_t batchSize) { starts[0] = 0; generator_.ids.clear(); for (size_t i = 0; i < batchSize; ++i) { - generator_.ids.insert(generator_.ids.end(), - finalPaths[i].ids.begin(), + generator_.ids.insert(generator_.ids.end(), finalPaths[i].ids.begin(), finalPaths[i].ids.end()); starts[i + 1] = generator_.ids.size(); batchMachineIdVec_.insert(batchMachineIdVec_.end(), - finalPaths[i].machineIdVec.begin(), - finalPaths[i].machineIdVec.end()); + finalPaths[i].machineIdVec.begin(), + finalPaths[i].machineIdVec.end()); } } @@ -920,9 +996,9 @@ void RecurrentGradientMachine::forwardFrame(int machineCur) { } } -void RecurrentGradientMachine::singlePathExpand( - Path& curPath, size_t curPathId, std::vector& newPaths, - size_t expandWidth) { +void RecurrentGradientMachine::singlePathExpand(Path& curPath, size_t curPathId, + std::vector& newPaths, + size_t expandWidth) { int calc_id = gDiyProbStart ? gDiyProbStart(curPath.ids.size(), curPath.ids.data()) : 0; @@ -946,19 +1022,20 @@ void RecurrentGradientMachine::singlePathExpand( if (id == -1) break; real newLogProb = generator_.config.log_prob() ? std::log(prob) : prob; - Path newPath(curPath, id, newLogProb, - curPathId /*machineId*/, k /*topIndex*/); + Path newPath(curPath, id, newLogProb, curPathId /*machineId*/, + k /*topIndex*/); if (this->beamSearchCtrlCallbacks_) { if (beamSearchCtrlCallbacks_->stopDetermineCandidates( - newPath.seqId, newPath.ids, newPath.probHistory)) return; + newPath.seqId, newPath.ids, newPath.probHistory)) + return; } // outFrameLines_.size() > 1UL if (dataArgsSize_) { newPath.machineIdVec = curPath.machineIdVec; newPath.machineIdVec.push_back(curPathId); } - bool atEos = eosVec[index] == 1U || - newPath.ids.size() >= (size_t)maxSequenceLength_; + bool atEos = + eosVec[index] == 1U || newPath.ids.size() >= (size_t)maxSequenceLength_; // adjustNewPath newPath.adjustProb(calc_id, atEos); if (this->beamSearchCtrlCallbacks_) { @@ -966,16 +1043,18 @@ void RecurrentGradientMachine::singlePathExpand( newPath.seqId, newPath.ids, newPath.probHistory, &newPath.logProb); } if (!newPath.isDropable()) { - atEos ? finalPaths_[curPath.seqId].push_back(newPath) : - newPaths.push_back(newPath); + atEos ? finalPaths_[curPath.seqId].push_back(newPath) + : newPaths.push_back(newPath); } } // for expandWidth - if (gDiyProbStop) { gDiyProbStop(calc_id); } + if (gDiyProbStop) { + gDiyProbStop(calc_id); + } } -void RecurrentGradientMachine::beamExpand( - std::vector& paths, std::vector& newPaths) { +void RecurrentGradientMachine::beamExpand(std::vector& paths, + std::vector& newPaths) { size_t candidatePathCount = paths.size(); // idVec.size() could be larger than candidatePathCount * beam, // so user can drop some node customly. @@ -988,7 +1067,7 @@ void RecurrentGradientMachine::beamExpand( int curSeqId = 0; for (size_t j = 0; j <= candidatePathCount; j++) { // expansions of a single sequence are all processed - curSeqId = (j < candidatePathCount? paths[j].seqId : curSeqId + 1); + curSeqId = (j < candidatePathCount ? paths[j].seqId : curSeqId + 1); if (prevSeqId != -1 && curSeqId != prevSeqId) { totalExpandCount += beamShrink(newPaths, prevSeqId, totalExpandCount); } @@ -1000,11 +1079,14 @@ void RecurrentGradientMachine::beamExpand( } // Drop extra nodes to beam size. -size_t RecurrentGradientMachine::beamShrink( - std::vector& newPaths, size_t seqId, size_t totalExpandCount) { - size_t minNewPathSize = std::min(getBeamSize(), - newPaths.size() - totalExpandCount); - if (!minNewPathSize) { return 0; } +size_t RecurrentGradientMachine::beamShrink(std::vector& newPaths, + size_t seqId, + size_t totalExpandCount) { + size_t minNewPathSize = + std::min(getBeamSize(), newPaths.size() - totalExpandCount); + if (!minNewPathSize) { + return 0; + } std::nth_element(newPaths.begin() + totalExpandCount, newPaths.begin() + totalExpandCount + minNewPathSize, newPaths.end(), Path::greaterPath); @@ -1017,11 +1099,8 @@ size_t RecurrentGradientMachine::beamShrink( // Remove the already formed paths that are relatively short finalPaths_[seqId].erase( - std::remove_if(finalPaths_[seqId].begin(), - finalPaths_[seqId].end(), - [&](Path& p) { - return p.logProb < minPathLogProb; - }), + std::remove_if(finalPaths_[seqId].begin(), finalPaths_[seqId].end(), + [&](Path& p) { return p.logProb < minPathLogProb; }), finalPaths_[seqId].end()); for (auto p : finalPaths_[seqId]) { if (minFinalPathLogProb_[seqId] > p.logProb) { @@ -1030,7 +1109,7 @@ size_t RecurrentGradientMachine::beamShrink( } if (finalPaths_[seqId].size() >= getBeamSize() && - minFinalPathLogProb_[seqId] >= maxPathLogProb) { + minFinalPathLogProb_[seqId] >= maxPathLogProb) { newPaths.resize(totalExpandCount); return 0; } @@ -1067,7 +1146,8 @@ void RecurrentGradientMachine::fillGenOutputs() { // in beam search, here only reserved the top 1 generated result // for out_links that are not the generated word indices. batchMachineIdVec_.insert(batchMachineIdVec_.end(), - path.machineIdVec.begin(), path.machineIdVec.end()); + path.machineIdVec.begin(), + path.machineIdVec.end()); } } starts[i + 1] = generator_.ids.size(); @@ -1091,21 +1171,21 @@ void RecurrentGradientMachine::copyDataOutlinkFrame(size_t machineCur) { void RecurrentGradientMachine::createDataOutlink( std::vector& machineIdVec) { - size_t seqNum = getBeamSize() > 1UL ? - finalPaths_.size() : finalPaths_[0].size(); + size_t seqNum = + getBeamSize() > 1UL ? finalPaths_.size() : finalPaths_[0].size(); std::vector starts(seqNum + 1, 0); for (size_t i = 0; i < seqNum; ++i) { - size_t seqLen = getBeamSize() > 1UL ? finalPaths_[i][0].ids.size() : - finalPaths_[0][i].ids.size(); + size_t seqLen = getBeamSize() > 1UL ? finalPaths_[i][0].ids.size() + : finalPaths_[0][i].ids.size(); starts[i + 1] = starts[i] + seqLen; } for (size_t i = 0; i < dataArgsSize_; i++) { - dataArgs_[i].concat(dataArgsFrame_[i], machineIdVec, - starts, useGpu_, HPPL_STREAM_1, PASS_TEST); + dataArgs_[i].concat(dataArgsFrame_[i], machineIdVec, starts, useGpu_, + HPPL_STREAM_1, PASS_TEST); - auto dataAgent = dynamic_cast( - outFrameLines_[i + 1].agentLayer.get()); + auto dataAgent = + dynamic_cast(outFrameLines_[i + 1].agentLayer.get()); CHECK_NOTNULL(dataAgent); dataAgent->setData(dataArgs_[i]); } diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h index cc49d13952323db6e514ea437552d076187d91e2..6328213793ed6ca39214ec00124570ecb1ce273b 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h +++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - #pragma once #include "GradientMachine.h" @@ -101,7 +100,7 @@ public: * Return true if this prefix or candidate is expected to be dropped. */ typedef std::function&, - const std::vector&)> DropCallback; + const std::vector&)> DropCallback; /** * @brief NormOrDropNodeCallback @@ -117,7 +116,7 @@ public: * The fourth parameter is the probability of the whole path. */ typedef std::function&, - std::vector&, real*)> NormOrDropNodeCallback; + std::vector&, real*)> NormOrDropNodeCallback; /** * @brief Register beam search control callbacks. Used for prediction. @@ -192,7 +191,7 @@ public: int machineId; // index of sample in frame int topIndex; // index of MaxIdLayer output in one sample - int seqId; // index of sequence in batch generation + int seqId; // index of sequence in batch generation std::vector machineIdVec; /** @@ -206,7 +205,10 @@ public: /** * @brief Path default ctor, first logProb is 0. */ - Path() { logProb = 0; seqId = 0; } + Path() { + logProb = 0; + seqId = 0; + } explicit Path(size_t seqId) : seqId(seqId) { logProb = 0; } /** @@ -319,21 +321,33 @@ protected: }; std::vector memoryFrameLines_; - // All inFrameLines and outFrameLines have the same element as follows. + // Each inFrameLines(inlinks) has its own info(elements) below, + // and all outFrameLines(outlinks) share the info with one inFrameLine, + // which is assigned by targetInfoInlinkId_. struct Info { IVectorPtr allIds; // scattered id of realLayer std::vector idIndex; // index of allIds ICpuGpuVectorPtr - sequenceStartPositions; // scattered sequenceStartPositions + sequenceStartPositions; // scattered sequenceStartPositions std::vector seqStartPosIndex; // index of sequenceStartPositions }; - Info info_; + std::vector info_; + + // numSeqs_[i] is the number sequences which is longer than i (for sequence + // data) or has more than i subsequences (for subsequence data) + std::vector numSeqs_; - // if no subSeq, tuple of (seqLength, seqStart, seqIndex, seqIndex) - // else, tuple of (subSeqLength, subSeqStart, seqIndex, subSeqIndex) - std::vector> seqLengthAndStart_; + std::vector> seqInfos_; - void createInFrameInfo(const Argument& input, PassType passType); + // the id of inlink which share info with outlinks + int targetInfoInlinkId_; + + /* create scattered id infomation for all realLayer of inFrameLines one time. + * If hasSubseq, will also create scattered sequenceStartPositions infomation + * for all realLayer of inFrameLines one time. + */ + void createInFrameInfo(int inlinks_id, const Argument& input, + PassType passType); void createMemoryFrameInfo(MemoryFrameLine* memoryFrameLine, PassType passType); @@ -363,6 +377,9 @@ protected: NeuralNetwork* rootNetwork_; bool reversed_; + + // if hasSubseq: max number of sentences(subseq)in batchsize samples + // else: max number of tokens in batchsize samples(sentences) int maxSequenceLength_; bool useGpu_; bool stopBeamSearch_; @@ -415,7 +432,7 @@ private: * @param machineIdVec : select a row of output matrix in each frame * that the generation process expanded. */ - void createDataOutlink(std::vector & machineIdVec); + void createDataOutlink(std::vector& machineIdVec); /* * @brief used in beam search, connect previous frame to form recurrent link diff --git a/paddle/gserver/layers/CTCLayer.cpp b/paddle/gserver/layers/CTCLayer.cpp index db1450694ecf7608fb37790e841b967288378e1f..6b9ffc5c749fb45be567881b8e625b48e28f69b4 100644 --- a/paddle/gserver/layers/CTCLayer.cpp +++ b/paddle/gserver/layers/CTCLayer.cpp @@ -49,8 +49,10 @@ void CTCLayer::forward(PassType passType) { Layer::forward(passType); if (useGpu_) { for (size_t i = 0; i < inputLayers_.size(); i++) { - tmpCpuInput_[i].resizeAndCopyFrom(getInput(i), false, HPPL_STREAM_1); + tmpCpuInput_[i].resizeAndCopyFrom( + getInput(i), false, HPPL_STREAM_DEFAULT); } + hl_stream_synchronize(HPPL_STREAM_DEFAULT); forwardImp(tmpCpuInput_[0], tmpCpuInput_[1]); } else { forwardImp(getInput(0), getInput(1)); @@ -92,9 +94,9 @@ void CTCLayer::backward(const UpdateCallback &callback) { if (useGpu_) { backwardImp(callback, tmpCpuInput_[0], tmpCpuInput_[1]); const_cast(getInput(0)). - resizeAndCopyFrom(tmpCpuInput_[0], true, HPPL_STREAM_1); + resizeAndCopyFrom(tmpCpuInput_[0], true, HPPL_STREAM_DEFAULT); const_cast(getInput(1)). - resizeAndCopyFrom(tmpCpuInput_[1], true, HPPL_STREAM_1); + resizeAndCopyFrom(tmpCpuInput_[1], true, HPPL_STREAM_DEFAULT); } else { backwardImp(callback, getInput(0), getInput(1)); } diff --git a/paddle/gserver/layers/ConvOperator.cpp b/paddle/gserver/layers/ConvOperator.cpp index d08c422764e5642816a94fc55b5b67445ffb42f7..8c72c1778451dfddbaa740921cd08cf73fe56785 100644 --- a/paddle/gserver/layers/ConvOperator.cpp +++ b/paddle/gserver/layers/ConvOperator.cpp @@ -248,7 +248,7 @@ void ConvOperator::forward() { CHECK_EQ(ins_[1]->value->getHeight(), batchSize); checkFilterSize(ins_[1]->value); Matrix::resizeOrCreate(out_->value, batchSize, - outputH_ * outputW_ * numFilters_); + outputH_ * outputW_ * numFilters_, false, useGpu_); { AsyncGpuBlock block; for (size_t batchId = 0; batchId < batchSize; ++batchId) { diff --git a/paddle/gserver/layers/ConvexCombinationLayer.cpp b/paddle/gserver/layers/ConvexCombinationLayer.cpp index e092b2e390f37cd322db8bed8273f561fa979791..a81cf939af671f3fb34fb52ae33035a7bb524aed 100644 --- a/paddle/gserver/layers/ConvexCombinationLayer.cpp +++ b/paddle/gserver/layers/ConvexCombinationLayer.cpp @@ -21,18 +21,20 @@ limitations under the License. */ namespace paddle { /** - * @brief A layer for convex weighted average of vectors, + * @brief A layer for weighted sum of vectors, * which is used in NEURAL MACHINE TRANSLATION BY JOINTLY LEARNING TO ALIGN AND * TRANSLATE - * - Input: the first input contains the convex weights (batchSize x weightDim), - * and the shape of second input is (batchSize x (weightdim*dataDim)). - * - Output: the shape of output is (batchSize x dataDim). + * - Input: the the size of the first input is weightDim, + * and the size of the second input is weightdim * dataDim. + * - Output: the sizeof the output is dataDim * \f[ - * out[i][j] = \sum_{j}(in0(i, j) * in1(i,j + i * dataDim)), - * i = 0,1,...,(batchSize-1); j = 0, 1,...,(dataDim-1) + * out(j) = \sum_{i}(in0(i) * in1(i,j + i * dataDim)), + * i = 0,1,...,(weightDim-1); j = 0, 1,...,(dataDim-1) * \f] + * Note that the above computation is for one sample. Multiple samples are + * processed in one batch. * - * The config file api is convex_comb_layer. + * The config file api is linear_comb_layer. */ class ConvexCombinationLayer : public Layer { protected: diff --git a/paddle/gserver/layers/CosSimLayer.cpp b/paddle/gserver/layers/CosSimLayer.cpp index b10bd1d886ecf42170914c619b7b4040d984501d..05a70aeff5e8ff3789bca966d351bffc8efb1cb3 100644 --- a/paddle/gserver/layers/CosSimLayer.cpp +++ b/paddle/gserver/layers/CosSimLayer.cpp @@ -48,7 +48,7 @@ void CosSimLayer::forward(PassType passType) { REGISTER_TIMER_INFO("CosFwAtvTimer", getName().c_str()); MatrixPtr prevOut1 = getInputValue(0); MatrixPtr prevOut2 = getInputValue(1); - outV->cosSim(*prevOut1, *prevOut2, kCosSimScale_); + outV->cosSim(*prevOut1, *prevOut2, config_.cos_scale()); } } @@ -59,7 +59,7 @@ void CosSimLayer::backward(const UpdateCallback& callback) { outG->cosSimDerivative(*this->getOutputValue(), *getInputValue(0), *getInputValue(1), *getInputGrad(0), - *getInputGrad(1), kCosSimScale_); + *getInputGrad(1), config_.cos_scale()); } } diff --git a/paddle/gserver/layers/CosSimLayer.h b/paddle/gserver/layers/CosSimLayer.h index 9b0e53335b2503513ce11a4ab19f2199acfee499..65eb807ab2e6f16aab5ef2a9b08d697868c743a3 100644 --- a/paddle/gserver/layers/CosSimLayer.h +++ b/paddle/gserver/layers/CosSimLayer.h @@ -36,7 +36,7 @@ namespace paddle { class CosSimLayer : public Layer { public: explicit CosSimLayer(const LayerConfig& config) - : Layer(config), kCosSimScale_(5.0f) {} + : Layer(config) {} ~CosSimLayer() {} @@ -44,8 +44,6 @@ public: void forward(PassType passType); void backward(const UpdateCallback& callback = nullptr); - - const real kCosSimScale_; }; } // namespace paddle diff --git a/paddle/gserver/layers/CostLayer.cpp b/paddle/gserver/layers/CostLayer.cpp index f353afabb3b7162783fef4f9093630fb826c86cb..0f99aee03200c3834c7c27343f41f77edc5a558e 100644 --- a/paddle/gserver/layers/CostLayer.cpp +++ b/paddle/gserver/layers/CostLayer.cpp @@ -509,8 +509,10 @@ void HuberTwoClass::forwardImp(Matrix &output, Argument &label, Matrix &cost) { if (useGpu_) { for (size_t i = 0; i < inputLayers_.size(); i++) { - tmpCpuInput_[i].resizeAndCopyFrom(getInput(i), false, HPPL_STREAM_1); + tmpCpuInput_[i].resizeAndCopyFrom( + getInput(i), false, HPPL_STREAM_DEFAULT); } + hl_stream_synchronize(HPPL_STREAM_DEFAULT); } forwardImpIn(output, label, cost); } diff --git a/paddle/gserver/layers/CudnnBatchNormLayer.cpp b/paddle/gserver/layers/CudnnBatchNormLayer.cpp index cef8772fc254f98d676e6fb89042487315280c61..3c6d13b0bf92ea98eb5c3331a1fdff6b177529b6 100644 --- a/paddle/gserver/layers/CudnnBatchNormLayer.cpp +++ b/paddle/gserver/layers/CudnnBatchNormLayer.cpp @@ -115,29 +115,11 @@ void CudnnBatchNormLayer::backward(const UpdateCallback& callback) { create(tmpBiasGrad_, 1, channels_, &betaGrad); } - // because of the different api of cudnn v4 and v5. - if (hl_get_cudnn_lib_version() < 5000) { - if (weight_->getWGrad()) { - create(tmpWGrad_, 1, channels_, &gammaGrad); - } - if (biases_ && biases_->getWGrad()) { - create(tmpBiasGrad_, 1, channels_, &betaGrad); - } - } - hl_batch_norm_backward(ioDesc_, input, ioDesc_, outGrad, ioDesc_, inGrad, bnParamDesc_, gamma, gammaGrad, betaGrad, EPS, savedMean, savedInvVar); - // because of the different api of cudnn v4 and v5. - if (hl_get_cudnn_lib_version() < 5000) { - if (weight_->getWGrad() && biases_->getWGrad()) { - weight_->getWGrad()->add(*tmpWGrad_); - biases_->getWGrad()->add(*tmpBiasGrad_); - } - } - { REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); biases_->getParameterPtr()->incUpdate(callback); diff --git a/paddle/gserver/layers/PrintLayer.cpp b/paddle/gserver/layers/PrintLayer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..68fee69f44d0c2c144f6dde6fd8ff36bd96094f6 --- /dev/null +++ b/paddle/gserver/layers/PrintLayer.cpp @@ -0,0 +1,58 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "Layer.h" + +namespace paddle { + +class PrintLayer : public Layer { +public: + explicit PrintLayer(const LayerConfig& config) + : Layer(config) {} + void forward(PassType passType); + void backward(const UpdateCallback& callback) {} +}; + +void PrintLayer::forward(PassType passType) { + Layer::forward(passType); + for (size_t i = 0; i != inputLayers_.size(); ++i) { + const auto& argu = getInput(i); + const std::string& name = inputLayers_[i]->getName(); + if (argu.value) { + std::ostringstream os; + argu.value->print(os); + LOG(INFO) << "layer=" << name << " value matrix:\n" << os.str(); + } + if (argu.ids) { + std::ostringstream os; + argu.ids->print(os, argu.ids->getSize()); + LOG(INFO) << "layer=" << name << " ids vector:\n" << os.str(); + } + if (auto startPos = argu.sequenceStartPositions) { + std::ostringstream os; + startPos->getVector(false)->print(os, startPos->getSize()); + LOG(INFO) << "layer=" << name << " sequence pos vector:\n" << os.str(); + } + if (auto subStartPos = argu.subSequenceStartPositions) { + std::ostringstream os; + subStartPos->getVector(false)->print(os, subStartPos->getSize()); + LOG(INFO) << "layer=" << name << " sub-sequence pos vector:\n" + << os.str(); + } + } +} + +REGISTER_LAYER(print, PrintLayer); + +} // namespace paddle diff --git a/paddle/gserver/layers/SamplingIdLayer.cpp b/paddle/gserver/layers/SamplingIdLayer.cpp index 41c1461967ae1c0ff3c4b3a11e8f7405b58f6ab9..b39c9948b53118b51090059fc554e76f94316f81 100644 --- a/paddle/gserver/layers/SamplingIdLayer.cpp +++ b/paddle/gserver/layers/SamplingIdLayer.cpp @@ -52,8 +52,10 @@ public: Layer::forward(passType); if (useGpu_) { for (size_t i = 0; i < inputLayers_.size(); i++) { - tmpCpuInput_[i].resizeAndCopyFrom(getInput(i), false, HPPL_STREAM_1); + tmpCpuInput_[i].resizeAndCopyFrom( + getInput(i), false, HPPL_STREAM_DEFAULT); } + hl_stream_synchronize(HPPL_STREAM_DEFAULT); forwardImp(tmpCpuInput_[0]); } else { forwardImp(getInput(0)); diff --git a/paddle/gserver/tests/LayerGradUtil.cpp b/paddle/gserver/tests/LayerGradUtil.cpp index f72011ae16cb3bac73e8acd5338bd7a179da329b..552a6c5b41c7f896c52b2132578b136200967573 100644 --- a/paddle/gserver/tests/LayerGradUtil.cpp +++ b/paddle/gserver/tests/LayerGradUtil.cpp @@ -92,7 +92,6 @@ void testState(LayerPtr testLayer, vector& dataLayers, testLayer->forward(PASS_TEST); Argument out; out.resizeAndCopyFrom(testLayer->getOutput(), /* useGpu= */ false); - hl_stream_synchronize(HPPL_STREAM_DEFAULT); if (batchOut.value) { size_t dim = batchOut.value->getWidth(); ASSERT_TRUE((bool)out.value); @@ -220,7 +219,6 @@ void testBatchState(LayerPtr testLayer, vector& dataLayers, testLayer->forward(PASS_TEST); Argument out; out.resizeAndCopyFrom(testLayer->getOutput(), /* useGpu= */ false); - hl_stream_synchronize(HPPL_STREAM_DEFAULT); if (batchOut.value) { size_t dim = batchOut.value->getWidth(); ASSERT_TRUE((bool)out.value); diff --git a/paddle/gserver/tests/Sequence/dummy.list b/paddle/gserver/tests/Sequence/dummy.list new file mode 100644 index 0000000000000000000000000000000000000000..0e52665e11298965df5738f69c5bcefcc8bab0f9 --- /dev/null +++ b/paddle/gserver/tests/Sequence/dummy.list @@ -0,0 +1 @@ +dummy_file_no_use diff --git a/paddle/gserver/tests/rnn_data_provider.py b/paddle/gserver/tests/rnn_data_provider.py new file mode 100644 index 0000000000000000000000000000000000000000..85a83554c5c3045d144ee0250d2808237eccc9e0 --- /dev/null +++ b/paddle/gserver/tests/rnn_data_provider.py @@ -0,0 +1,35 @@ +# Copyright (c) 2016 Baidu, Inc. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer.PyDataProvider2 import * + +data = [ + [[[1, 3, 2], [4, 5, 2]], 0], + [[[0, 2], [2, 5], [0, 1, 2]], 1], +] + +@provider(input_types=[integer_value_sub_sequence(10), + integer_value(2)]) +def process_subseq(settings, file_name): + for d in data: + yield d + +@provider(input_types=[integer_value_sequence(10), + integer_value(2)]) +def process_seq(settings, file_name): + for d in data: + seq = [] + for subseq in d[0]: + seq += subseq + yield seq, d[1] diff --git a/paddle/gserver/tests/sequenceGen.py b/paddle/gserver/tests/sequenceGen.py index e4727e472d446b48e6001968841bfc178e34ec0c..cb83d79d78cc677d5ffeb77f5693d08da2a51668 100644 --- a/paddle/gserver/tests/sequenceGen.py +++ b/paddle/gserver/tests/sequenceGen.py @@ -1,6 +1,3 @@ -#!/usr/bin/env python -#coding=utf-8 - # Copyright (c) 2016 Baidu, Inc. All Rights Reserved # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/paddle/gserver/tests/sequence_nest_rnn.conf b/paddle/gserver/tests/sequence_nest_rnn.conf new file mode 100644 index 0000000000000000000000000000000000000000..62b8c5d072d7b42e46504defeff12f7e101384a0 --- /dev/null +++ b/paddle/gserver/tests/sequence_nest_rnn.conf @@ -0,0 +1,76 @@ +#edit-mode: -*- python -*- +# Copyright (c) 2016 Baidu, Inc. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer_config_helpers import * + +######################## data source ################################ +define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list', + test_list=None, + module='rnn_data_provider', + obj='process_subseq') + + +settings(batch_size=2, learning_rate=0.01) +######################## network configure ################################ +dict_dim = 10 +word_dim = 8 +hidden_dim = 8 +label_dim = 3 + +data = data_layer(name="word", size=dict_dim) + +emb = embedding_layer(input=data, size=word_dim) + +# This hierachical RNN is designed to be equivalent to the simple RNN in +# sequence_rnn.conf + +def outer_step(x): + outer_mem = memory(name="outer_rnn_state", size=hidden_dim) + def inner_step(y): + inner_mem = memory(name="inner_rnn_state", + size=hidden_dim, + boot_layer=outer_mem) + out = fc_layer(input=[y, inner_mem], + size=hidden_dim, + act=TanhActivation(), + bias_attr=True, + name="inner_rnn_state") + return out + + inner_rnn_output = recurrent_group( + step=inner_step, + name="inner", + input=x) + last = last_seq(input=inner_rnn_output, name="outer_rnn_state") + + # "return last" should also work. But currently RecurrentGradientMachine + # does not handle it correctly. Current implementation requires that + # all the out links are from sequences. However, it does not report error + # when the out links are not sequences. + return inner_rnn_output + +out = recurrent_group( + name="outer", + step=outer_step, + input=SubsequenceInput(emb)) + +rep = last_seq(input=out) +prob = fc_layer(size=label_dim, + input=rep, + act=SoftmaxActivation(), + bias_attr=True) + +outputs(classification_cost(input=prob, + label=data_layer(name="label", size=label_dim))) diff --git a/paddle/gserver/tests/sequence_rnn.conf b/paddle/gserver/tests/sequence_rnn.conf new file mode 100644 index 0000000000000000000000000000000000000000..3294c2c3fc431c9d07aad0ba4620ec97a435fd91 --- /dev/null +++ b/paddle/gserver/tests/sequence_rnn.conf @@ -0,0 +1,57 @@ +#edit-mode: -*- python -*- +# Copyright (c) 2016 Baidu, Inc. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer_config_helpers import * + +######################## data source ################################ +define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list', + test_list=None, + module='rnn_data_provider', + obj='process_seq') + + +settings(batch_size=2, learning_rate=0.01) +######################## network configure ################################ +dict_dim = 10 +word_dim = 8 +hidden_dim = 8 +label_dim = 3 + +data = data_layer(name="word", size=dict_dim) + +emb = embedding_layer(input=data, size=word_dim) + +def step(y): + mem = memory(name="rnn_state", size=hidden_dim) + out = fc_layer(input=[y, mem], + size=hidden_dim, + act=TanhActivation(), + bias_attr=True, + name="rnn_state") + return out + +out = recurrent_group( + name="rnn", + step=step, + input=emb) + +rep = last_seq(input=out) +prob = fc_layer(size=label_dim, + input=rep, + act=SoftmaxActivation(), + bias_attr=True) + +outputs(classification_cost(input=prob, + label=data_layer(name="label", size=label_dim))) diff --git a/paddle/gserver/tests/test_Evaluator.cpp b/paddle/gserver/tests/test_Evaluator.cpp index 8e857781468fed694dbd061d896263bf05303260..3a591a316b8bafccac9c59ff28e57b4e27f8377a 100644 --- a/paddle/gserver/tests/test_Evaluator.cpp +++ b/paddle/gserver/tests/test_Evaluator.cpp @@ -87,18 +87,31 @@ void testEvaluator(TestConfig testConf, string testEvaluatorName, return; } + ICpuGpuVectorPtr sequenceStartPositions; + if (testConf.inputDefs[i].inputType == INPUT_SEQUENCE_DATA || + testConf.inputDefs[i].inputType == INPUT_SEQUENCE_LABEL) { + if (!sequenceStartPositions) { + generateSequenceStartPositions(batchSize, sequenceStartPositions); + } + data.sequenceStartPositions = sequenceStartPositions; + } + arguments.push_back(data); } Evaluator* testEvaluator = Evaluator::create(testConf.evaluatorConfig); double totalScore = 0.0; + testEvaluator->start(); totalScore += testEvaluator->evalImp(arguments); testEvaluator->updateSamplesNum(arguments); + testEvaluator->finish(); LOG(INFO) << *testEvaluator; double totalScore2 = 0.0; if (testConf.testAccumulate) { + testEvaluator->start(); totalScore2 += testEvaluator->evalImp(arguments); + testEvaluator->finish(); EXPECT_LE(fabs(totalScore - totalScore2), 1.0e-5); } } @@ -202,6 +215,15 @@ TEST(Evaluator, precision_recall) { false); } +TEST(Evaluator, ctc_error_evaluator) { + TestConfig config; + config.evaluatorConfig.set_type("ctc_edit_distance"); + + config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "output", 32}); + config.inputDefs.push_back({INPUT_SEQUENCE_LABEL, "label", 1}); + testEvaluatorAll(config, "ctc_error_evaluator", 100); +} + int main(int argc, char** argv) { initMain(argc, argv); FLAGS_thread_local_rand_use_global_seed = true; diff --git a/paddle/gserver/tests/test_RecurrentGradientMachine.cpp b/paddle/gserver/tests/test_RecurrentGradientMachine.cpp index 35d6ee7f4a402d198dbcd1df7b272dcd65723659..b73fdd18abf35858a366552120e69c8a039a4726 100644 --- a/paddle/gserver/tests/test_RecurrentGradientMachine.cpp +++ b/paddle/gserver/tests/test_RecurrentGradientMachine.cpp @@ -21,6 +21,8 @@ limitations under the License. */ #include #include +P_DECLARE_int32(seed); + using namespace paddle; // NOLINT using namespace std; // NOLINT class TrainerForTest : public paddle::Trainer { @@ -68,7 +70,9 @@ void CalCost(const string& conf, const string& dir, real* cost, CpuVector vecMomentum(dim); // vecW needs to be assigned, otherwise the variable is an uncertain value. - vecW.zeroMem(); + + *ThreadLocalRand::getSeed() = FLAGS_seed; + vecW.randnorm(0, 0.1); trainer.startTrain(); for (int i = 0; i < num_passes; ++i) { @@ -88,27 +92,39 @@ void CalCost(const string& conf, const string& dir, real* cost, rmDir(dir.c_str()); } -TEST(RecurrentGradientMachine, HasSubSequence) { +void test(const string& conf1, const string& conf2, double eps) { int num_passes = 5; real* cost1 = new real[num_passes]; - const string conf1 = "gserver/tests/sequence_layer_group.conf"; const string dir1 = "gserver/tests/t1"; CalCost(conf1, dir1, cost1, num_passes); real* cost2 = new real[num_passes]; - const string conf2 = "gserver/tests/sequence_nest_layer_group.conf"; const string dir2 = "gserver/tests/t2"; CalCost(conf2, dir2, cost2, num_passes); for (int i = 0; i < num_passes; i++) { LOG(INFO) << "num_passes: " << i << ", cost1=" << cost1[i] - << ", cost2=" << cost2[i]; - ASSERT_NEAR(cost1[i], cost2[i], 1e-3); + << ", cost2=" << cost2[i] + << ", diff=" << std::abs(cost1[i] - cost2[i]); + ASSERT_NEAR(cost1[i], cost2[i], eps); } delete[] cost1; delete[] cost2; } +TEST(RecurrentGradientMachine, HasSubSequence) { + test("gserver/tests/sequence_layer_group.conf", + "gserver/tests/sequence_nest_layer_group.conf", + 1e-5); +} + +TEST(RecurrentGradientMachine, rnn) { + test("gserver/tests/sequence_rnn.conf", + "gserver/tests/sequence_nest_rnn.conf", + 0); +} + + int main(int argc, char** argv) { if (paddle::version::isWithPyDataProvider()) { if (!paddle::version::isWithGpu()) { diff --git a/paddle/gserver/tests/test_RecurrentLayer.cpp b/paddle/gserver/tests/test_RecurrentLayer.cpp index 2cea190b859496cd635fc5a8d1834779537d50e6..9b933b153d158bef565c0964232525ba99b8b3d4 100644 --- a/paddle/gserver/tests/test_RecurrentLayer.cpp +++ b/paddle/gserver/tests/test_RecurrentLayer.cpp @@ -299,7 +299,6 @@ void checkRecurrentLayer(LayerConfig layerConfig, size_t batchSize, Argument& cpuInput = testCpu.dataLayer_->getOutput(); Argument& gpuInput = testGpu.dataLayer_->getOutput(); gpuInput.resizeAndCopyFrom(cpuInput, true); - hl_stream_synchronize(HPPL_STREAM_DEFAULT); const VectorPtr& cpuVec = testCpu.para_->getBuf(PARAMETER_VALUE); const VectorPtr& gpuVec = testGpu.para_->getBuf(PARAMETER_VALUE); diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index f3a6503d4a21ff8766f3289f8eee992d4d13045d..1b7f9ac5dac16c167dcc22930c28bc3521162b9b 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -146,6 +146,7 @@ void Matrix::resizeOrCreate(MatrixPtr& matrix, size_t height, size_t width, if (!matrix) { matrix = Matrix::create(height, width, trans, useGpu); } else { + CHECK_EQ(matrix->useGpu(), useGpu); matrix->resize(height, width); } } @@ -161,6 +162,7 @@ void Matrix::resizeOrCreateSparseMatrix(MatrixPtr& matrix, size_t height, } else { CHECK(dynamic_cast(matrix.get()) || dynamic_cast(matrix.get())); + CHECK_EQ(matrix->useGpu(), useGpu); matrix->resize(height, width, nnz, valueType, format); } } diff --git a/paddle/math/Vector.cpp b/paddle/math/Vector.cpp index b1a459b86aa4ff70e4e07267c8a902123f9d17c0..7553ea25e09d2f52f1f8b9205f954510b77cbfa9 100644 --- a/paddle/math/Vector.cpp +++ b/paddle/math/Vector.cpp @@ -800,6 +800,7 @@ void CpuGpuVectorT::resizeOrCreate(size_t size, bool useGpu) { } else if ((!useGpu) && (!cpuVectorT_)) { cpuVectorT_ = VectorT::create(size, false); } else { + CHECK((useGpu && gpuVectorT_) || (!useGpu && cpuVectorT_)); this->resize(size, useGpu); } } diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp index 8610a66452358e1b2e2a846ddfcf62a0ce99e22e..0ca56b29b39b317d01d80631e332ba02356a613d 100644 --- a/paddle/parameter/Argument.cpp +++ b/paddle/parameter/Argument.cpp @@ -25,6 +25,7 @@ static void resizeAndCopy(MatrixPtr& dest, const MatrixPtr& src, bool useGpu, if (!dest) { dest = src->clone(0, 0, useGpu); } else { + CHECK_EQ(dest->useGpu(), useGpu); dest->resize(src->getHeight(), src->getWidth()); } dest->copyFrom(*src, stream); @@ -60,12 +61,12 @@ static void resizeAndCopy(MatrixPtr& dest, const MatrixPtr& src, hl_stream_t stream = HPPL_STREAM_DEFAULT) { if (src) { CHECK_LE((size_t)startRow + copySize, src->getHeight()); - int height = copySize; int width = src->getWidth(); if (!dest) { dest = src->clone(height, width, useGpu); } else { + CHECK_EQ(dest->useGpu(), useGpu); dest->resize(height, width); } MatrixPtr submat = src->subMatrix(startRow, copySize); @@ -182,6 +183,11 @@ static void resizeAndCopy(SVectorPtr& dest, const SVectorPtr& src, } } +void Argument::resizeAndCopyFrom(const Argument& src, bool useGpu) { + resizeAndCopyFrom(src, useGpu, HPPL_STREAM_DEFAULT); + hl_stream_synchronize(HPPL_STREAM_DEFAULT); +} + void Argument::resizeAndCopyFrom(const Argument& src, bool useGpu, hl_stream_t stream) { dataId = src.dataId; @@ -199,6 +205,14 @@ void Argument::resizeAndCopyFrom(const Argument& src, bool useGpu, resizeAndCopy(strs, src.strs, useGpu, stream); } +int32_t Argument::resizeAndCopyFrom(const Argument& src, int32_t startSeq, + int32_t copySize, bool useGpu) { + int32_t size = resizeAndCopyFrom(src, startSeq, copySize, useGpu, + HPPL_STREAM_DEFAULT); + hl_stream_synchronize(HPPL_STREAM_DEFAULT); + return size; +} + int32_t Argument::resizeAndCopyFrom(const Argument& src, int32_t startSeq, int32_t copySize, bool useGpu, hl_stream_t stream) { @@ -463,51 +477,34 @@ void Argument::splitByDataId(const std::vector& argus, } } -void Argument::getSeqLengthAndStart( - std::vector>* seqLengthAndStart, - int* maxSequenceLength) const { +void Argument::getSeqInfo(std::vector* seqInfo) const { const int* starts = sequenceStartPositions->getData(false); - if (hasSubseq()) { - size_t numSubSequences = getNumSubSequences(); - (*seqLengthAndStart).reserve(numSubSequences); - const int* subStarts = subSequenceStartPositions->getData(false); - int seqIndex = 0; - int subSeqIndex = 0; - *maxSequenceLength = 0; - for (size_t i = 0; i < numSubSequences; ++i) { - if (subStarts[i] == starts[seqIndex]) { - subSeqIndex = 0; - (*seqLengthAndStart) - .push_back(std::make_tuple( - subStarts[i + 1] - subStarts[i], (int)subStarts[i], - (int)seqIndex, (int)subSeqIndex)); - ++subSeqIndex; - ++seqIndex; - } else if (subStarts[i] < starts[seqIndex]) { - (*seqLengthAndStart) - .push_back(std::make_tuple( - subStarts[i + 1] - subStarts[i], (int)subStarts[i], - (int)seqIndex - 1, (int)subSeqIndex)); - ++subSeqIndex; + const int* subStarts = hasSubseq() + ? subSequenceStartPositions->getData(false) : nullptr; + size_t numSequences = getNumSequences(); + seqInfo->reserve(numSequences); + int subSeqEnd = 0; + for (size_t i = 0; i < numSequences; ++i) { + SeqInfo info; + info.seqStart = starts[i]; + info.subLevelLength = starts[i + 1] - starts[i]; + info.seqId = i; + if (hasSubseq()) { + info.subSeqStart = subSeqEnd; + while (subStarts[subSeqEnd] < starts[i + 1]) { + ++subSeqEnd; } - // maxSequenceLength_ = 1 + max(subSeqIndex) in each Seq. - if (*maxSequenceLength < std::get<3>((*seqLengthAndStart)[i])) - *maxSequenceLength = std::get<3>((*seqLengthAndStart)[i]); - } - *maxSequenceLength += 1; - } else { - size_t numSequences = getNumSequences(); - (*seqLengthAndStart).reserve(numSequences); - for (size_t i = 0; i < numSequences; ++i) { - (*seqLengthAndStart) - .push_back(std::make_tuple( - starts[i + 1] - starts[i], (int)starts[i], (int)i, (int)i)); + info.topLevelLength = subSeqEnd - info.subSeqStart; + } else { + info.topLevelLength = info.subLevelLength; + info.subSeqStart = 0; // not used } - std::sort((*seqLengthAndStart).begin(), (*seqLengthAndStart).end(), - std::greater>()); - - *maxSequenceLength = std::get<0>((*seqLengthAndStart)[0]); + seqInfo->push_back(info); } + std::sort(seqInfo->begin(), seqInfo->end(), + [](const SeqInfo& a, const SeqInfo& b) { + return a.topLevelLength > b.topLevelLength; + }); } void Argument::checkSubset() const { diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h index c444ebaf12930e938a3a4d75541d0fbf5bbb01ac..81cd117fc45cfa34da0810b01c5a710d9ce5950b 100644 --- a/paddle/parameter/Argument.h +++ b/paddle/parameter/Argument.h @@ -203,13 +203,28 @@ struct Argument { * startSeq: the sample id of start * copySize: how many samples need to copy * return value: how many samples are copied + * Note that when specifying the stream explicitly in this case, + * synchronize should also be called somewhere after this function */ int32_t resizeAndCopyFrom(const Argument& src, int32_t startSeq, - int32_t copySize, bool useGpu = FLAGS_use_gpu, - hl_stream_t stream = HPPL_STREAM_DEFAULT); + int32_t copySize, bool useGpu, hl_stream_t stream); - void resizeAndCopyFrom(const Argument& src, bool useGpu = FLAGS_use_gpu, - hl_stream_t stream = HPPL_STREAM_DEFAULT); + /* + * same with the above function, except that the stream is + * HPPL_STREAM_DEFAULT and synchronize is automatically called + * inside it + */ + int32_t resizeAndCopyFrom(const Argument& src, int32_t startSeq, + int32_t copySize, bool useGpu = FLAGS_use_gpu); + + void resizeAndCopyFrom(const Argument& src, bool useGpu, hl_stream_t stream); + + /* + * same with the above function, except that the stream is + * HPPL_STREAM_DEFAULT and synchronize is automatically called + * inside it + */ + void resizeAndCopyFrom(const Argument& src, bool useGpu = FLAGS_use_gpu); /* @brief Concatenate several arguments into one and put the result into it. @@ -238,12 +253,29 @@ struct Argument { static void splitByDataId(const std::vector& argus, std::vector>* arguGroups); + struct SeqInfo { + // Equal to sequence length for sequence data + // Equal to number of subsequences for subsequence data + int topLevelLength; + + int seqStart; + int seqId; + + // Equal to topLevelLength for sequence data + // Equal to sum of the length of subsequences for subsequence data + int subLevelLength; + + // Only used for subsequence data, start position of this sequence + // is subSequenceStartPositions, i.e. + // subSequenceStartPositions[subSeqStart] == seqStart + int subSeqStart; + }; /* - Get Sequence Length, startPositions and max Length according to input - */ - void getSeqLengthAndStart( - std::vector>* seqLengthAndStart, - int* maxSequenceLength) const; + Get SeqInfo for each sequence of this argument + Elements in *seqInfo are sorted by topLevelLength in descending order + */ + void getSeqInfo(std::vector* segInfo) const; + /* Check Whether sequenceStartPositions is subset of subSequenceStartPositions. diff --git a/paddle/scripts/travis/build.sh b/paddle/scripts/travis/build_and_test.sh similarity index 60% rename from paddle/scripts/travis/build.sh rename to paddle/scripts/travis/build_and_test.sh index a644f2a4164f870dc88af9b8f357f5a3fb306d7d..3ea633be327027cc2093ad3a68158af1cfb097e7 100755 --- a/paddle/scripts/travis/build.sh +++ b/paddle/scripts/travis/build_and_test.sh @@ -1,5 +1,7 @@ #!/bin/bash -cd `dirname $0` source ./common.sh cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=OFF -DWITH_TESTING=ON -DON_TRAVIS=ON make -j `nproc` +env CTEST_OUTPUT_ON_FAILURE=1 make test ARGS="-j `nproc`" +sudo make install +sudo paddle version diff --git a/paddle/scripts/travis/deploy_key.enc b/paddle/scripts/travis/deploy_key.enc new file mode 100644 index 0000000000000000000000000000000000000000..b0aa45c5ac626c735735fd8541a43bf8b099d0a0 Binary files /dev/null and b/paddle/scripts/travis/deploy_key.enc differ diff --git a/paddle/scripts/travis/docs.sh b/paddle/scripts/travis/docs.sh new file mode 100755 index 0000000000000000000000000000000000000000..c2a4809d75b97a9d8d8b83cf197e90bd62b48603 --- /dev/null +++ b/paddle/scripts/travis/docs.sh @@ -0,0 +1,63 @@ +#!/bin/bash + +# Add set -e, cd to directory. +source ./common.sh + +# Compile Documentation only. +cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=ON +make paddle_docs paddle_docs_cn + +# Parse Github URL +REPO=`git config remote.origin.url` +SSH_REPO=${REPO/https:\/\/github.com\//git@github.com:} +SHA=`git rev-parse --verify HEAD` + +# Documentation branch name +# gh-pages branch is used for PaddlePaddle.org. The English version of +# documentation in `doc` directory, and the chinese version in `doc_cn` +# directory. +TARGET_BRANCH="gh-pages" + +# Only deploy master branch to build latest documentation. +SOURCE_BRANCH="master" + +# If is not a Github pull request, and in master branch. +if [ "$TRAVIS_PULL_REQUEST" != "false" -o "$TRAVIS_BRANCH" != "$SOURCE_BRANCH" ]; then + exit 0 +fi + +# Clone the repo to output directory +git clone $REPO output +cd output + +# checkout github page branch +git checkout $TARGET_BRANCH || git checkout --orphan $TARGET_BRANCH + +# remove old docs. mv new docs. +rm -rf doc doc_cn +mv ../doc_cn/html doc_cn +mv ../doc/html doc + +# Check is there anything changed. +set +e +git diff --exit-code >/dev/null +if [ $? -eq 0 ]; then + echo "No changes to the output on this push; exiting." + exit 0 +fi +set -e + +# Commit +git add . +git config user.name "Travis CI" +git config user.email "paddle-dev@baidu.com" +git commit -m "Deploy to GitHub Pages: ${SHA}" + +# Set ssh private key +openssl aes-256-cbc -K $SSL_KEY -iv $SSL_IV -in ../../paddle/scripts/travis/deploy_key.enc -out deploy_key -d +chmod 600 deploy_key +eval `ssh-agent -s` +ssh-add deploy_key + +# Push +git push $SSH_REPO $TARGET_BRANCH diff --git a/paddle/scripts/travis/main.sh b/paddle/scripts/travis/main.sh new file mode 100755 index 0000000000000000000000000000000000000000..c49d4546c24ac9304cd6f3c5940ed3d1d32ebb3d --- /dev/null +++ b/paddle/scripts/travis/main.sh @@ -0,0 +1,11 @@ +#!/bin/bash +cd `dirname $0` + +if [ ${JOB} == "BUILD_AND_TEST" ]; then + ./build_and_test.sh +elif [ ${JOB} == "DOCS" ]; then + ./docs.sh +else + echo Unknown job ${JOB} + exit 1 +fi diff --git a/paddle/scripts/travis/make_install.sh b/paddle/scripts/travis/make_install.sh deleted file mode 100755 index 08b2a648bb97de2c4f39c64efb9a41829faae0be..0000000000000000000000000000000000000000 --- a/paddle/scripts/travis/make_install.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -cd `dirname $0` -source ./common.sh -sudo make install -sudo paddle version diff --git a/paddle/scripts/travis/unittest.sh b/paddle/scripts/travis/unittest.sh deleted file mode 100755 index 45e8c85c1028efb98433ebc383931def30fae416..0000000000000000000000000000000000000000 --- a/paddle/scripts/travis/unittest.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -cd `dirname $0` -source ./common.sh -env CTEST_OUTPUT_ON_FAILURE=1 make test ARGS="-j `nproc`" - diff --git a/proto/ModelConfig.proto.m4 b/proto/ModelConfig.proto.m4 index d04620d363c14923455d68734b03ef9bb3f28f78..a2b243a7869eaff120b25ece35e95be4d4284d18 100644 --- a/proto/ModelConfig.proto.m4 +++ b/proto/ModelConfig.proto.m4 @@ -452,6 +452,9 @@ message SubModelConfig { repeated LinkConfig out_links = 10; optional GeneratorConfig generator = 11; + + // the id of inlink which share info with outlinks, used in recurrent layer group + optional int32 target_inlinkid = 12; } message ModelConfig { diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 68cc40247041093d3eec6dc93b22d358f4cbbaa1..fd9a003bb018c87fb8e8e2992390f27edfd72f4b 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -22,6 +22,8 @@ find_python_module(pip REQUIRED) find_python_module(wheel REQUIRED) find_python_module(google.protobuf REQUIRED) +add_subdirectory(paddle/trainer_config_helpers/tests) + install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/dist/ DESTINATION opt/paddle/share/wheels ) diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index b26a63e7f3c1d2fdfc5fea0a034a2f2c5238d1f0..f2f67f9bd66a4ebab9b5ace7fb13a194959d6c10 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -303,7 +303,8 @@ def MakeLayerNameInSubmodel(name, submodel_name = None): @config_func def RecurrentLayerGroupWithoutOutLinksBegin(name, in_links, - seq_reversed=False): + seq_reversed=False, + target_inlinkname=""): global g_current_submodel config_assert(g_config.model_config.type == "recurrent_nn", "RecurrentLayerGroup should be used only in recurrent_nn") @@ -311,14 +312,19 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name, SubModelBegin(name) g_current_submodel.is_recurrent_layer_group = True g_current_submodel.reversed = seq_reversed + g_current_submodel.target_inlinkid = -1 in_links_count = 0 - for link in in_links: + for linkid, link in enumerate(in_links): if isinstance(link, basestring): name = link has_subseq = False else: name = link.link_name has_subseq = link.has_subseq + # assign target_inlinkid according to target_inlinkname + if target_inlinkname == name: + g_current_submodel.target_inlinkid = linkid + if in_links_count == 0: in_links_has_subseq = has_subseq else: @@ -331,6 +337,7 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name, SequenceScatterAgentLayer(name=name, size=layer.size) else: ScatterAgentLayer(name=name, size=layer.size) + pair = g_current_submodel.in_links.add() pair.layer_name = layer_name pair.link_name = MakeLayerNameInSubmodel(name) @@ -362,10 +369,12 @@ def RecurrentLayerGroupBegin(name, in_links, out_links, generator=None, + target_inlinkname="", seq_reversed=False): RecurrentLayerGroupWithoutOutLinksBegin(name, in_links, - seq_reversed) + seq_reversed, + target_inlinkname) for link in out_links: RecurrentLayerGroupSetOutLink(link) @@ -1399,6 +1408,14 @@ class SelectiveFCLayer(LayerBase): input_index, psize, dims, sparse, format) self.create_bias_parameter(bias, self.config.size) +@config_layer('print') +class PrintLayer(LayerBase): + def __init__( + self, + name, + inputs): + super(PrintLayer, self).__init__(name, 'print', 0, inputs) + @config_layer('data') class DataLayer(LayerBase): def __init__( @@ -1614,7 +1631,7 @@ class BatchNormLayer(LayerBase): # Also based on cudnn version. use_cudnn = use_gpu and batch_norm_type != "batch_norm" and \ ((not parallel_nn) or self.config.device > -1) and \ - cudnn_version >= 4000 + cudnn_version >= 4007 self.layer_type = "cudnn_batch_norm" if use_cudnn else "batch_norm" super(BatchNormLayer, self).__init__(name, self.layer_type, 0, active_type=active_type, @@ -2264,6 +2281,9 @@ class ConvexCombinationLayer(LayerBase): name, 'convex_comb', size, inputs=inputs, device=device) config_assert(len(self.inputs) == 2, 'ConvexCombinationLayer must have 2 inputs') + config_assert( + size * self.get_input_layer(0).size == self.get_input_layer(1).size, + 'Wrong input size for ConvexCombinationLayer') self.set_layer_size(size) @config_layer('interpolation') @@ -2313,6 +2333,9 @@ class CosSimVecMatLayer(LayerBase): self.config.cos_scale = cos_scale config_assert(len(self.inputs) == 2, 'CosSimVecMatLayer must have 2 inputs') + config_assert( + size * self.get_input_layer(0).size == self.get_input_layer(1).size, + 'Wrong input size for CosSimVecMatLayer') @config_layer('sampling_id') class SamplingIdLayer(LayerBase): @@ -2361,6 +2384,7 @@ class CosSimLayer(LayerBase): self, name, inputs, + cos_scale=5, device=None): super(CosSimLayer, self).__init__( name, 'cos', 1, inputs=inputs, device=device) @@ -2368,6 +2392,7 @@ class CosSimLayer(LayerBase): config_assert( self.get_input_layer(0).size == self.get_input_layer(1).size, 'inputs of CosSimLayer must have same dim') + self.config.cos_scale = cos_scale @config_layer('tensor') diff --git a/python/paddle/trainer_config_helpers/evaluators.py b/python/paddle/trainer_config_helpers/evaluators.py index 956bedadd75e5f389c25c37c1a466a0c3cb97430..985fae9f955c950d861d4f1f2f98845562fb6fc9 100644 --- a/python/paddle/trainer_config_helpers/evaluators.py +++ b/python/paddle/trainer_config_helpers/evaluators.py @@ -94,7 +94,7 @@ def evaluator_base( Batch=200 samples=20000 AvgCost=0.679655 CurrentCost=0.662179 Eval: classification_error_evaluator=0.4486 CurrentEval: ErrorRate=0.3964 - + :param input: Input layers, a object of LayerOutput or a list of LayerOutput. :type input: list|LayerOutput @@ -296,6 +296,7 @@ def precision_recall_evaluator( @wrap_name_default() def ctc_error_evaluator( input, + label, name=None, ): """ @@ -305,16 +306,20 @@ def ctc_error_evaluator( .. code-block:: python - eval = ctc_error_evaluator(input) + eval = ctc_error_evaluator(input=input, label=lbl) :param name: Evaluator name. :type name: None|basestring - :param input: Input Layer. + :param input: Input Layer. Should be the same as the input for ctc_layer. :type input: LayerOutput + :param label: input label, which is a data_layer. Should be the same as the + label for ctc_layer + :type label: LayerOutput """ evaluator_base(name=name, type="ctc_edit_distance", - input=input) + input=input, + label=label) @evaluator(EvaluatorAttribute.FOR_CLASSIFICATION) @wrap_name_default() diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index b7e5f566bb8c39fa6ea9ed491f28fa046bba71ee..bda0b4f5d60e82c1d577b0063fd5e164bf6117c3 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -21,7 +21,6 @@ from .evaluators import * from .poolings import MaxPooling, AvgPooling, BasePoolingType from .attrs import * from .default_decorators import * - try: import cPickle as pickle except ImportError: @@ -47,11 +46,12 @@ __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel", 'BaseGeneratedInput', 'conv_operator', 'conv_shift_layer', 'tensor_layer', 'selective_fc_layer', 'sampling_id_layer', 'slope_intercept_layer', 'trans_full_matrix_projection', + 'linear_comb_layer', 'convex_comb_layer', 'ctc_layer', 'crf_layer', 'crf_decoding_layer', 'cross_entropy_with_selfnorm', 'cross_entropy', 'multi_binary_label_cross_entropy', 'rank_cost', 'lambda_cost', 'huber_cost', - 'block_expand_layer', + 'block_expand_layer', 'out_prod_layer', 'print_layer' ] @@ -70,7 +70,8 @@ class LayerType(object): POOLING_AVG = 'average' FC_LAYER = "fc" COST = 'cost' - COSINE_SIM = 'cos_vm' + COSINE_SIM_VEC = 'cos_vm' + COSINE_SIM = 'cos' HSIGMOID = 'hsigmoid' CONV_LAYER = "conv" POOL_LAYER = "pool" @@ -91,6 +92,7 @@ class LayerType(object): POWER_LAYER = 'power' SCALING_LAYER = 'scaling' TRANS_LAYER = 'trans' + OUT_PROD_LAYER = 'out_prod' MEMORY = 'memory' MAXID_LAYER = 'maxid' @@ -102,9 +104,11 @@ class LayerType(object): SEL_FC_LAYER = "selective_fc" SAMPLING_ID_LAYER = "sampling_id" SLOPE_INTERCEPT_LAYER = "slope_intercept" - CONVEX_COMBINATION_LAYER = "convex_comb" + LINEAR_COMBINATION_LAYER = "convex_comb" BLOCK_EXPAND = "blockexpand" + PRINT_LAYER = "print" + CTC_LAYER = "ctc" CRF_LAYER = "crf" CRF_DECODING_LAYER = "crf_decoding" @@ -171,6 +175,8 @@ class LayerOutput(object): assert LayerType.is_layer_type(layer_type) self.name = name self.layer_type = layer_type + if parents is not None and type(parents) != list: + parents = [parents] self.parents = [] if parents is None else parents self.activation = activation self.num_filters = num_filters @@ -197,6 +203,25 @@ ERROR_CLIPPING = 'error_clipping_threshold' DROPOUT = 'drop_rate' +def check_input(input): + """ + Check input is a LayerOutput or list of LayerOutput or tuple of LayerOutput + if is a LayerOutput, + + :param input: The input layer. Could be a list/tuple of input layer. + :type input: LayerOutput|list|tuple + :return: list of LayerOutput + :rtype: list of LayerOutput + """ + + if isinstance(input, LayerOutput): + return [LayerOutput] + assert isinstance(input, list) + for inp in input: + assert isinstance(inp, LayerOutput) + return list(input) + + def layer_support(*attrs): def decorator(method): @functools.wraps(method) @@ -512,7 +537,7 @@ class MixedLayerType(LayerOutput): :rtype: MixedLayerType """ if not self.finalized: - assert isinstance(other, Projection) + assert isinstance(other, Projection) or isinstance(other, Operator) self.inputs.append(other) self.parents.append(other.origin) return self @@ -725,6 +750,27 @@ def fc_layer(input, size, act=None, name=None, size=size) +@wrap_name_default("print") +def print_layer(input, name=None): + """ + Print the output value of input layers. This layer is useful for debugging. + + :param name: The Layer Name. + :type name: basestring + :param input: The input layer. Could be a list/tuple of input layer. + :type input: LayerOutput|list|tuple + :return: No return + """ + check_input(input) + + Layer( + name=name, + type=LayerType.PRINT_LAYER, + inputs=[l.name for l in input], + ) + LayerOutput(name, LayerType.PRINT_LAYER, input) + + @wrap_name_default("seq_pooling") @wrap_bias_attr_default(has_bias=False) @wrap_param_default(['pooling_type'], default_factory=lambda _: MaxPooling()) @@ -1169,13 +1215,16 @@ def power_layer(input, weight, name=None, layer_attr=None): @layer_support() def scaling_layer(input, weight, name=None, layer_attr=None): """ - A layer for each row of a matrix, multiplying with a element of a vector. + A layer for multiplying input vector by weight scalar. .. math:: - y.row[i] = w[i] * x.row[i] + y = w x + + where :math:`x` is size=dataDim input, :math:`w` is size=1 weight, + and :math:`y` is size=dataDim output. - where :math:`x` is (batchSize x dataDim) input, :math:`w` is - (batchSize x 1) weight vector, and :math:`y` is (batchSize x dataDim) output. + Note that the above computation is for one sample. Multiple samples are + processed in one batch. The example usage is: @@ -1249,11 +1298,14 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None): .. math:: similarity = cos(\\theta) = {\\mathbf{a} \\cdot \\mathbf{b} - \\over \\|\\mathbf{b}\\| \\|\\mathbf{b}\\|} + \\over \\|\\mathbf{a}\\| \\|\\mathbf{b}\\|} - And the input dimension is :math:`a \in R^M`, :math:`b \in R^{MN}`. The - similarity will be calculated N times by step M. The output dimension is - :math:`R^N`. The scale will be multiplied to similarity. + The size of a is M, size of b is M*N, + Similarity will be calculated N times by step M. The output size is + N. The scale will be multiplied to similarity. + + Note that the above computation is for one sample. Multiple samples are + processed in one batch. :param name: layer name :type name: basestring @@ -1270,14 +1322,23 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None): :return: LayerOutput object. :rtype: LayerOutput """ - Layer( - name=name, - type=LayerType.COSINE_SIM, - size=size, - cos_scale=scale, - inputs=[a.name, b.name], - **ExtraLayerAttribute.to_kwargs(layer_attr) - ) + if size == 1: + Layer( + name=name, + type=LayerType.COSINE_SIM, + cos_scale=scale, + inputs=[a.name, b.name], + **ExtraLayerAttribute.to_kwargs(layer_attr) + ) + else: + Layer( + name=name, + type=LayerType.COSINE_SIM_VEC, + size=size, + cos_scale=scale, + inputs=[a.name, b.name], + **ExtraLayerAttribute.to_kwargs(layer_attr) + ) return LayerOutput(name, LayerType.COSINE_SIM, parents=[a, b]) @wrap_name_default() @@ -2326,6 +2387,39 @@ def maxid_layer(input, name=None, layer_attr=None): layer_type=LayerType.MAXID_LAYER, parents=[input]) +@wrap_name_default() +def out_prod_layer(input1, input2, name=None, layer_attr=None): + """ + A layer for computing the outer product of two vectors + The result is a matrix of size(input1) x size(input2) + + The example usage is: + + .. code-block:: python + + out_prod = out_prod_layer(input1=vec1, input2=vec2) + + :param name: Layer name. + :type name: basestring + :param input1: The first input layer name. + :type input: LayerOutput + :param input2: The second input layer name. + :type input2: LayerOutput + :param layer_attr: extra layer attributes. + :type layer_attr: ExtraLayerAttribute. + :return: LayerOutput object. + :rtype: LayerOutput + """ + + assert isinstance(input1, LayerOutput) + assert isinstance(input2, LayerOutput) + Layer(name=name, + type="out_prod", + inputs=[input1.name, input2.name], + **ExtraLayerAttribute.to_kwargs(layer_attr)) + return LayerOutput(name=name, + layer_type=LayerType.OUT_PROD_LAYER, + parents=[input1,input2]) @wrap_name_default() def eos_layer(input, eos_id, name=None, layer_attr=None): @@ -2909,29 +3003,37 @@ def slope_intercept_layer(input, name=None, slope=1.0, intercept=0.0): @wrap_name_default() -def convex_comb_layer(input, size, name=None): +def linear_comb_layer(weights, vectors, size, name=None): """ - A layer for convex weighted average of vectors takes two inputs. - - Input: a vector containing the convex weights (batchSize x weightdim), - and a matrix in a vector form (batchSize x (weightdim * datadim)). - - Output: a vector (batchSize * datadim). + A layer for weighted sum of vectors takes two inputs. + - Input: size of weights is M + size of vectors is M*N + - Output: a vector of size=N .. math:: - y[i][j] = \sum_{j}(x_{1}(i, j) * x_{2}(i,j + i * dataDim)), + z(i) = \sum_{j=0}^{M-1} x(j) y(i+Nj) + where :math:`0 \le i \le N-1` + + Or in the matrix notation: - i = 0,1,...,(batchSize-1); j = 0, 1,...,(dataDim-1) + .. math:: + + z = x^\mathrm{T} Y In this formular: - - :math:`x_{1}`: the first input. - - :math:`x_{2}`: the second input. - - :math:`y`: the output. + - :math:`x`: weights + - :math:`y`: vectors. + - :math:`z`: the output. + + Note that the above computation is for one sample. Multiple samples are + processed in one batch. The simple usage is: .. code-block:: python - convex_comb = convex_comb_layer(input=inputs, + linear_comb = linear_comb_layer(weighs=weight, vectors=vectors, size=elem_dim) :param input: The input layers. @@ -2944,15 +3046,16 @@ def convex_comb_layer(input, size, name=None): :rtype: LayerOutput """ - assert isinstance(input, list) or isinstance(input, tuple) - assert len(input) == 2 Layer( name=name, - type=LayerType.CONVEX_COMBINATION_LAYER, + type=LayerType.LINEAR_COMBINATION_LAYER, size=size, - inputs=[Input(input[0].name), Input(input[1].name)], + inputs=[Input(weights.name), Input(vectors.name)], ) - return LayerOutput(name, LayerType.CONVEX_COMBINATION_LAYER, input, size=size) + return LayerOutput(name, LayerType.LINEAR_COMBINATION_LAYER, + [weights, vectors], size=size) + +convex_comb_layer = linear_comb_layer @wrap_name_default() def block_expand_layer(input, @@ -3036,6 +3139,17 @@ def ctc_layer(input, label, size, name=None, norm_by_times=False): classication task. That is, for sequence labeling problems where the alignment between the inputs and the target labels is unknown. + More details can be found by referring to `Connectionist Temporal + Classification: Labelling Unsegmented Sequence Data with Recurrent + Neural Networks `_ + + Note: + Considering the 'blank' label needed by CTC, you need to use + (num_classes + 1) as the input size. num_classes is the category number. + And the 'blank' is the last category index. So the size of 'input' layer, such as + fc_layer with softmax activation, should be num_classes + 1. The size of ctc_layer + should also be num_classes + 1. + The simple usage: .. code-block:: python @@ -3049,7 +3163,7 @@ def ctc_layer(input, label, size, name=None, norm_by_times=False): :type input: LayerOutput :param label: The data layer of label with variable length. :type label: LayerOutput - :param size: category numbers. + :param size: category numbers + 1. :type size: int :param name: The name of this layer, which can not specify. :type name: string|None diff --git a/python/paddle/trainer_config_helpers/tests/CMakeLists.txt b/python/paddle/trainer_config_helpers/tests/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..611fb855a8c9ad6679167105dd737c995b23c209 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/CMakeLists.txt @@ -0,0 +1,5 @@ +#################### test_config_parser ######################### +add_test(NAME layers_test + COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + python ${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/layers_test.py + WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) diff --git a/python/paddle/trainer_config_helpers/tests/layers_test.py b/python/paddle/trainer_config_helpers/tests/layers_test.py new file mode 100644 index 0000000000000000000000000000000000000000..3b55667354750066a7d3ab3a0af59eb9e7d47d86 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/layers_test.py @@ -0,0 +1,19 @@ +# Copyright (c) 2016 Baidu, Inc. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer.config_parser import parse_config_and_serialize + +if __name__ == '__main__': + parse_config_and_serialize( + 'trainer_config_helpers/tests/layers_test_config.py', '') diff --git a/python/paddle/trainer_config_helpers/tests/layers_test_config.py b/python/paddle/trainer_config_helpers/tests/layers_test_config.py new file mode 100644 index 0000000000000000000000000000000000000000..39c85c788eecad5c6bba6dbd2f2734725fa4fff6 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/layers_test_config.py @@ -0,0 +1,56 @@ +# Copyright (c) 2016 Baidu, Inc. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer_config_helpers import * + +num_classes = 5 + +x = data_layer(name="input1", size=3) +y = data_layer(name="input2", size=5) + +z = out_prod_layer(input1=x, input2=y) + +x1 = fc_layer(input=x, size=5) +y1 = fc_layer(input=y, size=5) +y2 = fc_layer(input=y, size=15) + +cos1 = cos_sim(a=x1, b=y1) +cos3 = cos_sim(a=x1, b=y2, size=3) + +linear_comb = linear_comb_layer(weights=x1, vectors=y2, size=3) + +out = fc_layer(input=[cos1, cos3, linear_comb, z], + size=num_classes, + act=SoftmaxActivation()) + +print_layer(input=[out]) + +outputs(classification_cost(out, data_layer(name="label", size=num_classes))) + +# for ctc +tmp = fc_layer(input=x1, + size=num_classes + 1, + act=SoftmaxActivation()) +ctc = ctc_layer(input=tmp, + label=y, + size=num_classes + 1) +ctc_eval = ctc_error_evaluator(input=tmp, label=y) + +settings( + batch_size=10, + learning_rate=2e-3, + learning_method=AdamOptimizer(), + regularization=L2Regularization(8e-4), + gradient_clipping_threshold=25 +)