提交 92ca98d5 编写于 作者: L liaogang

Merge remote-tracking branch 'upstream/master'

......@@ -2,6 +2,9 @@ language: cpp
cache: ccache
sudo: required
dist: trusty
env:
- JOB=DOCS
- JOB=BUILD_AND_TEST
addons:
apt:
packages:
......@@ -16,6 +19,7 @@ addons:
- python2.7-dev
- m4
- libprotobuf-dev
- doxygen
- protobuf-compiler
- python-protobuf
- python-numpy
......@@ -24,12 +28,10 @@ addons:
- libgflags-dev
- libgtest-dev
before_install:
- pip install wheel protobuf
- pip install wheel protobuf sphinx breathe recommonmark
- sudo paddle/scripts/travis/before_install.sh
script:
- paddle/scripts/travis/build.sh
- paddle/scripts/travis/unittest.sh
- paddle/scripts/travis/make_install.sh
- paddle/scripts/travis/main.sh
notifications:
email:
on_success: change
......
......@@ -25,7 +25,7 @@ repo or just head straight to the command line:
```shell
# Clone your fork to your local machine
git clone git@github.com:USERNAME/paddle.git
git clone git@github.com:USERNAME/Paddle.git
```
Then you can start to develop.
......@@ -52,7 +52,7 @@ To do this, you'll need to add a remote at first:
# see the current configured remote repository
git remote -v
# add upstream repository
git remote add upstream https://github.com/paddle/paddle.git
git remote add upstream https://github.com/baidu/Paddle.git
# verify the new upstream
git remote -v
```
......
......@@ -9,6 +9,7 @@ Install PaddlePaddle
:glob:
install_*
internal/install_from_jumbo.md
Build from Source
-----------------
......
......@@ -5,3 +5,4 @@ Cluster Train
:glob:
opensource/cluster_train.md
internal/index.md
......@@ -245,10 +245,10 @@ addto_layer
:members: addto_layer
:noindex:
convex_comb_layer
linear_comb_layer
-----------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: convex_comb_layer
:members: linear_comb_layer
:noindex:
interpolation_layer
......@@ -280,7 +280,13 @@ tensor_layer
.. automodule:: paddle.trainer_config_helpers.layers
:members: tensor_layer
:noindex:
cos_sim
-------
.. automodule:: paddle.trainer_config_helpers.layers
:members: cos_sim
:noindex:
trans_layer
------------
.. automodule:: paddle.trainer_config_helpers.layers
......@@ -341,12 +347,6 @@ rank_cost
:members: rank_cost
:noindex:
cos_sim
-------
.. automodule:: paddle.trainer_config_helpers.layers
:members: cos_sim
:noindex:
crf_layer
-----------------
.. automodule:: paddle.trainer_config_helpers.layers
......
......@@ -9,7 +9,11 @@ Note: The intallation packages are still in pre-release state and your experienc
.. toctree::
:maxdepth: 1
:glob:
源码下载(对内) <../build/internal/download_paddle_source_zh_cn.rst>
使用Jumbo安装(对内) <../build/internal/install_from_jumbo.rst>
从源码编译安装(对内) <../build/internal/build_from_source_zh_cn.rst>
install/docker_install.rst
install/ubuntu_install.rst
cmake/index.rst
集群训练
========
* `集群训练 <../../doc/cluster/index.html>`_
.. toctree::
:maxdepth: 2
:glob:
集群训练(对内) <internal/index.md>
......@@ -8,7 +8,7 @@ PaddlePaddle文档
* `用户接口 <ui/index.html>`_
* `使用示例 <demo/index.html>`_
* `模型配置 <../doc/ui/api/trainer_config_helpers/index.html>`_
* `集群训练 <../doc/cluster/index.html>`_
* `集群训练 <cluster/index.html>`_
开发指南
--------
......
......@@ -150,7 +150,7 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_R3(DYNAMIC_LOAD_CUDNN_WRAP)
// APIs available after R4:
#if CUDNN_VERSION >= 4000
#if CUDNN_VERSION >= 4007
#define CUDNN_DNN_ROUTINE_EACH_AFTER_R4(__macro) \
__macro(cudnnBatchNormalizationForwardTraining) \
__macro(cudnnBatchNormalizationForwardInference) \
......@@ -999,7 +999,7 @@ void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc,
double epsilon,
real *savedMean,
real *savedVar) {
#if CUDNN_VERSION >= 4000
#if CUDNN_VERSION >= 4007
if ((NULL != runningMean && NULL == runningInvVar) ||
(NULL == runningMean && NULL != runningInvVar)) {
LOG(FATAL) << "runningMean and runningInvVar can be NULL "
......@@ -1024,7 +1024,7 @@ void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc,
CHECK_SYNC("hl_batch_norm_forward_training failed");
#else
LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4000. "
LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4007. "
<< "But cudnn lib version is " << g_cudnn_lib_version;
#endif
}
......@@ -1039,7 +1039,7 @@ void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
real *estimatedMean,
real *estimatedInvVar,
double epsilon) {
#if CUDNN_VERSION >= 4000
#if CUDNN_VERSION >= 4007
cudnnTensorDescriptor_t xDesc = GET_TENSOR_DESCRIPTOR(inputDesc);
cudnnTensorDescriptor_t yDesc = GET_TENSOR_DESCRIPTOR(outputDesc);
cudnnTensorDescriptor_t bnDesc = GET_TENSOR_DESCRIPTOR(bnParamDesc);
......@@ -1053,7 +1053,7 @@ void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
CHECK_SYNC("hl_batch_norm_forward_inference failed");
#else
LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4000. "
LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4007. "
<< "But cudnn lib version is " << g_cudnn_lib_version;
#endif
}
......@@ -1071,7 +1071,7 @@ void hl_batch_norm_backward(hl_tensor_descriptor inputDesc,
double epsilon,
real *savedMean,
real *savedInvVar) {
#if CUDNN_VERSION >= 4000
#if CUDNN_VERSION >= 4007
if ((NULL != savedMean && NULL == savedInvVar) ||
(NULL == savedMean && NULL != savedInvVar)) {
LOG(FATAL) << "savedMean and savedVar can be NULL "
......@@ -1087,16 +1087,14 @@ void hl_batch_norm_backward(hl_tensor_descriptor inputDesc,
cudnnBatchNormMode_t mode = CUDNN_BATCHNORM_SPATIAL;
CHECK_CUDNN(dynload::cudnnBatchNormalizationBackward(
t_resource.cudnn_handle, mode, &alpha, &beta,
#if CUDNN_VERSION >= 5000
&alpha, &beta,
#endif
xDesc, input, dyDesc, outGrad, dxDesc, inGrad,
bnDesc, scale, scaleGrad, biasGrad, epsilon,
savedMean, savedInvVar));
CHECK_SYNC("hl_batch_norm_backward failed");
#else
LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4000. "
LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4007. "
<< "But cudnn lib version is " << g_cudnn_lib_version;
#endif
}
......@@ -19,6 +19,7 @@ limitations under the License. */
#include "hl_matrix_apply.cuh"
#include "hl_sequence.h"
#include "paddle/utils/Logging.h"
#include "hl_device_functions.cuh"
DEFINE_MATRIX_UNARY_OP(Zero, a = 0);
DEFINE_MATRIX_TERNARY_PARAMETER_OP(_add, TWO_PARAMETER, c = p1*a + p2*b);
......
......@@ -194,8 +194,8 @@ public:
virtual real evalImp(std::vector<Argument>& arguments) {
CHECK_EQ(arguments.size(), (size_t)2);
Argument output, label;
output.resizeAndCopyFrom(arguments[0], false);
label.resizeAndCopyFrom(arguments[1], false);
output.resizeAndCopyFrom(arguments[0], false, HPPL_STREAM_DEFAULT);
label.resizeAndCopyFrom(arguments[1], false, HPPL_STREAM_DEFAULT);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
CHECK(label.sequenceStartPositions);
CHECK(label.ids);
......@@ -207,7 +207,7 @@ public:
real err = 0;
err = editDistance(
output.value->getData() + output.value->getWidth() * outputStarts[i],
output.value->getHeight(), output.value->getWidth(),
outputStarts[i+1] - outputStarts[i], output.value->getWidth(),
label.ids->getData() + labelStarts[i],
labelStarts[i + 1] - labelStarts[i]);
......@@ -224,6 +224,9 @@ public:
for (const std::string& name : config_.input_layers()) {
arguments.push_back(nn.getLayer(name)->getOutput());
}
}
virtual void updateSamplesNum(const std::vector<Argument>& arguments) {
numSequences_ += arguments[1].getNumSequences();
}
......
......@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Stat.h"
#include "paddle/utils/Util.h"
#include "paddle/utils/Flags.h"
......@@ -291,6 +290,8 @@ void RecurrentGradientMachine::init(
if (subModelConfig->evaluator_names_size() > 0) {
evaluator_.reset(frames_[0]->makeEvaluator());
}
targetInfoInlinkId_ = subModelConfig->target_inlinkid();
}
void RecurrentGradientMachine::resizeOrCreateFrames(int numFrames) {
......@@ -325,7 +326,7 @@ void RecurrentGradientMachine::resizeOrCreateFrames(int numFrames) {
for (int i = frames_.size(); i < numFrames; ++i) {
std::unique_ptr<NeuralNetwork> frame(
NeuralNetwork::newNeuralNetwork(subModelName_));
NeuralNetwork::newNeuralNetwork(subModelName_));
frame->init(config_, subParamInitCb);
for (auto& inFrameLine : inFrameLines_) {
......@@ -382,6 +383,16 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
size_t numSequences = input.getNumSequences();
const int* starts = input.sequenceStartPositions->getData(false);
bool hasSubseq = input.hasSubseq();
// In case of !hasSubseq or targetInfoInlinkId_ == -1, all inlinks share the
// same inframe info
bool shareInlinkInfo = !hasSubseq || targetInfoInlinkId_ == -1;
// Defaultly, share info with the first inlink
if (shareInlinkInfo) {
targetInfoInlinkId_ = 0;
}
// check hasSubseq in both config and input are the same
CHECK_EQ(hasSubseq, inFrameLines_[0].hasSubseq);
......@@ -394,9 +405,17 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
CHECK_EQ((size_t)input1.getNumSequences(), numSequences);
// check all inputs should have same hasSubseq flag
CHECK_EQ(input.hasSubseq(), inFrameLines_[0].hasSubseq);
CHECK_EQ(input1.getBatchSize(), batchSize);
CHECK(std::equal(starts, starts + numSequences + 1,
input1.sequenceStartPositions->getData(false)));
// if shareInlinkInfo, checks:
// 1. all inlinks have same number of total tokens
// 2. all inlinks have same number of tokens for each sentence of each
// sample. If hasSubseq, one sample has multiple sentence, else, one
// sample is one sentence
if (shareInlinkInfo) {
CHECK_EQ(input1.getBatchSize(), batchSize);
CHECK(std::equal(starts, starts + numSequences + 1,
input1.sequenceStartPositions->getData(false)));
}
}
if (hasSubseq) {
......@@ -408,19 +427,46 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
for (size_t i = 1; i < inFrameLines_.size(); ++i) {
const Argument& input1 = inFrameLines_[i].inLayer->getOutput();
CHECK_EQ((size_t)input1.getNumSubSequences(), numSubSequences);
CHECK(std::equal(subStarts, subStarts + numSubSequences + 1,
input1.subSequenceStartPositions->getData(false)));
if (shareInlinkInfo) {
CHECK(std::equal(subStarts, subStarts + numSubSequences + 1,
input1.subSequenceStartPositions->getData(false)));
}
}
}
seqLengthAndStart_.clear();
input.getSeqLengthAndStart(&seqLengthAndStart_, &maxSequenceLength_);
info_.clear();
info_.resize(inFrameLines_.size());
seqInfos_.clear();
seqInfos_.resize(inFrameLines_.size());
{
AsyncGpuBlock asyncGpuBlock;
// if shareInlinkInfo, only calculate info of the first inlink
// else, calculate info for each inlink
if (shareInlinkInfo) {
input.getSeqInfo(&seqInfos_[0]);
maxSequenceLength_ = seqInfos_[0][0].topLevelLength;
createInFrameInfo(0, input, passType);
} else {
for (size_t i = 0; i < inFrameLines_.size(); i++) {
const Argument& input1 = inFrameLines_[i].inLayer->getOutput();
input1.getSeqInfo(&seqInfos_[i]);
maxSequenceLength_ = seqInfos_[i][0].topLevelLength;
createInFrameInfo(i, input1, passType);
}
}
// inFrameLine select rows in real layer one time
for (size_t i = 0; i < inFrameLines_.size(); i++) {
int curInlinkId = shareInlinkInfo ? 0 : i;
selectRowsOneTime(inFrameLines_[i].inLayer, info_[curInlinkId].allIds,
&(inFrameLines_[i].outArg), passType);
}
}
resizeOrCreateFrames(maxSequenceLength_);
resizeBootFrame(numSequences);
AsyncGpuBlock asyncGpuBlock;
createInFrameInfo(input, passType);
for (auto& memoryFrameLine : memoryFrameLines_) {
if (memoryFrameLine.rootAgent) {
auto scatterAgent =
......@@ -443,23 +489,29 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
auto gatherAgent =
dynamic_cast<GatherAgentLayer*>(outFrameLine.agentLayer.get());
CHECK_NOTNULL(gatherAgent);
gatherAgent->copyIdAndSequenceInfo(input, info_.allIds, info_.idIndex);
gatherAgent->copyIdAndSequenceInfo(input, info_[targetInfoInlinkId_].allIds,
info_[targetInfoInlinkId_].idIndex);
}
for (int i = 0; i < maxSequenceLength_; ++i) {
int idSize = info_.idIndex[i + 1] - info_.idIndex[i];
int idSize = 0;
// connect in_links
for (auto& inFrameLine : inFrameLines_) {
for (size_t j = 0; j < inFrameLines_.size(); ++j) {
// idSize denotes the sum number of tokens in each length i
idSize = info_[j].idIndex[i + 1] - info_[j].idIndex[i];
InFrameLine inFrameLine = inFrameLines_[j];
auto scatterAgent =
dynamic_cast<ScatterAgentLayer*>(inFrameLine.agents[i].get());
scatterAgent->setRealLayerAndOutput(inFrameLine.inLayer,
inFrameLine.outArg, info_.allIds,
info_.idIndex[i], idSize);
inFrameLine.outArg, info_[j].allIds,
info_[j].idIndex[i], idSize);
if (hasSubseq) {
int size = info_.seqStartPosIndex[i + 1] - info_.seqStartPosIndex[i];
scatterAgent->setSequenceStartPositions(
info_.sequenceStartPositions, info_.seqStartPosIndex[i], size);
// size: the length of subsequence
int size =
info_[j].seqStartPosIndex[i + 1] - info_[j].seqStartPosIndex[i];
scatterAgent->setSequenceStartPositions(info_[j].sequenceStartPositions,
info_[j].seqStartPosIndex[i],
size);
}
}
......@@ -469,13 +521,16 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
dynamic_cast<GatherAgentLayer*>(outFrameLine.agentLayer.get());
gatherAgent->addRealLayer(outFrameLine.frames[i]);
}
// connect memory links
// Adopt info_[0].idIndex because seq which has_subseq=True
// doesn't support Memory with !hasSubseq bootlayer;
// And inlinks that !hasSubSeq must have same inlink length.
idSize = info_[0].idIndex[i + 1] - info_[0].idIndex[i];
for (auto& memoryFrameLine : memoryFrameLines_) {
NeuralNetwork::connect(
memoryFrameLine.agents[i],
i == 0 ? memoryFrameLine.bootLayer : memoryFrameLine.frames[i - 1],
idSize /*height of agent*/);
numSeqs_[i] /*height of agent*/);
}
}
......@@ -560,62 +615,77 @@ void RecurrentGradientMachine::removeBeamSearchStatisticsCallbacks() {
* If hasSubseq, will also create scattered sequenceStartPositions infomation
* for all realLayer of inFrameLines one time.
*/
void RecurrentGradientMachine::createInFrameInfo(const Argument& input,
void RecurrentGradientMachine::createInFrameInfo(int inlinkId,
const Argument& input,
PassType passType) {
bool hasSubseq = input.hasSubseq();
// numSequences: # samples(sequences) in a batch
size_t numSequences = input.getNumSequences();
std::vector<int> allIds;
info_.idIndex.clear();
info_.idIndex.push_back(0); // first idIndex = 0
if (hasSubseq) { // for sequenceScatterAgentLayer
size_t numSubSequences = input.getNumSubSequences();
std::vector<int> sequenceStartPositions;
info_.seqStartPosIndex.clear();
info_.seqStartPosIndex.push_back(0); // first seqStartPosIndex = 0
for (int i = 0; i < maxSequenceLength_; ++i) {
sequenceStartPositions.push_back(0); // first element = 0
for (size_t j = 0; j < numSubSequences; ++j) {
if (std::get<3>(seqLengthAndStart_[j]) == i) {
int subSeqStart = std::get<1>(seqLengthAndStart_[j]);
int subSeqLength = std::get<0>(seqLengthAndStart_[j]);
for (int k = subSeqStart; k < subSeqStart + subSeqLength; ++k) {
allIds.push_back(k);
}
sequenceStartPositions.push_back(sequenceStartPositions.back() +
subSeqLength);
}
}
info_.idIndex.push_back(allIds.size());
info_.seqStartPosIndex.push_back(sequenceStartPositions.size());
auto& seqInfo = seqInfos_[inlinkId];
numSeqs_.clear();
Info* inlinkInfo = &info_[inlinkId];
inlinkInfo->idIndex.clear();
inlinkInfo->idIndex.push_back(0); // first idIndex = 0
std::vector<int> sequenceStartPositions;
const int* subSequenceStartPositions = nullptr;
if (hasSubseq) { // for sequenceScatterAgentLayer
subSequenceStartPositions =
input.subSequenceStartPositions->getData(false);
inlinkInfo->seqStartPosIndex.clear();
inlinkInfo->seqStartPosIndex.push_back(0); // first seqStartPosIndex = 0
}
// maxSequenceLength_: max topLevelLength in allsamples
for (int i = 0; i < maxSequenceLength_; ++i) {
if (hasSubseq) {
sequenceStartPositions.push_back(0); // first element = 0
}
// inFrameLine create sequenceStartPositions one time
CHECK_EQ(sequenceStartPositions.size(),
maxSequenceLength_ + numSubSequences);
CHECK_EQ(info_.seqStartPosIndex.size(),
static_cast<size_t>(maxSequenceLength_ + 1));
createSeqPos(sequenceStartPositions, &info_.sequenceStartPositions);
} else { // for scatterAgentLayer
for (int i = 0; i < maxSequenceLength_; ++i) {
for (size_t j = 0; j < numSequences; ++j) {
int seqLength = std::get<0>(seqLengthAndStart_[j]);
if (i >= seqLength) {
break;
int numSeqs = 0;
for (size_t j = 0; j < numSequences; ++j) {
int seqLength = seqInfo[j].topLevelLength;
if (i >= seqLength) {
break;
}
++numSeqs;
if (hasSubseq) {
int subSeqStart = subSequenceStartPositions[seqInfo[j].subSeqStart + i];
int subSeqEnd =
subSequenceStartPositions[seqInfo[j].subSeqStart + i + 1];
for (int k = subSeqStart; k < subSeqEnd; ++k) {
allIds.push_back(k);
}
int seqStart = std::get<1>(seqLengthAndStart_[j]);
sequenceStartPositions.push_back(sequenceStartPositions.back() +
subSeqEnd - subSeqStart);
} else {
int seqStart = seqInfo[j].seqStart;
allIds.push_back(reversed_ ? (seqStart + seqLength - 1 - i)
: (seqStart + i));
}
info_.idIndex.push_back(allIds.size());
}
inlinkInfo->idIndex.push_back(allIds.size());
numSeqs_.push_back(numSeqs);
if (hasSubseq) {
inlinkInfo->seqStartPosIndex.push_back(sequenceStartPositions.size());
}
}
// copy and check scatterId
copyScattedId(allIds, &info_.allIds, input.getBatchSize());
CHECK_EQ(info_.idIndex.size(), static_cast<size_t>(maxSequenceLength_ + 1));
// inFrameLine select rows in real layer one time
for (auto& inFrameLine : inFrameLines_) {
selectRowsOneTime(inFrameLine.inLayer, info_.allIds, &inFrameLine.outArg,
passType);
if (hasSubseq) {
// inFrameLine create sequenceStartPositions one time
CHECK_EQ(sequenceStartPositions.size(),
maxSequenceLength_ + input.getNumSubSequences());
CHECK_EQ(inlinkInfo->seqStartPosIndex.size(),
static_cast<size_t>(maxSequenceLength_ + 1));
createSeqPos(sequenceStartPositions, &inlinkInfo->sequenceStartPositions);
}
// copy and check scatterId
copyScattedId(allIds, &inlinkInfo->allIds, input.getBatchSize());
CHECK_EQ(inlinkInfo->idIndex.size(),
static_cast<size_t>(maxSequenceLength_ + 1));
}
/* like createInFrameInfo, but for all realLayer of memoryFrameLines*/
......@@ -633,19 +703,20 @@ void RecurrentGradientMachine::createMemoryFrameInfo(
sequenceStartPositions.push_back(0); // first element = 0
const int* starts = input.sequenceStartPositions->getData(false);
for (size_t i = 0; i < numSequences; ++i) {
int seqId = std::get<2>(seqLengthAndStart_[i]);
// memory info adopt info of inlinks[0]
int seqId = seqInfos_[0][i].seqId;
for (int k = starts[seqId]; k < starts[seqId + 1]; ++k) {
allIds.push_back(k);
}
sequenceStartPositions.push_back(sequenceStartPositions.back() +
starts[seqId + 1] - starts[seqId]);
starts[seqId + 1] - starts[seqId]);
}
createSeqPos(sequenceStartPositions,
&(*memoryFrameLine).sequenceStartPositions);
} else { // for scatterAgentLayer
for (size_t i = 0; i < numSequences; ++i) {
allIds.push_back(std::get<2>(seqLengthAndStart_[i]));
allIds.push_back(seqInfos_[0][i].seqId);
}
}
// copy and check scatterId
......@@ -699,18 +770,19 @@ size_t RecurrentGradientMachine::getGenBatchSize() {
for (auto& memoryFrameLine : memoryFrameLines_) {
if (!memoryFrameLine.rootLayer) continue;
Argument& bootArg = memoryFrameLine.rootLayer->getOutput();
size_t batchSize = memoryFrameLine.is_sequence ?
bootArg.getNumSequences() : bootArg.getBatchSize();
size_t batchSize = memoryFrameLine.is_sequence ? bootArg.getNumSequences()
: bootArg.getBatchSize();
if (numSequences) {
CHECK_EQ(numSequences, batchSize);
} else {
numSequences = batchSize;
}
}
CHECK(numSequences) << "Fail to get batch size in generation. "
"At least one of the Memory layer MUST have a layer that is NOT in "
"the layer group to boot it, and this boot layer is used to "
"decide batch_size in generation process.";
CHECK(numSequences)
<< "Fail to get batch size in generation. "
"At least one of the Memory layer MUST have a layer that is NOT in "
"the layer group to boot it, and this boot layer is used to "
"decide batch_size in generation process.";
return numSequences;
}
......@@ -732,7 +804,9 @@ void RecurrentGradientMachine::generateSequence() {
// connect boot frame memory links
std::vector<int> ids(numSequences);
for (size_t i = 0; i < numSequences; ++i) { ids[i] = i; }
for (size_t i = 0; i < numSequences; ++i) {
ids[i] = i;
}
for (auto& memoryFrameLine : memoryFrameLines_) {
if (memoryFrameLine.rootAgent) {
auto scatterAgent =
......@@ -756,7 +830,8 @@ void RecurrentGradientMachine::generateSequence() {
// init outArg
size_t resultNum = generator_.config.num_results_per_sample();
IVector::resizeOrCreate(generator_.outArg.ids,
IVector::resizeOrCreate(
generator_.outArg.ids,
generator_.config.max_num_frames() * numSequences * resultNum, false);
if (resultNum > 1) {
CHECK_LE(resultNum, static_cast<size_t>(generator_.config.beam_size()));
......@@ -847,7 +922,9 @@ void RecurrentGradientMachine::oneWaySearch(size_t batchSize) {
// path.seqId = -1 indicates end of generation
// of an input sequence
finalPaths[seqIds_[j]].seqId = -1;
} else { scatterIds.push_back(j); }
} else {
scatterIds.push_back(j);
}
}
}
......@@ -856,13 +933,12 @@ void RecurrentGradientMachine::oneWaySearch(size_t batchSize) {
starts[0] = 0;
generator_.ids.clear();
for (size_t i = 0; i < batchSize; ++i) {
generator_.ids.insert(generator_.ids.end(),
finalPaths[i].ids.begin(),
generator_.ids.insert(generator_.ids.end(), finalPaths[i].ids.begin(),
finalPaths[i].ids.end());
starts[i + 1] = generator_.ids.size();
batchMachineIdVec_.insert(batchMachineIdVec_.end(),
finalPaths[i].machineIdVec.begin(),
finalPaths[i].machineIdVec.end());
finalPaths[i].machineIdVec.begin(),
finalPaths[i].machineIdVec.end());
}
}
......@@ -920,9 +996,9 @@ void RecurrentGradientMachine::forwardFrame(int machineCur) {
}
}
void RecurrentGradientMachine::singlePathExpand(
Path& curPath, size_t curPathId, std::vector<Path>& newPaths,
size_t expandWidth) {
void RecurrentGradientMachine::singlePathExpand(Path& curPath, size_t curPathId,
std::vector<Path>& newPaths,
size_t expandWidth) {
int calc_id =
gDiyProbStart ? gDiyProbStart(curPath.ids.size(), curPath.ids.data()) : 0;
......@@ -946,19 +1022,20 @@ void RecurrentGradientMachine::singlePathExpand(
if (id == -1) break;
real newLogProb = generator_.config.log_prob() ? std::log(prob) : prob;
Path newPath(curPath, id, newLogProb,
curPathId /*machineId*/, k /*topIndex*/);
Path newPath(curPath, id, newLogProb, curPathId /*machineId*/,
k /*topIndex*/);
if (this->beamSearchCtrlCallbacks_) {
if (beamSearchCtrlCallbacks_->stopDetermineCandidates(
newPath.seqId, newPath.ids, newPath.probHistory)) return;
newPath.seqId, newPath.ids, newPath.probHistory))
return;
}
// outFrameLines_.size() > 1UL
if (dataArgsSize_) {
newPath.machineIdVec = curPath.machineIdVec;
newPath.machineIdVec.push_back(curPathId);
}
bool atEos = eosVec[index] == 1U ||
newPath.ids.size() >= (size_t)maxSequenceLength_;
bool atEos =
eosVec[index] == 1U || newPath.ids.size() >= (size_t)maxSequenceLength_;
// adjustNewPath
newPath.adjustProb(calc_id, atEos);
if (this->beamSearchCtrlCallbacks_) {
......@@ -966,16 +1043,18 @@ void RecurrentGradientMachine::singlePathExpand(
newPath.seqId, newPath.ids, newPath.probHistory, &newPath.logProb);
}
if (!newPath.isDropable()) {
atEos ? finalPaths_[curPath.seqId].push_back(newPath) :
newPaths.push_back(newPath);
atEos ? finalPaths_[curPath.seqId].push_back(newPath)
: newPaths.push_back(newPath);
}
} // for expandWidth
if (gDiyProbStop) { gDiyProbStop(calc_id); }
if (gDiyProbStop) {
gDiyProbStop(calc_id);
}
}
void RecurrentGradientMachine::beamExpand(
std::vector<Path>& paths, std::vector<Path>& newPaths) {
void RecurrentGradientMachine::beamExpand(std::vector<Path>& paths,
std::vector<Path>& newPaths) {
size_t candidatePathCount = paths.size();
// idVec.size() could be larger than candidatePathCount * beam,
// so user can drop some node customly.
......@@ -988,7 +1067,7 @@ void RecurrentGradientMachine::beamExpand(
int curSeqId = 0;
for (size_t j = 0; j <= candidatePathCount; j++) {
// expansions of a single sequence are all processed
curSeqId = (j < candidatePathCount? paths[j].seqId : curSeqId + 1);
curSeqId = (j < candidatePathCount ? paths[j].seqId : curSeqId + 1);
if (prevSeqId != -1 && curSeqId != prevSeqId) {
totalExpandCount += beamShrink(newPaths, prevSeqId, totalExpandCount);
}
......@@ -1000,11 +1079,14 @@ void RecurrentGradientMachine::beamExpand(
}
// Drop extra nodes to beam size.
size_t RecurrentGradientMachine::beamShrink(
std::vector<Path>& newPaths, size_t seqId, size_t totalExpandCount) {
size_t minNewPathSize = std::min(getBeamSize(),
newPaths.size() - totalExpandCount);
if (!minNewPathSize) { return 0; }
size_t RecurrentGradientMachine::beamShrink(std::vector<Path>& newPaths,
size_t seqId,
size_t totalExpandCount) {
size_t minNewPathSize =
std::min(getBeamSize(), newPaths.size() - totalExpandCount);
if (!minNewPathSize) {
return 0;
}
std::nth_element(newPaths.begin() + totalExpandCount,
newPaths.begin() + totalExpandCount + minNewPathSize,
newPaths.end(), Path::greaterPath);
......@@ -1017,11 +1099,8 @@ size_t RecurrentGradientMachine::beamShrink(
// Remove the already formed paths that are relatively short
finalPaths_[seqId].erase(
std::remove_if(finalPaths_[seqId].begin(),
finalPaths_[seqId].end(),
[&](Path& p) {
return p.logProb < minPathLogProb;
}),
std::remove_if(finalPaths_[seqId].begin(), finalPaths_[seqId].end(),
[&](Path& p) { return p.logProb < minPathLogProb; }),
finalPaths_[seqId].end());
for (auto p : finalPaths_[seqId]) {
if (minFinalPathLogProb_[seqId] > p.logProb) {
......@@ -1030,7 +1109,7 @@ size_t RecurrentGradientMachine::beamShrink(
}
if (finalPaths_[seqId].size() >= getBeamSize() &&
minFinalPathLogProb_[seqId] >= maxPathLogProb) {
minFinalPathLogProb_[seqId] >= maxPathLogProb) {
newPaths.resize(totalExpandCount);
return 0;
}
......@@ -1067,7 +1146,8 @@ void RecurrentGradientMachine::fillGenOutputs() {
// in beam search, here only reserved the top 1 generated result
// for out_links that are not the generated word indices.
batchMachineIdVec_.insert(batchMachineIdVec_.end(),
path.machineIdVec.begin(), path.machineIdVec.end());
path.machineIdVec.begin(),
path.machineIdVec.end());
}
}
starts[i + 1] = generator_.ids.size();
......@@ -1091,21 +1171,21 @@ void RecurrentGradientMachine::copyDataOutlinkFrame(size_t machineCur) {
void RecurrentGradientMachine::createDataOutlink(
std::vector<int>& machineIdVec) {
size_t seqNum = getBeamSize() > 1UL ?
finalPaths_.size() : finalPaths_[0].size();
size_t seqNum =
getBeamSize() > 1UL ? finalPaths_.size() : finalPaths_[0].size();
std::vector<int> starts(seqNum + 1, 0);
for (size_t i = 0; i < seqNum; ++i) {
size_t seqLen = getBeamSize() > 1UL ? finalPaths_[i][0].ids.size() :
finalPaths_[0][i].ids.size();
size_t seqLen = getBeamSize() > 1UL ? finalPaths_[i][0].ids.size()
: finalPaths_[0][i].ids.size();
starts[i + 1] = starts[i] + seqLen;
}
for (size_t i = 0; i < dataArgsSize_; i++) {
dataArgs_[i].concat(dataArgsFrame_[i], machineIdVec,
starts, useGpu_, HPPL_STREAM_1, PASS_TEST);
dataArgs_[i].concat(dataArgsFrame_[i], machineIdVec, starts, useGpu_,
HPPL_STREAM_1, PASS_TEST);
auto dataAgent = dynamic_cast<DataLayer*>(
outFrameLines_[i + 1].agentLayer.get());
auto dataAgent =
dynamic_cast<DataLayer*>(outFrameLines_[i + 1].agentLayer.get());
CHECK_NOTNULL(dataAgent);
dataAgent->setData(dataArgs_[i]);
}
......
......@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "GradientMachine.h"
......@@ -101,7 +100,7 @@ public:
* Return true if this prefix or candidate is expected to be dropped.
*/
typedef std::function<bool(int seqId, const std::vector<int>&,
const std::vector<real>&)> DropCallback;
const std::vector<real>&)> DropCallback;
/**
* @brief NormOrDropNodeCallback
......@@ -117,7 +116,7 @@ public:
* The fourth parameter is the probability of the whole path.
*/
typedef std::function<void(int seqId, const std::vector<int>&,
std::vector<real>&, real*)> NormOrDropNodeCallback;
std::vector<real>&, real*)> NormOrDropNodeCallback;
/**
* @brief Register beam search control callbacks. Used for prediction.
......@@ -192,7 +191,7 @@ public:
int machineId; // index of sample in frame
int topIndex; // index of MaxIdLayer output in one sample
int seqId; // index of sequence in batch generation
int seqId; // index of sequence in batch generation
std::vector<int> machineIdVec;
/**
......@@ -206,7 +205,10 @@ public:
/**
* @brief Path default ctor, first logProb is 0.
*/
Path() { logProb = 0; seqId = 0; }
Path() {
logProb = 0;
seqId = 0;
}
explicit Path(size_t seqId) : seqId(seqId) { logProb = 0; }
/**
......@@ -319,21 +321,33 @@ protected:
};
std::vector<MemoryFrameLine> memoryFrameLines_;
// All inFrameLines and outFrameLines have the same element as follows.
// Each inFrameLines(inlinks) has its own info(elements) below,
// and all outFrameLines(outlinks) share the info with one inFrameLine,
// which is assigned by targetInfoInlinkId_.
struct Info {
IVectorPtr allIds; // scattered id of realLayer
std::vector<int> idIndex; // index of allIds
ICpuGpuVectorPtr
sequenceStartPositions; // scattered sequenceStartPositions
sequenceStartPositions; // scattered sequenceStartPositions
std::vector<int> seqStartPosIndex; // index of sequenceStartPositions
};
Info info_;
std::vector<Info> info_;
// numSeqs_[i] is the number sequences which is longer than i (for sequence
// data) or has more than i subsequences (for subsequence data)
std::vector<int> numSeqs_;
// if no subSeq, tuple of (seqLength, seqStart, seqIndex, seqIndex)
// else, tuple of (subSeqLength, subSeqStart, seqIndex, subSeqIndex)
std::vector<std::tuple<int, int, int, int>> seqLengthAndStart_;
std::vector<std::vector<Argument::SeqInfo>> seqInfos_;
void createInFrameInfo(const Argument& input, PassType passType);
// the id of inlink which share info with outlinks
int targetInfoInlinkId_;
/* create scattered id infomation for all realLayer of inFrameLines one time.
* If hasSubseq, will also create scattered sequenceStartPositions infomation
* for all realLayer of inFrameLines one time.
*/
void createInFrameInfo(int inlinks_id, const Argument& input,
PassType passType);
void createMemoryFrameInfo(MemoryFrameLine* memoryFrameLine,
PassType passType);
......@@ -363,6 +377,9 @@ protected:
NeuralNetwork* rootNetwork_;
bool reversed_;
// if hasSubseq: max number of sentences(subseq)in batchsize samples
// else: max number of tokens in batchsize samples(sentences)
int maxSequenceLength_;
bool useGpu_;
bool stopBeamSearch_;
......@@ -415,7 +432,7 @@ private:
* @param machineIdVec : select a row of output matrix in each frame
* that the generation process expanded.
*/
void createDataOutlink(std::vector<int> & machineIdVec);
void createDataOutlink(std::vector<int>& machineIdVec);
/*
* @brief used in beam search, connect previous frame to form recurrent link
......
......@@ -49,8 +49,10 @@ void CTCLayer::forward(PassType passType) {
Layer::forward(passType);
if (useGpu_) {
for (size_t i = 0; i < inputLayers_.size(); i++) {
tmpCpuInput_[i].resizeAndCopyFrom(getInput(i), false, HPPL_STREAM_1);
tmpCpuInput_[i].resizeAndCopyFrom(
getInput(i), false, HPPL_STREAM_DEFAULT);
}
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
forwardImp(tmpCpuInput_[0], tmpCpuInput_[1]);
} else {
forwardImp(getInput(0), getInput(1));
......@@ -92,9 +94,9 @@ void CTCLayer::backward(const UpdateCallback &callback) {
if (useGpu_) {
backwardImp(callback, tmpCpuInput_[0], tmpCpuInput_[1]);
const_cast<Argument&>(getInput(0)).
resizeAndCopyFrom(tmpCpuInput_[0], true, HPPL_STREAM_1);
resizeAndCopyFrom(tmpCpuInput_[0], true, HPPL_STREAM_DEFAULT);
const_cast<Argument&>(getInput(1)).
resizeAndCopyFrom(tmpCpuInput_[1], true, HPPL_STREAM_1);
resizeAndCopyFrom(tmpCpuInput_[1], true, HPPL_STREAM_DEFAULT);
} else {
backwardImp(callback, getInput(0), getInput(1));
}
......
......@@ -248,7 +248,7 @@ void ConvOperator::forward() {
CHECK_EQ(ins_[1]->value->getHeight(), batchSize);
checkFilterSize(ins_[1]->value);
Matrix::resizeOrCreate(out_->value, batchSize,
outputH_ * outputW_ * numFilters_);
outputH_ * outputW_ * numFilters_, false, useGpu_);
{
AsyncGpuBlock block;
for (size_t batchId = 0; batchId < batchSize; ++batchId) {
......
......@@ -21,18 +21,20 @@ limitations under the License. */
namespace paddle {
/**
* @brief A layer for convex weighted average of vectors,
* @brief A layer for weighted sum of vectors,
* which is used in NEURAL MACHINE TRANSLATION BY JOINTLY LEARNING TO ALIGN AND
* TRANSLATE
* - Input: the first input contains the convex weights (batchSize x weightDim),
* and the shape of second input is (batchSize x (weightdim*dataDim)).
* - Output: the shape of output is (batchSize x dataDim).
* - Input: the the size of the first input is weightDim,
* and the size of the second input is weightdim * dataDim.
* - Output: the sizeof the output is dataDim
* \f[
* out[i][j] = \sum_{j}(in0(i, j) * in1(i,j + i * dataDim)),
* i = 0,1,...,(batchSize-1); j = 0, 1,...,(dataDim-1)
* out(j) = \sum_{i}(in0(i) * in1(i,j + i * dataDim)),
* i = 0,1,...,(weightDim-1); j = 0, 1,...,(dataDim-1)
* \f]
* Note that the above computation is for one sample. Multiple samples are
* processed in one batch.
*
* The config file api is convex_comb_layer.
* The config file api is linear_comb_layer.
*/
class ConvexCombinationLayer : public Layer {
protected:
......
......@@ -48,7 +48,7 @@ void CosSimLayer::forward(PassType passType) {
REGISTER_TIMER_INFO("CosFwAtvTimer", getName().c_str());
MatrixPtr prevOut1 = getInputValue(0);
MatrixPtr prevOut2 = getInputValue(1);
outV->cosSim(*prevOut1, *prevOut2, kCosSimScale_);
outV->cosSim(*prevOut1, *prevOut2, config_.cos_scale());
}
}
......@@ -59,7 +59,7 @@ void CosSimLayer::backward(const UpdateCallback& callback) {
outG->cosSimDerivative(*this->getOutputValue(), *getInputValue(0),
*getInputValue(1), *getInputGrad(0),
*getInputGrad(1), kCosSimScale_);
*getInputGrad(1), config_.cos_scale());
}
}
......
......@@ -36,7 +36,7 @@ namespace paddle {
class CosSimLayer : public Layer {
public:
explicit CosSimLayer(const LayerConfig& config)
: Layer(config), kCosSimScale_(5.0f) {}
: Layer(config) {}
~CosSimLayer() {}
......@@ -44,8 +44,6 @@ public:
void forward(PassType passType);
void backward(const UpdateCallback& callback = nullptr);
const real kCosSimScale_;
};
} // namespace paddle
......@@ -509,8 +509,10 @@ void HuberTwoClass::forwardImp(Matrix &output, Argument &label,
Matrix &cost) {
if (useGpu_) {
for (size_t i = 0; i < inputLayers_.size(); i++) {
tmpCpuInput_[i].resizeAndCopyFrom(getInput(i), false, HPPL_STREAM_1);
tmpCpuInput_[i].resizeAndCopyFrom(
getInput(i), false, HPPL_STREAM_DEFAULT);
}
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
}
forwardImpIn(output, label, cost);
}
......
......@@ -115,29 +115,11 @@ void CudnnBatchNormLayer::backward(const UpdateCallback& callback) {
create(tmpBiasGrad_, 1, channels_, &betaGrad);
}
// because of the different api of cudnn v4 and v5.
if (hl_get_cudnn_lib_version() < 5000) {
if (weight_->getWGrad()) {
create(tmpWGrad_, 1, channels_, &gammaGrad);
}
if (biases_ && biases_->getWGrad()) {
create(tmpBiasGrad_, 1, channels_, &betaGrad);
}
}
hl_batch_norm_backward(ioDesc_, input, ioDesc_, outGrad,
ioDesc_, inGrad, bnParamDesc_,
gamma, gammaGrad, betaGrad,
EPS, savedMean, savedInvVar);
// because of the different api of cudnn v4 and v5.
if (hl_get_cudnn_lib_version() < 5000) {
if (weight_->getWGrad() && biases_->getWGrad()) {
weight_->getWGrad()->add(*tmpWGrad_);
biases_->getWGrad()->add(*tmpBiasGrad_);
}
}
{
REGISTER_TIMER_INFO("WeightUpdate", getName().c_str());
biases_->getParameterPtr()->incUpdate(callback);
......
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "Layer.h"
namespace paddle {
class PrintLayer : public Layer {
public:
explicit PrintLayer(const LayerConfig& config)
: Layer(config) {}
void forward(PassType passType);
void backward(const UpdateCallback& callback) {}
};
void PrintLayer::forward(PassType passType) {
Layer::forward(passType);
for (size_t i = 0; i != inputLayers_.size(); ++i) {
const auto& argu = getInput(i);
const std::string& name = inputLayers_[i]->getName();
if (argu.value) {
std::ostringstream os;
argu.value->print(os);
LOG(INFO) << "layer=" << name << " value matrix:\n" << os.str();
}
if (argu.ids) {
std::ostringstream os;
argu.ids->print(os, argu.ids->getSize());
LOG(INFO) << "layer=" << name << " ids vector:\n" << os.str();
}
if (auto startPos = argu.sequenceStartPositions) {
std::ostringstream os;
startPos->getVector(false)->print(os, startPos->getSize());
LOG(INFO) << "layer=" << name << " sequence pos vector:\n" << os.str();
}
if (auto subStartPos = argu.subSequenceStartPositions) {
std::ostringstream os;
subStartPos->getVector(false)->print(os, subStartPos->getSize());
LOG(INFO) << "layer=" << name << " sub-sequence pos vector:\n"
<< os.str();
}
}
}
REGISTER_LAYER(print, PrintLayer);
} // namespace paddle
......@@ -52,8 +52,10 @@ public:
Layer::forward(passType);
if (useGpu_) {
for (size_t i = 0; i < inputLayers_.size(); i++) {
tmpCpuInput_[i].resizeAndCopyFrom(getInput(i), false, HPPL_STREAM_1);
tmpCpuInput_[i].resizeAndCopyFrom(
getInput(i), false, HPPL_STREAM_DEFAULT);
}
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
forwardImp(tmpCpuInput_[0]);
} else {
forwardImp(getInput(0));
......
......@@ -92,7 +92,6 @@ void testState(LayerPtr testLayer, vector<DataLayerPtr>& dataLayers,
testLayer->forward(PASS_TEST);
Argument out;
out.resizeAndCopyFrom(testLayer->getOutput(), /* useGpu= */ false);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
if (batchOut.value) {
size_t dim = batchOut.value->getWidth();
ASSERT_TRUE((bool)out.value);
......@@ -220,7 +219,6 @@ void testBatchState(LayerPtr testLayer, vector<DataLayerPtr>& dataLayers,
testLayer->forward(PASS_TEST);
Argument out;
out.resizeAndCopyFrom(testLayer->getOutput(), /* useGpu= */ false);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
if (batchOut.value) {
size_t dim = batchOut.value->getWidth();
ASSERT_TRUE((bool)out.value);
......
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.PyDataProvider2 import *
data = [
[[[1, 3, 2], [4, 5, 2]], 0],
[[[0, 2], [2, 5], [0, 1, 2]], 1],
]
@provider(input_types=[integer_value_sub_sequence(10),
integer_value(2)])
def process_subseq(settings, file_name):
for d in data:
yield d
@provider(input_types=[integer_value_sequence(10),
integer_value(2)])
def process_seq(settings, file_name):
for d in data:
seq = []
for subseq in d[0]:
seq += subseq
yield seq, d[1]
#!/usr/bin/env python
#coding=utf-8
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
......
#edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
######################## data source ################################
define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list',
test_list=None,
module='rnn_data_provider',
obj='process_subseq')
settings(batch_size=2, learning_rate=0.01)
######################## network configure ################################
dict_dim = 10
word_dim = 8
hidden_dim = 8
label_dim = 3
data = data_layer(name="word", size=dict_dim)
emb = embedding_layer(input=data, size=word_dim)
# This hierachical RNN is designed to be equivalent to the simple RNN in
# sequence_rnn.conf
def outer_step(x):
outer_mem = memory(name="outer_rnn_state", size=hidden_dim)
def inner_step(y):
inner_mem = memory(name="inner_rnn_state",
size=hidden_dim,
boot_layer=outer_mem)
out = fc_layer(input=[y, inner_mem],
size=hidden_dim,
act=TanhActivation(),
bias_attr=True,
name="inner_rnn_state")
return out
inner_rnn_output = recurrent_group(
step=inner_step,
name="inner",
input=x)
last = last_seq(input=inner_rnn_output, name="outer_rnn_state")
# "return last" should also work. But currently RecurrentGradientMachine
# does not handle it correctly. Current implementation requires that
# all the out links are from sequences. However, it does not report error
# when the out links are not sequences.
return inner_rnn_output
out = recurrent_group(
name="outer",
step=outer_step,
input=SubsequenceInput(emb))
rep = last_seq(input=out)
prob = fc_layer(size=label_dim,
input=rep,
act=SoftmaxActivation(),
bias_attr=True)
outputs(classification_cost(input=prob,
label=data_layer(name="label", size=label_dim)))
#edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
######################## data source ################################
define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list',
test_list=None,
module='rnn_data_provider',
obj='process_seq')
settings(batch_size=2, learning_rate=0.01)
######################## network configure ################################
dict_dim = 10
word_dim = 8
hidden_dim = 8
label_dim = 3
data = data_layer(name="word", size=dict_dim)
emb = embedding_layer(input=data, size=word_dim)
def step(y):
mem = memory(name="rnn_state", size=hidden_dim)
out = fc_layer(input=[y, mem],
size=hidden_dim,
act=TanhActivation(),
bias_attr=True,
name="rnn_state")
return out
out = recurrent_group(
name="rnn",
step=step,
input=emb)
rep = last_seq(input=out)
prob = fc_layer(size=label_dim,
input=rep,
act=SoftmaxActivation(),
bias_attr=True)
outputs(classification_cost(input=prob,
label=data_layer(name="label", size=label_dim)))
......@@ -87,18 +87,31 @@ void testEvaluator(TestConfig testConf, string testEvaluatorName,
return;
}
ICpuGpuVectorPtr sequenceStartPositions;
if (testConf.inputDefs[i].inputType == INPUT_SEQUENCE_DATA ||
testConf.inputDefs[i].inputType == INPUT_SEQUENCE_LABEL) {
if (!sequenceStartPositions) {
generateSequenceStartPositions(batchSize, sequenceStartPositions);
}
data.sequenceStartPositions = sequenceStartPositions;
}
arguments.push_back(data);
}
Evaluator* testEvaluator = Evaluator::create(testConf.evaluatorConfig);
double totalScore = 0.0;
testEvaluator->start();
totalScore += testEvaluator->evalImp(arguments);
testEvaluator->updateSamplesNum(arguments);
testEvaluator->finish();
LOG(INFO) << *testEvaluator;
double totalScore2 = 0.0;
if (testConf.testAccumulate) {
testEvaluator->start();
totalScore2 += testEvaluator->evalImp(arguments);
testEvaluator->finish();
EXPECT_LE(fabs(totalScore - totalScore2), 1.0e-5);
}
}
......@@ -202,6 +215,15 @@ TEST(Evaluator, precision_recall) {
false);
}
TEST(Evaluator, ctc_error_evaluator) {
TestConfig config;
config.evaluatorConfig.set_type("ctc_edit_distance");
config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "output", 32});
config.inputDefs.push_back({INPUT_SEQUENCE_LABEL, "label", 1});
testEvaluatorAll(config, "ctc_error_evaluator", 100);
}
int main(int argc, char** argv) {
initMain(argc, argv);
FLAGS_thread_local_rand_use_global_seed = true;
......
......@@ -21,6 +21,8 @@ limitations under the License. */
#include <paddle/trainer/TrainerInternal.h>
#include <paddle/gserver/gradientmachines/GradientMachine.h>
P_DECLARE_int32(seed);
using namespace paddle; // NOLINT
using namespace std; // NOLINT
class TrainerForTest : public paddle::Trainer {
......@@ -68,7 +70,9 @@ void CalCost(const string& conf, const string& dir, real* cost,
CpuVector vecMomentum(dim);
// vecW needs to be assigned, otherwise the variable is an uncertain value.
vecW.zeroMem();
*ThreadLocalRand::getSeed() = FLAGS_seed;
vecW.randnorm(0, 0.1);
trainer.startTrain();
for (int i = 0; i < num_passes; ++i) {
......@@ -88,27 +92,39 @@ void CalCost(const string& conf, const string& dir, real* cost,
rmDir(dir.c_str());
}
TEST(RecurrentGradientMachine, HasSubSequence) {
void test(const string& conf1, const string& conf2, double eps) {
int num_passes = 5;
real* cost1 = new real[num_passes];
const string conf1 = "gserver/tests/sequence_layer_group.conf";
const string dir1 = "gserver/tests/t1";
CalCost(conf1, dir1, cost1, num_passes);
real* cost2 = new real[num_passes];
const string conf2 = "gserver/tests/sequence_nest_layer_group.conf";
const string dir2 = "gserver/tests/t2";
CalCost(conf2, dir2, cost2, num_passes);
for (int i = 0; i < num_passes; i++) {
LOG(INFO) << "num_passes: " << i << ", cost1=" << cost1[i]
<< ", cost2=" << cost2[i];
ASSERT_NEAR(cost1[i], cost2[i], 1e-3);
<< ", cost2=" << cost2[i]
<< ", diff=" << std::abs(cost1[i] - cost2[i]);
ASSERT_NEAR(cost1[i], cost2[i], eps);
}
delete[] cost1;
delete[] cost2;
}
TEST(RecurrentGradientMachine, HasSubSequence) {
test("gserver/tests/sequence_layer_group.conf",
"gserver/tests/sequence_nest_layer_group.conf",
1e-5);
}
TEST(RecurrentGradientMachine, rnn) {
test("gserver/tests/sequence_rnn.conf",
"gserver/tests/sequence_nest_rnn.conf",
0);
}
int main(int argc, char** argv) {
if (paddle::version::isWithPyDataProvider()) {
if (!paddle::version::isWithGpu()) {
......
......@@ -299,7 +299,6 @@ void checkRecurrentLayer(LayerConfig layerConfig, size_t batchSize,
Argument& cpuInput = testCpu.dataLayer_->getOutput();
Argument& gpuInput = testGpu.dataLayer_->getOutput();
gpuInput.resizeAndCopyFrom(cpuInput, true);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
const VectorPtr& cpuVec = testCpu.para_->getBuf(PARAMETER_VALUE);
const VectorPtr& gpuVec = testGpu.para_->getBuf(PARAMETER_VALUE);
......
......@@ -146,6 +146,7 @@ void Matrix::resizeOrCreate(MatrixPtr& matrix, size_t height, size_t width,
if (!matrix) {
matrix = Matrix::create(height, width, trans, useGpu);
} else {
CHECK_EQ(matrix->useGpu(), useGpu);
matrix->resize(height, width);
}
}
......@@ -161,6 +162,7 @@ void Matrix::resizeOrCreateSparseMatrix(MatrixPtr& matrix, size_t height,
} else {
CHECK(dynamic_cast<CpuSparseMatrix*>(matrix.get()) ||
dynamic_cast<GpuSparseMatrix*>(matrix.get()));
CHECK_EQ(matrix->useGpu(), useGpu);
matrix->resize(height, width, nnz, valueType, format);
}
}
......
......@@ -800,6 +800,7 @@ void CpuGpuVectorT<T>::resizeOrCreate(size_t size, bool useGpu) {
} else if ((!useGpu) && (!cpuVectorT_)) {
cpuVectorT_ = VectorT<T>::create(size, false);
} else {
CHECK((useGpu && gpuVectorT_) || (!useGpu && cpuVectorT_));
this->resize(size, useGpu);
}
}
......
......@@ -25,6 +25,7 @@ static void resizeAndCopy(MatrixPtr& dest, const MatrixPtr& src, bool useGpu,
if (!dest) {
dest = src->clone(0, 0, useGpu);
} else {
CHECK_EQ(dest->useGpu(), useGpu);
dest->resize(src->getHeight(), src->getWidth());
}
dest->copyFrom(*src, stream);
......@@ -60,12 +61,12 @@ static void resizeAndCopy(MatrixPtr& dest, const MatrixPtr& src,
hl_stream_t stream = HPPL_STREAM_DEFAULT) {
if (src) {
CHECK_LE((size_t)startRow + copySize, src->getHeight());
int height = copySize;
int width = src->getWidth();
if (!dest) {
dest = src->clone(height, width, useGpu);
} else {
CHECK_EQ(dest->useGpu(), useGpu);
dest->resize(height, width);
}
MatrixPtr submat = src->subMatrix(startRow, copySize);
......@@ -182,6 +183,11 @@ static void resizeAndCopy(SVectorPtr& dest, const SVectorPtr& src,
}
}
void Argument::resizeAndCopyFrom(const Argument& src, bool useGpu) {
resizeAndCopyFrom(src, useGpu, HPPL_STREAM_DEFAULT);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
}
void Argument::resizeAndCopyFrom(const Argument& src, bool useGpu,
hl_stream_t stream) {
dataId = src.dataId;
......@@ -199,6 +205,14 @@ void Argument::resizeAndCopyFrom(const Argument& src, bool useGpu,
resizeAndCopy(strs, src.strs, useGpu, stream);
}
int32_t Argument::resizeAndCopyFrom(const Argument& src, int32_t startSeq,
int32_t copySize, bool useGpu) {
int32_t size = resizeAndCopyFrom(src, startSeq, copySize, useGpu,
HPPL_STREAM_DEFAULT);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
return size;
}
int32_t Argument::resizeAndCopyFrom(const Argument& src, int32_t startSeq,
int32_t copySize, bool useGpu,
hl_stream_t stream) {
......@@ -463,51 +477,34 @@ void Argument::splitByDataId(const std::vector<Argument>& argus,
}
}
void Argument::getSeqLengthAndStart(
std::vector<std::tuple<int, int, int, int>>* seqLengthAndStart,
int* maxSequenceLength) const {
void Argument::getSeqInfo(std::vector<SeqInfo>* seqInfo) const {
const int* starts = sequenceStartPositions->getData(false);
if (hasSubseq()) {
size_t numSubSequences = getNumSubSequences();
(*seqLengthAndStart).reserve(numSubSequences);
const int* subStarts = subSequenceStartPositions->getData(false);
int seqIndex = 0;
int subSeqIndex = 0;
*maxSequenceLength = 0;
for (size_t i = 0; i < numSubSequences; ++i) {
if (subStarts[i] == starts[seqIndex]) {
subSeqIndex = 0;
(*seqLengthAndStart)
.push_back(std::make_tuple<int, int, int, int>(
subStarts[i + 1] - subStarts[i], (int)subStarts[i],
(int)seqIndex, (int)subSeqIndex));
++subSeqIndex;
++seqIndex;
} else if (subStarts[i] < starts[seqIndex]) {
(*seqLengthAndStart)
.push_back(std::make_tuple<int, int, int, int>(
subStarts[i + 1] - subStarts[i], (int)subStarts[i],
(int)seqIndex - 1, (int)subSeqIndex));
++subSeqIndex;
const int* subStarts = hasSubseq()
? subSequenceStartPositions->getData(false) : nullptr;
size_t numSequences = getNumSequences();
seqInfo->reserve(numSequences);
int subSeqEnd = 0;
for (size_t i = 0; i < numSequences; ++i) {
SeqInfo info;
info.seqStart = starts[i];
info.subLevelLength = starts[i + 1] - starts[i];
info.seqId = i;
if (hasSubseq()) {
info.subSeqStart = subSeqEnd;
while (subStarts[subSeqEnd] < starts[i + 1]) {
++subSeqEnd;
}
// maxSequenceLength_ = 1 + max(subSeqIndex) in each Seq.
if (*maxSequenceLength < std::get<3>((*seqLengthAndStart)[i]))
*maxSequenceLength = std::get<3>((*seqLengthAndStart)[i]);
}
*maxSequenceLength += 1;
} else {
size_t numSequences = getNumSequences();
(*seqLengthAndStart).reserve(numSequences);
for (size_t i = 0; i < numSequences; ++i) {
(*seqLengthAndStart)
.push_back(std::make_tuple<int, int, int, int>(
starts[i + 1] - starts[i], (int)starts[i], (int)i, (int)i));
info.topLevelLength = subSeqEnd - info.subSeqStart;
} else {
info.topLevelLength = info.subLevelLength;
info.subSeqStart = 0; // not used
}
std::sort((*seqLengthAndStart).begin(), (*seqLengthAndStart).end(),
std::greater<std::tuple<int, int, int, int>>());
*maxSequenceLength = std::get<0>((*seqLengthAndStart)[0]);
seqInfo->push_back(info);
}
std::sort(seqInfo->begin(), seqInfo->end(),
[](const SeqInfo& a, const SeqInfo& b) {
return a.topLevelLength > b.topLevelLength;
});
}
void Argument::checkSubset() const {
......
......@@ -203,13 +203,28 @@ struct Argument {
* startSeq: the sample id of start
* copySize: how many samples need to copy
* return value: how many samples are copied
* Note that when specifying the stream explicitly in this case,
* synchronize should also be called somewhere after this function
*/
int32_t resizeAndCopyFrom(const Argument& src, int32_t startSeq,
int32_t copySize, bool useGpu = FLAGS_use_gpu,
hl_stream_t stream = HPPL_STREAM_DEFAULT);
int32_t copySize, bool useGpu, hl_stream_t stream);
void resizeAndCopyFrom(const Argument& src, bool useGpu = FLAGS_use_gpu,
hl_stream_t stream = HPPL_STREAM_DEFAULT);
/*
* same with the above function, except that the stream is
* HPPL_STREAM_DEFAULT and synchronize is automatically called
* inside it
*/
int32_t resizeAndCopyFrom(const Argument& src, int32_t startSeq,
int32_t copySize, bool useGpu = FLAGS_use_gpu);
void resizeAndCopyFrom(const Argument& src, bool useGpu, hl_stream_t stream);
/*
* same with the above function, except that the stream is
* HPPL_STREAM_DEFAULT and synchronize is automatically called
* inside it
*/
void resizeAndCopyFrom(const Argument& src, bool useGpu = FLAGS_use_gpu);
/*
@brief Concatenate several arguments into one and put the result into it.
......@@ -238,12 +253,29 @@ struct Argument {
static void splitByDataId(const std::vector<Argument>& argus,
std::vector<std::vector<Argument>>* arguGroups);
struct SeqInfo {
// Equal to sequence length for sequence data
// Equal to number of subsequences for subsequence data
int topLevelLength;
int seqStart;
int seqId;
// Equal to topLevelLength for sequence data
// Equal to sum of the length of subsequences for subsequence data
int subLevelLength;
// Only used for subsequence data, start position of this sequence
// is subSequenceStartPositions, i.e.
// subSequenceStartPositions[subSeqStart] == seqStart
int subSeqStart;
};
/*
Get Sequence Length, startPositions and max Length according to input
*/
void getSeqLengthAndStart(
std::vector<std::tuple<int, int, int, int>>* seqLengthAndStart,
int* maxSequenceLength) const;
Get SeqInfo for each sequence of this argument
Elements in *seqInfo are sorted by topLevelLength in descending order
*/
void getSeqInfo(std::vector<SeqInfo>* segInfo) const;
/*
Check Whether sequenceStartPositions is subset of
subSequenceStartPositions.
......
#!/bin/bash
cd `dirname $0`
source ./common.sh
cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=OFF -DWITH_TESTING=ON -DON_TRAVIS=ON
make -j `nproc`
env CTEST_OUTPUT_ON_FAILURE=1 make test ARGS="-j `nproc`"
sudo make install
sudo paddle version
#!/bin/bash
# Add set -e, cd to directory.
source ./common.sh
# Compile Documentation only.
cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=ON
make paddle_docs paddle_docs_cn
# Parse Github URL
REPO=`git config remote.origin.url`
SSH_REPO=${REPO/https:\/\/github.com\//git@github.com:}
SHA=`git rev-parse --verify HEAD`
# Documentation branch name
# gh-pages branch is used for PaddlePaddle.org. The English version of
# documentation in `doc` directory, and the chinese version in `doc_cn`
# directory.
TARGET_BRANCH="gh-pages"
# Only deploy master branch to build latest documentation.
SOURCE_BRANCH="master"
# If is not a Github pull request, and in master branch.
if [ "$TRAVIS_PULL_REQUEST" != "false" -o "$TRAVIS_BRANCH" != "$SOURCE_BRANCH" ]; then
exit 0
fi
# Clone the repo to output directory
git clone $REPO output
cd output
# checkout github page branch
git checkout $TARGET_BRANCH || git checkout --orphan $TARGET_BRANCH
# remove old docs. mv new docs.
rm -rf doc doc_cn
mv ../doc_cn/html doc_cn
mv ../doc/html doc
# Check is there anything changed.
set +e
git diff --exit-code >/dev/null
if [ $? -eq 0 ]; then
echo "No changes to the output on this push; exiting."
exit 0
fi
set -e
# Commit
git add .
git config user.name "Travis CI"
git config user.email "paddle-dev@baidu.com"
git commit -m "Deploy to GitHub Pages: ${SHA}"
# Set ssh private key
openssl aes-256-cbc -K $SSL_KEY -iv $SSL_IV -in ../../paddle/scripts/travis/deploy_key.enc -out deploy_key -d
chmod 600 deploy_key
eval `ssh-agent -s`
ssh-add deploy_key
# Push
git push $SSH_REPO $TARGET_BRANCH
#!/bin/bash
cd `dirname $0`
if [ ${JOB} == "BUILD_AND_TEST" ]; then
./build_and_test.sh
elif [ ${JOB} == "DOCS" ]; then
./docs.sh
else
echo Unknown job ${JOB}
exit 1
fi
#!/bin/bash
cd `dirname $0`
source ./common.sh
sudo make install
sudo paddle version
#!/bin/bash
cd `dirname $0`
source ./common.sh
env CTEST_OUTPUT_ON_FAILURE=1 make test ARGS="-j `nproc`"
......@@ -452,6 +452,9 @@ message SubModelConfig {
repeated LinkConfig out_links = 10;
optional GeneratorConfig generator = 11;
// the id of inlink which share info with outlinks, used in recurrent layer group
optional int32 target_inlinkid = 12;
}
message ModelConfig {
......
......@@ -22,6 +22,8 @@ find_python_module(pip REQUIRED)
find_python_module(wheel REQUIRED)
find_python_module(google.protobuf REQUIRED)
add_subdirectory(paddle/trainer_config_helpers/tests)
install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/dist/
DESTINATION opt/paddle/share/wheels
)
......@@ -303,7 +303,8 @@ def MakeLayerNameInSubmodel(name, submodel_name = None):
@config_func
def RecurrentLayerGroupWithoutOutLinksBegin(name,
in_links,
seq_reversed=False):
seq_reversed=False,
target_inlinkname=""):
global g_current_submodel
config_assert(g_config.model_config.type == "recurrent_nn",
"RecurrentLayerGroup should be used only in recurrent_nn")
......@@ -311,14 +312,19 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
SubModelBegin(name)
g_current_submodel.is_recurrent_layer_group = True
g_current_submodel.reversed = seq_reversed
g_current_submodel.target_inlinkid = -1
in_links_count = 0
for link in in_links:
for linkid, link in enumerate(in_links):
if isinstance(link, basestring):
name = link
has_subseq = False
else:
name = link.link_name
has_subseq = link.has_subseq
# assign target_inlinkid according to target_inlinkname
if target_inlinkname == name:
g_current_submodel.target_inlinkid = linkid
if in_links_count == 0:
in_links_has_subseq = has_subseq
else:
......@@ -331,6 +337,7 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
SequenceScatterAgentLayer(name=name, size=layer.size)
else:
ScatterAgentLayer(name=name, size=layer.size)
pair = g_current_submodel.in_links.add()
pair.layer_name = layer_name
pair.link_name = MakeLayerNameInSubmodel(name)
......@@ -362,10 +369,12 @@ def RecurrentLayerGroupBegin(name,
in_links,
out_links,
generator=None,
target_inlinkname="",
seq_reversed=False):
RecurrentLayerGroupWithoutOutLinksBegin(name,
in_links,
seq_reversed)
seq_reversed,
target_inlinkname)
for link in out_links:
RecurrentLayerGroupSetOutLink(link)
......@@ -1399,6 +1408,14 @@ class SelectiveFCLayer(LayerBase):
input_index, psize, dims, sparse, format)
self.create_bias_parameter(bias, self.config.size)
@config_layer('print')
class PrintLayer(LayerBase):
def __init__(
self,
name,
inputs):
super(PrintLayer, self).__init__(name, 'print', 0, inputs)
@config_layer('data')
class DataLayer(LayerBase):
def __init__(
......@@ -1614,7 +1631,7 @@ class BatchNormLayer(LayerBase):
# Also based on cudnn version.
use_cudnn = use_gpu and batch_norm_type != "batch_norm" and \
((not parallel_nn) or self.config.device > -1) and \
cudnn_version >= 4000
cudnn_version >= 4007
self.layer_type = "cudnn_batch_norm" if use_cudnn else "batch_norm"
super(BatchNormLayer, self).__init__(name, self.layer_type, 0,
active_type=active_type,
......@@ -2264,6 +2281,9 @@ class ConvexCombinationLayer(LayerBase):
name, 'convex_comb', size, inputs=inputs, device=device)
config_assert(len(self.inputs) == 2,
'ConvexCombinationLayer must have 2 inputs')
config_assert(
size * self.get_input_layer(0).size == self.get_input_layer(1).size,
'Wrong input size for ConvexCombinationLayer')
self.set_layer_size(size)
@config_layer('interpolation')
......@@ -2313,6 +2333,9 @@ class CosSimVecMatLayer(LayerBase):
self.config.cos_scale = cos_scale
config_assert(len(self.inputs) == 2,
'CosSimVecMatLayer must have 2 inputs')
config_assert(
size * self.get_input_layer(0).size == self.get_input_layer(1).size,
'Wrong input size for CosSimVecMatLayer')
@config_layer('sampling_id')
class SamplingIdLayer(LayerBase):
......@@ -2361,6 +2384,7 @@ class CosSimLayer(LayerBase):
self,
name,
inputs,
cos_scale=5,
device=None):
super(CosSimLayer, self).__init__(
name, 'cos', 1, inputs=inputs, device=device)
......@@ -2368,6 +2392,7 @@ class CosSimLayer(LayerBase):
config_assert(
self.get_input_layer(0).size == self.get_input_layer(1).size,
'inputs of CosSimLayer must have same dim')
self.config.cos_scale = cos_scale
@config_layer('tensor')
......
......@@ -94,7 +94,7 @@ def evaluator_base(
Batch=200 samples=20000 AvgCost=0.679655 CurrentCost=0.662179 Eval:
classification_error_evaluator=0.4486
CurrentEval: ErrorRate=0.3964
:param input: Input layers, a object of LayerOutput or a list of
LayerOutput.
:type input: list|LayerOutput
......@@ -296,6 +296,7 @@ def precision_recall_evaluator(
@wrap_name_default()
def ctc_error_evaluator(
input,
label,
name=None,
):
"""
......@@ -305,16 +306,20 @@ def ctc_error_evaluator(
.. code-block:: python
eval = ctc_error_evaluator(input)
eval = ctc_error_evaluator(input=input, label=lbl)
:param name: Evaluator name.
:type name: None|basestring
:param input: Input Layer.
:param input: Input Layer. Should be the same as the input for ctc_layer.
:type input: LayerOutput
:param label: input label, which is a data_layer. Should be the same as the
label for ctc_layer
:type label: LayerOutput
"""
evaluator_base(name=name,
type="ctc_edit_distance",
input=input)
input=input,
label=label)
@evaluator(EvaluatorAttribute.FOR_CLASSIFICATION)
@wrap_name_default()
......
......@@ -21,7 +21,6 @@ from .evaluators import *
from .poolings import MaxPooling, AvgPooling, BasePoolingType
from .attrs import *
from .default_decorators import *
try:
import cPickle as pickle
except ImportError:
......@@ -47,11 +46,12 @@ __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel",
'BaseGeneratedInput', 'conv_operator', 'conv_shift_layer',
'tensor_layer', 'selective_fc_layer', 'sampling_id_layer',
'slope_intercept_layer', 'trans_full_matrix_projection',
'linear_comb_layer',
'convex_comb_layer', 'ctc_layer', 'crf_layer', 'crf_decoding_layer',
'cross_entropy_with_selfnorm', 'cross_entropy',
'multi_binary_label_cross_entropy',
'rank_cost', 'lambda_cost', 'huber_cost',
'block_expand_layer',
'block_expand_layer', 'out_prod_layer', 'print_layer'
]
......@@ -70,7 +70,8 @@ class LayerType(object):
POOLING_AVG = 'average'
FC_LAYER = "fc"
COST = 'cost'
COSINE_SIM = 'cos_vm'
COSINE_SIM_VEC = 'cos_vm'
COSINE_SIM = 'cos'
HSIGMOID = 'hsigmoid'
CONV_LAYER = "conv"
POOL_LAYER = "pool"
......@@ -91,6 +92,7 @@ class LayerType(object):
POWER_LAYER = 'power'
SCALING_LAYER = 'scaling'
TRANS_LAYER = 'trans'
OUT_PROD_LAYER = 'out_prod'
MEMORY = 'memory'
MAXID_LAYER = 'maxid'
......@@ -102,9 +104,11 @@ class LayerType(object):
SEL_FC_LAYER = "selective_fc"
SAMPLING_ID_LAYER = "sampling_id"
SLOPE_INTERCEPT_LAYER = "slope_intercept"
CONVEX_COMBINATION_LAYER = "convex_comb"
LINEAR_COMBINATION_LAYER = "convex_comb"
BLOCK_EXPAND = "blockexpand"
PRINT_LAYER = "print"
CTC_LAYER = "ctc"
CRF_LAYER = "crf"
CRF_DECODING_LAYER = "crf_decoding"
......@@ -171,6 +175,8 @@ class LayerOutput(object):
assert LayerType.is_layer_type(layer_type)
self.name = name
self.layer_type = layer_type
if parents is not None and type(parents) != list:
parents = [parents]
self.parents = [] if parents is None else parents
self.activation = activation
self.num_filters = num_filters
......@@ -197,6 +203,25 @@ ERROR_CLIPPING = 'error_clipping_threshold'
DROPOUT = 'drop_rate'
def check_input(input):
"""
Check input is a LayerOutput or list of LayerOutput or tuple of LayerOutput
if is a LayerOutput,
:param input: The input layer. Could be a list/tuple of input layer.
:type input: LayerOutput|list|tuple
:return: list of LayerOutput
:rtype: list of LayerOutput
"""
if isinstance(input, LayerOutput):
return [LayerOutput]
assert isinstance(input, list)
for inp in input:
assert isinstance(inp, LayerOutput)
return list(input)
def layer_support(*attrs):
def decorator(method):
@functools.wraps(method)
......@@ -512,7 +537,7 @@ class MixedLayerType(LayerOutput):
:rtype: MixedLayerType
"""
if not self.finalized:
assert isinstance(other, Projection)
assert isinstance(other, Projection) or isinstance(other, Operator)
self.inputs.append(other)
self.parents.append(other.origin)
return self
......@@ -725,6 +750,27 @@ def fc_layer(input, size, act=None, name=None,
size=size)
@wrap_name_default("print")
def print_layer(input, name=None):
"""
Print the output value of input layers. This layer is useful for debugging.
:param name: The Layer Name.
:type name: basestring
:param input: The input layer. Could be a list/tuple of input layer.
:type input: LayerOutput|list|tuple
:return: No return
"""
check_input(input)
Layer(
name=name,
type=LayerType.PRINT_LAYER,
inputs=[l.name for l in input],
)
LayerOutput(name, LayerType.PRINT_LAYER, input)
@wrap_name_default("seq_pooling")
@wrap_bias_attr_default(has_bias=False)
@wrap_param_default(['pooling_type'], default_factory=lambda _: MaxPooling())
......@@ -1169,13 +1215,16 @@ def power_layer(input, weight, name=None, layer_attr=None):
@layer_support()
def scaling_layer(input, weight, name=None, layer_attr=None):
"""
A layer for each row of a matrix, multiplying with a element of a vector.
A layer for multiplying input vector by weight scalar.
.. math::
y.row[i] = w[i] * x.row[i]
y = w x
where :math:`x` is size=dataDim input, :math:`w` is size=1 weight,
and :math:`y` is size=dataDim output.
where :math:`x` is (batchSize x dataDim) input, :math:`w` is
(batchSize x 1) weight vector, and :math:`y` is (batchSize x dataDim) output.
Note that the above computation is for one sample. Multiple samples are
processed in one batch.
The example usage is:
......@@ -1249,11 +1298,14 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None):
.. math::
similarity = cos(\\theta) = {\\mathbf{a} \\cdot \\mathbf{b}
\\over \\|\\mathbf{b}\\| \\|\\mathbf{b}\\|}
\\over \\|\\mathbf{a}\\| \\|\\mathbf{b}\\|}
And the input dimension is :math:`a \in R^M`, :math:`b \in R^{MN}`. The
similarity will be calculated N times by step M. The output dimension is
:math:`R^N`. The scale will be multiplied to similarity.
The size of a is M, size of b is M*N,
Similarity will be calculated N times by step M. The output size is
N. The scale will be multiplied to similarity.
Note that the above computation is for one sample. Multiple samples are
processed in one batch.
:param name: layer name
:type name: basestring
......@@ -1270,14 +1322,23 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None):
:return: LayerOutput object.
:rtype: LayerOutput
"""
Layer(
name=name,
type=LayerType.COSINE_SIM,
size=size,
cos_scale=scale,
inputs=[a.name, b.name],
**ExtraLayerAttribute.to_kwargs(layer_attr)
)
if size == 1:
Layer(
name=name,
type=LayerType.COSINE_SIM,
cos_scale=scale,
inputs=[a.name, b.name],
**ExtraLayerAttribute.to_kwargs(layer_attr)
)
else:
Layer(
name=name,
type=LayerType.COSINE_SIM_VEC,
size=size,
cos_scale=scale,
inputs=[a.name, b.name],
**ExtraLayerAttribute.to_kwargs(layer_attr)
)
return LayerOutput(name, LayerType.COSINE_SIM, parents=[a, b])
@wrap_name_default()
......@@ -2326,6 +2387,39 @@ def maxid_layer(input, name=None, layer_attr=None):
layer_type=LayerType.MAXID_LAYER,
parents=[input])
@wrap_name_default()
def out_prod_layer(input1, input2, name=None, layer_attr=None):
"""
A layer for computing the outer product of two vectors
The result is a matrix of size(input1) x size(input2)
The example usage is:
.. code-block:: python
out_prod = out_prod_layer(input1=vec1, input2=vec2)
:param name: Layer name.
:type name: basestring
:param input1: The first input layer name.
:type input: LayerOutput
:param input2: The second input layer name.
:type input2: LayerOutput
:param layer_attr: extra layer attributes.
:type layer_attr: ExtraLayerAttribute.
:return: LayerOutput object.
:rtype: LayerOutput
"""
assert isinstance(input1, LayerOutput)
assert isinstance(input2, LayerOutput)
Layer(name=name,
type="out_prod",
inputs=[input1.name, input2.name],
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name=name,
layer_type=LayerType.OUT_PROD_LAYER,
parents=[input1,input2])
@wrap_name_default()
def eos_layer(input, eos_id, name=None, layer_attr=None):
......@@ -2909,29 +3003,37 @@ def slope_intercept_layer(input, name=None, slope=1.0, intercept=0.0):
@wrap_name_default()
def convex_comb_layer(input, size, name=None):
def linear_comb_layer(weights, vectors, size, name=None):
"""
A layer for convex weighted average of vectors takes two inputs.
- Input: a vector containing the convex weights (batchSize x weightdim),
and a matrix in a vector form (batchSize x (weightdim * datadim)).
- Output: a vector (batchSize * datadim).
A layer for weighted sum of vectors takes two inputs.
- Input: size of weights is M
size of vectors is M*N
- Output: a vector of size=N
.. math::
y[i][j] = \sum_{j}(x_{1}(i, j) * x_{2}(i,j + i * dataDim)),
z(i) = \sum_{j=0}^{M-1} x(j) y(i+Nj)
where :math:`0 \le i \le N-1`
Or in the matrix notation:
i = 0,1,...,(batchSize-1); j = 0, 1,...,(dataDim-1)
.. math::
z = x^\mathrm{T} Y
In this formular:
- :math:`x_{1}`: the first input.
- :math:`x_{2}`: the second input.
- :math:`y`: the output.
- :math:`x`: weights
- :math:`y`: vectors.
- :math:`z`: the output.
Note that the above computation is for one sample. Multiple samples are
processed in one batch.
The simple usage is:
.. code-block:: python
convex_comb = convex_comb_layer(input=inputs,
linear_comb = linear_comb_layer(weighs=weight, vectors=vectors,
size=elem_dim)
:param input: The input layers.
......@@ -2944,15 +3046,16 @@ def convex_comb_layer(input, size, name=None):
:rtype: LayerOutput
"""
assert isinstance(input, list) or isinstance(input, tuple)
assert len(input) == 2
Layer(
name=name,
type=LayerType.CONVEX_COMBINATION_LAYER,
type=LayerType.LINEAR_COMBINATION_LAYER,
size=size,
inputs=[Input(input[0].name), Input(input[1].name)],
inputs=[Input(weights.name), Input(vectors.name)],
)
return LayerOutput(name, LayerType.CONVEX_COMBINATION_LAYER, input, size=size)
return LayerOutput(name, LayerType.LINEAR_COMBINATION_LAYER,
[weights, vectors], size=size)
convex_comb_layer = linear_comb_layer
@wrap_name_default()
def block_expand_layer(input,
......@@ -3036,6 +3139,17 @@ def ctc_layer(input, label, size, name=None, norm_by_times=False):
classication task. That is, for sequence labeling problems where the
alignment between the inputs and the target labels is unknown.
More details can be found by referring to `Connectionist Temporal
Classification: Labelling Unsegmented Sequence Data with Recurrent
Neural Networks <http://machinelearning.wustl.edu/mlpapers/paper_files/icml2006_GravesFGS06.pdf>`_
Note:
Considering the 'blank' label needed by CTC, you need to use
(num_classes + 1) as the input size. num_classes is the category number.
And the 'blank' is the last category index. So the size of 'input' layer, such as
fc_layer with softmax activation, should be num_classes + 1. The size of ctc_layer
should also be num_classes + 1.
The simple usage:
.. code-block:: python
......@@ -3049,7 +3163,7 @@ def ctc_layer(input, label, size, name=None, norm_by_times=False):
:type input: LayerOutput
:param label: The data layer of label with variable length.
:type label: LayerOutput
:param size: category numbers.
:param size: category numbers + 1.
:type size: int
:param name: The name of this layer, which can not specify.
:type name: string|None
......
#################### test_config_parser #########################
add_test(NAME layers_test
COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/
python ${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/layers_test.py
WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle)
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.config_parser import parse_config_and_serialize
if __name__ == '__main__':
parse_config_and_serialize(
'trainer_config_helpers/tests/layers_test_config.py', '')
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
num_classes = 5
x = data_layer(name="input1", size=3)
y = data_layer(name="input2", size=5)
z = out_prod_layer(input1=x, input2=y)
x1 = fc_layer(input=x, size=5)
y1 = fc_layer(input=y, size=5)
y2 = fc_layer(input=y, size=15)
cos1 = cos_sim(a=x1, b=y1)
cos3 = cos_sim(a=x1, b=y2, size=3)
linear_comb = linear_comb_layer(weights=x1, vectors=y2, size=3)
out = fc_layer(input=[cos1, cos3, linear_comb, z],
size=num_classes,
act=SoftmaxActivation())
print_layer(input=[out])
outputs(classification_cost(out, data_layer(name="label", size=num_classes)))
# for ctc
tmp = fc_layer(input=x1,
size=num_classes + 1,
act=SoftmaxActivation())
ctc = ctc_layer(input=tmp,
label=y,
size=num_classes + 1)
ctc_eval = ctc_error_evaluator(input=tmp, label=y)
settings(
batch_size=10,
learning_rate=2e-3,
learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4),
gradient_clipping_threshold=25
)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册