提交 92ca98d5 编写于 作者: L liaogang

Merge remote-tracking branch 'upstream/master'

...@@ -2,6 +2,9 @@ language: cpp ...@@ -2,6 +2,9 @@ language: cpp
cache: ccache cache: ccache
sudo: required sudo: required
dist: trusty dist: trusty
env:
- JOB=DOCS
- JOB=BUILD_AND_TEST
addons: addons:
apt: apt:
packages: packages:
...@@ -16,6 +19,7 @@ addons: ...@@ -16,6 +19,7 @@ addons:
- python2.7-dev - python2.7-dev
- m4 - m4
- libprotobuf-dev - libprotobuf-dev
- doxygen
- protobuf-compiler - protobuf-compiler
- python-protobuf - python-protobuf
- python-numpy - python-numpy
...@@ -24,12 +28,10 @@ addons: ...@@ -24,12 +28,10 @@ addons:
- libgflags-dev - libgflags-dev
- libgtest-dev - libgtest-dev
before_install: before_install:
- pip install wheel protobuf - pip install wheel protobuf sphinx breathe recommonmark
- sudo paddle/scripts/travis/before_install.sh - sudo paddle/scripts/travis/before_install.sh
script: script:
- paddle/scripts/travis/build.sh - paddle/scripts/travis/main.sh
- paddle/scripts/travis/unittest.sh
- paddle/scripts/travis/make_install.sh
notifications: notifications:
email: email:
on_success: change on_success: change
......
...@@ -25,7 +25,7 @@ repo or just head straight to the command line: ...@@ -25,7 +25,7 @@ repo or just head straight to the command line:
```shell ```shell
# Clone your fork to your local machine # Clone your fork to your local machine
git clone git@github.com:USERNAME/paddle.git git clone git@github.com:USERNAME/Paddle.git
``` ```
Then you can start to develop. Then you can start to develop.
...@@ -52,7 +52,7 @@ To do this, you'll need to add a remote at first: ...@@ -52,7 +52,7 @@ To do this, you'll need to add a remote at first:
# see the current configured remote repository # see the current configured remote repository
git remote -v git remote -v
# add upstream repository # add upstream repository
git remote add upstream https://github.com/paddle/paddle.git git remote add upstream https://github.com/baidu/Paddle.git
# verify the new upstream # verify the new upstream
git remote -v git remote -v
``` ```
......
...@@ -9,6 +9,7 @@ Install PaddlePaddle ...@@ -9,6 +9,7 @@ Install PaddlePaddle
:glob: :glob:
install_* install_*
internal/install_from_jumbo.md
Build from Source Build from Source
----------------- -----------------
......
...@@ -5,3 +5,4 @@ Cluster Train ...@@ -5,3 +5,4 @@ Cluster Train
:glob: :glob:
opensource/cluster_train.md opensource/cluster_train.md
internal/index.md
...@@ -245,10 +245,10 @@ addto_layer ...@@ -245,10 +245,10 @@ addto_layer
:members: addto_layer :members: addto_layer
:noindex: :noindex:
convex_comb_layer linear_comb_layer
----------------- -----------------
.. automodule:: paddle.trainer_config_helpers.layers .. automodule:: paddle.trainer_config_helpers.layers
:members: convex_comb_layer :members: linear_comb_layer
:noindex: :noindex:
interpolation_layer interpolation_layer
...@@ -280,7 +280,13 @@ tensor_layer ...@@ -280,7 +280,13 @@ tensor_layer
.. automodule:: paddle.trainer_config_helpers.layers .. automodule:: paddle.trainer_config_helpers.layers
:members: tensor_layer :members: tensor_layer
:noindex: :noindex:
cos_sim
-------
.. automodule:: paddle.trainer_config_helpers.layers
:members: cos_sim
:noindex:
trans_layer trans_layer
------------ ------------
.. automodule:: paddle.trainer_config_helpers.layers .. automodule:: paddle.trainer_config_helpers.layers
...@@ -341,12 +347,6 @@ rank_cost ...@@ -341,12 +347,6 @@ rank_cost
:members: rank_cost :members: rank_cost
:noindex: :noindex:
cos_sim
-------
.. automodule:: paddle.trainer_config_helpers.layers
:members: cos_sim
:noindex:
crf_layer crf_layer
----------------- -----------------
.. automodule:: paddle.trainer_config_helpers.layers .. automodule:: paddle.trainer_config_helpers.layers
......
...@@ -9,7 +9,11 @@ Note: The intallation packages are still in pre-release state and your experienc ...@@ -9,7 +9,11 @@ Note: The intallation packages are still in pre-release state and your experienc
.. toctree:: .. toctree::
:maxdepth: 1 :maxdepth: 1
:glob:
源码下载(对内) <../build/internal/download_paddle_source_zh_cn.rst>
使用Jumbo安装(对内) <../build/internal/install_from_jumbo.rst>
从源码编译安装(对内) <../build/internal/build_from_source_zh_cn.rst>
install/docker_install.rst install/docker_install.rst
install/ubuntu_install.rst install/ubuntu_install.rst
cmake/index.rst cmake/index.rst
集群训练
========
* `集群训练 <../../doc/cluster/index.html>`_
.. toctree::
:maxdepth: 2
:glob:
集群训练(对内) <internal/index.md>
...@@ -8,7 +8,7 @@ PaddlePaddle文档 ...@@ -8,7 +8,7 @@ PaddlePaddle文档
* `用户接口 <ui/index.html>`_ * `用户接口 <ui/index.html>`_
* `使用示例 <demo/index.html>`_ * `使用示例 <demo/index.html>`_
* `模型配置 <../doc/ui/api/trainer_config_helpers/index.html>`_ * `模型配置 <../doc/ui/api/trainer_config_helpers/index.html>`_
* `集群训练 <../doc/cluster/index.html>`_ * `集群训练 <cluster/index.html>`_
开发指南 开发指南
-------- --------
......
...@@ -150,7 +150,7 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_R3(DYNAMIC_LOAD_CUDNN_WRAP) ...@@ -150,7 +150,7 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_R3(DYNAMIC_LOAD_CUDNN_WRAP)
// APIs available after R4: // APIs available after R4:
#if CUDNN_VERSION >= 4000 #if CUDNN_VERSION >= 4007
#define CUDNN_DNN_ROUTINE_EACH_AFTER_R4(__macro) \ #define CUDNN_DNN_ROUTINE_EACH_AFTER_R4(__macro) \
__macro(cudnnBatchNormalizationForwardTraining) \ __macro(cudnnBatchNormalizationForwardTraining) \
__macro(cudnnBatchNormalizationForwardInference) \ __macro(cudnnBatchNormalizationForwardInference) \
...@@ -999,7 +999,7 @@ void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc, ...@@ -999,7 +999,7 @@ void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc,
double epsilon, double epsilon,
real *savedMean, real *savedMean,
real *savedVar) { real *savedVar) {
#if CUDNN_VERSION >= 4000 #if CUDNN_VERSION >= 4007
if ((NULL != runningMean && NULL == runningInvVar) || if ((NULL != runningMean && NULL == runningInvVar) ||
(NULL == runningMean && NULL != runningInvVar)) { (NULL == runningMean && NULL != runningInvVar)) {
LOG(FATAL) << "runningMean and runningInvVar can be NULL " LOG(FATAL) << "runningMean and runningInvVar can be NULL "
...@@ -1024,7 +1024,7 @@ void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc, ...@@ -1024,7 +1024,7 @@ void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc,
CHECK_SYNC("hl_batch_norm_forward_training failed"); CHECK_SYNC("hl_batch_norm_forward_training failed");
#else #else
LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4000. " LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4007. "
<< "But cudnn lib version is " << g_cudnn_lib_version; << "But cudnn lib version is " << g_cudnn_lib_version;
#endif #endif
} }
...@@ -1039,7 +1039,7 @@ void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc, ...@@ -1039,7 +1039,7 @@ void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
real *estimatedMean, real *estimatedMean,
real *estimatedInvVar, real *estimatedInvVar,
double epsilon) { double epsilon) {
#if CUDNN_VERSION >= 4000 #if CUDNN_VERSION >= 4007
cudnnTensorDescriptor_t xDesc = GET_TENSOR_DESCRIPTOR(inputDesc); cudnnTensorDescriptor_t xDesc = GET_TENSOR_DESCRIPTOR(inputDesc);
cudnnTensorDescriptor_t yDesc = GET_TENSOR_DESCRIPTOR(outputDesc); cudnnTensorDescriptor_t yDesc = GET_TENSOR_DESCRIPTOR(outputDesc);
cudnnTensorDescriptor_t bnDesc = GET_TENSOR_DESCRIPTOR(bnParamDesc); cudnnTensorDescriptor_t bnDesc = GET_TENSOR_DESCRIPTOR(bnParamDesc);
...@@ -1053,7 +1053,7 @@ void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc, ...@@ -1053,7 +1053,7 @@ void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
CHECK_SYNC("hl_batch_norm_forward_inference failed"); CHECK_SYNC("hl_batch_norm_forward_inference failed");
#else #else
LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4000. " LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4007. "
<< "But cudnn lib version is " << g_cudnn_lib_version; << "But cudnn lib version is " << g_cudnn_lib_version;
#endif #endif
} }
...@@ -1071,7 +1071,7 @@ void hl_batch_norm_backward(hl_tensor_descriptor inputDesc, ...@@ -1071,7 +1071,7 @@ void hl_batch_norm_backward(hl_tensor_descriptor inputDesc,
double epsilon, double epsilon,
real *savedMean, real *savedMean,
real *savedInvVar) { real *savedInvVar) {
#if CUDNN_VERSION >= 4000 #if CUDNN_VERSION >= 4007
if ((NULL != savedMean && NULL == savedInvVar) || if ((NULL != savedMean && NULL == savedInvVar) ||
(NULL == savedMean && NULL != savedInvVar)) { (NULL == savedMean && NULL != savedInvVar)) {
LOG(FATAL) << "savedMean and savedVar can be NULL " LOG(FATAL) << "savedMean and savedVar can be NULL "
...@@ -1087,16 +1087,14 @@ void hl_batch_norm_backward(hl_tensor_descriptor inputDesc, ...@@ -1087,16 +1087,14 @@ void hl_batch_norm_backward(hl_tensor_descriptor inputDesc,
cudnnBatchNormMode_t mode = CUDNN_BATCHNORM_SPATIAL; cudnnBatchNormMode_t mode = CUDNN_BATCHNORM_SPATIAL;
CHECK_CUDNN(dynload::cudnnBatchNormalizationBackward( CHECK_CUDNN(dynload::cudnnBatchNormalizationBackward(
t_resource.cudnn_handle, mode, &alpha, &beta, t_resource.cudnn_handle, mode, &alpha, &beta,
#if CUDNN_VERSION >= 5000
&alpha, &beta, &alpha, &beta,
#endif
xDesc, input, dyDesc, outGrad, dxDesc, inGrad, xDesc, input, dyDesc, outGrad, dxDesc, inGrad,
bnDesc, scale, scaleGrad, biasGrad, epsilon, bnDesc, scale, scaleGrad, biasGrad, epsilon,
savedMean, savedInvVar)); savedMean, savedInvVar));
CHECK_SYNC("hl_batch_norm_backward failed"); CHECK_SYNC("hl_batch_norm_backward failed");
#else #else
LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4000. " LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4007. "
<< "But cudnn lib version is " << g_cudnn_lib_version; << "But cudnn lib version is " << g_cudnn_lib_version;
#endif #endif
} }
...@@ -19,6 +19,7 @@ limitations under the License. */ ...@@ -19,6 +19,7 @@ limitations under the License. */
#include "hl_matrix_apply.cuh" #include "hl_matrix_apply.cuh"
#include "hl_sequence.h" #include "hl_sequence.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
#include "hl_device_functions.cuh"
DEFINE_MATRIX_UNARY_OP(Zero, a = 0); DEFINE_MATRIX_UNARY_OP(Zero, a = 0);
DEFINE_MATRIX_TERNARY_PARAMETER_OP(_add, TWO_PARAMETER, c = p1*a + p2*b); DEFINE_MATRIX_TERNARY_PARAMETER_OP(_add, TWO_PARAMETER, c = p1*a + p2*b);
......
...@@ -194,8 +194,8 @@ public: ...@@ -194,8 +194,8 @@ public:
virtual real evalImp(std::vector<Argument>& arguments) { virtual real evalImp(std::vector<Argument>& arguments) {
CHECK_EQ(arguments.size(), (size_t)2); CHECK_EQ(arguments.size(), (size_t)2);
Argument output, label; Argument output, label;
output.resizeAndCopyFrom(arguments[0], false); output.resizeAndCopyFrom(arguments[0], false, HPPL_STREAM_DEFAULT);
label.resizeAndCopyFrom(arguments[1], false); label.resizeAndCopyFrom(arguments[1], false, HPPL_STREAM_DEFAULT);
hl_stream_synchronize(HPPL_STREAM_DEFAULT); hl_stream_synchronize(HPPL_STREAM_DEFAULT);
CHECK(label.sequenceStartPositions); CHECK(label.sequenceStartPositions);
CHECK(label.ids); CHECK(label.ids);
...@@ -207,7 +207,7 @@ public: ...@@ -207,7 +207,7 @@ public:
real err = 0; real err = 0;
err = editDistance( err = editDistance(
output.value->getData() + output.value->getWidth() * outputStarts[i], output.value->getData() + output.value->getWidth() * outputStarts[i],
output.value->getHeight(), output.value->getWidth(), outputStarts[i+1] - outputStarts[i], output.value->getWidth(),
label.ids->getData() + labelStarts[i], label.ids->getData() + labelStarts[i],
labelStarts[i + 1] - labelStarts[i]); labelStarts[i + 1] - labelStarts[i]);
...@@ -224,6 +224,9 @@ public: ...@@ -224,6 +224,9 @@ public:
for (const std::string& name : config_.input_layers()) { for (const std::string& name : config_.input_layers()) {
arguments.push_back(nn.getLayer(name)->getOutput()); arguments.push_back(nn.getLayer(name)->getOutput());
} }
}
virtual void updateSamplesNum(const std::vector<Argument>& arguments) {
numSequences_ += arguments[1].getNumSequences(); numSequences_ += arguments[1].getNumSequences();
} }
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include "GradientMachine.h" #include "GradientMachine.h"
...@@ -101,7 +100,7 @@ public: ...@@ -101,7 +100,7 @@ public:
* Return true if this prefix or candidate is expected to be dropped. * Return true if this prefix or candidate is expected to be dropped.
*/ */
typedef std::function<bool(int seqId, const std::vector<int>&, typedef std::function<bool(int seqId, const std::vector<int>&,
const std::vector<real>&)> DropCallback; const std::vector<real>&)> DropCallback;
/** /**
* @brief NormOrDropNodeCallback * @brief NormOrDropNodeCallback
...@@ -117,7 +116,7 @@ public: ...@@ -117,7 +116,7 @@ public:
* The fourth parameter is the probability of the whole path. * The fourth parameter is the probability of the whole path.
*/ */
typedef std::function<void(int seqId, const std::vector<int>&, typedef std::function<void(int seqId, const std::vector<int>&,
std::vector<real>&, real*)> NormOrDropNodeCallback; std::vector<real>&, real*)> NormOrDropNodeCallback;
/** /**
* @brief Register beam search control callbacks. Used for prediction. * @brief Register beam search control callbacks. Used for prediction.
...@@ -192,7 +191,7 @@ public: ...@@ -192,7 +191,7 @@ public:
int machineId; // index of sample in frame int machineId; // index of sample in frame
int topIndex; // index of MaxIdLayer output in one sample int topIndex; // index of MaxIdLayer output in one sample
int seqId; // index of sequence in batch generation int seqId; // index of sequence in batch generation
std::vector<int> machineIdVec; std::vector<int> machineIdVec;
/** /**
...@@ -206,7 +205,10 @@ public: ...@@ -206,7 +205,10 @@ public:
/** /**
* @brief Path default ctor, first logProb is 0. * @brief Path default ctor, first logProb is 0.
*/ */
Path() { logProb = 0; seqId = 0; } Path() {
logProb = 0;
seqId = 0;
}
explicit Path(size_t seqId) : seqId(seqId) { logProb = 0; } explicit Path(size_t seqId) : seqId(seqId) { logProb = 0; }
/** /**
...@@ -319,21 +321,33 @@ protected: ...@@ -319,21 +321,33 @@ protected:
}; };
std::vector<MemoryFrameLine> memoryFrameLines_; std::vector<MemoryFrameLine> memoryFrameLines_;
// All inFrameLines and outFrameLines have the same element as follows. // Each inFrameLines(inlinks) has its own info(elements) below,
// and all outFrameLines(outlinks) share the info with one inFrameLine,
// which is assigned by targetInfoInlinkId_.
struct Info { struct Info {
IVectorPtr allIds; // scattered id of realLayer IVectorPtr allIds; // scattered id of realLayer
std::vector<int> idIndex; // index of allIds std::vector<int> idIndex; // index of allIds
ICpuGpuVectorPtr ICpuGpuVectorPtr
sequenceStartPositions; // scattered sequenceStartPositions sequenceStartPositions; // scattered sequenceStartPositions
std::vector<int> seqStartPosIndex; // index of sequenceStartPositions std::vector<int> seqStartPosIndex; // index of sequenceStartPositions
}; };
Info info_; std::vector<Info> info_;
// numSeqs_[i] is the number sequences which is longer than i (for sequence
// data) or has more than i subsequences (for subsequence data)
std::vector<int> numSeqs_;
// if no subSeq, tuple of (seqLength, seqStart, seqIndex, seqIndex) std::vector<std::vector<Argument::SeqInfo>> seqInfos_;
// else, tuple of (subSeqLength, subSeqStart, seqIndex, subSeqIndex)
std::vector<std::tuple<int, int, int, int>> seqLengthAndStart_;
void createInFrameInfo(const Argument& input, PassType passType); // the id of inlink which share info with outlinks
int targetInfoInlinkId_;
/* create scattered id infomation for all realLayer of inFrameLines one time.
* If hasSubseq, will also create scattered sequenceStartPositions infomation
* for all realLayer of inFrameLines one time.
*/
void createInFrameInfo(int inlinks_id, const Argument& input,
PassType passType);
void createMemoryFrameInfo(MemoryFrameLine* memoryFrameLine, void createMemoryFrameInfo(MemoryFrameLine* memoryFrameLine,
PassType passType); PassType passType);
...@@ -363,6 +377,9 @@ protected: ...@@ -363,6 +377,9 @@ protected:
NeuralNetwork* rootNetwork_; NeuralNetwork* rootNetwork_;
bool reversed_; bool reversed_;
// if hasSubseq: max number of sentences(subseq)in batchsize samples
// else: max number of tokens in batchsize samples(sentences)
int maxSequenceLength_; int maxSequenceLength_;
bool useGpu_; bool useGpu_;
bool stopBeamSearch_; bool stopBeamSearch_;
...@@ -415,7 +432,7 @@ private: ...@@ -415,7 +432,7 @@ private:
* @param machineIdVec : select a row of output matrix in each frame * @param machineIdVec : select a row of output matrix in each frame
* that the generation process expanded. * that the generation process expanded.
*/ */
void createDataOutlink(std::vector<int> & machineIdVec); void createDataOutlink(std::vector<int>& machineIdVec);
/* /*
* @brief used in beam search, connect previous frame to form recurrent link * @brief used in beam search, connect previous frame to form recurrent link
......
...@@ -49,8 +49,10 @@ void CTCLayer::forward(PassType passType) { ...@@ -49,8 +49,10 @@ void CTCLayer::forward(PassType passType) {
Layer::forward(passType); Layer::forward(passType);
if (useGpu_) { if (useGpu_) {
for (size_t i = 0; i < inputLayers_.size(); i++) { for (size_t i = 0; i < inputLayers_.size(); i++) {
tmpCpuInput_[i].resizeAndCopyFrom(getInput(i), false, HPPL_STREAM_1); tmpCpuInput_[i].resizeAndCopyFrom(
getInput(i), false, HPPL_STREAM_DEFAULT);
} }
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
forwardImp(tmpCpuInput_[0], tmpCpuInput_[1]); forwardImp(tmpCpuInput_[0], tmpCpuInput_[1]);
} else { } else {
forwardImp(getInput(0), getInput(1)); forwardImp(getInput(0), getInput(1));
...@@ -92,9 +94,9 @@ void CTCLayer::backward(const UpdateCallback &callback) { ...@@ -92,9 +94,9 @@ void CTCLayer::backward(const UpdateCallback &callback) {
if (useGpu_) { if (useGpu_) {
backwardImp(callback, tmpCpuInput_[0], tmpCpuInput_[1]); backwardImp(callback, tmpCpuInput_[0], tmpCpuInput_[1]);
const_cast<Argument&>(getInput(0)). const_cast<Argument&>(getInput(0)).
resizeAndCopyFrom(tmpCpuInput_[0], true, HPPL_STREAM_1); resizeAndCopyFrom(tmpCpuInput_[0], true, HPPL_STREAM_DEFAULT);
const_cast<Argument&>(getInput(1)). const_cast<Argument&>(getInput(1)).
resizeAndCopyFrom(tmpCpuInput_[1], true, HPPL_STREAM_1); resizeAndCopyFrom(tmpCpuInput_[1], true, HPPL_STREAM_DEFAULT);
} else { } else {
backwardImp(callback, getInput(0), getInput(1)); backwardImp(callback, getInput(0), getInput(1));
} }
......
...@@ -248,7 +248,7 @@ void ConvOperator::forward() { ...@@ -248,7 +248,7 @@ void ConvOperator::forward() {
CHECK_EQ(ins_[1]->value->getHeight(), batchSize); CHECK_EQ(ins_[1]->value->getHeight(), batchSize);
checkFilterSize(ins_[1]->value); checkFilterSize(ins_[1]->value);
Matrix::resizeOrCreate(out_->value, batchSize, Matrix::resizeOrCreate(out_->value, batchSize,
outputH_ * outputW_ * numFilters_); outputH_ * outputW_ * numFilters_, false, useGpu_);
{ {
AsyncGpuBlock block; AsyncGpuBlock block;
for (size_t batchId = 0; batchId < batchSize; ++batchId) { for (size_t batchId = 0; batchId < batchSize; ++batchId) {
......
...@@ -21,18 +21,20 @@ limitations under the License. */ ...@@ -21,18 +21,20 @@ limitations under the License. */
namespace paddle { namespace paddle {
/** /**
* @brief A layer for convex weighted average of vectors, * @brief A layer for weighted sum of vectors,
* which is used in NEURAL MACHINE TRANSLATION BY JOINTLY LEARNING TO ALIGN AND * which is used in NEURAL MACHINE TRANSLATION BY JOINTLY LEARNING TO ALIGN AND
* TRANSLATE * TRANSLATE
* - Input: the first input contains the convex weights (batchSize x weightDim), * - Input: the the size of the first input is weightDim,
* and the shape of second input is (batchSize x (weightdim*dataDim)). * and the size of the second input is weightdim * dataDim.
* - Output: the shape of output is (batchSize x dataDim). * - Output: the sizeof the output is dataDim
* \f[ * \f[
* out[i][j] = \sum_{j}(in0(i, j) * in1(i,j + i * dataDim)), * out(j) = \sum_{i}(in0(i) * in1(i,j + i * dataDim)),
* i = 0,1,...,(batchSize-1); j = 0, 1,...,(dataDim-1) * i = 0,1,...,(weightDim-1); j = 0, 1,...,(dataDim-1)
* \f] * \f]
* Note that the above computation is for one sample. Multiple samples are
* processed in one batch.
* *
* The config file api is convex_comb_layer. * The config file api is linear_comb_layer.
*/ */
class ConvexCombinationLayer : public Layer { class ConvexCombinationLayer : public Layer {
protected: protected:
......
...@@ -48,7 +48,7 @@ void CosSimLayer::forward(PassType passType) { ...@@ -48,7 +48,7 @@ void CosSimLayer::forward(PassType passType) {
REGISTER_TIMER_INFO("CosFwAtvTimer", getName().c_str()); REGISTER_TIMER_INFO("CosFwAtvTimer", getName().c_str());
MatrixPtr prevOut1 = getInputValue(0); MatrixPtr prevOut1 = getInputValue(0);
MatrixPtr prevOut2 = getInputValue(1); MatrixPtr prevOut2 = getInputValue(1);
outV->cosSim(*prevOut1, *prevOut2, kCosSimScale_); outV->cosSim(*prevOut1, *prevOut2, config_.cos_scale());
} }
} }
...@@ -59,7 +59,7 @@ void CosSimLayer::backward(const UpdateCallback& callback) { ...@@ -59,7 +59,7 @@ void CosSimLayer::backward(const UpdateCallback& callback) {
outG->cosSimDerivative(*this->getOutputValue(), *getInputValue(0), outG->cosSimDerivative(*this->getOutputValue(), *getInputValue(0),
*getInputValue(1), *getInputGrad(0), *getInputValue(1), *getInputGrad(0),
*getInputGrad(1), kCosSimScale_); *getInputGrad(1), config_.cos_scale());
} }
} }
......
...@@ -36,7 +36,7 @@ namespace paddle { ...@@ -36,7 +36,7 @@ namespace paddle {
class CosSimLayer : public Layer { class CosSimLayer : public Layer {
public: public:
explicit CosSimLayer(const LayerConfig& config) explicit CosSimLayer(const LayerConfig& config)
: Layer(config), kCosSimScale_(5.0f) {} : Layer(config) {}
~CosSimLayer() {} ~CosSimLayer() {}
...@@ -44,8 +44,6 @@ public: ...@@ -44,8 +44,6 @@ public:
void forward(PassType passType); void forward(PassType passType);
void backward(const UpdateCallback& callback = nullptr); void backward(const UpdateCallback& callback = nullptr);
const real kCosSimScale_;
}; };
} // namespace paddle } // namespace paddle
...@@ -509,8 +509,10 @@ void HuberTwoClass::forwardImp(Matrix &output, Argument &label, ...@@ -509,8 +509,10 @@ void HuberTwoClass::forwardImp(Matrix &output, Argument &label,
Matrix &cost) { Matrix &cost) {
if (useGpu_) { if (useGpu_) {
for (size_t i = 0; i < inputLayers_.size(); i++) { for (size_t i = 0; i < inputLayers_.size(); i++) {
tmpCpuInput_[i].resizeAndCopyFrom(getInput(i), false, HPPL_STREAM_1); tmpCpuInput_[i].resizeAndCopyFrom(
getInput(i), false, HPPL_STREAM_DEFAULT);
} }
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
} }
forwardImpIn(output, label, cost); forwardImpIn(output, label, cost);
} }
......
...@@ -115,29 +115,11 @@ void CudnnBatchNormLayer::backward(const UpdateCallback& callback) { ...@@ -115,29 +115,11 @@ void CudnnBatchNormLayer::backward(const UpdateCallback& callback) {
create(tmpBiasGrad_, 1, channels_, &betaGrad); create(tmpBiasGrad_, 1, channels_, &betaGrad);
} }
// because of the different api of cudnn v4 and v5.
if (hl_get_cudnn_lib_version() < 5000) {
if (weight_->getWGrad()) {
create(tmpWGrad_, 1, channels_, &gammaGrad);
}
if (biases_ && biases_->getWGrad()) {
create(tmpBiasGrad_, 1, channels_, &betaGrad);
}
}
hl_batch_norm_backward(ioDesc_, input, ioDesc_, outGrad, hl_batch_norm_backward(ioDesc_, input, ioDesc_, outGrad,
ioDesc_, inGrad, bnParamDesc_, ioDesc_, inGrad, bnParamDesc_,
gamma, gammaGrad, betaGrad, gamma, gammaGrad, betaGrad,
EPS, savedMean, savedInvVar); EPS, savedMean, savedInvVar);
// because of the different api of cudnn v4 and v5.
if (hl_get_cudnn_lib_version() < 5000) {
if (weight_->getWGrad() && biases_->getWGrad()) {
weight_->getWGrad()->add(*tmpWGrad_);
biases_->getWGrad()->add(*tmpBiasGrad_);
}
}
{ {
REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); REGISTER_TIMER_INFO("WeightUpdate", getName().c_str());
biases_->getParameterPtr()->incUpdate(callback); biases_->getParameterPtr()->incUpdate(callback);
......
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "Layer.h"
namespace paddle {
class PrintLayer : public Layer {
public:
explicit PrintLayer(const LayerConfig& config)
: Layer(config) {}
void forward(PassType passType);
void backward(const UpdateCallback& callback) {}
};
void PrintLayer::forward(PassType passType) {
Layer::forward(passType);
for (size_t i = 0; i != inputLayers_.size(); ++i) {
const auto& argu = getInput(i);
const std::string& name = inputLayers_[i]->getName();
if (argu.value) {
std::ostringstream os;
argu.value->print(os);
LOG(INFO) << "layer=" << name << " value matrix:\n" << os.str();
}
if (argu.ids) {
std::ostringstream os;
argu.ids->print(os, argu.ids->getSize());
LOG(INFO) << "layer=" << name << " ids vector:\n" << os.str();
}
if (auto startPos = argu.sequenceStartPositions) {
std::ostringstream os;
startPos->getVector(false)->print(os, startPos->getSize());
LOG(INFO) << "layer=" << name << " sequence pos vector:\n" << os.str();
}
if (auto subStartPos = argu.subSequenceStartPositions) {
std::ostringstream os;
subStartPos->getVector(false)->print(os, subStartPos->getSize());
LOG(INFO) << "layer=" << name << " sub-sequence pos vector:\n"
<< os.str();
}
}
}
REGISTER_LAYER(print, PrintLayer);
} // namespace paddle
...@@ -52,8 +52,10 @@ public: ...@@ -52,8 +52,10 @@ public:
Layer::forward(passType); Layer::forward(passType);
if (useGpu_) { if (useGpu_) {
for (size_t i = 0; i < inputLayers_.size(); i++) { for (size_t i = 0; i < inputLayers_.size(); i++) {
tmpCpuInput_[i].resizeAndCopyFrom(getInput(i), false, HPPL_STREAM_1); tmpCpuInput_[i].resizeAndCopyFrom(
getInput(i), false, HPPL_STREAM_DEFAULT);
} }
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
forwardImp(tmpCpuInput_[0]); forwardImp(tmpCpuInput_[0]);
} else { } else {
forwardImp(getInput(0)); forwardImp(getInput(0));
......
...@@ -92,7 +92,6 @@ void testState(LayerPtr testLayer, vector<DataLayerPtr>& dataLayers, ...@@ -92,7 +92,6 @@ void testState(LayerPtr testLayer, vector<DataLayerPtr>& dataLayers,
testLayer->forward(PASS_TEST); testLayer->forward(PASS_TEST);
Argument out; Argument out;
out.resizeAndCopyFrom(testLayer->getOutput(), /* useGpu= */ false); out.resizeAndCopyFrom(testLayer->getOutput(), /* useGpu= */ false);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
if (batchOut.value) { if (batchOut.value) {
size_t dim = batchOut.value->getWidth(); size_t dim = batchOut.value->getWidth();
ASSERT_TRUE((bool)out.value); ASSERT_TRUE((bool)out.value);
...@@ -220,7 +219,6 @@ void testBatchState(LayerPtr testLayer, vector<DataLayerPtr>& dataLayers, ...@@ -220,7 +219,6 @@ void testBatchState(LayerPtr testLayer, vector<DataLayerPtr>& dataLayers,
testLayer->forward(PASS_TEST); testLayer->forward(PASS_TEST);
Argument out; Argument out;
out.resizeAndCopyFrom(testLayer->getOutput(), /* useGpu= */ false); out.resizeAndCopyFrom(testLayer->getOutput(), /* useGpu= */ false);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
if (batchOut.value) { if (batchOut.value) {
size_t dim = batchOut.value->getWidth(); size_t dim = batchOut.value->getWidth();
ASSERT_TRUE((bool)out.value); ASSERT_TRUE((bool)out.value);
......
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.PyDataProvider2 import *
data = [
[[[1, 3, 2], [4, 5, 2]], 0],
[[[0, 2], [2, 5], [0, 1, 2]], 1],
]
@provider(input_types=[integer_value_sub_sequence(10),
integer_value(2)])
def process_subseq(settings, file_name):
for d in data:
yield d
@provider(input_types=[integer_value_sequence(10),
integer_value(2)])
def process_seq(settings, file_name):
for d in data:
seq = []
for subseq in d[0]:
seq += subseq
yield seq, d[1]
#!/usr/bin/env python
#coding=utf-8
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved # Copyright (c) 2016 Baidu, Inc. All Rights Reserved
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
......
#edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
######################## data source ################################
define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list',
test_list=None,
module='rnn_data_provider',
obj='process_subseq')
settings(batch_size=2, learning_rate=0.01)
######################## network configure ################################
dict_dim = 10
word_dim = 8
hidden_dim = 8
label_dim = 3
data = data_layer(name="word", size=dict_dim)
emb = embedding_layer(input=data, size=word_dim)
# This hierachical RNN is designed to be equivalent to the simple RNN in
# sequence_rnn.conf
def outer_step(x):
outer_mem = memory(name="outer_rnn_state", size=hidden_dim)
def inner_step(y):
inner_mem = memory(name="inner_rnn_state",
size=hidden_dim,
boot_layer=outer_mem)
out = fc_layer(input=[y, inner_mem],
size=hidden_dim,
act=TanhActivation(),
bias_attr=True,
name="inner_rnn_state")
return out
inner_rnn_output = recurrent_group(
step=inner_step,
name="inner",
input=x)
last = last_seq(input=inner_rnn_output, name="outer_rnn_state")
# "return last" should also work. But currently RecurrentGradientMachine
# does not handle it correctly. Current implementation requires that
# all the out links are from sequences. However, it does not report error
# when the out links are not sequences.
return inner_rnn_output
out = recurrent_group(
name="outer",
step=outer_step,
input=SubsequenceInput(emb))
rep = last_seq(input=out)
prob = fc_layer(size=label_dim,
input=rep,
act=SoftmaxActivation(),
bias_attr=True)
outputs(classification_cost(input=prob,
label=data_layer(name="label", size=label_dim)))
#edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
######################## data source ################################
define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list',
test_list=None,
module='rnn_data_provider',
obj='process_seq')
settings(batch_size=2, learning_rate=0.01)
######################## network configure ################################
dict_dim = 10
word_dim = 8
hidden_dim = 8
label_dim = 3
data = data_layer(name="word", size=dict_dim)
emb = embedding_layer(input=data, size=word_dim)
def step(y):
mem = memory(name="rnn_state", size=hidden_dim)
out = fc_layer(input=[y, mem],
size=hidden_dim,
act=TanhActivation(),
bias_attr=True,
name="rnn_state")
return out
out = recurrent_group(
name="rnn",
step=step,
input=emb)
rep = last_seq(input=out)
prob = fc_layer(size=label_dim,
input=rep,
act=SoftmaxActivation(),
bias_attr=True)
outputs(classification_cost(input=prob,
label=data_layer(name="label", size=label_dim)))
...@@ -87,18 +87,31 @@ void testEvaluator(TestConfig testConf, string testEvaluatorName, ...@@ -87,18 +87,31 @@ void testEvaluator(TestConfig testConf, string testEvaluatorName,
return; return;
} }
ICpuGpuVectorPtr sequenceStartPositions;
if (testConf.inputDefs[i].inputType == INPUT_SEQUENCE_DATA ||
testConf.inputDefs[i].inputType == INPUT_SEQUENCE_LABEL) {
if (!sequenceStartPositions) {
generateSequenceStartPositions(batchSize, sequenceStartPositions);
}
data.sequenceStartPositions = sequenceStartPositions;
}
arguments.push_back(data); arguments.push_back(data);
} }
Evaluator* testEvaluator = Evaluator::create(testConf.evaluatorConfig); Evaluator* testEvaluator = Evaluator::create(testConf.evaluatorConfig);
double totalScore = 0.0; double totalScore = 0.0;
testEvaluator->start();
totalScore += testEvaluator->evalImp(arguments); totalScore += testEvaluator->evalImp(arguments);
testEvaluator->updateSamplesNum(arguments); testEvaluator->updateSamplesNum(arguments);
testEvaluator->finish();
LOG(INFO) << *testEvaluator; LOG(INFO) << *testEvaluator;
double totalScore2 = 0.0; double totalScore2 = 0.0;
if (testConf.testAccumulate) { if (testConf.testAccumulate) {
testEvaluator->start();
totalScore2 += testEvaluator->evalImp(arguments); totalScore2 += testEvaluator->evalImp(arguments);
testEvaluator->finish();
EXPECT_LE(fabs(totalScore - totalScore2), 1.0e-5); EXPECT_LE(fabs(totalScore - totalScore2), 1.0e-5);
} }
} }
...@@ -202,6 +215,15 @@ TEST(Evaluator, precision_recall) { ...@@ -202,6 +215,15 @@ TEST(Evaluator, precision_recall) {
false); false);
} }
TEST(Evaluator, ctc_error_evaluator) {
TestConfig config;
config.evaluatorConfig.set_type("ctc_edit_distance");
config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "output", 32});
config.inputDefs.push_back({INPUT_SEQUENCE_LABEL, "label", 1});
testEvaluatorAll(config, "ctc_error_evaluator", 100);
}
int main(int argc, char** argv) { int main(int argc, char** argv) {
initMain(argc, argv); initMain(argc, argv);
FLAGS_thread_local_rand_use_global_seed = true; FLAGS_thread_local_rand_use_global_seed = true;
......
...@@ -21,6 +21,8 @@ limitations under the License. */ ...@@ -21,6 +21,8 @@ limitations under the License. */
#include <paddle/trainer/TrainerInternal.h> #include <paddle/trainer/TrainerInternal.h>
#include <paddle/gserver/gradientmachines/GradientMachine.h> #include <paddle/gserver/gradientmachines/GradientMachine.h>
P_DECLARE_int32(seed);
using namespace paddle; // NOLINT using namespace paddle; // NOLINT
using namespace std; // NOLINT using namespace std; // NOLINT
class TrainerForTest : public paddle::Trainer { class TrainerForTest : public paddle::Trainer {
...@@ -68,7 +70,9 @@ void CalCost(const string& conf, const string& dir, real* cost, ...@@ -68,7 +70,9 @@ void CalCost(const string& conf, const string& dir, real* cost,
CpuVector vecMomentum(dim); CpuVector vecMomentum(dim);
// vecW needs to be assigned, otherwise the variable is an uncertain value. // vecW needs to be assigned, otherwise the variable is an uncertain value.
vecW.zeroMem();
*ThreadLocalRand::getSeed() = FLAGS_seed;
vecW.randnorm(0, 0.1);
trainer.startTrain(); trainer.startTrain();
for (int i = 0; i < num_passes; ++i) { for (int i = 0; i < num_passes; ++i) {
...@@ -88,27 +92,39 @@ void CalCost(const string& conf, const string& dir, real* cost, ...@@ -88,27 +92,39 @@ void CalCost(const string& conf, const string& dir, real* cost,
rmDir(dir.c_str()); rmDir(dir.c_str());
} }
TEST(RecurrentGradientMachine, HasSubSequence) { void test(const string& conf1, const string& conf2, double eps) {
int num_passes = 5; int num_passes = 5;
real* cost1 = new real[num_passes]; real* cost1 = new real[num_passes];
const string conf1 = "gserver/tests/sequence_layer_group.conf";
const string dir1 = "gserver/tests/t1"; const string dir1 = "gserver/tests/t1";
CalCost(conf1, dir1, cost1, num_passes); CalCost(conf1, dir1, cost1, num_passes);
real* cost2 = new real[num_passes]; real* cost2 = new real[num_passes];
const string conf2 = "gserver/tests/sequence_nest_layer_group.conf";
const string dir2 = "gserver/tests/t2"; const string dir2 = "gserver/tests/t2";
CalCost(conf2, dir2, cost2, num_passes); CalCost(conf2, dir2, cost2, num_passes);
for (int i = 0; i < num_passes; i++) { for (int i = 0; i < num_passes; i++) {
LOG(INFO) << "num_passes: " << i << ", cost1=" << cost1[i] LOG(INFO) << "num_passes: " << i << ", cost1=" << cost1[i]
<< ", cost2=" << cost2[i]; << ", cost2=" << cost2[i]
ASSERT_NEAR(cost1[i], cost2[i], 1e-3); << ", diff=" << std::abs(cost1[i] - cost2[i]);
ASSERT_NEAR(cost1[i], cost2[i], eps);
} }
delete[] cost1; delete[] cost1;
delete[] cost2; delete[] cost2;
} }
TEST(RecurrentGradientMachine, HasSubSequence) {
test("gserver/tests/sequence_layer_group.conf",
"gserver/tests/sequence_nest_layer_group.conf",
1e-5);
}
TEST(RecurrentGradientMachine, rnn) {
test("gserver/tests/sequence_rnn.conf",
"gserver/tests/sequence_nest_rnn.conf",
0);
}
int main(int argc, char** argv) { int main(int argc, char** argv) {
if (paddle::version::isWithPyDataProvider()) { if (paddle::version::isWithPyDataProvider()) {
if (!paddle::version::isWithGpu()) { if (!paddle::version::isWithGpu()) {
......
...@@ -299,7 +299,6 @@ void checkRecurrentLayer(LayerConfig layerConfig, size_t batchSize, ...@@ -299,7 +299,6 @@ void checkRecurrentLayer(LayerConfig layerConfig, size_t batchSize,
Argument& cpuInput = testCpu.dataLayer_->getOutput(); Argument& cpuInput = testCpu.dataLayer_->getOutput();
Argument& gpuInput = testGpu.dataLayer_->getOutput(); Argument& gpuInput = testGpu.dataLayer_->getOutput();
gpuInput.resizeAndCopyFrom(cpuInput, true); gpuInput.resizeAndCopyFrom(cpuInput, true);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
const VectorPtr& cpuVec = testCpu.para_->getBuf(PARAMETER_VALUE); const VectorPtr& cpuVec = testCpu.para_->getBuf(PARAMETER_VALUE);
const VectorPtr& gpuVec = testGpu.para_->getBuf(PARAMETER_VALUE); const VectorPtr& gpuVec = testGpu.para_->getBuf(PARAMETER_VALUE);
......
...@@ -146,6 +146,7 @@ void Matrix::resizeOrCreate(MatrixPtr& matrix, size_t height, size_t width, ...@@ -146,6 +146,7 @@ void Matrix::resizeOrCreate(MatrixPtr& matrix, size_t height, size_t width,
if (!matrix) { if (!matrix) {
matrix = Matrix::create(height, width, trans, useGpu); matrix = Matrix::create(height, width, trans, useGpu);
} else { } else {
CHECK_EQ(matrix->useGpu(), useGpu);
matrix->resize(height, width); matrix->resize(height, width);
} }
} }
...@@ -161,6 +162,7 @@ void Matrix::resizeOrCreateSparseMatrix(MatrixPtr& matrix, size_t height, ...@@ -161,6 +162,7 @@ void Matrix::resizeOrCreateSparseMatrix(MatrixPtr& matrix, size_t height,
} else { } else {
CHECK(dynamic_cast<CpuSparseMatrix*>(matrix.get()) || CHECK(dynamic_cast<CpuSparseMatrix*>(matrix.get()) ||
dynamic_cast<GpuSparseMatrix*>(matrix.get())); dynamic_cast<GpuSparseMatrix*>(matrix.get()));
CHECK_EQ(matrix->useGpu(), useGpu);
matrix->resize(height, width, nnz, valueType, format); matrix->resize(height, width, nnz, valueType, format);
} }
} }
......
...@@ -800,6 +800,7 @@ void CpuGpuVectorT<T>::resizeOrCreate(size_t size, bool useGpu) { ...@@ -800,6 +800,7 @@ void CpuGpuVectorT<T>::resizeOrCreate(size_t size, bool useGpu) {
} else if ((!useGpu) && (!cpuVectorT_)) { } else if ((!useGpu) && (!cpuVectorT_)) {
cpuVectorT_ = VectorT<T>::create(size, false); cpuVectorT_ = VectorT<T>::create(size, false);
} else { } else {
CHECK((useGpu && gpuVectorT_) || (!useGpu && cpuVectorT_));
this->resize(size, useGpu); this->resize(size, useGpu);
} }
} }
......
...@@ -25,6 +25,7 @@ static void resizeAndCopy(MatrixPtr& dest, const MatrixPtr& src, bool useGpu, ...@@ -25,6 +25,7 @@ static void resizeAndCopy(MatrixPtr& dest, const MatrixPtr& src, bool useGpu,
if (!dest) { if (!dest) {
dest = src->clone(0, 0, useGpu); dest = src->clone(0, 0, useGpu);
} else { } else {
CHECK_EQ(dest->useGpu(), useGpu);
dest->resize(src->getHeight(), src->getWidth()); dest->resize(src->getHeight(), src->getWidth());
} }
dest->copyFrom(*src, stream); dest->copyFrom(*src, stream);
...@@ -60,12 +61,12 @@ static void resizeAndCopy(MatrixPtr& dest, const MatrixPtr& src, ...@@ -60,12 +61,12 @@ static void resizeAndCopy(MatrixPtr& dest, const MatrixPtr& src,
hl_stream_t stream = HPPL_STREAM_DEFAULT) { hl_stream_t stream = HPPL_STREAM_DEFAULT) {
if (src) { if (src) {
CHECK_LE((size_t)startRow + copySize, src->getHeight()); CHECK_LE((size_t)startRow + copySize, src->getHeight());
int height = copySize; int height = copySize;
int width = src->getWidth(); int width = src->getWidth();
if (!dest) { if (!dest) {
dest = src->clone(height, width, useGpu); dest = src->clone(height, width, useGpu);
} else { } else {
CHECK_EQ(dest->useGpu(), useGpu);
dest->resize(height, width); dest->resize(height, width);
} }
MatrixPtr submat = src->subMatrix(startRow, copySize); MatrixPtr submat = src->subMatrix(startRow, copySize);
...@@ -182,6 +183,11 @@ static void resizeAndCopy(SVectorPtr& dest, const SVectorPtr& src, ...@@ -182,6 +183,11 @@ static void resizeAndCopy(SVectorPtr& dest, const SVectorPtr& src,
} }
} }
void Argument::resizeAndCopyFrom(const Argument& src, bool useGpu) {
resizeAndCopyFrom(src, useGpu, HPPL_STREAM_DEFAULT);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
}
void Argument::resizeAndCopyFrom(const Argument& src, bool useGpu, void Argument::resizeAndCopyFrom(const Argument& src, bool useGpu,
hl_stream_t stream) { hl_stream_t stream) {
dataId = src.dataId; dataId = src.dataId;
...@@ -199,6 +205,14 @@ void Argument::resizeAndCopyFrom(const Argument& src, bool useGpu, ...@@ -199,6 +205,14 @@ void Argument::resizeAndCopyFrom(const Argument& src, bool useGpu,
resizeAndCopy(strs, src.strs, useGpu, stream); resizeAndCopy(strs, src.strs, useGpu, stream);
} }
int32_t Argument::resizeAndCopyFrom(const Argument& src, int32_t startSeq,
int32_t copySize, bool useGpu) {
int32_t size = resizeAndCopyFrom(src, startSeq, copySize, useGpu,
HPPL_STREAM_DEFAULT);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
return size;
}
int32_t Argument::resizeAndCopyFrom(const Argument& src, int32_t startSeq, int32_t Argument::resizeAndCopyFrom(const Argument& src, int32_t startSeq,
int32_t copySize, bool useGpu, int32_t copySize, bool useGpu,
hl_stream_t stream) { hl_stream_t stream) {
...@@ -463,51 +477,34 @@ void Argument::splitByDataId(const std::vector<Argument>& argus, ...@@ -463,51 +477,34 @@ void Argument::splitByDataId(const std::vector<Argument>& argus,
} }
} }
void Argument::getSeqLengthAndStart( void Argument::getSeqInfo(std::vector<SeqInfo>* seqInfo) const {
std::vector<std::tuple<int, int, int, int>>* seqLengthAndStart,
int* maxSequenceLength) const {
const int* starts = sequenceStartPositions->getData(false); const int* starts = sequenceStartPositions->getData(false);
if (hasSubseq()) { const int* subStarts = hasSubseq()
size_t numSubSequences = getNumSubSequences(); ? subSequenceStartPositions->getData(false) : nullptr;
(*seqLengthAndStart).reserve(numSubSequences); size_t numSequences = getNumSequences();
const int* subStarts = subSequenceStartPositions->getData(false); seqInfo->reserve(numSequences);
int seqIndex = 0; int subSeqEnd = 0;
int subSeqIndex = 0; for (size_t i = 0; i < numSequences; ++i) {
*maxSequenceLength = 0; SeqInfo info;
for (size_t i = 0; i < numSubSequences; ++i) { info.seqStart = starts[i];
if (subStarts[i] == starts[seqIndex]) { info.subLevelLength = starts[i + 1] - starts[i];
subSeqIndex = 0; info.seqId = i;
(*seqLengthAndStart) if (hasSubseq()) {
.push_back(std::make_tuple<int, int, int, int>( info.subSeqStart = subSeqEnd;
subStarts[i + 1] - subStarts[i], (int)subStarts[i], while (subStarts[subSeqEnd] < starts[i + 1]) {
(int)seqIndex, (int)subSeqIndex)); ++subSeqEnd;
++subSeqIndex;
++seqIndex;
} else if (subStarts[i] < starts[seqIndex]) {
(*seqLengthAndStart)
.push_back(std::make_tuple<int, int, int, int>(
subStarts[i + 1] - subStarts[i], (int)subStarts[i],
(int)seqIndex - 1, (int)subSeqIndex));
++subSeqIndex;
} }
// maxSequenceLength_ = 1 + max(subSeqIndex) in each Seq. info.topLevelLength = subSeqEnd - info.subSeqStart;
if (*maxSequenceLength < std::get<3>((*seqLengthAndStart)[i])) } else {
*maxSequenceLength = std::get<3>((*seqLengthAndStart)[i]); info.topLevelLength = info.subLevelLength;
} info.subSeqStart = 0; // not used
*maxSequenceLength += 1;
} else {
size_t numSequences = getNumSequences();
(*seqLengthAndStart).reserve(numSequences);
for (size_t i = 0; i < numSequences; ++i) {
(*seqLengthAndStart)
.push_back(std::make_tuple<int, int, int, int>(
starts[i + 1] - starts[i], (int)starts[i], (int)i, (int)i));
} }
std::sort((*seqLengthAndStart).begin(), (*seqLengthAndStart).end(), seqInfo->push_back(info);
std::greater<std::tuple<int, int, int, int>>());
*maxSequenceLength = std::get<0>((*seqLengthAndStart)[0]);
} }
std::sort(seqInfo->begin(), seqInfo->end(),
[](const SeqInfo& a, const SeqInfo& b) {
return a.topLevelLength > b.topLevelLength;
});
} }
void Argument::checkSubset() const { void Argument::checkSubset() const {
......
...@@ -203,13 +203,28 @@ struct Argument { ...@@ -203,13 +203,28 @@ struct Argument {
* startSeq: the sample id of start * startSeq: the sample id of start
* copySize: how many samples need to copy * copySize: how many samples need to copy
* return value: how many samples are copied * return value: how many samples are copied
* Note that when specifying the stream explicitly in this case,
* synchronize should also be called somewhere after this function
*/ */
int32_t resizeAndCopyFrom(const Argument& src, int32_t startSeq, int32_t resizeAndCopyFrom(const Argument& src, int32_t startSeq,
int32_t copySize, bool useGpu = FLAGS_use_gpu, int32_t copySize, bool useGpu, hl_stream_t stream);
hl_stream_t stream = HPPL_STREAM_DEFAULT);
void resizeAndCopyFrom(const Argument& src, bool useGpu = FLAGS_use_gpu, /*
hl_stream_t stream = HPPL_STREAM_DEFAULT); * same with the above function, except that the stream is
* HPPL_STREAM_DEFAULT and synchronize is automatically called
* inside it
*/
int32_t resizeAndCopyFrom(const Argument& src, int32_t startSeq,
int32_t copySize, bool useGpu = FLAGS_use_gpu);
void resizeAndCopyFrom(const Argument& src, bool useGpu, hl_stream_t stream);
/*
* same with the above function, except that the stream is
* HPPL_STREAM_DEFAULT and synchronize is automatically called
* inside it
*/
void resizeAndCopyFrom(const Argument& src, bool useGpu = FLAGS_use_gpu);
/* /*
@brief Concatenate several arguments into one and put the result into it. @brief Concatenate several arguments into one and put the result into it.
...@@ -238,12 +253,29 @@ struct Argument { ...@@ -238,12 +253,29 @@ struct Argument {
static void splitByDataId(const std::vector<Argument>& argus, static void splitByDataId(const std::vector<Argument>& argus,
std::vector<std::vector<Argument>>* arguGroups); std::vector<std::vector<Argument>>* arguGroups);
struct SeqInfo {
// Equal to sequence length for sequence data
// Equal to number of subsequences for subsequence data
int topLevelLength;
int seqStart;
int seqId;
// Equal to topLevelLength for sequence data
// Equal to sum of the length of subsequences for subsequence data
int subLevelLength;
// Only used for subsequence data, start position of this sequence
// is subSequenceStartPositions, i.e.
// subSequenceStartPositions[subSeqStart] == seqStart
int subSeqStart;
};
/* /*
Get Sequence Length, startPositions and max Length according to input Get SeqInfo for each sequence of this argument
*/ Elements in *seqInfo are sorted by topLevelLength in descending order
void getSeqLengthAndStart( */
std::vector<std::tuple<int, int, int, int>>* seqLengthAndStart, void getSeqInfo(std::vector<SeqInfo>* segInfo) const;
int* maxSequenceLength) const;
/* /*
Check Whether sequenceStartPositions is subset of Check Whether sequenceStartPositions is subset of
subSequenceStartPositions. subSequenceStartPositions.
......
#!/bin/bash #!/bin/bash
cd `dirname $0`
source ./common.sh source ./common.sh
cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=OFF -DWITH_TESTING=ON -DON_TRAVIS=ON cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=OFF -DWITH_TESTING=ON -DON_TRAVIS=ON
make -j `nproc` make -j `nproc`
env CTEST_OUTPUT_ON_FAILURE=1 make test ARGS="-j `nproc`"
sudo make install
sudo paddle version
#!/bin/bash
# Add set -e, cd to directory.
source ./common.sh
# Compile Documentation only.
cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=ON
make paddle_docs paddle_docs_cn
# Parse Github URL
REPO=`git config remote.origin.url`
SSH_REPO=${REPO/https:\/\/github.com\//git@github.com:}
SHA=`git rev-parse --verify HEAD`
# Documentation branch name
# gh-pages branch is used for PaddlePaddle.org. The English version of
# documentation in `doc` directory, and the chinese version in `doc_cn`
# directory.
TARGET_BRANCH="gh-pages"
# Only deploy master branch to build latest documentation.
SOURCE_BRANCH="master"
# If is not a Github pull request, and in master branch.
if [ "$TRAVIS_PULL_REQUEST" != "false" -o "$TRAVIS_BRANCH" != "$SOURCE_BRANCH" ]; then
exit 0
fi
# Clone the repo to output directory
git clone $REPO output
cd output
# checkout github page branch
git checkout $TARGET_BRANCH || git checkout --orphan $TARGET_BRANCH
# remove old docs. mv new docs.
rm -rf doc doc_cn
mv ../doc_cn/html doc_cn
mv ../doc/html doc
# Check is there anything changed.
set +e
git diff --exit-code >/dev/null
if [ $? -eq 0 ]; then
echo "No changes to the output on this push; exiting."
exit 0
fi
set -e
# Commit
git add .
git config user.name "Travis CI"
git config user.email "paddle-dev@baidu.com"
git commit -m "Deploy to GitHub Pages: ${SHA}"
# Set ssh private key
openssl aes-256-cbc -K $SSL_KEY -iv $SSL_IV -in ../../paddle/scripts/travis/deploy_key.enc -out deploy_key -d
chmod 600 deploy_key
eval `ssh-agent -s`
ssh-add deploy_key
# Push
git push $SSH_REPO $TARGET_BRANCH
#!/bin/bash
cd `dirname $0`
if [ ${JOB} == "BUILD_AND_TEST" ]; then
./build_and_test.sh
elif [ ${JOB} == "DOCS" ]; then
./docs.sh
else
echo Unknown job ${JOB}
exit 1
fi
#!/bin/bash
cd `dirname $0`
source ./common.sh
sudo make install
sudo paddle version
#!/bin/bash
cd `dirname $0`
source ./common.sh
env CTEST_OUTPUT_ON_FAILURE=1 make test ARGS="-j `nproc`"
...@@ -452,6 +452,9 @@ message SubModelConfig { ...@@ -452,6 +452,9 @@ message SubModelConfig {
repeated LinkConfig out_links = 10; repeated LinkConfig out_links = 10;
optional GeneratorConfig generator = 11; optional GeneratorConfig generator = 11;
// the id of inlink which share info with outlinks, used in recurrent layer group
optional int32 target_inlinkid = 12;
} }
message ModelConfig { message ModelConfig {
......
...@@ -22,6 +22,8 @@ find_python_module(pip REQUIRED) ...@@ -22,6 +22,8 @@ find_python_module(pip REQUIRED)
find_python_module(wheel REQUIRED) find_python_module(wheel REQUIRED)
find_python_module(google.protobuf REQUIRED) find_python_module(google.protobuf REQUIRED)
add_subdirectory(paddle/trainer_config_helpers/tests)
install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/dist/ install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/dist/
DESTINATION opt/paddle/share/wheels DESTINATION opt/paddle/share/wheels
) )
...@@ -303,7 +303,8 @@ def MakeLayerNameInSubmodel(name, submodel_name = None): ...@@ -303,7 +303,8 @@ def MakeLayerNameInSubmodel(name, submodel_name = None):
@config_func @config_func
def RecurrentLayerGroupWithoutOutLinksBegin(name, def RecurrentLayerGroupWithoutOutLinksBegin(name,
in_links, in_links,
seq_reversed=False): seq_reversed=False,
target_inlinkname=""):
global g_current_submodel global g_current_submodel
config_assert(g_config.model_config.type == "recurrent_nn", config_assert(g_config.model_config.type == "recurrent_nn",
"RecurrentLayerGroup should be used only in recurrent_nn") "RecurrentLayerGroup should be used only in recurrent_nn")
...@@ -311,14 +312,19 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name, ...@@ -311,14 +312,19 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
SubModelBegin(name) SubModelBegin(name)
g_current_submodel.is_recurrent_layer_group = True g_current_submodel.is_recurrent_layer_group = True
g_current_submodel.reversed = seq_reversed g_current_submodel.reversed = seq_reversed
g_current_submodel.target_inlinkid = -1
in_links_count = 0 in_links_count = 0
for link in in_links: for linkid, link in enumerate(in_links):
if isinstance(link, basestring): if isinstance(link, basestring):
name = link name = link
has_subseq = False has_subseq = False
else: else:
name = link.link_name name = link.link_name
has_subseq = link.has_subseq has_subseq = link.has_subseq
# assign target_inlinkid according to target_inlinkname
if target_inlinkname == name:
g_current_submodel.target_inlinkid = linkid
if in_links_count == 0: if in_links_count == 0:
in_links_has_subseq = has_subseq in_links_has_subseq = has_subseq
else: else:
...@@ -331,6 +337,7 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name, ...@@ -331,6 +337,7 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
SequenceScatterAgentLayer(name=name, size=layer.size) SequenceScatterAgentLayer(name=name, size=layer.size)
else: else:
ScatterAgentLayer(name=name, size=layer.size) ScatterAgentLayer(name=name, size=layer.size)
pair = g_current_submodel.in_links.add() pair = g_current_submodel.in_links.add()
pair.layer_name = layer_name pair.layer_name = layer_name
pair.link_name = MakeLayerNameInSubmodel(name) pair.link_name = MakeLayerNameInSubmodel(name)
...@@ -362,10 +369,12 @@ def RecurrentLayerGroupBegin(name, ...@@ -362,10 +369,12 @@ def RecurrentLayerGroupBegin(name,
in_links, in_links,
out_links, out_links,
generator=None, generator=None,
target_inlinkname="",
seq_reversed=False): seq_reversed=False):
RecurrentLayerGroupWithoutOutLinksBegin(name, RecurrentLayerGroupWithoutOutLinksBegin(name,
in_links, in_links,
seq_reversed) seq_reversed,
target_inlinkname)
for link in out_links: for link in out_links:
RecurrentLayerGroupSetOutLink(link) RecurrentLayerGroupSetOutLink(link)
...@@ -1399,6 +1408,14 @@ class SelectiveFCLayer(LayerBase): ...@@ -1399,6 +1408,14 @@ class SelectiveFCLayer(LayerBase):
input_index, psize, dims, sparse, format) input_index, psize, dims, sparse, format)
self.create_bias_parameter(bias, self.config.size) self.create_bias_parameter(bias, self.config.size)
@config_layer('print')
class PrintLayer(LayerBase):
def __init__(
self,
name,
inputs):
super(PrintLayer, self).__init__(name, 'print', 0, inputs)
@config_layer('data') @config_layer('data')
class DataLayer(LayerBase): class DataLayer(LayerBase):
def __init__( def __init__(
...@@ -1614,7 +1631,7 @@ class BatchNormLayer(LayerBase): ...@@ -1614,7 +1631,7 @@ class BatchNormLayer(LayerBase):
# Also based on cudnn version. # Also based on cudnn version.
use_cudnn = use_gpu and batch_norm_type != "batch_norm" and \ use_cudnn = use_gpu and batch_norm_type != "batch_norm" and \
((not parallel_nn) or self.config.device > -1) and \ ((not parallel_nn) or self.config.device > -1) and \
cudnn_version >= 4000 cudnn_version >= 4007
self.layer_type = "cudnn_batch_norm" if use_cudnn else "batch_norm" self.layer_type = "cudnn_batch_norm" if use_cudnn else "batch_norm"
super(BatchNormLayer, self).__init__(name, self.layer_type, 0, super(BatchNormLayer, self).__init__(name, self.layer_type, 0,
active_type=active_type, active_type=active_type,
...@@ -2264,6 +2281,9 @@ class ConvexCombinationLayer(LayerBase): ...@@ -2264,6 +2281,9 @@ class ConvexCombinationLayer(LayerBase):
name, 'convex_comb', size, inputs=inputs, device=device) name, 'convex_comb', size, inputs=inputs, device=device)
config_assert(len(self.inputs) == 2, config_assert(len(self.inputs) == 2,
'ConvexCombinationLayer must have 2 inputs') 'ConvexCombinationLayer must have 2 inputs')
config_assert(
size * self.get_input_layer(0).size == self.get_input_layer(1).size,
'Wrong input size for ConvexCombinationLayer')
self.set_layer_size(size) self.set_layer_size(size)
@config_layer('interpolation') @config_layer('interpolation')
...@@ -2313,6 +2333,9 @@ class CosSimVecMatLayer(LayerBase): ...@@ -2313,6 +2333,9 @@ class CosSimVecMatLayer(LayerBase):
self.config.cos_scale = cos_scale self.config.cos_scale = cos_scale
config_assert(len(self.inputs) == 2, config_assert(len(self.inputs) == 2,
'CosSimVecMatLayer must have 2 inputs') 'CosSimVecMatLayer must have 2 inputs')
config_assert(
size * self.get_input_layer(0).size == self.get_input_layer(1).size,
'Wrong input size for CosSimVecMatLayer')
@config_layer('sampling_id') @config_layer('sampling_id')
class SamplingIdLayer(LayerBase): class SamplingIdLayer(LayerBase):
...@@ -2361,6 +2384,7 @@ class CosSimLayer(LayerBase): ...@@ -2361,6 +2384,7 @@ class CosSimLayer(LayerBase):
self, self,
name, name,
inputs, inputs,
cos_scale=5,
device=None): device=None):
super(CosSimLayer, self).__init__( super(CosSimLayer, self).__init__(
name, 'cos', 1, inputs=inputs, device=device) name, 'cos', 1, inputs=inputs, device=device)
...@@ -2368,6 +2392,7 @@ class CosSimLayer(LayerBase): ...@@ -2368,6 +2392,7 @@ class CosSimLayer(LayerBase):
config_assert( config_assert(
self.get_input_layer(0).size == self.get_input_layer(1).size, self.get_input_layer(0).size == self.get_input_layer(1).size,
'inputs of CosSimLayer must have same dim') 'inputs of CosSimLayer must have same dim')
self.config.cos_scale = cos_scale
@config_layer('tensor') @config_layer('tensor')
......
...@@ -94,7 +94,7 @@ def evaluator_base( ...@@ -94,7 +94,7 @@ def evaluator_base(
Batch=200 samples=20000 AvgCost=0.679655 CurrentCost=0.662179 Eval: Batch=200 samples=20000 AvgCost=0.679655 CurrentCost=0.662179 Eval:
classification_error_evaluator=0.4486 classification_error_evaluator=0.4486
CurrentEval: ErrorRate=0.3964 CurrentEval: ErrorRate=0.3964
:param input: Input layers, a object of LayerOutput or a list of :param input: Input layers, a object of LayerOutput or a list of
LayerOutput. LayerOutput.
:type input: list|LayerOutput :type input: list|LayerOutput
...@@ -296,6 +296,7 @@ def precision_recall_evaluator( ...@@ -296,6 +296,7 @@ def precision_recall_evaluator(
@wrap_name_default() @wrap_name_default()
def ctc_error_evaluator( def ctc_error_evaluator(
input, input,
label,
name=None, name=None,
): ):
""" """
...@@ -305,16 +306,20 @@ def ctc_error_evaluator( ...@@ -305,16 +306,20 @@ def ctc_error_evaluator(
.. code-block:: python .. code-block:: python
eval = ctc_error_evaluator(input) eval = ctc_error_evaluator(input=input, label=lbl)
:param name: Evaluator name. :param name: Evaluator name.
:type name: None|basestring :type name: None|basestring
:param input: Input Layer. :param input: Input Layer. Should be the same as the input for ctc_layer.
:type input: LayerOutput :type input: LayerOutput
:param label: input label, which is a data_layer. Should be the same as the
label for ctc_layer
:type label: LayerOutput
""" """
evaluator_base(name=name, evaluator_base(name=name,
type="ctc_edit_distance", type="ctc_edit_distance",
input=input) input=input,
label=label)
@evaluator(EvaluatorAttribute.FOR_CLASSIFICATION) @evaluator(EvaluatorAttribute.FOR_CLASSIFICATION)
@wrap_name_default() @wrap_name_default()
......
...@@ -21,7 +21,6 @@ from .evaluators import * ...@@ -21,7 +21,6 @@ from .evaluators import *
from .poolings import MaxPooling, AvgPooling, BasePoolingType from .poolings import MaxPooling, AvgPooling, BasePoolingType
from .attrs import * from .attrs import *
from .default_decorators import * from .default_decorators import *
try: try:
import cPickle as pickle import cPickle as pickle
except ImportError: except ImportError:
...@@ -47,11 +46,12 @@ __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel", ...@@ -47,11 +46,12 @@ __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel",
'BaseGeneratedInput', 'conv_operator', 'conv_shift_layer', 'BaseGeneratedInput', 'conv_operator', 'conv_shift_layer',
'tensor_layer', 'selective_fc_layer', 'sampling_id_layer', 'tensor_layer', 'selective_fc_layer', 'sampling_id_layer',
'slope_intercept_layer', 'trans_full_matrix_projection', 'slope_intercept_layer', 'trans_full_matrix_projection',
'linear_comb_layer',
'convex_comb_layer', 'ctc_layer', 'crf_layer', 'crf_decoding_layer', 'convex_comb_layer', 'ctc_layer', 'crf_layer', 'crf_decoding_layer',
'cross_entropy_with_selfnorm', 'cross_entropy', 'cross_entropy_with_selfnorm', 'cross_entropy',
'multi_binary_label_cross_entropy', 'multi_binary_label_cross_entropy',
'rank_cost', 'lambda_cost', 'huber_cost', 'rank_cost', 'lambda_cost', 'huber_cost',
'block_expand_layer', 'block_expand_layer', 'out_prod_layer', 'print_layer'
] ]
...@@ -70,7 +70,8 @@ class LayerType(object): ...@@ -70,7 +70,8 @@ class LayerType(object):
POOLING_AVG = 'average' POOLING_AVG = 'average'
FC_LAYER = "fc" FC_LAYER = "fc"
COST = 'cost' COST = 'cost'
COSINE_SIM = 'cos_vm' COSINE_SIM_VEC = 'cos_vm'
COSINE_SIM = 'cos'
HSIGMOID = 'hsigmoid' HSIGMOID = 'hsigmoid'
CONV_LAYER = "conv" CONV_LAYER = "conv"
POOL_LAYER = "pool" POOL_LAYER = "pool"
...@@ -91,6 +92,7 @@ class LayerType(object): ...@@ -91,6 +92,7 @@ class LayerType(object):
POWER_LAYER = 'power' POWER_LAYER = 'power'
SCALING_LAYER = 'scaling' SCALING_LAYER = 'scaling'
TRANS_LAYER = 'trans' TRANS_LAYER = 'trans'
OUT_PROD_LAYER = 'out_prod'
MEMORY = 'memory' MEMORY = 'memory'
MAXID_LAYER = 'maxid' MAXID_LAYER = 'maxid'
...@@ -102,9 +104,11 @@ class LayerType(object): ...@@ -102,9 +104,11 @@ class LayerType(object):
SEL_FC_LAYER = "selective_fc" SEL_FC_LAYER = "selective_fc"
SAMPLING_ID_LAYER = "sampling_id" SAMPLING_ID_LAYER = "sampling_id"
SLOPE_INTERCEPT_LAYER = "slope_intercept" SLOPE_INTERCEPT_LAYER = "slope_intercept"
CONVEX_COMBINATION_LAYER = "convex_comb" LINEAR_COMBINATION_LAYER = "convex_comb"
BLOCK_EXPAND = "blockexpand" BLOCK_EXPAND = "blockexpand"
PRINT_LAYER = "print"
CTC_LAYER = "ctc" CTC_LAYER = "ctc"
CRF_LAYER = "crf" CRF_LAYER = "crf"
CRF_DECODING_LAYER = "crf_decoding" CRF_DECODING_LAYER = "crf_decoding"
...@@ -171,6 +175,8 @@ class LayerOutput(object): ...@@ -171,6 +175,8 @@ class LayerOutput(object):
assert LayerType.is_layer_type(layer_type) assert LayerType.is_layer_type(layer_type)
self.name = name self.name = name
self.layer_type = layer_type self.layer_type = layer_type
if parents is not None and type(parents) != list:
parents = [parents]
self.parents = [] if parents is None else parents self.parents = [] if parents is None else parents
self.activation = activation self.activation = activation
self.num_filters = num_filters self.num_filters = num_filters
...@@ -197,6 +203,25 @@ ERROR_CLIPPING = 'error_clipping_threshold' ...@@ -197,6 +203,25 @@ ERROR_CLIPPING = 'error_clipping_threshold'
DROPOUT = 'drop_rate' DROPOUT = 'drop_rate'
def check_input(input):
"""
Check input is a LayerOutput or list of LayerOutput or tuple of LayerOutput
if is a LayerOutput,
:param input: The input layer. Could be a list/tuple of input layer.
:type input: LayerOutput|list|tuple
:return: list of LayerOutput
:rtype: list of LayerOutput
"""
if isinstance(input, LayerOutput):
return [LayerOutput]
assert isinstance(input, list)
for inp in input:
assert isinstance(inp, LayerOutput)
return list(input)
def layer_support(*attrs): def layer_support(*attrs):
def decorator(method): def decorator(method):
@functools.wraps(method) @functools.wraps(method)
...@@ -512,7 +537,7 @@ class MixedLayerType(LayerOutput): ...@@ -512,7 +537,7 @@ class MixedLayerType(LayerOutput):
:rtype: MixedLayerType :rtype: MixedLayerType
""" """
if not self.finalized: if not self.finalized:
assert isinstance(other, Projection) assert isinstance(other, Projection) or isinstance(other, Operator)
self.inputs.append(other) self.inputs.append(other)
self.parents.append(other.origin) self.parents.append(other.origin)
return self return self
...@@ -725,6 +750,27 @@ def fc_layer(input, size, act=None, name=None, ...@@ -725,6 +750,27 @@ def fc_layer(input, size, act=None, name=None,
size=size) size=size)
@wrap_name_default("print")
def print_layer(input, name=None):
"""
Print the output value of input layers. This layer is useful for debugging.
:param name: The Layer Name.
:type name: basestring
:param input: The input layer. Could be a list/tuple of input layer.
:type input: LayerOutput|list|tuple
:return: No return
"""
check_input(input)
Layer(
name=name,
type=LayerType.PRINT_LAYER,
inputs=[l.name for l in input],
)
LayerOutput(name, LayerType.PRINT_LAYER, input)
@wrap_name_default("seq_pooling") @wrap_name_default("seq_pooling")
@wrap_bias_attr_default(has_bias=False) @wrap_bias_attr_default(has_bias=False)
@wrap_param_default(['pooling_type'], default_factory=lambda _: MaxPooling()) @wrap_param_default(['pooling_type'], default_factory=lambda _: MaxPooling())
...@@ -1169,13 +1215,16 @@ def power_layer(input, weight, name=None, layer_attr=None): ...@@ -1169,13 +1215,16 @@ def power_layer(input, weight, name=None, layer_attr=None):
@layer_support() @layer_support()
def scaling_layer(input, weight, name=None, layer_attr=None): def scaling_layer(input, weight, name=None, layer_attr=None):
""" """
A layer for each row of a matrix, multiplying with a element of a vector. A layer for multiplying input vector by weight scalar.
.. math:: .. math::
y.row[i] = w[i] * x.row[i] y = w x
where :math:`x` is size=dataDim input, :math:`w` is size=1 weight,
and :math:`y` is size=dataDim output.
where :math:`x` is (batchSize x dataDim) input, :math:`w` is Note that the above computation is for one sample. Multiple samples are
(batchSize x 1) weight vector, and :math:`y` is (batchSize x dataDim) output. processed in one batch.
The example usage is: The example usage is:
...@@ -1249,11 +1298,14 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None): ...@@ -1249,11 +1298,14 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None):
.. math:: .. math::
similarity = cos(\\theta) = {\\mathbf{a} \\cdot \\mathbf{b} similarity = cos(\\theta) = {\\mathbf{a} \\cdot \\mathbf{b}
\\over \\|\\mathbf{b}\\| \\|\\mathbf{b}\\|} \\over \\|\\mathbf{a}\\| \\|\\mathbf{b}\\|}
And the input dimension is :math:`a \in R^M`, :math:`b \in R^{MN}`. The The size of a is M, size of b is M*N,
similarity will be calculated N times by step M. The output dimension is Similarity will be calculated N times by step M. The output size is
:math:`R^N`. The scale will be multiplied to similarity. N. The scale will be multiplied to similarity.
Note that the above computation is for one sample. Multiple samples are
processed in one batch.
:param name: layer name :param name: layer name
:type name: basestring :type name: basestring
...@@ -1270,14 +1322,23 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None): ...@@ -1270,14 +1322,23 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None):
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
Layer( if size == 1:
name=name, Layer(
type=LayerType.COSINE_SIM, name=name,
size=size, type=LayerType.COSINE_SIM,
cos_scale=scale, cos_scale=scale,
inputs=[a.name, b.name], inputs=[a.name, b.name],
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr)
) )
else:
Layer(
name=name,
type=LayerType.COSINE_SIM_VEC,
size=size,
cos_scale=scale,
inputs=[a.name, b.name],
**ExtraLayerAttribute.to_kwargs(layer_attr)
)
return LayerOutput(name, LayerType.COSINE_SIM, parents=[a, b]) return LayerOutput(name, LayerType.COSINE_SIM, parents=[a, b])
@wrap_name_default() @wrap_name_default()
...@@ -2326,6 +2387,39 @@ def maxid_layer(input, name=None, layer_attr=None): ...@@ -2326,6 +2387,39 @@ def maxid_layer(input, name=None, layer_attr=None):
layer_type=LayerType.MAXID_LAYER, layer_type=LayerType.MAXID_LAYER,
parents=[input]) parents=[input])
@wrap_name_default()
def out_prod_layer(input1, input2, name=None, layer_attr=None):
"""
A layer for computing the outer product of two vectors
The result is a matrix of size(input1) x size(input2)
The example usage is:
.. code-block:: python
out_prod = out_prod_layer(input1=vec1, input2=vec2)
:param name: Layer name.
:type name: basestring
:param input1: The first input layer name.
:type input: LayerOutput
:param input2: The second input layer name.
:type input2: LayerOutput
:param layer_attr: extra layer attributes.
:type layer_attr: ExtraLayerAttribute.
:return: LayerOutput object.
:rtype: LayerOutput
"""
assert isinstance(input1, LayerOutput)
assert isinstance(input2, LayerOutput)
Layer(name=name,
type="out_prod",
inputs=[input1.name, input2.name],
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name=name,
layer_type=LayerType.OUT_PROD_LAYER,
parents=[input1,input2])
@wrap_name_default() @wrap_name_default()
def eos_layer(input, eos_id, name=None, layer_attr=None): def eos_layer(input, eos_id, name=None, layer_attr=None):
...@@ -2909,29 +3003,37 @@ def slope_intercept_layer(input, name=None, slope=1.0, intercept=0.0): ...@@ -2909,29 +3003,37 @@ def slope_intercept_layer(input, name=None, slope=1.0, intercept=0.0):
@wrap_name_default() @wrap_name_default()
def convex_comb_layer(input, size, name=None): def linear_comb_layer(weights, vectors, size, name=None):
""" """
A layer for convex weighted average of vectors takes two inputs. A layer for weighted sum of vectors takes two inputs.
- Input: a vector containing the convex weights (batchSize x weightdim), - Input: size of weights is M
and a matrix in a vector form (batchSize x (weightdim * datadim)). size of vectors is M*N
- Output: a vector (batchSize * datadim). - Output: a vector of size=N
.. math:: .. math::
y[i][j] = \sum_{j}(x_{1}(i, j) * x_{2}(i,j + i * dataDim)), z(i) = \sum_{j=0}^{M-1} x(j) y(i+Nj)
where :math:`0 \le i \le N-1`
Or in the matrix notation:
i = 0,1,...,(batchSize-1); j = 0, 1,...,(dataDim-1) .. math::
z = x^\mathrm{T} Y
In this formular: In this formular:
- :math:`x_{1}`: the first input. - :math:`x`: weights
- :math:`x_{2}`: the second input. - :math:`y`: vectors.
- :math:`y`: the output. - :math:`z`: the output.
Note that the above computation is for one sample. Multiple samples are
processed in one batch.
The simple usage is: The simple usage is:
.. code-block:: python .. code-block:: python
convex_comb = convex_comb_layer(input=inputs, linear_comb = linear_comb_layer(weighs=weight, vectors=vectors,
size=elem_dim) size=elem_dim)
:param input: The input layers. :param input: The input layers.
...@@ -2944,15 +3046,16 @@ def convex_comb_layer(input, size, name=None): ...@@ -2944,15 +3046,16 @@ def convex_comb_layer(input, size, name=None):
:rtype: LayerOutput :rtype: LayerOutput
""" """
assert isinstance(input, list) or isinstance(input, tuple)
assert len(input) == 2
Layer( Layer(
name=name, name=name,
type=LayerType.CONVEX_COMBINATION_LAYER, type=LayerType.LINEAR_COMBINATION_LAYER,
size=size, size=size,
inputs=[Input(input[0].name), Input(input[1].name)], inputs=[Input(weights.name), Input(vectors.name)],
) )
return LayerOutput(name, LayerType.CONVEX_COMBINATION_LAYER, input, size=size) return LayerOutput(name, LayerType.LINEAR_COMBINATION_LAYER,
[weights, vectors], size=size)
convex_comb_layer = linear_comb_layer
@wrap_name_default() @wrap_name_default()
def block_expand_layer(input, def block_expand_layer(input,
...@@ -3036,6 +3139,17 @@ def ctc_layer(input, label, size, name=None, norm_by_times=False): ...@@ -3036,6 +3139,17 @@ def ctc_layer(input, label, size, name=None, norm_by_times=False):
classication task. That is, for sequence labeling problems where the classication task. That is, for sequence labeling problems where the
alignment between the inputs and the target labels is unknown. alignment between the inputs and the target labels is unknown.
More details can be found by referring to `Connectionist Temporal
Classification: Labelling Unsegmented Sequence Data with Recurrent
Neural Networks <http://machinelearning.wustl.edu/mlpapers/paper_files/icml2006_GravesFGS06.pdf>`_
Note:
Considering the 'blank' label needed by CTC, you need to use
(num_classes + 1) as the input size. num_classes is the category number.
And the 'blank' is the last category index. So the size of 'input' layer, such as
fc_layer with softmax activation, should be num_classes + 1. The size of ctc_layer
should also be num_classes + 1.
The simple usage: The simple usage:
.. code-block:: python .. code-block:: python
...@@ -3049,7 +3163,7 @@ def ctc_layer(input, label, size, name=None, norm_by_times=False): ...@@ -3049,7 +3163,7 @@ def ctc_layer(input, label, size, name=None, norm_by_times=False):
:type input: LayerOutput :type input: LayerOutput
:param label: The data layer of label with variable length. :param label: The data layer of label with variable length.
:type label: LayerOutput :type label: LayerOutput
:param size: category numbers. :param size: category numbers + 1.
:type size: int :type size: int
:param name: The name of this layer, which can not specify. :param name: The name of this layer, which can not specify.
:type name: string|None :type name: string|None
......
#################### test_config_parser #########################
add_test(NAME layers_test
COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/
python ${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/layers_test.py
WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle)
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.config_parser import parse_config_and_serialize
if __name__ == '__main__':
parse_config_and_serialize(
'trainer_config_helpers/tests/layers_test_config.py', '')
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
num_classes = 5
x = data_layer(name="input1", size=3)
y = data_layer(name="input2", size=5)
z = out_prod_layer(input1=x, input2=y)
x1 = fc_layer(input=x, size=5)
y1 = fc_layer(input=y, size=5)
y2 = fc_layer(input=y, size=15)
cos1 = cos_sim(a=x1, b=y1)
cos3 = cos_sim(a=x1, b=y2, size=3)
linear_comb = linear_comb_layer(weights=x1, vectors=y2, size=3)
out = fc_layer(input=[cos1, cos3, linear_comb, z],
size=num_classes,
act=SoftmaxActivation())
print_layer(input=[out])
outputs(classification_cost(out, data_layer(name="label", size=num_classes)))
# for ctc
tmp = fc_layer(input=x1,
size=num_classes + 1,
act=SoftmaxActivation())
ctc = ctc_layer(input=tmp,
label=y,
size=num_classes + 1)
ctc_eval = ctc_error_evaluator(input=tmp, label=y)
settings(
batch_size=10,
learning_rate=2e-3,
learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4),
gradient_clipping_threshold=25
)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册