提交 93006787 编写于 作者: E emailweixu 提交者: GitHub

Merge pull request #73 from reyoung/merge_icode

Merge Baidu Internal Changes.
......@@ -9,6 +9,7 @@ Install PaddlePaddle
:glob:
install_*
internal/install_from_jumbo.md
Build from Source
-----------------
......
......@@ -5,3 +5,4 @@ Cluster Train
:glob:
opensource/cluster_train.md
internal/index.md
......@@ -9,7 +9,11 @@ Note: The intallation packages are still in pre-release state and your experienc
.. toctree::
:maxdepth: 1
:glob:
源码下载(对内) <../build/internal/download_paddle_source_zh_cn.rst>
使用Jumbo安装(对内) <../build/internal/install_from_jumbo.rst>
从源码编译安装(对内) <../build/internal/build_from_source_zh_cn.rst>
install/docker_install.rst
install/ubuntu_install.rst
cmake/index.rst
集群训练
========
* `集群训练 <../../doc/cluster/index.html>`_
.. toctree::
:maxdepth: 2
:glob:
集群训练(对内) <internal/index.md>
......@@ -8,7 +8,7 @@ PaddlePaddle文档
* `用户接口 <ui/index.html>`_
* `使用示例 <demo/index.html>`_
* `模型配置 <../doc/ui/api/trainer_config_helpers/index.html>`_
* `集群训练 <../doc/cluster/index.html>`_
* `集群训练 <cluster/index.html>`_
开发指南
--------
......
......@@ -194,8 +194,8 @@ public:
virtual real evalImp(std::vector<Argument>& arguments) {
CHECK_EQ(arguments.size(), (size_t)2);
Argument output, label;
output.resizeAndCopyFrom(arguments[0], false);
label.resizeAndCopyFrom(arguments[1], false);
output.resizeAndCopyFrom(arguments[0], false, HPPL_STREAM_DEFAULT);
label.resizeAndCopyFrom(arguments[1], false, HPPL_STREAM_DEFAULT);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
CHECK(label.sequenceStartPositions);
CHECK(label.ids);
......
......@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "GradientMachine.h"
......@@ -206,7 +205,10 @@ public:
/**
* @brief Path default ctor, first logProb is 0.
*/
Path() { logProb = 0; seqId = 0; }
Path() {
logProb = 0;
seqId = 0;
}
explicit Path(size_t seqId) : seqId(seqId) { logProb = 0; }
/**
......@@ -319,7 +321,9 @@ protected:
};
std::vector<MemoryFrameLine> memoryFrameLines_;
// All inFrameLines and outFrameLines have the same element as follows.
// Each inFrameLines(inlinks) has its own info(elements) below,
// and all outFrameLines(outlinks) share the info with one inFrameLine,
// which is assigned by targetInfoInlinkId_.
struct Info {
IVectorPtr allIds; // scattered id of realLayer
std::vector<int> idIndex; // index of allIds
......@@ -327,13 +331,27 @@ protected:
sequenceStartPositions; // scattered sequenceStartPositions
std::vector<int> seqStartPosIndex; // index of sequenceStartPositions
};
Info info_;
std::vector<Info> info_;
// if no subSeq, tuple of (seqLength, seqStart, seqIndex, seqIndex)
// else, tuple of (subSeqLength, subSeqStart, seqIndex, subSeqIndex)
std::vector<std::tuple<int, int, int, int>> seqLengthAndStart_;
// numSeqs_[i] is the number sequences which is longer than i (for sequence
// data) or has more than i subsequences (for subsequence data)
std::vector<int> numSeqs_;
void createInFrameInfo(const Argument& input, PassType passType);
// each inlinks has a "std::vector<std::tuple<int, int, int, int>>" denotes
// its sequence info:
// if hasSubSeq, tuple of (subSeqLength, subSeqStart, seqIndex, subSeqIndex)
// else, tuple of (seqLength, seqStart, seqIndex, seqIndex)
std::vector<std::vector<std::tuple<int, int, int, int>>> seqLengthAndStart_;
// the id of inlink which share info with outlinks
int targetInfoInlinkId_;
/* create scattered id infomation for all realLayer of inFrameLines one time.
* If hasSubseq, will also create scattered sequenceStartPositions infomation
* for all realLayer of inFrameLines one time.
*/
void createInFrameInfo(int inlinks_id, const Argument& input,
PassType passType);
void createMemoryFrameInfo(MemoryFrameLine* memoryFrameLine,
PassType passType);
......@@ -363,6 +381,9 @@ protected:
NeuralNetwork* rootNetwork_;
bool reversed_;
// if hasSubseq: max number of sentences(subseq)in batchsize samples
// else: max number of tokens in batchsize samples(sentences)
int maxSequenceLength_;
bool useGpu_;
bool stopBeamSearch_;
......@@ -415,7 +436,7 @@ private:
* @param machineIdVec : select a row of output matrix in each frame
* that the generation process expanded.
*/
void createDataOutlink(std::vector<int> & machineIdVec);
void createDataOutlink(std::vector<int>& machineIdVec);
/*
* @brief used in beam search, connect previous frame to form recurrent link
......
......@@ -49,8 +49,10 @@ void CTCLayer::forward(PassType passType) {
Layer::forward(passType);
if (useGpu_) {
for (size_t i = 0; i < inputLayers_.size(); i++) {
tmpCpuInput_[i].resizeAndCopyFrom(getInput(i), false, HPPL_STREAM_1);
tmpCpuInput_[i].resizeAndCopyFrom(
getInput(i), false, HPPL_STREAM_DEFAULT);
}
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
forwardImp(tmpCpuInput_[0], tmpCpuInput_[1]);
} else {
forwardImp(getInput(0), getInput(1));
......@@ -92,9 +94,9 @@ void CTCLayer::backward(const UpdateCallback &callback) {
if (useGpu_) {
backwardImp(callback, tmpCpuInput_[0], tmpCpuInput_[1]);
const_cast<Argument&>(getInput(0)).
resizeAndCopyFrom(tmpCpuInput_[0], true, HPPL_STREAM_1);
resizeAndCopyFrom(tmpCpuInput_[0], true, HPPL_STREAM_DEFAULT);
const_cast<Argument&>(getInput(1)).
resizeAndCopyFrom(tmpCpuInput_[1], true, HPPL_STREAM_1);
resizeAndCopyFrom(tmpCpuInput_[1], true, HPPL_STREAM_DEFAULT);
} else {
backwardImp(callback, getInput(0), getInput(1));
}
......
......@@ -248,7 +248,7 @@ void ConvOperator::forward() {
CHECK_EQ(ins_[1]->value->getHeight(), batchSize);
checkFilterSize(ins_[1]->value);
Matrix::resizeOrCreate(out_->value, batchSize,
outputH_ * outputW_ * numFilters_);
outputH_ * outputW_ * numFilters_, false, useGpu_);
{
AsyncGpuBlock block;
for (size_t batchId = 0; batchId < batchSize; ++batchId) {
......
......@@ -509,8 +509,10 @@ void HuberTwoClass::forwardImp(Matrix &output, Argument &label,
Matrix &cost) {
if (useGpu_) {
for (size_t i = 0; i < inputLayers_.size(); i++) {
tmpCpuInput_[i].resizeAndCopyFrom(getInput(i), false, HPPL_STREAM_1);
tmpCpuInput_[i].resizeAndCopyFrom(
getInput(i), false, HPPL_STREAM_DEFAULT);
}
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
}
forwardImpIn(output, label, cost);
}
......
......@@ -52,8 +52,10 @@ public:
Layer::forward(passType);
if (useGpu_) {
for (size_t i = 0; i < inputLayers_.size(); i++) {
tmpCpuInput_[i].resizeAndCopyFrom(getInput(i), false, HPPL_STREAM_1);
tmpCpuInput_[i].resizeAndCopyFrom(
getInput(i), false, HPPL_STREAM_DEFAULT);
}
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
forwardImp(tmpCpuInput_[0]);
} else {
forwardImp(getInput(0));
......
......@@ -92,7 +92,6 @@ void testState(LayerPtr testLayer, vector<DataLayerPtr>& dataLayers,
testLayer->forward(PASS_TEST);
Argument out;
out.resizeAndCopyFrom(testLayer->getOutput(), /* useGpu= */ false);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
if (batchOut.value) {
size_t dim = batchOut.value->getWidth();
ASSERT_TRUE((bool)out.value);
......@@ -220,7 +219,6 @@ void testBatchState(LayerPtr testLayer, vector<DataLayerPtr>& dataLayers,
testLayer->forward(PASS_TEST);
Argument out;
out.resizeAndCopyFrom(testLayer->getOutput(), /* useGpu= */ false);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
if (batchOut.value) {
size_t dim = batchOut.value->getWidth();
ASSERT_TRUE((bool)out.value);
......
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.PyDataProvider2 import *
data = [
[[[1, 3, 2], [4, 5, 2]], 0],
[[[0, 2], [2, 5], [0, 1, 2]], 1],
]
@provider(input_types=[integer_value_sub_sequence(10),
integer_value(2)])
def process_subseq(settings, file_name):
for d in data:
yield d
@provider(input_types=[integer_value_sequence(10),
integer_value(2)])
def process_seq(settings, file_name):
for d in data:
seq = []
for subseq in d[0]:
seq += subseq
yield seq, d[1]
#!/usr/bin/env python
#coding=utf-8
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
......
#edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
######################## data source ################################
define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list',
test_list=None,
module='rnn_data_provider',
obj='process_subseq')
settings(batch_size=2, learning_rate=0.01)
######################## network configure ################################
dict_dim = 10
word_dim = 8
hidden_dim = 8
label_dim = 3
data = data_layer(name="word", size=dict_dim)
emb = embedding_layer(input=data, size=word_dim)
# This hierachical RNN is designed to be equivalent to the simple RNN in
# sequence_rnn.conf
def outer_step(x):
outer_mem = memory(name="outer_rnn_state", size=hidden_dim)
def inner_step(y):
inner_mem = memory(name="inner_rnn_state",
size=hidden_dim,
boot_layer=outer_mem)
return fc_layer(input=[y, inner_mem],
size=hidden_dim,
act=TanhActivation(),
bias_attr=True,
name="inner_rnn_state")
inner_rnn_output = recurrent_group(
step=inner_step,
input=x)
last = last_seq(input=inner_rnn_output, name="outer_rnn_state")
# "return last" should also work. But currently RecurrentGradientMachine
# does not handle it correctly. Current implementation requires that
# all the out links are from sequences. However, it does not report error
# when the out links are not sequences.
return inner_rnn_output
out = recurrent_group(
step=outer_step,
input=SubsequenceInput(emb))
value_printer_evaluator(input=out)
rep = last_seq(input=out)
prob = fc_layer(size=label_dim,
input=rep,
act=SoftmaxActivation(),
bias_attr=True)
outputs(classification_cost(input=prob,
label=data_layer(name="label", size=label_dim)))
#edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
######################## data source ################################
define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list',
test_list=None,
module='rnn_data_provider',
obj='process_seq')
settings(batch_size=2, learning_rate=0.01)
######################## network configure ################################
dict_dim = 10
word_dim = 8
hidden_dim = 8
label_dim = 3
data = data_layer(name="word", size=dict_dim)
emb = embedding_layer(input=data, size=word_dim)
def step(y):
mem = memory(name="rnn_state", size=hidden_dim)
return fc_layer(input=[y, mem],
size=hidden_dim,
act=TanhActivation(),
bias_attr=True,
name="rnn_state")
out = recurrent_group(
step=step,
input=emb)
value_printer_evaluator(input=out)
rep = last_seq(input=out)
prob = fc_layer(size=label_dim,
input=rep,
act=SoftmaxActivation(),
bias_attr=True)
outputs(classification_cost(input=prob,
label=data_layer(name="label", size=label_dim)))
......@@ -21,6 +21,8 @@ limitations under the License. */
#include <paddle/trainer/TrainerInternal.h>
#include <paddle/gserver/gradientmachines/GradientMachine.h>
P_DECLARE_int32(seed);
using namespace paddle; // NOLINT
using namespace std; // NOLINT
class TrainerForTest : public paddle::Trainer {
......@@ -68,7 +70,9 @@ void CalCost(const string& conf, const string& dir, real* cost,
CpuVector vecMomentum(dim);
// vecW needs to be assigned, otherwise the variable is an uncertain value.
vecW.zeroMem();
*ThreadLocalRand::getSeed() = FLAGS_seed;
vecW.randnorm(0, 0.1);
trainer.startTrain();
for (int i = 0; i < num_passes; ++i) {
......@@ -88,15 +92,13 @@ void CalCost(const string& conf, const string& dir, real* cost,
rmDir(dir.c_str());
}
TEST(RecurrentGradientMachine, HasSubSequence) {
void test(const string& conf1, const string& conf2) {
int num_passes = 5;
real* cost1 = new real[num_passes];
const string conf1 = "gserver/tests/sequence_layer_group.conf";
const string dir1 = "gserver/tests/t1";
CalCost(conf1, dir1, cost1, num_passes);
real* cost2 = new real[num_passes];
const string conf2 = "gserver/tests/sequence_nest_layer_group.conf";
const string dir2 = "gserver/tests/t2";
CalCost(conf2, dir2, cost2, num_passes);
......@@ -109,6 +111,17 @@ TEST(RecurrentGradientMachine, HasSubSequence) {
delete[] cost2;
}
TEST(RecurrentGradientMachine, HasSubSequence) {
test("gserver/tests/sequence_layer_group.conf",
"gserver/tests/sequence_nest_layer_group.conf");
}
TEST(RecurrentGradientMachine, rnn) {
test("gserver/tests/sequence_rnn.conf",
"gserver/tests/sequence_nest_rnn.conf");
}
int main(int argc, char** argv) {
if (paddle::version::isWithPyDataProvider()) {
if (!paddle::version::isWithGpu()) {
......
......@@ -299,7 +299,6 @@ void checkRecurrentLayer(LayerConfig layerConfig, size_t batchSize,
Argument& cpuInput = testCpu.dataLayer_->getOutput();
Argument& gpuInput = testGpu.dataLayer_->getOutput();
gpuInput.resizeAndCopyFrom(cpuInput, true);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
const VectorPtr& cpuVec = testCpu.para_->getBuf(PARAMETER_VALUE);
const VectorPtr& gpuVec = testGpu.para_->getBuf(PARAMETER_VALUE);
......
......@@ -146,6 +146,7 @@ void Matrix::resizeOrCreate(MatrixPtr& matrix, size_t height, size_t width,
if (!matrix) {
matrix = Matrix::create(height, width, trans, useGpu);
} else {
CHECK_EQ(matrix->useGpu(), useGpu);
matrix->resize(height, width);
}
}
......@@ -161,6 +162,7 @@ void Matrix::resizeOrCreateSparseMatrix(MatrixPtr& matrix, size_t height,
} else {
CHECK(dynamic_cast<CpuSparseMatrix*>(matrix.get()) ||
dynamic_cast<GpuSparseMatrix*>(matrix.get()));
CHECK_EQ(matrix->useGpu(), useGpu);
matrix->resize(height, width, nnz, valueType, format);
}
}
......
......@@ -800,6 +800,7 @@ void CpuGpuVectorT<T>::resizeOrCreate(size_t size, bool useGpu) {
} else if ((!useGpu) && (!cpuVectorT_)) {
cpuVectorT_ = VectorT<T>::create(size, false);
} else {
CHECK((useGpu && gpuVectorT_) || (!useGpu && cpuVectorT_));
this->resize(size, useGpu);
}
}
......
......@@ -25,6 +25,7 @@ static void resizeAndCopy(MatrixPtr& dest, const MatrixPtr& src, bool useGpu,
if (!dest) {
dest = src->clone(0, 0, useGpu);
} else {
CHECK_EQ(dest->useGpu(), useGpu);
dest->resize(src->getHeight(), src->getWidth());
}
dest->copyFrom(*src, stream);
......@@ -60,12 +61,12 @@ static void resizeAndCopy(MatrixPtr& dest, const MatrixPtr& src,
hl_stream_t stream = HPPL_STREAM_DEFAULT) {
if (src) {
CHECK_LE((size_t)startRow + copySize, src->getHeight());
int height = copySize;
int width = src->getWidth();
if (!dest) {
dest = src->clone(height, width, useGpu);
} else {
CHECK_EQ(dest->useGpu(), useGpu);
dest->resize(height, width);
}
MatrixPtr submat = src->subMatrix(startRow, copySize);
......@@ -182,6 +183,11 @@ static void resizeAndCopy(SVectorPtr& dest, const SVectorPtr& src,
}
}
void Argument::resizeAndCopyFrom(const Argument& src, bool useGpu) {
resizeAndCopyFrom(src, useGpu, HPPL_STREAM_DEFAULT);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
}
void Argument::resizeAndCopyFrom(const Argument& src, bool useGpu,
hl_stream_t stream) {
dataId = src.dataId;
......@@ -199,6 +205,14 @@ void Argument::resizeAndCopyFrom(const Argument& src, bool useGpu,
resizeAndCopy(strs, src.strs, useGpu, stream);
}
int32_t Argument::resizeAndCopyFrom(const Argument& src, int32_t startSeq,
int32_t copySize, bool useGpu) {
int32_t size = resizeAndCopyFrom(src, startSeq, copySize, useGpu,
HPPL_STREAM_DEFAULT);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
return size;
}
int32_t Argument::resizeAndCopyFrom(const Argument& src, int32_t startSeq,
int32_t copySize, bool useGpu,
hl_stream_t stream) {
......
......@@ -203,13 +203,28 @@ struct Argument {
* startSeq: the sample id of start
* copySize: how many samples need to copy
* return value: how many samples are copied
* Note that when specifying the stream explicitly in this case,
* synchronize should also be called somewhere after this function
*/
int32_t resizeAndCopyFrom(const Argument& src, int32_t startSeq,
int32_t copySize, bool useGpu = FLAGS_use_gpu,
hl_stream_t stream = HPPL_STREAM_DEFAULT);
int32_t copySize, bool useGpu, hl_stream_t stream);
void resizeAndCopyFrom(const Argument& src, bool useGpu = FLAGS_use_gpu,
hl_stream_t stream = HPPL_STREAM_DEFAULT);
/*
* same with the above function, except that the stream is
* HPPL_STREAM_DEFAULT and synchronize is automatically called
* inside it
*/
int32_t resizeAndCopyFrom(const Argument& src, int32_t startSeq,
int32_t copySize, bool useGpu = FLAGS_use_gpu);
void resizeAndCopyFrom(const Argument& src, bool useGpu, hl_stream_t stream);
/*
* same with the above function, except that the stream is
* HPPL_STREAM_DEFAULT and synchronize is automatically called
* inside it
*/
void resizeAndCopyFrom(const Argument& src, bool useGpu = FLAGS_use_gpu);
/*
@brief Concatenate several arguments into one and put the result into it.
......@@ -240,6 +255,15 @@ struct Argument {
/*
Get Sequence Length, startPositions and max Length according to input
1. For sequence data:
Each tuple is (seq_length, seq_start, seq_id, seq_id)
The tuples are sorted according to seq_length or subseq_length
*maxSequenceLength is the maximal sequence length
2. For subsequence data:
Each tuple is (subseq_length, subseq_start, seq_id, subseq_id)
The tuples are not sorted. They are in the original order.
*maxSequenceLenth is the maximal number of subsequences in each sequence.
*/
void getSeqLengthAndStart(
std::vector<std::tuple<int, int, int, int>>* seqLengthAndStart,
......
......@@ -452,6 +452,9 @@ message SubModelConfig {
repeated LinkConfig out_links = 10;
optional GeneratorConfig generator = 11;
// the id of inlink which share info with outlinks, used in recurrent layer group
optional int32 target_inlinkid = 12;
}
message ModelConfig {
......
......@@ -303,7 +303,8 @@ def MakeLayerNameInSubmodel(name, submodel_name = None):
@config_func
def RecurrentLayerGroupWithoutOutLinksBegin(name,
in_links,
seq_reversed=False):
seq_reversed=False,
target_inlinkname=""):
global g_current_submodel
config_assert(g_config.model_config.type == "recurrent_nn",
"RecurrentLayerGroup should be used only in recurrent_nn")
......@@ -311,14 +312,19 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
SubModelBegin(name)
g_current_submodel.is_recurrent_layer_group = True
g_current_submodel.reversed = seq_reversed
g_current_submodel.target_inlinkid = -1
in_links_count = 0
for link in in_links:
for linkid, link in enumerate(in_links):
if isinstance(link, basestring):
name = link
has_subseq = False
else:
name = link.link_name
has_subseq = link.has_subseq
# assign target_inlinkid according to target_inlinkname
if target_inlinkname == name:
g_current_submodel.target_inlinkid = linkid
if in_links_count == 0:
in_links_has_subseq = has_subseq
else:
......@@ -331,6 +337,7 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
SequenceScatterAgentLayer(name=name, size=layer.size)
else:
ScatterAgentLayer(name=name, size=layer.size)
pair = g_current_submodel.in_links.add()
pair.layer_name = layer_name
pair.link_name = MakeLayerNameInSubmodel(name)
......@@ -362,10 +369,12 @@ def RecurrentLayerGroupBegin(name,
in_links,
out_links,
generator=None,
target_inlinkname="",
seq_reversed=False):
RecurrentLayerGroupWithoutOutLinksBegin(name,
in_links,
seq_reversed)
seq_reversed,
target_inlinkname)
for link in out_links:
RecurrentLayerGroupSetOutLink(link)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册