提交 93006787 编写于 作者: E emailweixu 提交者: GitHub

Merge pull request #73 from reyoung/merge_icode

Merge Baidu Internal Changes.
...@@ -9,6 +9,7 @@ Install PaddlePaddle ...@@ -9,6 +9,7 @@ Install PaddlePaddle
:glob: :glob:
install_* install_*
internal/install_from_jumbo.md
Build from Source Build from Source
----------------- -----------------
......
...@@ -5,3 +5,4 @@ Cluster Train ...@@ -5,3 +5,4 @@ Cluster Train
:glob: :glob:
opensource/cluster_train.md opensource/cluster_train.md
internal/index.md
...@@ -9,7 +9,11 @@ Note: The intallation packages are still in pre-release state and your experienc ...@@ -9,7 +9,11 @@ Note: The intallation packages are still in pre-release state and your experienc
.. toctree:: .. toctree::
:maxdepth: 1 :maxdepth: 1
:glob:
源码下载(对内) <../build/internal/download_paddle_source_zh_cn.rst>
使用Jumbo安装(对内) <../build/internal/install_from_jumbo.rst>
从源码编译安装(对内) <../build/internal/build_from_source_zh_cn.rst>
install/docker_install.rst install/docker_install.rst
install/ubuntu_install.rst install/ubuntu_install.rst
cmake/index.rst cmake/index.rst
集群训练
========
* `集群训练 <../../doc/cluster/index.html>`_
.. toctree::
:maxdepth: 2
:glob:
集群训练(对内) <internal/index.md>
...@@ -8,7 +8,7 @@ PaddlePaddle文档 ...@@ -8,7 +8,7 @@ PaddlePaddle文档
* `用户接口 <ui/index.html>`_ * `用户接口 <ui/index.html>`_
* `使用示例 <demo/index.html>`_ * `使用示例 <demo/index.html>`_
* `模型配置 <../doc/ui/api/trainer_config_helpers/index.html>`_ * `模型配置 <../doc/ui/api/trainer_config_helpers/index.html>`_
* `集群训练 <../doc/cluster/index.html>`_ * `集群训练 <cluster/index.html>`_
开发指南 开发指南
-------- --------
......
...@@ -194,8 +194,8 @@ public: ...@@ -194,8 +194,8 @@ public:
virtual real evalImp(std::vector<Argument>& arguments) { virtual real evalImp(std::vector<Argument>& arguments) {
CHECK_EQ(arguments.size(), (size_t)2); CHECK_EQ(arguments.size(), (size_t)2);
Argument output, label; Argument output, label;
output.resizeAndCopyFrom(arguments[0], false); output.resizeAndCopyFrom(arguments[0], false, HPPL_STREAM_DEFAULT);
label.resizeAndCopyFrom(arguments[1], false); label.resizeAndCopyFrom(arguments[1], false, HPPL_STREAM_DEFAULT);
hl_stream_synchronize(HPPL_STREAM_DEFAULT); hl_stream_synchronize(HPPL_STREAM_DEFAULT);
CHECK(label.sequenceStartPositions); CHECK(label.sequenceStartPositions);
CHECK(label.ids); CHECK(label.ids);
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include "GradientMachine.h" #include "GradientMachine.h"
...@@ -206,7 +205,10 @@ public: ...@@ -206,7 +205,10 @@ public:
/** /**
* @brief Path default ctor, first logProb is 0. * @brief Path default ctor, first logProb is 0.
*/ */
Path() { logProb = 0; seqId = 0; } Path() {
logProb = 0;
seqId = 0;
}
explicit Path(size_t seqId) : seqId(seqId) { logProb = 0; } explicit Path(size_t seqId) : seqId(seqId) { logProb = 0; }
/** /**
...@@ -319,7 +321,9 @@ protected: ...@@ -319,7 +321,9 @@ protected:
}; };
std::vector<MemoryFrameLine> memoryFrameLines_; std::vector<MemoryFrameLine> memoryFrameLines_;
// All inFrameLines and outFrameLines have the same element as follows. // Each inFrameLines(inlinks) has its own info(elements) below,
// and all outFrameLines(outlinks) share the info with one inFrameLine,
// which is assigned by targetInfoInlinkId_.
struct Info { struct Info {
IVectorPtr allIds; // scattered id of realLayer IVectorPtr allIds; // scattered id of realLayer
std::vector<int> idIndex; // index of allIds std::vector<int> idIndex; // index of allIds
...@@ -327,13 +331,27 @@ protected: ...@@ -327,13 +331,27 @@ protected:
sequenceStartPositions; // scattered sequenceStartPositions sequenceStartPositions; // scattered sequenceStartPositions
std::vector<int> seqStartPosIndex; // index of sequenceStartPositions std::vector<int> seqStartPosIndex; // index of sequenceStartPositions
}; };
Info info_; std::vector<Info> info_;
// if no subSeq, tuple of (seqLength, seqStart, seqIndex, seqIndex) // numSeqs_[i] is the number sequences which is longer than i (for sequence
// else, tuple of (subSeqLength, subSeqStart, seqIndex, subSeqIndex) // data) or has more than i subsequences (for subsequence data)
std::vector<std::tuple<int, int, int, int>> seqLengthAndStart_; std::vector<int> numSeqs_;
void createInFrameInfo(const Argument& input, PassType passType); // each inlinks has a "std::vector<std::tuple<int, int, int, int>>" denotes
// its sequence info:
// if hasSubSeq, tuple of (subSeqLength, subSeqStart, seqIndex, subSeqIndex)
// else, tuple of (seqLength, seqStart, seqIndex, seqIndex)
std::vector<std::vector<std::tuple<int, int, int, int>>> seqLengthAndStart_;
// the id of inlink which share info with outlinks
int targetInfoInlinkId_;
/* create scattered id infomation for all realLayer of inFrameLines one time.
* If hasSubseq, will also create scattered sequenceStartPositions infomation
* for all realLayer of inFrameLines one time.
*/
void createInFrameInfo(int inlinks_id, const Argument& input,
PassType passType);
void createMemoryFrameInfo(MemoryFrameLine* memoryFrameLine, void createMemoryFrameInfo(MemoryFrameLine* memoryFrameLine,
PassType passType); PassType passType);
...@@ -363,6 +381,9 @@ protected: ...@@ -363,6 +381,9 @@ protected:
NeuralNetwork* rootNetwork_; NeuralNetwork* rootNetwork_;
bool reversed_; bool reversed_;
// if hasSubseq: max number of sentences(subseq)in batchsize samples
// else: max number of tokens in batchsize samples(sentences)
int maxSequenceLength_; int maxSequenceLength_;
bool useGpu_; bool useGpu_;
bool stopBeamSearch_; bool stopBeamSearch_;
...@@ -415,7 +436,7 @@ private: ...@@ -415,7 +436,7 @@ private:
* @param machineIdVec : select a row of output matrix in each frame * @param machineIdVec : select a row of output matrix in each frame
* that the generation process expanded. * that the generation process expanded.
*/ */
void createDataOutlink(std::vector<int> & machineIdVec); void createDataOutlink(std::vector<int>& machineIdVec);
/* /*
* @brief used in beam search, connect previous frame to form recurrent link * @brief used in beam search, connect previous frame to form recurrent link
......
...@@ -49,8 +49,10 @@ void CTCLayer::forward(PassType passType) { ...@@ -49,8 +49,10 @@ void CTCLayer::forward(PassType passType) {
Layer::forward(passType); Layer::forward(passType);
if (useGpu_) { if (useGpu_) {
for (size_t i = 0; i < inputLayers_.size(); i++) { for (size_t i = 0; i < inputLayers_.size(); i++) {
tmpCpuInput_[i].resizeAndCopyFrom(getInput(i), false, HPPL_STREAM_1); tmpCpuInput_[i].resizeAndCopyFrom(
getInput(i), false, HPPL_STREAM_DEFAULT);
} }
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
forwardImp(tmpCpuInput_[0], tmpCpuInput_[1]); forwardImp(tmpCpuInput_[0], tmpCpuInput_[1]);
} else { } else {
forwardImp(getInput(0), getInput(1)); forwardImp(getInput(0), getInput(1));
...@@ -92,9 +94,9 @@ void CTCLayer::backward(const UpdateCallback &callback) { ...@@ -92,9 +94,9 @@ void CTCLayer::backward(const UpdateCallback &callback) {
if (useGpu_) { if (useGpu_) {
backwardImp(callback, tmpCpuInput_[0], tmpCpuInput_[1]); backwardImp(callback, tmpCpuInput_[0], tmpCpuInput_[1]);
const_cast<Argument&>(getInput(0)). const_cast<Argument&>(getInput(0)).
resizeAndCopyFrom(tmpCpuInput_[0], true, HPPL_STREAM_1); resizeAndCopyFrom(tmpCpuInput_[0], true, HPPL_STREAM_DEFAULT);
const_cast<Argument&>(getInput(1)). const_cast<Argument&>(getInput(1)).
resizeAndCopyFrom(tmpCpuInput_[1], true, HPPL_STREAM_1); resizeAndCopyFrom(tmpCpuInput_[1], true, HPPL_STREAM_DEFAULT);
} else { } else {
backwardImp(callback, getInput(0), getInput(1)); backwardImp(callback, getInput(0), getInput(1));
} }
......
...@@ -248,7 +248,7 @@ void ConvOperator::forward() { ...@@ -248,7 +248,7 @@ void ConvOperator::forward() {
CHECK_EQ(ins_[1]->value->getHeight(), batchSize); CHECK_EQ(ins_[1]->value->getHeight(), batchSize);
checkFilterSize(ins_[1]->value); checkFilterSize(ins_[1]->value);
Matrix::resizeOrCreate(out_->value, batchSize, Matrix::resizeOrCreate(out_->value, batchSize,
outputH_ * outputW_ * numFilters_); outputH_ * outputW_ * numFilters_, false, useGpu_);
{ {
AsyncGpuBlock block; AsyncGpuBlock block;
for (size_t batchId = 0; batchId < batchSize; ++batchId) { for (size_t batchId = 0; batchId < batchSize; ++batchId) {
......
...@@ -509,8 +509,10 @@ void HuberTwoClass::forwardImp(Matrix &output, Argument &label, ...@@ -509,8 +509,10 @@ void HuberTwoClass::forwardImp(Matrix &output, Argument &label,
Matrix &cost) { Matrix &cost) {
if (useGpu_) { if (useGpu_) {
for (size_t i = 0; i < inputLayers_.size(); i++) { for (size_t i = 0; i < inputLayers_.size(); i++) {
tmpCpuInput_[i].resizeAndCopyFrom(getInput(i), false, HPPL_STREAM_1); tmpCpuInput_[i].resizeAndCopyFrom(
getInput(i), false, HPPL_STREAM_DEFAULT);
} }
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
} }
forwardImpIn(output, label, cost); forwardImpIn(output, label, cost);
} }
......
...@@ -52,8 +52,10 @@ public: ...@@ -52,8 +52,10 @@ public:
Layer::forward(passType); Layer::forward(passType);
if (useGpu_) { if (useGpu_) {
for (size_t i = 0; i < inputLayers_.size(); i++) { for (size_t i = 0; i < inputLayers_.size(); i++) {
tmpCpuInput_[i].resizeAndCopyFrom(getInput(i), false, HPPL_STREAM_1); tmpCpuInput_[i].resizeAndCopyFrom(
getInput(i), false, HPPL_STREAM_DEFAULT);
} }
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
forwardImp(tmpCpuInput_[0]); forwardImp(tmpCpuInput_[0]);
} else { } else {
forwardImp(getInput(0)); forwardImp(getInput(0));
......
...@@ -92,7 +92,6 @@ void testState(LayerPtr testLayer, vector<DataLayerPtr>& dataLayers, ...@@ -92,7 +92,6 @@ void testState(LayerPtr testLayer, vector<DataLayerPtr>& dataLayers,
testLayer->forward(PASS_TEST); testLayer->forward(PASS_TEST);
Argument out; Argument out;
out.resizeAndCopyFrom(testLayer->getOutput(), /* useGpu= */ false); out.resizeAndCopyFrom(testLayer->getOutput(), /* useGpu= */ false);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
if (batchOut.value) { if (batchOut.value) {
size_t dim = batchOut.value->getWidth(); size_t dim = batchOut.value->getWidth();
ASSERT_TRUE((bool)out.value); ASSERT_TRUE((bool)out.value);
...@@ -220,7 +219,6 @@ void testBatchState(LayerPtr testLayer, vector<DataLayerPtr>& dataLayers, ...@@ -220,7 +219,6 @@ void testBatchState(LayerPtr testLayer, vector<DataLayerPtr>& dataLayers,
testLayer->forward(PASS_TEST); testLayer->forward(PASS_TEST);
Argument out; Argument out;
out.resizeAndCopyFrom(testLayer->getOutput(), /* useGpu= */ false); out.resizeAndCopyFrom(testLayer->getOutput(), /* useGpu= */ false);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
if (batchOut.value) { if (batchOut.value) {
size_t dim = batchOut.value->getWidth(); size_t dim = batchOut.value->getWidth();
ASSERT_TRUE((bool)out.value); ASSERT_TRUE((bool)out.value);
......
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.PyDataProvider2 import *
data = [
[[[1, 3, 2], [4, 5, 2]], 0],
[[[0, 2], [2, 5], [0, 1, 2]], 1],
]
@provider(input_types=[integer_value_sub_sequence(10),
integer_value(2)])
def process_subseq(settings, file_name):
for d in data:
yield d
@provider(input_types=[integer_value_sequence(10),
integer_value(2)])
def process_seq(settings, file_name):
for d in data:
seq = []
for subseq in d[0]:
seq += subseq
yield seq, d[1]
#!/usr/bin/env python
#coding=utf-8
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved # Copyright (c) 2016 Baidu, Inc. All Rights Reserved
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
......
#edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
######################## data source ################################
define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list',
test_list=None,
module='rnn_data_provider',
obj='process_subseq')
settings(batch_size=2, learning_rate=0.01)
######################## network configure ################################
dict_dim = 10
word_dim = 8
hidden_dim = 8
label_dim = 3
data = data_layer(name="word", size=dict_dim)
emb = embedding_layer(input=data, size=word_dim)
# This hierachical RNN is designed to be equivalent to the simple RNN in
# sequence_rnn.conf
def outer_step(x):
outer_mem = memory(name="outer_rnn_state", size=hidden_dim)
def inner_step(y):
inner_mem = memory(name="inner_rnn_state",
size=hidden_dim,
boot_layer=outer_mem)
return fc_layer(input=[y, inner_mem],
size=hidden_dim,
act=TanhActivation(),
bias_attr=True,
name="inner_rnn_state")
inner_rnn_output = recurrent_group(
step=inner_step,
input=x)
last = last_seq(input=inner_rnn_output, name="outer_rnn_state")
# "return last" should also work. But currently RecurrentGradientMachine
# does not handle it correctly. Current implementation requires that
# all the out links are from sequences. However, it does not report error
# when the out links are not sequences.
return inner_rnn_output
out = recurrent_group(
step=outer_step,
input=SubsequenceInput(emb))
value_printer_evaluator(input=out)
rep = last_seq(input=out)
prob = fc_layer(size=label_dim,
input=rep,
act=SoftmaxActivation(),
bias_attr=True)
outputs(classification_cost(input=prob,
label=data_layer(name="label", size=label_dim)))
#edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
######################## data source ################################
define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list',
test_list=None,
module='rnn_data_provider',
obj='process_seq')
settings(batch_size=2, learning_rate=0.01)
######################## network configure ################################
dict_dim = 10
word_dim = 8
hidden_dim = 8
label_dim = 3
data = data_layer(name="word", size=dict_dim)
emb = embedding_layer(input=data, size=word_dim)
def step(y):
mem = memory(name="rnn_state", size=hidden_dim)
return fc_layer(input=[y, mem],
size=hidden_dim,
act=TanhActivation(),
bias_attr=True,
name="rnn_state")
out = recurrent_group(
step=step,
input=emb)
value_printer_evaluator(input=out)
rep = last_seq(input=out)
prob = fc_layer(size=label_dim,
input=rep,
act=SoftmaxActivation(),
bias_attr=True)
outputs(classification_cost(input=prob,
label=data_layer(name="label", size=label_dim)))
...@@ -21,6 +21,8 @@ limitations under the License. */ ...@@ -21,6 +21,8 @@ limitations under the License. */
#include <paddle/trainer/TrainerInternal.h> #include <paddle/trainer/TrainerInternal.h>
#include <paddle/gserver/gradientmachines/GradientMachine.h> #include <paddle/gserver/gradientmachines/GradientMachine.h>
P_DECLARE_int32(seed);
using namespace paddle; // NOLINT using namespace paddle; // NOLINT
using namespace std; // NOLINT using namespace std; // NOLINT
class TrainerForTest : public paddle::Trainer { class TrainerForTest : public paddle::Trainer {
...@@ -68,7 +70,9 @@ void CalCost(const string& conf, const string& dir, real* cost, ...@@ -68,7 +70,9 @@ void CalCost(const string& conf, const string& dir, real* cost,
CpuVector vecMomentum(dim); CpuVector vecMomentum(dim);
// vecW needs to be assigned, otherwise the variable is an uncertain value. // vecW needs to be assigned, otherwise the variable is an uncertain value.
vecW.zeroMem();
*ThreadLocalRand::getSeed() = FLAGS_seed;
vecW.randnorm(0, 0.1);
trainer.startTrain(); trainer.startTrain();
for (int i = 0; i < num_passes; ++i) { for (int i = 0; i < num_passes; ++i) {
...@@ -88,15 +92,13 @@ void CalCost(const string& conf, const string& dir, real* cost, ...@@ -88,15 +92,13 @@ void CalCost(const string& conf, const string& dir, real* cost,
rmDir(dir.c_str()); rmDir(dir.c_str());
} }
TEST(RecurrentGradientMachine, HasSubSequence) { void test(const string& conf1, const string& conf2) {
int num_passes = 5; int num_passes = 5;
real* cost1 = new real[num_passes]; real* cost1 = new real[num_passes];
const string conf1 = "gserver/tests/sequence_layer_group.conf";
const string dir1 = "gserver/tests/t1"; const string dir1 = "gserver/tests/t1";
CalCost(conf1, dir1, cost1, num_passes); CalCost(conf1, dir1, cost1, num_passes);
real* cost2 = new real[num_passes]; real* cost2 = new real[num_passes];
const string conf2 = "gserver/tests/sequence_nest_layer_group.conf";
const string dir2 = "gserver/tests/t2"; const string dir2 = "gserver/tests/t2";
CalCost(conf2, dir2, cost2, num_passes); CalCost(conf2, dir2, cost2, num_passes);
...@@ -109,6 +111,17 @@ TEST(RecurrentGradientMachine, HasSubSequence) { ...@@ -109,6 +111,17 @@ TEST(RecurrentGradientMachine, HasSubSequence) {
delete[] cost2; delete[] cost2;
} }
TEST(RecurrentGradientMachine, HasSubSequence) {
test("gserver/tests/sequence_layer_group.conf",
"gserver/tests/sequence_nest_layer_group.conf");
}
TEST(RecurrentGradientMachine, rnn) {
test("gserver/tests/sequence_rnn.conf",
"gserver/tests/sequence_nest_rnn.conf");
}
int main(int argc, char** argv) { int main(int argc, char** argv) {
if (paddle::version::isWithPyDataProvider()) { if (paddle::version::isWithPyDataProvider()) {
if (!paddle::version::isWithGpu()) { if (!paddle::version::isWithGpu()) {
......
...@@ -299,7 +299,6 @@ void checkRecurrentLayer(LayerConfig layerConfig, size_t batchSize, ...@@ -299,7 +299,6 @@ void checkRecurrentLayer(LayerConfig layerConfig, size_t batchSize,
Argument& cpuInput = testCpu.dataLayer_->getOutput(); Argument& cpuInput = testCpu.dataLayer_->getOutput();
Argument& gpuInput = testGpu.dataLayer_->getOutput(); Argument& gpuInput = testGpu.dataLayer_->getOutput();
gpuInput.resizeAndCopyFrom(cpuInput, true); gpuInput.resizeAndCopyFrom(cpuInput, true);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
const VectorPtr& cpuVec = testCpu.para_->getBuf(PARAMETER_VALUE); const VectorPtr& cpuVec = testCpu.para_->getBuf(PARAMETER_VALUE);
const VectorPtr& gpuVec = testGpu.para_->getBuf(PARAMETER_VALUE); const VectorPtr& gpuVec = testGpu.para_->getBuf(PARAMETER_VALUE);
......
...@@ -146,6 +146,7 @@ void Matrix::resizeOrCreate(MatrixPtr& matrix, size_t height, size_t width, ...@@ -146,6 +146,7 @@ void Matrix::resizeOrCreate(MatrixPtr& matrix, size_t height, size_t width,
if (!matrix) { if (!matrix) {
matrix = Matrix::create(height, width, trans, useGpu); matrix = Matrix::create(height, width, trans, useGpu);
} else { } else {
CHECK_EQ(matrix->useGpu(), useGpu);
matrix->resize(height, width); matrix->resize(height, width);
} }
} }
...@@ -161,6 +162,7 @@ void Matrix::resizeOrCreateSparseMatrix(MatrixPtr& matrix, size_t height, ...@@ -161,6 +162,7 @@ void Matrix::resizeOrCreateSparseMatrix(MatrixPtr& matrix, size_t height,
} else { } else {
CHECK(dynamic_cast<CpuSparseMatrix*>(matrix.get()) || CHECK(dynamic_cast<CpuSparseMatrix*>(matrix.get()) ||
dynamic_cast<GpuSparseMatrix*>(matrix.get())); dynamic_cast<GpuSparseMatrix*>(matrix.get()));
CHECK_EQ(matrix->useGpu(), useGpu);
matrix->resize(height, width, nnz, valueType, format); matrix->resize(height, width, nnz, valueType, format);
} }
} }
......
...@@ -800,6 +800,7 @@ void CpuGpuVectorT<T>::resizeOrCreate(size_t size, bool useGpu) { ...@@ -800,6 +800,7 @@ void CpuGpuVectorT<T>::resizeOrCreate(size_t size, bool useGpu) {
} else if ((!useGpu) && (!cpuVectorT_)) { } else if ((!useGpu) && (!cpuVectorT_)) {
cpuVectorT_ = VectorT<T>::create(size, false); cpuVectorT_ = VectorT<T>::create(size, false);
} else { } else {
CHECK((useGpu && gpuVectorT_) || (!useGpu && cpuVectorT_));
this->resize(size, useGpu); this->resize(size, useGpu);
} }
} }
......
...@@ -25,6 +25,7 @@ static void resizeAndCopy(MatrixPtr& dest, const MatrixPtr& src, bool useGpu, ...@@ -25,6 +25,7 @@ static void resizeAndCopy(MatrixPtr& dest, const MatrixPtr& src, bool useGpu,
if (!dest) { if (!dest) {
dest = src->clone(0, 0, useGpu); dest = src->clone(0, 0, useGpu);
} else { } else {
CHECK_EQ(dest->useGpu(), useGpu);
dest->resize(src->getHeight(), src->getWidth()); dest->resize(src->getHeight(), src->getWidth());
} }
dest->copyFrom(*src, stream); dest->copyFrom(*src, stream);
...@@ -60,12 +61,12 @@ static void resizeAndCopy(MatrixPtr& dest, const MatrixPtr& src, ...@@ -60,12 +61,12 @@ static void resizeAndCopy(MatrixPtr& dest, const MatrixPtr& src,
hl_stream_t stream = HPPL_STREAM_DEFAULT) { hl_stream_t stream = HPPL_STREAM_DEFAULT) {
if (src) { if (src) {
CHECK_LE((size_t)startRow + copySize, src->getHeight()); CHECK_LE((size_t)startRow + copySize, src->getHeight());
int height = copySize; int height = copySize;
int width = src->getWidth(); int width = src->getWidth();
if (!dest) { if (!dest) {
dest = src->clone(height, width, useGpu); dest = src->clone(height, width, useGpu);
} else { } else {
CHECK_EQ(dest->useGpu(), useGpu);
dest->resize(height, width); dest->resize(height, width);
} }
MatrixPtr submat = src->subMatrix(startRow, copySize); MatrixPtr submat = src->subMatrix(startRow, copySize);
...@@ -182,6 +183,11 @@ static void resizeAndCopy(SVectorPtr& dest, const SVectorPtr& src, ...@@ -182,6 +183,11 @@ static void resizeAndCopy(SVectorPtr& dest, const SVectorPtr& src,
} }
} }
void Argument::resizeAndCopyFrom(const Argument& src, bool useGpu) {
resizeAndCopyFrom(src, useGpu, HPPL_STREAM_DEFAULT);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
}
void Argument::resizeAndCopyFrom(const Argument& src, bool useGpu, void Argument::resizeAndCopyFrom(const Argument& src, bool useGpu,
hl_stream_t stream) { hl_stream_t stream) {
dataId = src.dataId; dataId = src.dataId;
...@@ -199,6 +205,14 @@ void Argument::resizeAndCopyFrom(const Argument& src, bool useGpu, ...@@ -199,6 +205,14 @@ void Argument::resizeAndCopyFrom(const Argument& src, bool useGpu,
resizeAndCopy(strs, src.strs, useGpu, stream); resizeAndCopy(strs, src.strs, useGpu, stream);
} }
int32_t Argument::resizeAndCopyFrom(const Argument& src, int32_t startSeq,
int32_t copySize, bool useGpu) {
int32_t size = resizeAndCopyFrom(src, startSeq, copySize, useGpu,
HPPL_STREAM_DEFAULT);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
return size;
}
int32_t Argument::resizeAndCopyFrom(const Argument& src, int32_t startSeq, int32_t Argument::resizeAndCopyFrom(const Argument& src, int32_t startSeq,
int32_t copySize, bool useGpu, int32_t copySize, bool useGpu,
hl_stream_t stream) { hl_stream_t stream) {
......
...@@ -203,13 +203,28 @@ struct Argument { ...@@ -203,13 +203,28 @@ struct Argument {
* startSeq: the sample id of start * startSeq: the sample id of start
* copySize: how many samples need to copy * copySize: how many samples need to copy
* return value: how many samples are copied * return value: how many samples are copied
* Note that when specifying the stream explicitly in this case,
* synchronize should also be called somewhere after this function
*/ */
int32_t resizeAndCopyFrom(const Argument& src, int32_t startSeq, int32_t resizeAndCopyFrom(const Argument& src, int32_t startSeq,
int32_t copySize, bool useGpu = FLAGS_use_gpu, int32_t copySize, bool useGpu, hl_stream_t stream);
hl_stream_t stream = HPPL_STREAM_DEFAULT);
void resizeAndCopyFrom(const Argument& src, bool useGpu = FLAGS_use_gpu, /*
hl_stream_t stream = HPPL_STREAM_DEFAULT); * same with the above function, except that the stream is
* HPPL_STREAM_DEFAULT and synchronize is automatically called
* inside it
*/
int32_t resizeAndCopyFrom(const Argument& src, int32_t startSeq,
int32_t copySize, bool useGpu = FLAGS_use_gpu);
void resizeAndCopyFrom(const Argument& src, bool useGpu, hl_stream_t stream);
/*
* same with the above function, except that the stream is
* HPPL_STREAM_DEFAULT and synchronize is automatically called
* inside it
*/
void resizeAndCopyFrom(const Argument& src, bool useGpu = FLAGS_use_gpu);
/* /*
@brief Concatenate several arguments into one and put the result into it. @brief Concatenate several arguments into one and put the result into it.
...@@ -240,6 +255,15 @@ struct Argument { ...@@ -240,6 +255,15 @@ struct Argument {
/* /*
Get Sequence Length, startPositions and max Length according to input Get Sequence Length, startPositions and max Length according to input
1. For sequence data:
Each tuple is (seq_length, seq_start, seq_id, seq_id)
The tuples are sorted according to seq_length or subseq_length
*maxSequenceLength is the maximal sequence length
2. For subsequence data:
Each tuple is (subseq_length, subseq_start, seq_id, subseq_id)
The tuples are not sorted. They are in the original order.
*maxSequenceLenth is the maximal number of subsequences in each sequence.
*/ */
void getSeqLengthAndStart( void getSeqLengthAndStart(
std::vector<std::tuple<int, int, int, int>>* seqLengthAndStart, std::vector<std::tuple<int, int, int, int>>* seqLengthAndStart,
......
...@@ -452,6 +452,9 @@ message SubModelConfig { ...@@ -452,6 +452,9 @@ message SubModelConfig {
repeated LinkConfig out_links = 10; repeated LinkConfig out_links = 10;
optional GeneratorConfig generator = 11; optional GeneratorConfig generator = 11;
// the id of inlink which share info with outlinks, used in recurrent layer group
optional int32 target_inlinkid = 12;
} }
message ModelConfig { message ModelConfig {
......
...@@ -303,7 +303,8 @@ def MakeLayerNameInSubmodel(name, submodel_name = None): ...@@ -303,7 +303,8 @@ def MakeLayerNameInSubmodel(name, submodel_name = None):
@config_func @config_func
def RecurrentLayerGroupWithoutOutLinksBegin(name, def RecurrentLayerGroupWithoutOutLinksBegin(name,
in_links, in_links,
seq_reversed=False): seq_reversed=False,
target_inlinkname=""):
global g_current_submodel global g_current_submodel
config_assert(g_config.model_config.type == "recurrent_nn", config_assert(g_config.model_config.type == "recurrent_nn",
"RecurrentLayerGroup should be used only in recurrent_nn") "RecurrentLayerGroup should be used only in recurrent_nn")
...@@ -311,14 +312,19 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name, ...@@ -311,14 +312,19 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
SubModelBegin(name) SubModelBegin(name)
g_current_submodel.is_recurrent_layer_group = True g_current_submodel.is_recurrent_layer_group = True
g_current_submodel.reversed = seq_reversed g_current_submodel.reversed = seq_reversed
g_current_submodel.target_inlinkid = -1
in_links_count = 0 in_links_count = 0
for link in in_links: for linkid, link in enumerate(in_links):
if isinstance(link, basestring): if isinstance(link, basestring):
name = link name = link
has_subseq = False has_subseq = False
else: else:
name = link.link_name name = link.link_name
has_subseq = link.has_subseq has_subseq = link.has_subseq
# assign target_inlinkid according to target_inlinkname
if target_inlinkname == name:
g_current_submodel.target_inlinkid = linkid
if in_links_count == 0: if in_links_count == 0:
in_links_has_subseq = has_subseq in_links_has_subseq = has_subseq
else: else:
...@@ -331,6 +337,7 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name, ...@@ -331,6 +337,7 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
SequenceScatterAgentLayer(name=name, size=layer.size) SequenceScatterAgentLayer(name=name, size=layer.size)
else: else:
ScatterAgentLayer(name=name, size=layer.size) ScatterAgentLayer(name=name, size=layer.size)
pair = g_current_submodel.in_links.add() pair = g_current_submodel.in_links.add()
pair.layer_name = layer_name pair.layer_name = layer_name
pair.link_name = MakeLayerNameInSubmodel(name) pair.link_name = MakeLayerNameInSubmodel(name)
...@@ -362,10 +369,12 @@ def RecurrentLayerGroupBegin(name, ...@@ -362,10 +369,12 @@ def RecurrentLayerGroupBegin(name,
in_links, in_links,
out_links, out_links,
generator=None, generator=None,
target_inlinkname="",
seq_reversed=False): seq_reversed=False):
RecurrentLayerGroupWithoutOutLinksBegin(name, RecurrentLayerGroupWithoutOutLinksBegin(name,
in_links, in_links,
seq_reversed) seq_reversed,
target_inlinkname)
for link in out_links: for link in out_links:
RecurrentLayerGroupSetOutLink(link) RecurrentLayerGroupSetOutLink(link)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册