diff --git a/paddle/cuda/src/hl_cuda_cublas.cc b/paddle/cuda/src/hl_cuda_cublas.cc index 445279fa01034cc0805c3dbd2e3cb1b269607661..dc109487ded20f91c3081ebde8bb50834c362bcf 100644 --- a/paddle/cuda/src/hl_cuda_cublas.cc +++ b/paddle/cuda/src/hl_cuda_cublas.cc @@ -217,7 +217,7 @@ void hl_matrix_mul(real *A_d, hl_trans_op_t transa, } else { LOG(FATAL) << "parameter transa error!"; } - CHECK_EQ(stat, CUBLAS_STATUS_SUCCESS); + CHECK_EQ(stat, CUBLAS_STATUS_SUCCESS) << hl_cublas_get_error_string(stat); CHECK_SYNC("hl_matrix_mul failed"); } @@ -266,7 +266,7 @@ void hl_matrix_mul_vector(real *A_d, hl_trans_op_t trans, LOG(FATAL) << "parameter transa error!"; } - CHECK_EQ(stat, CUBLAS_STATUS_SUCCESS); + CHECK_EQ(stat, CUBLAS_STATUS_SUCCESS) << hl_cublas_get_error_string(stat); CHECK_SYNC("hl_matrix_mul_vector"); } diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp index bee82faa5fca8bb82848b862a239258a8165ce7b..fc38bca3c403b2855ad873e5cc06539d10a941cf 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp @@ -497,20 +497,21 @@ void RecurrentGradientMachine::forward(const std::vector& inArgs, int idSize = 0; // connect in_links for (size_t j = 0; j < inFrameLines_.size(); ++j) { + Info& info = info_[shareInlinkInfo ? 0 : j]; // idSize denotes the sum number of tokens in each length i - idSize = info_[j].idIndex[i + 1] - info_[j].idIndex[i]; + idSize = info.idIndex[i + 1] - info.idIndex[i]; InFrameLine inFrameLine = inFrameLines_[j]; auto scatterAgent = dynamic_cast(inFrameLine.agents[i].get()); scatterAgent->setRealLayerAndOutput(inFrameLine.inLayer, - inFrameLine.outArg, info_[j].allIds, - info_[j].idIndex[i], idSize); + inFrameLine.outArg, info.allIds, + info.idIndex[i], idSize); if (hasSubseq) { // size: the length of subsequence int size = - info_[j].seqStartPosIndex[i + 1] - info_[j].seqStartPosIndex[i]; - scatterAgent->setSequenceStartPositions(info_[j].sequenceStartPositions, - info_[j].seqStartPosIndex[i], + info.seqStartPosIndex[i + 1] - info.seqStartPosIndex[i]; + scatterAgent->setSequenceStartPositions(info.sequenceStartPositions, + info.seqStartPosIndex[i], size); } } @@ -744,16 +745,24 @@ void RecurrentGradientMachine::selectRowsOneTime(LayerPtr layer, const IVectorPtr& allIds, Argument* arg, PassType passType) { - const MatrixPtr& realV = layer->getOutputValue(); - int height = realV->getHeight(); - int width = realV->getWidth(); - Matrix::resizeOrCreate(arg->value, height, width, /* trans */ false, useGpu_); - arg->value->zeroMem(); - arg->value->selectRows(*realV, *allIds); - if (passType != PASS_TEST) { - Matrix::resizeOrCreate(arg->grad, height, width, /* trans */ false, - useGpu_); - arg->grad->zeroMem(); + Argument& src = layer->getOutput(); + if (src.value) { + const MatrixPtr& realV = src.value; + int height = realV->getHeight(); + int width = realV->getWidth(); + Matrix::resizeOrCreate( + arg->value, height, width, /* trans */ false, useGpu_); + arg->value->zeroMem(); + arg->value->selectRows(*realV, *allIds); + if (passType != PASS_TEST) { + Matrix::resizeOrCreate(arg->grad, height, width, /* trans */ false, + useGpu_); + arg->grad->zeroMem(); + } + } + if (src.ids) { + IVector::resizeOrCreate(arg->ids, src.ids->getSize(), useGpu_); + arg->ids->selectFrom(*src.ids, *allIds); } } diff --git a/paddle/gserver/layers/AgentLayer.cpp b/paddle/gserver/layers/AgentLayer.cpp index c1bef18ed38af8393b044f184364dfbd7e9e6bbb..056e9568852ac93552413334be1960e9c17525d4 100644 --- a/paddle/gserver/layers/AgentLayer.cpp +++ b/paddle/gserver/layers/AgentLayer.cpp @@ -139,15 +139,16 @@ void ScatterAgentLayer::forward(PassType passType) { Layer::forward(passType); CHECK_EQ(realLayer_->getDeviceId(), this->getDeviceId()); - if (realLayer_->getOutput().ids) { // ids scatter - IVector::resizeOrCreate(output_.ids, ids_->getSize(), useGpu_); - output_.ids->selectFrom(*realLayer_->getOutput().ids, *ids_); - } else { // value scatter - int width = this->getSize(); - if (realOutArg_.value) { - output_.subArgFrom(realOutArg_, /* offset */ idIndex_ * width, idSize_, - width, useGpu_); - } else { // used in generation + int width = this->getSize(); + if (realOutArg_.value || realOutArg_.ids) { + output_.subArgFrom(realOutArg_, /* offset */ idIndex_, idSize_, + width, useGpu_); + } else { // used in generation + if (realLayer_->getOutput().ids) { + IVector::resizeOrCreate(output_.ids, ids_->getSize(), useGpu_); + output_.ids->selectFrom(*realLayer_->getOutput().ids, *ids_); + } + if (realLayer_->getOutput().value) { int height = ids_->getSize(); resetOutput(height, width); @@ -213,18 +214,17 @@ void SequenceGatherAgentLayer::forward(PassType passType) { void SequenceScatterAgentLayer::forward(PassType passType) { Layer::forward(passType); CHECK_EQ(realLayer_->getDeviceId(), this->getDeviceId()); - CHECK(!realLayer_->getOutput().ids) << "Not supported"; const Argument& input = realLayer_->getOutput(); - CHECK_EQ(input.value->getWidth(), this->getSize()); + CHECK_EQ(realLayer_->getSize(), this->getSize()); int width = this->getSize(); AsyncGpuBlock asyncGpuBlock; REGISTER_TIMER_INFO("SequenceAgentLayerForward", getName().c_str()); - if (realOutArg_.value) { + if (realOutArg_.value || realOutArg_.ids) { CHECK(realOutArg_.sequenceStartPositions); - output_.subArgFrom(realOutArg_, /* offset */ idIndex_ * width, idSize_, + output_.subArgFrom(realOutArg_, /* offset */ idIndex_, idSize_, width, useGpu_, /* trans */ false, /* seqFlag */ true, /* seqStart */ seqStartPosIndex_, /* seqSize */ numSequences_); diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt index 129f10fac114d41f7c016e1fc22f311ee78cbfa5..ff2abf76973174ac2a437830b234f4c9937c08ed 100644 --- a/paddle/gserver/tests/CMakeLists.txt +++ b/paddle/gserver/tests/CMakeLists.txt @@ -56,7 +56,6 @@ add_test(NAME test_RecurrentGradientMachine COMMAND .set_python_path.sh -d ${PROJ_ROOT}/python:${PROJ_ROOT}/paddle/gserver/tests ${CMAKE_CURRENT_BINARY_DIR}/test_RecurrentGradientMachine - --use_gpu=false WORKING_DIRECTORY ${PROJ_ROOT}/paddle) add_unittest_without_exec(test_NetworkCompare diff --git a/paddle/gserver/tests/sequence_nest_rnn_multi_input.conf b/paddle/gserver/tests/sequence_nest_rnn_multi_input.conf new file mode 100644 index 0000000000000000000000000000000000000000..e01b3f8e7aa5c4c14c64c2843b0f6f82817972a1 --- /dev/null +++ b/paddle/gserver/tests/sequence_nest_rnn_multi_input.conf @@ -0,0 +1,77 @@ +#edit-mode: -*- python -*- +# Copyright (c) 2016 Baidu, Inc. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer_config_helpers import * + +######################## data source ################################ +define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list', + test_list=None, + module='rnn_data_provider', + obj='process_subseq') + + +settings(batch_size=2, learning_rate=0.01) +######################## network configure ################################ +dict_dim = 10 +word_dim = 8 +hidden_dim = 8 +label_dim = 3 + +data = data_layer(name="word", size=dict_dim) + +emb = embedding_layer(input=data, size=word_dim) + +# This hierachical RNN is designed to be equivalent to the simple RNN in +# sequence_rnn.conf + +def outer_step(wid, x): + outer_mem = memory(name="outer_rnn_state", size=hidden_dim) + def inner_step(y, wid): + z = embedding_layer(input=wid, size=word_dim) + inner_mem = memory(name="inner_rnn_state", + size=hidden_dim, + boot_layer=outer_mem) + out = fc_layer(input=[y, z, inner_mem], + size=hidden_dim, + act=TanhActivation(), + bias_attr=True, + name="inner_rnn_state") + return out + + inner_rnn_output = recurrent_group( + step=inner_step, + name="inner", + input=[x, wid]) + last = last_seq(input=inner_rnn_output, name="outer_rnn_state") + + # "return last" should also work. But currently RecurrentGradientMachine + # does not handle it correctly. Current implementation requires that + # all the out links are from sequences. However, it does not report error + # when the out links are not sequences. + return inner_rnn_output + +out = recurrent_group( + name="outer", + step=outer_step, + input=[SubsequenceInput(data), SubsequenceInput(emb)]) + +rep = last_seq(input=out) +prob = fc_layer(size=label_dim, + input=rep, + act=SoftmaxActivation(), + bias_attr=True) + +outputs(classification_cost(input=prob, + label=data_layer(name="label", size=label_dim))) diff --git a/paddle/gserver/tests/sequence_rnn_multi_input.conf b/paddle/gserver/tests/sequence_rnn_multi_input.conf new file mode 100644 index 0000000000000000000000000000000000000000..968621cab59be9296ae5ee962a3a359fff59e022 --- /dev/null +++ b/paddle/gserver/tests/sequence_rnn_multi_input.conf @@ -0,0 +1,58 @@ +#edit-mode: -*- python -*- +# Copyright (c) 2016 Baidu, Inc. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer_config_helpers import * + +######################## data source ################################ +define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list', + test_list=None, + module='rnn_data_provider', + obj='process_seq') + + +settings(batch_size=2, learning_rate=0.01) +######################## network configure ################################ +dict_dim = 10 +word_dim = 8 +hidden_dim = 8 +label_dim = 3 + +data = data_layer(name="word", size=dict_dim) + +emb = embedding_layer(input=data, size=word_dim) + +def step(y, wid): + z = embedding_layer(input=wid, size=word_dim) + mem = memory(name="rnn_state", size=hidden_dim) + out = fc_layer(input=[y, z, mem], + size=hidden_dim, + act=TanhActivation(), + bias_attr=True, + name="rnn_state") + return out + +out = recurrent_group( + name="rnn", + step=step, + input=[emb, data]) + +rep = last_seq(input=out) +prob = fc_layer(size=label_dim, + input=rep, + act=SoftmaxActivation(), + bias_attr=True) + +outputs(classification_cost(input=prob, + label=data_layer(name="label", size=label_dim))) diff --git a/paddle/gserver/tests/test_RecurrentGradientMachine.cpp b/paddle/gserver/tests/test_RecurrentGradientMachine.cpp index b73fdd18abf35858a366552120e69c8a039a4726..550df0a31844ece80aa3f2d976f46a84cef9b35f 100644 --- a/paddle/gserver/tests/test_RecurrentGradientMachine.cpp +++ b/paddle/gserver/tests/test_RecurrentGradientMachine.cpp @@ -92,7 +92,11 @@ void CalCost(const string& conf, const string& dir, real* cost, rmDir(dir.c_str()); } -void test(const string& conf1, const string& conf2, double eps) { +void test(const string& conf1, const string& conf2, double eps, bool useGpu) { + if (!paddle::version::isWithGpu() && useGpu) { + return; + } + FLAGS_use_gpu = useGpu; int num_passes = 5; real* cost1 = new real[num_passes]; const string dir1 = "gserver/tests/t1"; @@ -113,17 +117,28 @@ void test(const string& conf1, const string& conf2, double eps) { } TEST(RecurrentGradientMachine, HasSubSequence) { - test("gserver/tests/sequence_layer_group.conf", - "gserver/tests/sequence_nest_layer_group.conf", - 1e-5); + for (bool useGpu : {false, true}) { + test("gserver/tests/sequence_layer_group.conf", + "gserver/tests/sequence_nest_layer_group.conf", + 1e-5, useGpu); + } } TEST(RecurrentGradientMachine, rnn) { - test("gserver/tests/sequence_rnn.conf", - "gserver/tests/sequence_nest_rnn.conf", - 0); + for (bool useGpu : {false, true}) { + test("gserver/tests/sequence_rnn.conf", + "gserver/tests/sequence_nest_rnn.conf", + 1e-6, useGpu); + } } +TEST(RecurrentGradientMachine, rnn_multi_input) { + for (bool useGpu : {false, true}) { + test("gserver/tests/sequence_rnn_multi_input.conf", + "gserver/tests/sequence_nest_rnn_multi_input.conf", + 1e-6, useGpu); + } +} int main(int argc, char** argv) { if (paddle::version::isWithPyDataProvider()) { diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp index 0ca56b29b39b317d01d80631e332ba02356a613d..42c74661d2b2cebe0c2f5f14d0970ab2f1fec866 100644 --- a/paddle/parameter/Argument.cpp +++ b/paddle/parameter/Argument.cpp @@ -554,11 +554,16 @@ void Argument::degradeSequence(const Argument& input, bool useGpu) { void Argument::subArgFrom(const Argument& input, size_t offset, size_t height, size_t width, bool useGpu, bool trans, bool seqFlag, size_t seqStart, size_t seqSize) { - value = Matrix::create(input.value->getData() + offset, height, width, trans, - useGpu); + if (input.value) { + value = Matrix::create(input.value->getData() + offset * width, + height, width, trans, useGpu); + } + if (input.ids) { + ids = IVector::create(input.ids->getData() + offset, height, useGpu); + } if (input.grad) { - grad = Matrix::create(input.grad->getData() + offset, height, width, trans, - useGpu); + grad = Matrix::create(input.grad->getData() + offset * width, + height, width, trans, useGpu); } if (seqFlag) { sequenceStartPositions = std::make_shared( diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h index 81cd117fc45cfa34da0810b01c5a710d9ce5950b..81ff9029bc4c8fca7adbabd7ae65caf7ac2f3c2a 100644 --- a/paddle/parameter/Argument.h +++ b/paddle/parameter/Argument.h @@ -177,11 +177,11 @@ struct Argument { } /** - * @brief (value, grad, sequenceStartPositions) of output are subset of + * @brief (value, ids, grad, sequenceStartPositions) of output are subset of * input. Note that, output share the same memory of input. * * @param input[in] input - * @param offset[in] offset of input.value + * @param offset[in] offset in terms of rows * @param height[in] height of output.value * @param width[in] width of output.value * @param useGpu[in] diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 76b0db546b18ba731a9dd7cb74aa0bacd0a268cb..dabf8e2953054356278a81e54ae7a7d15e358af9 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -216,7 +216,7 @@ def check_input(input): """ if isinstance(input, LayerOutput): - return [LayerOutput] + return [input] assert isinstance(input, list) for inp in input: assert isinstance(inp, LayerOutput) @@ -764,7 +764,7 @@ def print_layer(input, name=None): :type input: LayerOutput|list|tuple :return: No return """ - check_input(input) + input = check_input(input) Layer( name=name,