diff --git a/.travis.yml b/.travis.yml index 376c693602b56fe719decfeb41c217497e143e12..8c8c6699d3d9abddd65a3a224c2bceedc7d88348 100644 --- a/.travis.yml +++ b/.travis.yml @@ -38,7 +38,7 @@ before_install: # Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python # protobuf version. - pip install numpy wheel 'protobuf==3.1' sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit requests==2.9.2 LinkChecker - - pip install rarfile + - pip install rarfile nltk==3.2.2 scipy==0.19.0 recordio matplotlib Pillow - curl https://glide.sh/get | bash - eval "$(GIMME_GO_VERSION=1.8.3 gimme)" - go get -u github.com/alecthomas/gometalinter diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index 372272a53c12c314fc80eebbce5eae9fcabc55ba..cb330ea5e1b914587a725c9b90a33053f3fbbc3d 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -257,6 +257,16 @@ seq_concat .. autoclass:: paddle.v2.layer.seq_concat :noindex: +kmax_sequence_score +------------------- +.. autoclass:: paddle.v2.layer.kmax_sequence_score + :noindex: + +sub_nested_seq +-------------- +.. autoclass:: paddle.v2.layer.sub_nested_seq + :noindex: + Reshaping Layers ================ diff --git a/doc/templates/conf.py.cn.in b/doc/templates/conf.py.cn.in index 95cad835b11816f4d2e256c2abd662a545a5bad2..673948dfe7928240817b552141ec9bc2f8a672b7 100644 --- a/doc/templates/conf.py.cn.in +++ b/doc/templates/conf.py.cn.in @@ -13,15 +13,11 @@ # serve to show the default. import sys import os, subprocess +sys.path.insert(0, os.path.abspath('@PROJ_ROOT@/python')) import shlex from recommonmark import parser, transform -try: - import py_paddle - import paddle - import paddle.v2 -except ImportError: - print("Must install paddle python package before generating documentation") - sys.exit(1) +import paddle +import paddle.v2 MarkdownParser = parser.CommonMarkParser AutoStructify = transform.AutoStructify diff --git a/doc/templates/conf.py.en.in b/doc/templates/conf.py.en.in index b477f0120c4fa0544012080b7cfb8572d3c44b04..b6b50b7dcd5647b50a13703160489323ed90a1b4 100644 --- a/doc/templates/conf.py.en.in +++ b/doc/templates/conf.py.en.in @@ -13,15 +13,11 @@ # serve to show the default. import sys import os, subprocess +sys.path.insert(0, os.path.abspath('@PROJ_ROOT@/python')) import shlex from recommonmark import parser, transform -try: - import py_paddle - import paddle - import paddle.v2 -except ImportError: - print("Must install paddle python package before generating documentation") - sys.exit(1) +import paddle +import paddle.v2 MarkdownParser = parser.CommonMarkParser diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index f6ad5b2e4258553fc1a4eeb869b9d4d02cae9e26..33e6baf818a728d7bf50ba110274d60000dcc22e 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -40,11 +40,12 @@ if(WITH_PYTHON) cc_library(paddle_pybind SHARED SRCS pybind.cc DEPS pybind python backward - fc_op - sgd_op - add_op - mean_op - cross_entropy_op - fill_zeros_like_op - recurrent_op) + fc_op + sgd_op + add_op + mean_op + cross_entropy_op + recurrent_op + uniform_random_op + fill_zeros_like_op) endif(WITH_PYTHON) diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index b2813da83d9e4c525e66bb1f79b28769627eaec2..6c26183818a9d6996e3d3ce2af74ba36f4711eca 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -260,12 +260,6 @@ class OpRegistry { return CreateOp(op_desc.type(), inputs, outputs, attrs); } - static bool SupportGPU(const std::string& op_type) { - OperatorWithKernel::OpKernelKey key; - key.place_ = platform::GPUPlace(); - return OperatorWithKernel::AllOpKernels().at(op_type).count(key) != 0; - } - static std::shared_ptr CreateGradOp(const OperatorBase& op) { PADDLE_ENFORCE(!op.IsNetOp(), "Use framework::Backward to get backward ops"); diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index 03fabff79b637299f8e133aab29ccb0e145379cf..c324fa6702de1eabab3f75cbf4e6568c99b60470 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -88,6 +88,8 @@ class OperatorBase { virtual bool IsNetOp() const { return false; } + virtual bool SupportGPU() const { return false; } + /// rename inputs outputs name void Rename(const std::string& old_name, const std::string& new_name); @@ -308,7 +310,7 @@ class OperatorWithKernel : public OperatorBase { using OpKernelMap = std::unordered_map, OpKernelHash>; - void InferShape(const Scope& scope) const { + void InferShape(const Scope& scope) const override { InferShape(InferShapeContext(this, scope)); } @@ -324,6 +326,12 @@ class OperatorWithKernel : public OperatorBase { return g_all_op_kernels; } + bool SupportGPU() const override { + OperatorWithKernel::OpKernelKey key; + key.place_ = platform::GPUPlace(); + return OperatorWithKernel::AllOpKernels().at(type_).count(key) != 0; + } + protected: virtual void InferShape(const InferShapeContext& ctx) const = 0; }; diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index 26fdf2650b0f3c1739d8fa7e2c8514fa88bca253..915ffb1c007c24eca457a0c646378bc191a15810 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -39,6 +39,7 @@ USE_OP(softmax); USE_OP(rowwise_add); USE_OP(fill_zeros_like); USE_OP_WITHOUT_KERNEL(recurrent_op); +USE_OP(uniform_random); namespace paddle { namespace framework { @@ -56,6 +57,26 @@ void ExposeOperator(ClassType &m) { [](const typename ClassType::type &op) -> std::vector { return op.outputs_; }) + .def("inputs", + [](const typename ClassType::type &op) -> std::vector { + return op.inputs_; + }) + .def("support_gpu", &ClassType::type::SupportGPU) + .def("temp_outputs", + [](const typename ClassType::type &op) -> std::vector { + auto iter = op.attrs_.find("temporary_index"); + std::vector ret; + if (iter == op.attrs_.end()) { + return ret; + } else { + auto tmp_idx = boost::get>(iter->second); + ret.reserve(tmp_idx.size()); + for (auto &index : tmp_idx) { + ret.push_back(op.outputs_.at(index)); + } + return ret; + } + }) .def("__str__", &ClassType::type::DebugString); } @@ -201,8 +222,6 @@ All parameter, weight, gradient are variables in Paddle. return OpRegistry::CreateOp(desc); }); - operator_base.def_static("support_gpu", &OpRegistry::SupportGPU); - operator_base.def("backward", [](const OperatorBase &forwardOp, const std::unordered_set &no_grad_vars) { diff --git a/paddle/gserver/layers/KmaxSeqScoreLayer.cpp b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8ce591d4762466e1ed4b2970cb9cae9203bc0a2b --- /dev/null +++ b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp @@ -0,0 +1,117 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "Layer.h" + +namespace paddle { + +class KmaxSeqScoreLayer : public Layer { +private: + MatrixPtr scores_; + size_t beamSize_; + void kmaxScorePerSeq(const real* score, + real* sortedRes, + const ICpuGpuVectorPtr seqStartPos); + +public: + explicit KmaxSeqScoreLayer(const LayerConfig& config) : Layer(config) {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; +}; + +REGISTER_LAYER(kmax_seq_score, KmaxSeqScoreLayer); + +bool KmaxSeqScoreLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + bool ret = Layer::init(layerMap, parameterMap); + CHECK_EQ(1U, inputLayers_.size()); + + beamSize_ = config_.beam_size(); + CHECK_GE(beamSize_, 1U); + + setNeedSequenceInfo(false); + setNeedGradient(false); + return ret; +} + +void KmaxSeqScoreLayer::kmaxScorePerSeq(const real* scores, + real* sortedIds, + const ICpuGpuVectorPtr seqStartPos) { + int* starts = seqStartPos->getMutableData(false); + std::vector indices; + for (size_t i = 0; i < seqStartPos->getSize() - 1; ++i) { + int seqLen = starts[i + 1] - starts[i]; + int k = std::min(static_cast(beamSize_), seqLen); + + indices.resize(seqLen, 0); + std::iota(begin(indices), end(indices), 0.); + std::vector tmpScore(scores + starts[i], scores + starts[i + 1]); + std::partial_sort( + begin(indices), + begin(indices) + k, + end(indices), + [&](size_t a, size_t b) { return tmpScore[a] > tmpScore[b]; }); + memcpy(sortedIds + (i * beamSize_), indices.data(), k * sizeof(real)); + } +} + +void KmaxSeqScoreLayer::forward(PassType passType) { + Layer::forward(passType); + + const Argument& input = getInput(0); + const MatrixPtr inputScore = getInputValue(0); + + CHECK(input.hasSeq() || input.hasSubseq()) + << "input of " << getName() + << " must be a sequence or a nested sequence."; + CHECK_EQ(input.value->getWidth(), 1UL) + << "input of " << getName() + << " is score over a sequence or a nested sequence, so its width " + << " must be 1."; + + if (useGpu_) { + // this Layer runs only in CPU, if the model is runing on GPU, + // then copy the input to this layer from GPU to CPU. + Matrix::resizeOrCreate(scores_, + inputScore->getHeight(), + 1, + false /* trans */, + false /* useGpu */); + scores_->copyFrom(*inputScore); + } else { + scores_ = inputScore; + } + + Matrix::resizeOrCreate( + output_.value, + input.hasSubseq() ? input.getNumSubSequences() : input.getNumSequences(), + beamSize_, + false, + false); + output_.value->one(); + output_.value->mulScalar(-1.); + + kmaxScorePerSeq(scores_->getData(), + output_.value->getData(), + input.hasSubseq() ? input.subSequenceStartPositions + : input.sequenceStartPositions); +} + +void KmaxSeqScoreLayer::backward(const UpdateCallback& callback) {} + +} // namespace paddle diff --git a/paddle/gserver/layers/SubNestedSequenceLayer.cpp b/paddle/gserver/layers/SubNestedSequenceLayer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..76f587fff760d9eb9c2a8eeed53abf4d42e90834 --- /dev/null +++ b/paddle/gserver/layers/SubNestedSequenceLayer.cpp @@ -0,0 +1,176 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "Layer.h" +#include "paddle/math/Matrix.h" +#include "paddle/math/Vector.h" +#include "paddle/utils/Logging.h" +#include "paddle/utils/Stat.h" + +namespace paddle { + +class SubNestedSequenceLayer : public Layer { +public: + explicit SubNestedSequenceLayer(const LayerConfig& config) : Layer(config) {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; + +private: + /* + * This functions generates the indices of rows in a batch according to the + * indices of selected sub-sequence in each sequence. + * + * Examples: + * selectedIndices: + * [ + * [0, 1, -1], + * [0, 1, 2], + * [0, -1, -1], + * [0, 2, 3], + * ] + * inputSeqInfo: + * [ + * [0,3,4], + * [4,5,7,10,15], + * [15,20], + * [20,22,23,25,28] + * ] + * + * ths output is saved to private member rowIndice_; + * [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, + * 16,17,18,19,20,21,22,23,24,25,26,27] + */ + + void calSelectedCols(const MatrixPtr selectedIndices, + const std::vector>& inputSeqInfo); + + // if the second input of this layer is on GPU memory, copy it to CPU memory. + MatrixPtr selIdsCpu_; + + // reorganized sequenceStartPositions and subSequenceStartPositions + // into a 2d vector to facilitate the sequence selection process. + std::vector> inputSeqInfoVec_; + + // the final selected row indices in a batch, + // rowIdx_ and selectedRows_ actually share a same memory. + IVectorPtr rowIndice_; + std::vector selectedRows_; +}; + +REGISTER_LAYER(sub_nested_seq, SubNestedSequenceLayer); + +bool SubNestedSequenceLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + /* Initialize the basic parent class */ + Layer::init(layerMap, parameterMap); + CHECK_EQ(2U, inputLayers_.size()); + setNeedSequenceInfo(false); + return true; +} + +void SubNestedSequenceLayer::calSelectedCols( + const MatrixPtr selectedIndices, + const std::vector>& inputSeqInfo) { + selectedRows_.clear(); + + std::vector outSeqStartInfo(1, 0); + std::vector outSubSeqStartInfo(1, 0); + + size_t seqNum = selectedIndices->getHeight(); + size_t beamSize = selectedIndices->getWidth(); + for (size_t i = 0; i < seqNum; ++i) { + for (size_t j = 0; j < beamSize; ++j) { + if (selectedIndices->getElement(i, j) == -1.) break; + int selSubSeqIdx = selectedIndices->getElement(i, j); + CHECK_GT(inputSeqInfoVec_[i].size() - 1, selSubSeqIdx); + + size_t subSeqLen = inputSeqInfoVec_[i][selSubSeqIdx + 1] - + inputSeqInfoVec_[i][selSubSeqIdx]; + for (size_t k = 0; k < subSeqLen; ++k) + selectedRows_.push_back(inputSeqInfoVec_[i][selSubSeqIdx] + k); + outSubSeqStartInfo.push_back(outSubSeqStartInfo.back() + subSeqLen); + } + outSeqStartInfo.push_back(outSubSeqStartInfo.back()); + } + + if (useGpu_) { + rowIndice_ = IVector::create(selectedRows_.size(), useGpu_); + rowIndice_->copyFrom(selectedRows_.data(), selectedRows_.size()); + } else { + rowIndice_ = + IVector::create(selectedRows_.data(), selectedRows_.size(), useGpu_); + } + + // create the sequence information for the output. + ICpuGpuVector::resizeOrCreate( + output_.sequenceStartPositions, outSeqStartInfo.size(), false); + output_.sequenceStartPositions->copyFrom( + outSeqStartInfo.data(), outSeqStartInfo.size(), false); + + ICpuGpuVector::resizeOrCreate( + output_.subSequenceStartPositions, outSubSeqStartInfo.size(), false); + output_.subSequenceStartPositions->copyFrom( + outSubSeqStartInfo.data(), outSubSeqStartInfo.size(), false); +} + +void SubNestedSequenceLayer::forward(PassType passType) { + Layer::forward(passType); + + const Argument& inputSeq = getInput(0); + CHECK(inputSeq.hasSubseq()) << "The first input of SubNestSequence layer " + << "must be a nested sequence."; + const MatrixPtr selectedIndices = getInputValue(1); + CHECK_EQ(inputSeq.getNumSequences(), selectedIndices->getHeight()); + + if (dynamic_cast(selectedIndices.get())) { + /* + * Currently, the second input for this layer is generated by + * kmax_sequence_score_layer whose output is always stored on CPU, + * or a data_layer which canbe on GPU. + * + * If the second input is on GPU, copy it to CPU memory, because this + * input always uses very few memory, and operations related to it are + * all logic control, not computations. + */ + Matrix::resizeOrCreate(selIdsCpu_, + selectedIndices->getHeight(), + selectedIndices->getWidth(), + false /* trans */, + false /* useGpu */); + selIdsCpu_->copyFrom(*selectedIndices); + } else { + selIdsCpu_ = selectedIndices; + } + + Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions, + inputSeq.subSequenceStartPositions, + inputSeqInfoVec_); + calSelectedCols(selIdsCpu_, inputSeqInfoVec_); + + resetOutput(selectedRows_.size(), getSize()); + getOutputValue()->selectRows(*getInputValue(0), *rowIndice_); +} + +void SubNestedSequenceLayer::backward(const UpdateCallback& callback) { + MatrixPtr inputSeqGrad = getInputGrad(0); + MatrixPtr outputGrad = getOutputGrad(); + + if (inputSeqGrad) outputGrad->addToRows(*inputSeqGrad, *rowIndice_); +} + +} // namespace paddle diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt index 5511ab6b8bb05108e76cc0913264d864d2fecf5b..209d0ab9c8d7e8463c8636b1412622a94f359fb1 100644 --- a/paddle/gserver/tests/CMakeLists.txt +++ b/paddle/gserver/tests/CMakeLists.txt @@ -66,6 +66,16 @@ add_unittest_without_exec(test_BatchNorm add_test(NAME test_BatchNorm COMMAND test_BatchNorm) + + +################# test_KmaxSeqScore ####################### +add_unittest_without_exec(test_KmaxSeqScore + test_KmaxSeqScore.cpp + LayerGradUtil.cpp) + +add_test(NAME test_KmaxSeqScore + COMMAND test_KmaxSeqScore) + ################## test_Evaluator ####################### add_unittest(test_Evaluator test_Evaluator.cpp) diff --git a/paddle/gserver/tests/test_KmaxSeqScore.cpp b/paddle/gserver/tests/test_KmaxSeqScore.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f958b4974d45ef65f8f374148a31ad3a6ce7632f --- /dev/null +++ b/paddle/gserver/tests/test_KmaxSeqScore.cpp @@ -0,0 +1,160 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include +#include "ModelConfig.pb.h" +#include "paddle/gserver/layers/DataLayer.h" +#include "paddle/trainer/Trainer.h" +#include "paddle/utils/GlobalConstants.h" + +#include "LayerGradUtil.h" +#include "paddle/testing/TestUtil.h" + +using namespace paddle; // NOLINT +using namespace std; // NOLINT + +DECLARE_bool(use_gpu); +DECLARE_int32(gpu_id); +DECLARE_bool(thread_local_rand_use_global_seed); + +vector randSampling(int range, int n) { + CHECK_GE(range, n); + vector num(range); + iota(begin(num), end(num), 0); + if (range == n) return num; + + random_shuffle(begin(num), end(num)); + num.resize(n); + return num; +} + +void genRandomSeqInfo(vector& seqStartPosition, + vector& subSeqStartPosition) { + const int maxSeqNum = 100; + // generate random start position information + int seqNum = 1 + (rand() % maxSeqNum); + seqStartPosition.resize(seqNum + 1, 0); + subSeqStartPosition.resize(1, 0); + + for (int i = 0; i < seqNum; ++i) { + int subSeqLen = 1 + (rand() % maxSeqNum); + for (int j = 0; j < subSeqLen; ++j) + subSeqStartPosition.push_back(subSeqStartPosition.back() + subSeqLen); + seqStartPosition[i + 1] = subSeqStartPosition.back(); + } +} + +void genRandomGroundTruth(real* values, + vector>& groundTruth, + vector& startPos, + size_t beamSize) { + groundTruth.resize(startPos.size() - 1, vector(beamSize, -1)); + for (size_t i = 0; i < startPos.size() - 1; ++i) { + int seqLen = startPos[i + 1] - startPos[i]; + vector pos = + randSampling(seqLen, min(static_cast(beamSize), seqLen)); + for (size_t j = 0; j < pos.size(); ++j) { + groundTruth[i][j] = pos[j]; + values[startPos[i] + pos[j]] = 1.; + } + } +} + +void checkLayerOut(vector> groundTruth, + real* layerOut, + size_t beamSize) { + for (size_t i = 0; i < groundTruth.size(); ++i) { + int begPos = i * beamSize; + vector tmp(layerOut + begPos, layerOut + begPos + beamSize); + sort(begin(tmp), end(tmp)); + sort(begin(groundTruth[i]), end(groundTruth[i])); + for (size_t j = 0; j < beamSize; ++j) CHECK_EQ(tmp[j], groundTruth[i][j]); + } +} + +TEST(Layer, kmaxSeqScoreLayer) { + const size_t maxBeamSize = 100; + int beamSize = 1 + (rand() % maxBeamSize); + + vector seqStartPosition; + vector subSeqStartPosition; + genRandomSeqInfo(seqStartPosition, subSeqStartPosition); + MatrixPtr inValue = + Matrix::create(subSeqStartPosition.back(), 1, false, false); + + for (auto hasSubseq : {false, true}) { + vector> groundTruth; + inValue->randomizeUniform(); + genRandomGroundTruth(inValue->getData(), + groundTruth, + hasSubseq ? subSeqStartPosition : seqStartPosition, + beamSize); + + for (auto useGpu : {false, true}) { + TestConfig config; + config.layerConfig.set_type("kmax_seq_score"); + config.layerConfig.set_beam_size(beamSize); + + if (hasSubseq) { + config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, + "scores", + inValue, + seqStartPosition, + subSeqStartPosition}); + } else { + config.inputDefs.push_back( + {INPUT_SELF_DEFINE_DATA, "scores", inValue, seqStartPosition}); + } + config.layerConfig.add_inputs(); + + // data layer initialize + std::vector dataLayers; + LayerMap layerMap; + vector datas; + initDataLayer( + config, + &dataLayers, + &datas, + &layerMap, + "kmax_seq_score", + 100 /* actually this parameter is unused in self-defined input*/, + false, + useGpu); + // test layer initialize + std::vector parameters; + LayerPtr kmaxSeqScoreLayer; + FLAGS_use_gpu = useGpu; + initTestLayer(config, &layerMap, ¶meters, &kmaxSeqScoreLayer); + kmaxSeqScoreLayer->forward(PASS_TRAIN); + + const MatrixPtr outValue = kmaxSeqScoreLayer->getOutputValue(); + CHECK_EQ(outValue->getHeight(), + hasSubseq ? subSeqStartPosition.size() - 1 + : seqStartPosition.size() - 1); + CHECK_EQ(outValue->getWidth(), beamSize); + checkLayerOut(groundTruth, outValue->getData(), beamSize); + } + } +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + initMain(argc, argv); + FLAGS_thread_local_rand_use_global_seed = true; + srand((size_t)(time(NULL))); + return RUN_ALL_TESTS(); +} diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index fe11278f41c0118ee0bdb34f17fbf9602e0fa76b..0f312b6ca50bc1e6317251ba785f1c61a224b54e 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1899,6 +1899,84 @@ TEST(Layer, CropLayer) { } } +vector randSampling(real range, int n) { + CHECK_GE(range, n); + vector num(range); + iota(begin(num), end(num), 0.); + if (range == n) return num; + + random_shuffle(begin(num), end(num)); + num.resize(n); + sort(begin(num), end(num)); + return num; +} + +TEST(Layer, SubNestedSequenceLayer) { + // layer size is not crutial for this layer, + // so use a small layer size in unittest + const int layerSize = 4; + + const int maxSeqNum = 50; + const int maxSeqLen = 50; + const int maxBeamSize = 32; + + srand((size_t)(time(NULL))); + int beamSize = 1 + (rand() % maxBeamSize); + + TestConfig config; + config.layerConfig.set_type("sub_nested_seq"); + config.layerConfig.set_name("sub_nested_seq_layer"); + config.layerConfig.set_size(layerSize); + + int seqNum = 1 + (rand() % maxSeqNum); + + // sequence information for the first input, it is a nested sequence + vector seqStartPos(seqNum + 1, 0); + vector subSeqStartPos(1, 0); + + // selected indices + MatrixPtr selectedIndices = Matrix::create(seqNum, beamSize, false, false); + selectedIndices->one(); + selectedIndices->mulScalar(-1.); + real* indicesData = selectedIndices->getData(); + + for (int i = 0; i < seqNum; ++i) { + int subSeqNum = 1 + (rand() % maxSeqNum); + for (int j = 0; j < subSeqNum; ++j) { + subSeqStartPos.push_back(subSeqStartPos.back() + + (1 + (rand() % maxSeqLen))); + } + vector selSeqs = + randSampling(static_cast(subSeqNum), min(beamSize, subSeqNum)); + memcpy(indicesData + (i * beamSize), + selSeqs.data(), + selSeqs.size() * sizeof(real)); + seqStartPos[i + 1] = subSeqStartPos.back(); + } + + MatrixPtr seqInputPtr = + Matrix::create(seqStartPos.back(), layerSize, false, false); + seqInputPtr->randomizeUniform(); + config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, + "nested_seq_input", + seqInputPtr, + seqStartPos, + subSeqStartPos}); + config.layerConfig.add_inputs(); + config.inputDefs.push_back( + {INPUT_SELF_DEFINE_DATA, "selected_indices", selectedIndices}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, + "sub_nested_seq", + /* batchSize */ seqNum, + /* trans */ false, + /* useGpu*/ useGpu, + /* useWeight */ false); + } +} + TEST(Layer, ClipLayer) { const size_t batchSize = 128; const size_t size = 512; diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 6f6feb49a08041bd49b108b962edee519cd49bc5..9e4026d1c66ccd30ecfc37b3e819241cb85b1a1a 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -67,3 +67,5 @@ op_library(fc_op op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc DEPS op_desc tensor op_registry operator net_op) cc_test(recurrent_op_test SRCS recurrent_op_test.cc DEPS recurrent_op gtest mul_op add_op) +op_library(uniform_random_op + SRCS uniform_random_op.cc uniform_random_op.cu) diff --git a/paddle/operators/cross_entropy_op.cc b/paddle/operators/cross_entropy_op.cc index 8ae4bfa16c14845991ff2886527471e21c7377d4..7c03fbd9e9ed295e2b71d80ca533645d250f2244 100644 --- a/paddle/operators/cross_entropy_op.cc +++ b/paddle/operators/cross_entropy_op.cc @@ -70,9 +70,10 @@ OnehotCrossEntropy Operator. namespace ops = paddle::operators; REGISTER_OP(onehot_cross_entropy, ops::OnehotCrossEntropyOp, ops::OnehotCrossEntropyOpMaker); -REGISTER_OP_CPU_KERNEL( - onehot_cross_entropy, - ops::OnehotCrossEntropyOpKernel); +REGISTER_OP_CPU_KERNEL(onehot_cross_entropy, + ops::OnehotCrossEntropyOpKernel); +REGISTER_GRADIENT_OP(onehot_cross_entropy, onehot_cross_entropy_grad, + ops::OnehotCrossEntropyGradientOp); REGISTER_OP_CPU_KERNEL( onehot_cross_entropy_grad, ops::OnehotCrossEntropyGradientOpKernel); diff --git a/paddle/operators/net_op.h b/paddle/operators/net_op.h index 15b3eba41e7dfa165a7ea8de2c67c0205021a3a9..792b336675fc97659d9a23358cf3d48ede56e54e 100644 --- a/paddle/operators/net_op.h +++ b/paddle/operators/net_op.h @@ -59,6 +59,15 @@ class NetOp : public framework::OperatorBase { } } + bool SupportGPU() const override { + for (auto& op : ops_) { + if (!op->SupportGPU()) { + return false; + } + } + return true; + } + /** * @brief Add an operator by ptr */ diff --git a/paddle/operators/uniform_random_op.cc b/paddle/operators/uniform_random_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..405b84b76d2e24db25d2ff16e99495f2f132ef09 --- /dev/null +++ b/paddle/operators/uniform_random_op.cc @@ -0,0 +1,84 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include +#include "paddle/framework/op_registry.h" +#include "paddle/framework/operator.h" + +namespace paddle { +namespace operators { + +// It seems that Eigen::Tensor::random in GPU will SEGFAULT. +// Use std::random and thrust::random(thrust is a std library in CUDA) to +// implement uniform random. +template +class CPUUniformRandomKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* tensor = context.Output(0); + T* data = tensor->mutable_data(context.GetPlace()); + unsigned int seed = + static_cast(context.op_.GetAttr("seed")); + std::minstd_rand engine; + if (seed == 0) { + seed = std::random_device()(); + } + engine.seed(seed); + std::uniform_real_distribution dist( + static_cast(context.op_.GetAttr("min")), + static_cast(context.op_.GetAttr("max"))); + for (ssize_t i = 0; i < framework::product(tensor->dims()); ++i) { + data[i] = dist(engine); + } + } +}; + +class UniformRandomOp : public framework::OperatorWithKernel { + protected: + void InferShape(const framework::InferShapeContext& ctx) const override { + PADDLE_ENFORCE(GetAttr("min") < GetAttr("max"), + "uniform_random's min must less then max"); + auto* tensor = ctx.Output(0); + auto dims = GetAttr>("dims"); + tensor->Resize(framework::make_ddim(dims)); + } +}; + +class UniformRandomOpMaker : public framework::OpProtoAndCheckerMaker { + public: + UniformRandomOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : framework::OpProtoAndCheckerMaker(proto, op_checker) { + AddOutput("Out", "The output tensor of uniform random op"); + AddComment(R"DOC(Uniform random operator. + +Used to initialize tensor with uniform random generator. +)DOC"); + AddAttr>("dims", "the dimension of random tensor"); + AddAttr("min", "Minimum value of uniform random").SetDefault(-1.0f); + AddAttr("max", "Maximun value of uniform random").SetDefault(1.0f); + AddAttr("seed", + "Random seed of uniform random. " + "0 means generate a seed by system") + .SetDefault(0); + } +}; +} // namespace operators +} // namespace paddle + +REGISTER_OP(uniform_random, paddle::operators::UniformRandomOp, + paddle::operators::UniformRandomOpMaker); +REGISTER_OP_CPU_KERNEL(uniform_random, + paddle::operators::CPUUniformRandomKernel); diff --git a/paddle/operators/uniform_random_op.cu b/paddle/operators/uniform_random_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..f1a63e52ec0d3d46a505a89d7d7916bf93a58221 --- /dev/null +++ b/paddle/operators/uniform_random_op.cu @@ -0,0 +1,70 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include +#include +#include +#include "paddle/framework/op_registry.h" +#include "paddle/framework/operator.h" + +namespace paddle { +namespace operators { + +template +struct UniformGenerator { + T min_, max_; + unsigned int seed_; + + __host__ __device__ UniformGenerator(T min, T max, int seed) + : min_(min), max_(max), seed_(seed) {} + + __host__ __device__ T operator()(const unsigned int n) const { + thrust::minstd_rand rng; + rng.seed(seed_); + thrust::uniform_real_distribution dist(min_, max_); + rng.discard(n); + return dist(rng); + } +}; + +// It seems that Eigen::Tensor::random in GPU will SEGFAULT. +// Use std::random and thrust::random(thrust is a std library in CUDA) to +// implement uniform random. +template +class GPUUniformRandomKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* tensor = context.Output(0); + T* data = tensor->mutable_data(context.GetPlace()); + unsigned int seed = + static_cast(context.op_.GetAttr("seed")); + if (seed == 0) { + seed = std::random_device()(); + } + T min = static_cast(context.op_.GetAttr("min")); + T max = static_cast(context.op_.GetAttr("max")); + thrust::counting_iterator index_sequence_begin(0); + ssize_t N = framework::product(tensor->dims()); + thrust::transform(index_sequence_begin, index_sequence_begin + N, + thrust::device_ptr(data), + UniformGenerator(min, max, seed)); + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OP_GPU_KERNEL(uniform_random, + paddle::operators::GPUUniformRandomKernel); diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp index ef72b973c1a465a8ac03cae1070429160eac0ac1..0547ac93cd183afbcede41d280c6b4b16ed7dab1 100644 --- a/paddle/parameter/Argument.cpp +++ b/paddle/parameter/Argument.cpp @@ -666,4 +666,24 @@ void Argument::subArgFrom(const Argument& input, } } +void Argument::reorganizeSeqInfo( + const ICpuGpuVectorPtr seqStartPos, + const ICpuGpuVectorPtr subSeqStartPos, + std::vector>& reorganizedSeqInfo) { + int* seqStarts = seqStartPos->getMutableData(false); + int* subSeqStarts = subSeqStartPos->getMutableData(false); + + int seqNum = seqStartPos->getSize() - 1; + reorganizedSeqInfo.resize(seqNum, std::vector()); + int seqIdx = 0; + for (size_t i = 0; i < subSeqStartPos->getSize(); ++i) { + reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]); + if (subSeqStarts[i] == seqStarts[seqIdx + 1]) { + seqIdx++; + if (seqIdx == seqNum) return; + reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]); + } + } +} + } // namespace paddle diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h index 0ccdef802e71b659788cfd24f28ebe43e1917db1..d8d7a4398f99a2794c5d25528a7d582f5ed629ba 100644 --- a/paddle/parameter/Argument.h +++ b/paddle/parameter/Argument.h @@ -317,6 +317,30 @@ struct Argument { */ void printValueString(std::ostream& stream, const std::string& prefix = "") const; + + /** + * @brief reorganizeSeqInfo will reorganize sequenceStartPositions and + * subSequenceStartPositions into a 2 dimensional arrary: reorganizedSeqInfo. + * + * @param seqStartPos: sequenceStartPositions of an Argument. + * @param subSeqStartPos: subSequenceStartPositions of an Argument. + * @param the reorganized sequence start position information. + * + * Examples: + * seqStartPos: [0, 4, 15, 20, 28] + * subSeqStartPos: [0, 3, 4, 5, 7, 10, 15, 20, 22, 23, 25, 28] + * reorganizedSeqInfo: + * [ + * [0,3,4], + * [4,5,7,10,15], + * [15,20], + * [20,22,23,25,28] + * ] + */ + static void reorganizeSeqInfo( + const ICpuGpuVectorPtr seqStartPos, + const ICpuGpuVectorPtr subSeqStartPos, + std::vector>& reorganizedSeqInfo); }; } // namespace paddle diff --git a/paddle/scripts/travis/build_doc.sh b/paddle/scripts/travis/build_doc.sh index 33fb5d84e2701c163b5d1b1bb3362ee81ebb34ea..dfcff38302703066e868c60e213f0f7cbc55a31e 100755 --- a/paddle/scripts/travis/build_doc.sh +++ b/paddle/scripts/travis/build_doc.sh @@ -5,15 +5,9 @@ set -e mkdir -p $TRAVIS_BUILD_DIR/build cd $TRAVIS_BUILD_DIR/build -# Compile paddle binaries first -cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=OFF -DWITH_MKLDNN=OFF -DWITH_MKLML=OFF -DWITH_GOLANG=ON -DWITH_STYLE_CHECK=OFF - -mkdir output -make -j `nproc` -find .. -name '*whl' | xargs pip install # install all wheels. -rm -rf * # Compile Documentation only. cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DWITH_MKLML=OFF -DWITH_DOC=ON +make -j `nproc` gen_proto_py make -j `nproc` paddle_docs paddle_docs_cn # check websites for broken links @@ -35,6 +29,7 @@ TARGET_BRANCH="gh-pages" SOURCE_BRANCH="master" # Clone the repo to output directory +mkdir output git clone $REPO output cd output diff --git a/proto/CMakeLists.txt b/proto/CMakeLists.txt index 18584cafe7971bad281b498908c54780250791b7..e1cea8bd0de5394020a498725485cea025512e48 100644 --- a/proto/CMakeLists.txt +++ b/proto/CMakeLists.txt @@ -17,7 +17,7 @@ foreach(filename ${proto_filenames}) COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} ARGS "--python_out=${PROJ_ROOT}/python/paddle/proto" "-I" ${CMAKE_CURRENT_SOURCE_DIR} ${ABS_FIL} - DEPENDS ${ABS_FIL} ${external_project_dependencies}) + DEPENDS ${ABS_FIL} protoc) endforeach() add_custom_target(gen_proto_py ALL DEPENDS ${PROTO_GEN_PY}) diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 9ea69fc5e57636c22fb20d5d97de760b9cc3bcde..b7b696ef0c13e1bae2e910e08d1a1ea3e45cd5d5 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2657,6 +2657,31 @@ class SubSequenceLayer(LayerBase): self.create_bias_parameter(bias, size) +@config_layer('sub_nested_seq') +class SubNestedSequenceLayer(LayerBase): + def __init__(self, name, inputs, selected_indices, bias=False, **xargs): + if isinstance(inputs, list): + assert len(inputs) == 1, ('the first input of sub_nested_seq ' + 'layer is a single nested sequence.') + inputs = inputs[0] + if isinstance(selected_indices, list): + assert len(selected_indices) == 1, ( + 'the second input of ' + 'sub_nested_seq layer is a single layer which is a ' + 'set of selected indices.') + selected_indices = selected_indices[0] + + super(SubNestedSequenceLayer, self).__init__( + name, + 'sub_nested_seq', + 0, + inputs=[inputs, selected_indices], + **xargs) + input_layer0 = self.get_input_layer(0) + size = input_layer0.size + self.set_layer_size(size) + + @config_layer('out_prod') class OuterProdLayer(LayerBase): def __init__(self, name, inputs, device=None): @@ -3223,6 +3248,16 @@ class CTCLayer(LayerBase): config_assert(len(self.inputs) == 2, 'CTCLayer must have 2 inputs') +@config_layer('kmax_seq_score') +class KmaxSeqScoreLayer(LayerBase): + def __init__(self, name, inputs, beam_size, **xargs): + super(KmaxSeqScoreLayer, self).__init__( + name, 'kmax_seq_score', 0, inputs=inputs, **xargs) + config_assert( + len(self.inputs) == 1, 'KmaxSeqScoreLayer has only one input.') + self.config.beam_size = beam_size + + @config_layer('warp_ctc') class WarpCTCLayer(LayerBase): def __init__(self, diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index ea5fdcc50f6abbc67fb61b7fd56c100d9f9811d0..1bc55c869601551aff5fc0311458f906385522d2 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -129,8 +129,10 @@ __all__ = [ 'prelu_layer', 'gated_unit_layer', 'crop_layer', + 'sub_nested_seq_layer', 'clip_layer', 'slice_projection', + 'kmax_sequence_score_layer', ] @@ -224,8 +226,11 @@ class LayerType(object): PRELU = 'prelu' CROP_LAYER = 'crop' + SUB_NESTED_SEQ = 'sub_nested_seq' CLIP_LAYER = 'clip' + KMAX_SEQ_SCORE = 'kmax_seq_score' + @staticmethod def is_layer_type(type_name): """ @@ -6088,6 +6093,53 @@ def crop_layer(input, offset, axis=2, shape=None, name=None, layer_attr=None): size=l.config.size) +@wrap_name_default() +@layer_support() +def sub_nested_seq_layer(input, selected_indices, name=None): + """ + The sub_nested_seq_layer accepts two inputs: the first one is a nested + sequence; the second one is a set of selceted indices in the nested sequence. + + Then sub_nest_seq_layer trims the first nested sequence input according + to the selected indices to form a new output. This layer is useful in + beam training. + + The example usage is: + + .. code-block:: python + + sub_nest_seq = sub_nested_seq_layer(input=[data, selected_indices]) + + + :param input: A nested sequence. + :type input: LayerOutput + :param selected_indices: a set of sequence indices in the nested sequence. + :type input: LayerOutput + :param name: name of this layer. + :type name: basestring + :return: LayerOutput object. + :rtype: LayerOutput + """ + + assert isinstance(input, LayerOutput), ( + 'The first input of ' + 'sub_nested_seq_layer must be a Paddle layer.') + assert isinstance(selected_indices, LayerOutput), ( + 'The second input of ' + 'sub_nested_seq_layer must be a Paddle layer.') + + l = Layer( + inputs=input.name, + selected_indices=selected_indices.name, + name=name, + type=LayerType.SUB_NESTED_SEQ) + return LayerOutput( + name=name, + layer_type=LayerType.SUB_NESTED_SEQ, + parents=input, + size=l.config.size) + + @wrap_name_default("clip") def clip_layer(input, min, max, name=None): """ @@ -6109,7 +6161,8 @@ def clip_layer(input, min, max, name=None): :type min: double :param max: The upper threshold for clipping. :type max: double - :return: LayerOutput + :return: LayerOutput object. + :rtype: LayerOutput """ Layer( name=name, @@ -6119,3 +6172,41 @@ def clip_layer(input, min, max, name=None): max=max) return LayerOutput( name, LayerType.CLIP_LAYER, parents=[input], size=input.size) + + +@wrap_name_default() +@layer_support() +def kmax_sequence_score_layer(input, name=None, beam_size=1): + """ + This layer accepts one input which are scores over a sequence or a nested + sequence, and returns indices of beam_size sequences with highest scores. + + .. code-block:: python + + kmax_indices = kmax_sequence_score_layer(input=input_layer, beam_size) + + + :param name: The Layer Name. + :type name: basestring + :param input: The input layer. It stores scores over a sequence or a nested + sequence and its size must be 1. + :type input: LayerOutput. + :param beam_size: squence indices with top beam_size scores are returned. + :type beam_size: double + :return: LayerOutput object. + :rtype: LayerOutput + """ + assert isinstance(input, LayerOutput), ("kmax_sequence_score_layer " + "accepts only one input.") + assert input.size == 1, ( + "input of kmax_sequence_score_layer is a score" + "over a sequence or a nested sequence, so its width must be 1.") + + Layer( + name=name, + type=LayerType.KMAX_SEQ_SCORE, + inputs=[input.name], + beam_size=beam_size) + + return LayerOutput( + name, LayerType.KMAX_SEQ_SCORE, parents=[input], size=input.size) diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index 0ffa58bc1e2088f75e7cd25c7ecdffbe270825a4..a61beb871ad064c617fa141451afcb2a5ac64854 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -7,6 +7,7 @@ test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer -test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer) +test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer +test_kmax_seq_socre_layer test_seq_select_layers) export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr new file mode 100644 index 0000000000000000000000000000000000000000..81bd71f68eb3f2c04ccd46ee3b77a07543395c60 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr @@ -0,0 +1,66 @@ +type: "nn" +layers { + name: "input" + type: "data" + size: 300 + active_type: "" +} +layers { + name: "data" + type: "data" + size: 128 + active_type: "" +} +layers { + name: "__fc_layer_0__" + type: "fc" + size: 1 + active_type: "exponential" + inputs { + input_layer_name: "data" + input_parameter_name: "___fc_layer_0__.w0" + } + bias_parameter_name: "___fc_layer_0__.wbias" +} +layers { + name: "__kmax_sequence_score_layer_0__" + type: "kmax_seq_score" + active_type: "" + inputs { + input_layer_name: "__fc_layer_0__" + } + beam_size: 5 +} +parameters { + name: "___fc_layer_0__.w0" + size: 128 + initial_mean: 0.0 + initial_std: 0.0883883476483 + dims: 128 + dims: 1 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___fc_layer_0__.wbias" + size: 1 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 1 + initial_strategy: 0 + initial_smart: false +} +input_layer_names: "data" +output_layer_names: "__kmax_sequence_score_layer_0__" +sub_models { + name: "root" + layer_names: "input" + layer_names: "data" + layer_names: "__fc_layer_0__" + layer_names: "__kmax_sequence_score_layer_0__" + input_layer_names: "data" + output_layer_names: "__kmax_sequence_score_layer_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_select_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_select_layers.protostr new file mode 100644 index 0000000000000000000000000000000000000000..4b906b113e3c0569d5576127e100d097e4923436 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_select_layers.protostr @@ -0,0 +1,37 @@ +type: "nn" +layers { + name: "input_seq" + type: "data" + size: 300 + active_type: "" +} +layers { + name: "input" + type: "data" + size: 5 + active_type: "" +} +layers { + name: "__sub_nested_seq_layer_0__" + type: "sub_nested_seq" + size: 300 + active_type: "" + inputs { + input_layer_name: "input_seq" + } + inputs { + input_layer_name: "input" + } +} +input_layer_names: "input_seq" +output_layer_names: "__sub_nested_seq_layer_0__" +sub_models { + name: "root" + layer_names: "input_seq" + layer_names: "input" + layer_names: "__sub_nested_seq_layer_0__" + input_layer_names: "input_seq" + output_layer_names: "__sub_nested_seq_layer_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..d245c5a41c793e1f02f306bfe64071bd9885906e --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python +#coding=utf-8 +from paddle.trainer_config_helpers import * + +data = data_layer(name='input', size=300) + +data = data_layer(name="data", size=128) +scores = fc_layer(input=data, size=1, act=ExpActivation()) +kmax_seq_id = kmax_sequence_score_layer(input=scores, beam_size=5) + +outputs(kmax_seq_id) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_seq_select_layers.py b/python/paddle/trainer_config_helpers/tests/configs/test_seq_select_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..6d1c3175ba9801d69f3f9cb9e754858253192270 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_seq_select_layers.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python +#coding=utf-8 +from paddle.trainer_config_helpers import * + +beam_size = 5 + +data = data_layer(name='input_seq', size=300) +selected_ids = data_layer(name='input', size=beam_size) +sub_nest_seq = sub_nested_seq_layer(input=data, selected_indices=selected_ids) + +outputs(sub_nest_seq) diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index 541639ac21661529b0b1f2cc8d8fa25605052c8c..10659caa882fd3d4060f9947413a392c3b681ee8 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -13,6 +13,7 @@ py_test(test_protobuf SRCS test_protobuf.py) py_test(test_add_two_op SRCS test_add_two_op.py) py_test(test_sigmoid_op SRCS test_sigmoid_op.py) py_test(test_softmax_op SRCS test_softmax_op.py) +py_test(test_cross_entropy_op SRCS test_cross_entropy_op.py) py_test(test_fill_zeros_like_op SRCS test_fill_zeros_like_op.py) py_test(gradient_checker SRCS gradient_checker.py) @@ -21,3 +22,4 @@ py_test(test_rowwise_add_op SRCS test_rowwise_add_op.py) py_test(test_default_scope_funcs SRCS test_default_scope_funcs.py) py_test(test_operator SRCS test_operator.py) +py_test(test_uniform_random_op SRCS test_uniform_random_op.py) diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py index cfd29932f5b46920815819c5a75d62a0138e21a2..b73c4869d14a62a951d8e45dafb14b7523355519 100644 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -1,16 +1,31 @@ +import unittest + +import numpy import paddle.v2.framework.core as core from paddle.v2.framework.op import Operator -import numpy -import unittest __all__ = ['get_numeric_gradient'] +def create_op(op_type): + kwargs = dict() + for in_name in Operator.get_op_input_names(op_type): + kwargs[in_name] = in_name + for out_name in Operator.get_op_output_names(op_type): + kwargs[out_name] = out_name + + return Operator(op_type, **kwargs) + + +def grad_var_name(var_name): + return var_name + "@GRAD" + + def get_numeric_gradient(op, input_values, output_name, input_to_check, - delta=1e-2, + delta=0.005, local_scope=None): """ Get Numeric Gradient for an operator's input. @@ -76,6 +91,113 @@ def get_numeric_gradient(op, return gradient_flat.reshape(tensor_to_check.get_dims()) +class GradientChecker(unittest.TestCase): + def __is_close(self, numeric_grads, scope, max_relative_error): + for name in numeric_grads: + op_grad = numpy.array( + scope.find_var(grad_var_name(name)).get_tensor()) + is_close = numpy.allclose( + numeric_grads[name], op_grad, rtol=max_relative_error, atol=100) + if not is_close: + return False + return True + + def check_grad(self, + forward_op, + input_vars, + inputs_to_check, + output_name, + no_grad_set=None, + only_cpu=False, + max_relative_error=0.005): + """ + :param forward_op: used to create backward_op + :param input_vars: numpy value of input variable. The following + computation will use these variables. + :param inputs_to_check: inputs var names that should check gradient. + :param output_name: output name that used to + :param max_relative_error: The relative tolerance parameter. + :param no_grad_set: used when create backward ops + :param only_cpu: only compute and check gradient on cpu kernel. + :return: + """ + if no_grad_set is None: + no_grad_set = set() + + tmp_outs = forward_op.temp_outputs() + no_tmp_out = filter(lambda name: name not in tmp_outs, + forward_op.outputs()) + if len(no_tmp_out) != 1: + raise ValueError("non temp out_names should be 1") + + in_names = forward_op.inputs() + for no_grad in no_grad_set: + if no_grad not in in_names: + raise ValueError("no_grad should be in in_names") + + backward_op = core.Operator.backward(forward_op, no_grad_set) + + places = [core.CPUPlace()] + if not only_cpu and core.is_compile_gpu() and backward_op.support_gpu(): + places.append(core.GPUPlace(0)) + + numeric_grad = dict() + # get numeric gradient + for check_name in inputs_to_check: + numeric_grad[check_name] = \ + get_numeric_gradient(forward_op, input_vars, output_name, check_name) + + # get operator gradient according to different device + for place in places: + scope = core.Scope() + ctx = core.DeviceContext.create(place) + + # create input var and set value + for name, value in input_vars.iteritems(): + if name not in in_names: + raise ValueError(name + " not in op.inputs_") + var = scope.new_var(name).get_tensor() + var.set_dims(value.shape) + var.set(value, place) + + # create output var + for out_name in forward_op.outputs(): + scope.new_var(out_name).get_tensor() + + # infer the shape of output var and compute/set value of output var + forward_op.infer_shape(scope) + forward_op.run(scope, ctx) + + # create output grad var + # set shape as the output var + # set value of this grad to ones + for name in forward_op.outputs(): + out_tensor = scope.find_var(name).get_tensor() + grad_tensor = scope.new_var(grad_var_name(name)).get_tensor() + grad_tensor.set_dims(out_tensor.shape()) + data = 1.0 * numpy.ones(out_tensor.shape()) + grad_tensor.set(data, place) + + # create input grad var + for name in backward_op.outputs(): + scope.new_var(name).get_tensor() + + # infer the shape of input gradient var and compute/set it's value + # with backward op + backward_op.infer_shape(scope) + backward_op.run(scope, ctx) + + if isinstance(place, core.CPUPlace): + msg = "CPU kernel gradient is not close to numeric gradient" + else: + if isinstance(place, core.GPUPlace): + msg = "GPU kernel gradient is not close to numeric gradient" + else: + raise ValueError("unknown place " + type(place)) + self.assertTrue( + self.__is_close(numeric_grad, scope, max_relative_error), msg) + + if __name__ == '__main__': class GetNumericGradientTest(unittest.TestCase): @@ -87,4 +209,28 @@ if __name__ == '__main__': arr = get_numeric_gradient(add_op, {'X': x, "Y": y}, 'Z', 'X') self.assertAlmostEqual(arr.mean(), 1.0, delta=1e-2) + def test_softmax_op(self): + def stable_softmax(x): + """Compute the softmax of vector x in a numerically stable way.""" + shiftx = x - numpy.max(x) + exps = numpy.exp(shiftx) + return exps / numpy.sum(exps) + + def label_softmax_grad(Y, dY): + dX = Y * 0.0 + for i in range(Y.shape[0]): + d = numpy.dot(Y[i, :], dY[i, :]) + dX[i, :] = Y[i, :] * (dY[i, :] - d) + return dX + + softmax_op = Operator("softmax", X="X", Y="Y") + + X = numpy.random.random((2, 2)).astype("float32") + Y = numpy.apply_along_axis(stable_softmax, 1, X) + dY = numpy.ones(Y.shape) + dX = label_softmax_grad(Y, dY) + + arr = get_numeric_gradient(softmax_op, {"X": X}, 'Y', 'X') + numpy.testing.assert_almost_equal(arr, dX, decimal=1e-2) + unittest.main() diff --git a/python/paddle/v2/framework/tests/op_test_util.py b/python/paddle/v2/framework/tests/op_test_util.py index da6bed0fcd690d5a7f53f44d0181c75f12e5d074..dd65e0f2dc23d3f657ff16c55fb297dae210b2d7 100644 --- a/python/paddle/v2/framework/tests/op_test_util.py +++ b/python/paddle/v2/framework/tests/op_test_util.py @@ -1,6 +1,5 @@ -import paddle.v2.framework.core as core -import unittest import numpy +import paddle.v2.framework.core as core from paddle.v2.framework.op import Operator @@ -24,7 +23,7 @@ class OpTestMeta(type): scope = core.Scope() kwargs = dict() places = [core.CPUPlace()] - if core.is_compile_gpu() and core.Operator.support_gpu(self.type): + if core.is_compile_gpu(): places.append(core.GPUPlace(0)) for place in places: @@ -53,6 +52,8 @@ class OpTestMeta(type): kwargs[attr_name] = self.attrs[attr_name] op = Operator(self.type, **kwargs) + if isinstance(place, core.GPUPlace) and not op.support_gpu(): + return op.infer_shape(scope) diff --git a/python/paddle/v2/framework/tests/test_cross_entropy_op.py b/python/paddle/v2/framework/tests/test_cross_entropy_op.py index b26e25d58b59bd1cb16e9ba2a1cccd27799b15f2..4815192e255c6e0429db3f50918a76a773b30131 100644 --- a/python/paddle/v2/framework/tests/test_cross_entropy_op.py +++ b/python/paddle/v2/framework/tests/test_cross_entropy_op.py @@ -1,9 +1,10 @@ import unittest import numpy from op_test_util import OpTestMeta +from gradient_checker import GradientChecker, create_op -class TestSGD(unittest.TestCase): +class TestCrossEntropy(unittest.TestCase): __metaclass__ = OpTestMeta def setUp(self): @@ -20,7 +21,18 @@ class TestSGD(unittest.TestCase): self.outputs = {'Y': numpy.array(Y).astype("float32")} -# TODO(superjom) add gradient check +class CrossEntropyGradOpTest(GradientChecker): + def test_softmax_grad(self): + op = create_op("onehot_cross_entropy") + batch_size = 100 + class_num = 10 + inputs = { + "X": numpy.random.uniform( + 0.1, 1.0, [batch_size, class_num]).astype("float32"), + "label": (class_num / 2) * numpy.ones(batch_size).astype("int32") + } + self.check_grad(op, inputs, set("X"), "Y") + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/v2/framework/tests/test_softmax_op.py b/python/paddle/v2/framework/tests/test_softmax_op.py index d20e085b8e43488480edf07b6cd4edcd861883f3..e670d93653e07d35e5019c9daac45c214eddf367 100644 --- a/python/paddle/v2/framework/tests/test_softmax_op.py +++ b/python/paddle/v2/framework/tests/test_softmax_op.py @@ -1,9 +1,8 @@ import unittest import numpy as np -import paddle.v2.framework.core as core -from paddle.v2.framework.op import Operator +from gradient_checker import GradientChecker, create_op from op_test_util import OpTestMeta @@ -25,62 +24,11 @@ class TestSoftmaxOp(unittest.TestCase): } -class TestSoftmaxGradOp(unittest.TestCase): - def test_softmax_grad(self): - op = Operator('softmax', X="X", Y="Y") - backward_op = core.Operator.backward(op, set()) - self.assertEqual(backward_op.type(), "softmax_grad") - expected = '''Op(softmax_grad), inputs:(X, Y, Y@GRAD), outputs:(X@GRAD).''' - self.assertEqual(expected, str(backward_op)) - - batch_size = 3 - class_num = 5 - # Initialize X and add 1e-2 for numerical stability - Y = np.random.rand(batch_size, class_num).astype(np.float32) - Y = Y + 1e-2 - dY = np.random.rand(batch_size, class_num).astype(np.float32) - - # Reference implementation of cross entropy with soft labels - def label_softmax_grad(Y, dY): - dX = Y * 0.0 - for i in range(batch_size): - d = np.dot(Y[i, :], dY[i, :]) - dX[i, :] = Y[i, :] * (dY[i, :] - d) - return dX - - expected = label_softmax_grad(Y, dY) - - scope = core.Scope() - places = [] - places.append(core.CPUPlace()) - if core.is_compile_gpu(): - places.append(core.GPUPlace(0)) - - for place in places: - y = scope.new_var("Y") - y_tensor = y.get_tensor() - y_tensor.set_dims([batch_size, class_num]) - y_tensor.alloc_float(place) - y_tensor.set(Y, place) - - dy = scope.new_var("Y@GRAD") - dy_tensor = dy.get_tensor() - dy_tensor.set_dims([batch_size, class_num]) - dy_tensor.alloc_float(place) - dy_tensor.set(dY, place) - - x = scope.new_var("X") - dx = scope.new_var("X@GRAD") - - tensor = scope.find_var("X@GRAD").get_tensor() - backward_op.infer_shape(scope) - self.assertEqual([batch_size, class_num], tensor.shape()) - - ctx = core.DeviceContext.create(place) - backward_op.run(scope, ctx) - actual = np.array(tensor) - - np.testing.assert_almost_equal(actual, expected, decimal=3) +class SoftmaxGradOpTest(GradientChecker): + def test_softmax(self): + op = create_op("softmax") + inputs = {"X": np.random.uniform(0.1, 1, [10, 10]).astype("float32")} + self.check_grad(op, inputs, set("X"), "Y") if __name__ == '__main__': diff --git a/python/paddle/v2/framework/tests/test_uniform_random_op.py b/python/paddle/v2/framework/tests/test_uniform_random_op.py new file mode 100644 index 0000000000000000000000000000000000000000..c3d2bb44da3977c0899b2609a8efe15b7e1789f2 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_uniform_random_op.py @@ -0,0 +1,35 @@ +import unittest +from paddle.v2.framework.op import Operator +import paddle.v2.framework.core as core +import numpy + + +class UniformRandomTest(unittest.TestCase): + def test_uniform_random_cpu(self): + self.uniform_random_test(place=core.CPUPlace()) + + def test_uniform_random_gpu(self): + if core.is_compile_gpu(): + self.uniform_random_test(place=core.GPUPlace(0)) + + def uniform_random_test(self, place): + scope = core.Scope() + scope.new_var("X").get_tensor() + + op = Operator( + "uniform_random", + Out="X", + dims=[1000, 784], + min=-5.0, + max=10.0, + seed=10) + + op.infer_shape(scope) + ctx = core.DeviceContext.create(place) + op.run(scope, ctx) + tensor = numpy.array(scope.find_var("X").get_tensor()) + self.assertAlmostEqual(tensor.mean(), 2.5, delta=0.1) + + +if __name__ == '__main__': + unittest.main()