diff --git a/.travis.yml b/.travis.yml index 376c693602b56fe719decfeb41c217497e143e12..8c8c6699d3d9abddd65a3a224c2bceedc7d88348 100644 --- a/.travis.yml +++ b/.travis.yml @@ -38,7 +38,7 @@ before_install: # Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python # protobuf version. - pip install numpy wheel 'protobuf==3.1' sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit requests==2.9.2 LinkChecker - - pip install rarfile + - pip install rarfile nltk==3.2.2 scipy==0.19.0 recordio matplotlib Pillow - curl https://glide.sh/get | bash - eval "$(GIMME_GO_VERSION=1.8.3 gimme)" - go get -u github.com/alecthomas/gometalinter diff --git a/Dockerfile b/Dockerfile index 156ad3552b2c4ff90b405c35c66d44117c2624a4..06a3d8930769bca2599a7afedb3683b2207cb302 100644 --- a/Dockerfile +++ b/Dockerfile @@ -38,17 +38,16 @@ RUN apt-get update && \ RUN pip --no-cache-dir install 'numpy>=1.12.0' # Install Go and glide -RUN wget -O go.tgz https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz && \ - tar -C /usr/local -xzf go.tgz && \ +RUN wget -qO- https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz | \ + tar -xz -C /usr/local && \ mkdir /root/gopath && \ mkdir /root/gopath/bin && \ - mkdir /root/gopath/src && \ - rm go.tgz + mkdir /root/gopath/src ENV GOROOT=/usr/local/go GOPATH=/root/gopath # should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT. ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin # install glide -RUN curl -q https://glide.sh/get | sh +RUN curl -s -q https://glide.sh/get | sh # git credential to skip password typing RUN git config --global credential.helper store diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index 372272a53c12c314fc80eebbce5eae9fcabc55ba..cb330ea5e1b914587a725c9b90a33053f3fbbc3d 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -257,6 +257,16 @@ seq_concat .. autoclass:: paddle.v2.layer.seq_concat :noindex: +kmax_sequence_score +------------------- +.. autoclass:: paddle.v2.layer.kmax_sequence_score + :noindex: + +sub_nested_seq +-------------- +.. autoclass:: paddle.v2.layer.sub_nested_seq + :noindex: + Reshaping Layers ================ diff --git a/doc/design/releasing_process.md b/doc/design/releasing_process.md index 3692a5248a355cfcfd1cfd0911d43d65166921b1..0c10e782808ca6456347ec54cb5e921162731ede 100644 --- a/doc/design/releasing_process.md +++ b/doc/design/releasing_process.md @@ -11,6 +11,15 @@ Paddle每次发新的版本,遵循以下流程: * 编译这个版本的Ubuntu Deb包。如果失败,修复Ubuntu Deb包编译问题,Patch号加一,返回第二步。 * 使用Regression Test List作为检查列表,测试Docker镜像/ubuntu安装包的功能正确性 * 如果失败,记录下所有失败的例子,在这个`release/版本号`分支中,修复所有bug后,Patch号加一,返回第二步 + * 编译这个版本的python wheel包,并发布到pypi。 + * 由于pypi.python.org目前遵循[严格的命名规范PEP 513](https://www.python.org/dev/peps/pep-0513),在使用twine上传之前,需要重命名wheel包中platform相关的后缀,比如将`linux_x86_64`修改成`manylinux1_x86_64`。 + * pypi上的package名称为paddlepaddle和paddlepaddle_gpu,如果要上传GPU版本的包,需要修改build/python/setup.py中,name: "paddlepaddle_gpu"并重新打包wheel包:`python setup.py bdist_wheel`。 + * 上传方法: + ``` + cd build/python + pip install twine + twine upload dist/[package to upload] + ``` 4. 第三步完成后,将`release/版本号`分支合入master分支,并删除`release/版本号`分支。将master分支的合入commit打上tag,tag为`版本号`。同时再将`master`分支合入`develop`分支。最后删除`release/版本号`分支。 5. 编译master分支的Docker发行镜像,发布到dockerhub。编译ubuntu的deb包,发布到github release页面 6. 协同完成Release Note的书写 diff --git a/doc/templates/conf.py.cn.in b/doc/templates/conf.py.cn.in index 95cad835b11816f4d2e256c2abd662a545a5bad2..673948dfe7928240817b552141ec9bc2f8a672b7 100644 --- a/doc/templates/conf.py.cn.in +++ b/doc/templates/conf.py.cn.in @@ -13,15 +13,11 @@ # serve to show the default. import sys import os, subprocess +sys.path.insert(0, os.path.abspath('@PROJ_ROOT@/python')) import shlex from recommonmark import parser, transform -try: - import py_paddle - import paddle - import paddle.v2 -except ImportError: - print("Must install paddle python package before generating documentation") - sys.exit(1) +import paddle +import paddle.v2 MarkdownParser = parser.CommonMarkParser AutoStructify = transform.AutoStructify diff --git a/doc/templates/conf.py.en.in b/doc/templates/conf.py.en.in index b477f0120c4fa0544012080b7cfb8572d3c44b04..b6b50b7dcd5647b50a13703160489323ed90a1b4 100644 --- a/doc/templates/conf.py.en.in +++ b/doc/templates/conf.py.en.in @@ -13,15 +13,11 @@ # serve to show the default. import sys import os, subprocess +sys.path.insert(0, os.path.abspath('@PROJ_ROOT@/python')) import shlex from recommonmark import parser, transform -try: - import py_paddle - import paddle - import paddle.v2 -except ImportError: - print("Must install paddle python package before generating documentation") - sys.exit(1) +import paddle +import paddle.v2 MarkdownParser = parser.CommonMarkParser diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 31f778d53ba2867d61be1f87db8189981ace9e2b..04659639910b3b073c1d15f419aa6996360519e0 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -31,13 +31,17 @@ add_dependencies(framework_py_proto framework_py_proto_init) cc_library(backward SRCS backward.cc DEPS net_op) cc_test(backward_test SRCS backward_test.cc DEPS backward) + +if(WITH_PYTHON) cc_library(paddle_pybind SHARED SRCS pybind.cc DEPS pybind python backward - fc_op - sgd_op - add_op - mean_op - cross_entropy_op - fill_zeros_like_op - recurrent_op) + fc_op + sgd_op + add_op + mean_op + cross_entropy_op + recurrent_op + uniform_random_op + fill_zeros_like_op) +endif(WITH_PYTHON) diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index 10a3f49810f75b3bfe32b20c64c94295368b0b49..8f39b79cf729774d9057b3b68d71d2b4907da77a 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -13,6 +13,7 @@ limitations under the License. */ #include "paddle/framework/backward.h" + #include #include "paddle/framework/op_registry.h" #include "paddle/operators/net_op.h" diff --git a/paddle/framework/backward_test.cc b/paddle/framework/backward_test.cc index 8e85a2510fe9bd4f8bbd67d12b76232a4f268e61..653b5693e8da0a977c28251e6aa55c5be820c7a5 100644 --- a/paddle/framework/backward_test.cc +++ b/paddle/framework/backward_test.cc @@ -17,16 +17,21 @@ #include #include "paddle/framework/op_registry.h" #include "paddle/operators/net_op.h" -#include "paddle/operators/type_alias.h" namespace paddle { namespace framework { +using OperatorBase = framework::OperatorBase; +using OpProtoAndCheckerMaker = framework::OpProtoAndCheckerMaker; +using OpProto = framework::OpProto; +using OpAttrChecker = framework::OpAttrChecker; +using Scope = framework::Scope; +using DeviceContext = platform::DeviceContext; + class EmptyOp : public OperatorBase { public: void InferShape(const Scope &scope) const override {} - void Run(const Scope &scope, - const platform::DeviceContext &dev_ctx) const override {} + void Run(const Scope &scope, const DeviceContext &dev_ctx) const override {} }; class RowWiseAddOpMaker : public OpProtoAndCheckerMaker { @@ -71,7 +76,7 @@ class NoGradOpMaker : public OpProtoAndCheckerMaker { } }; -class FcOp : public ops::NetOp { +class FcOp : public operators::NetOp { public: void Init() override { AddOp(OpRegistry::CreateOp("mul", @@ -145,6 +150,7 @@ class AddOpMaker : public OpProtoAndCheckerMaker { } // namespace paddle namespace f = paddle::framework; +namespace ops = paddle::operators; using EnforceNotMet = paddle::platform::EnforceNotMet; REGISTER_OP(rowwise_add, f::EmptyOp, f::RowWiseAddOpMaker); REGISTER_GRADIENT_OP(rowwise_add, rowwise_add_grad, f::EmptyOp); diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index 9123e9b56fd068d7df59160e8987bc51bbf511df..db23fd7bf938a1b2e97347d46c2f58efb9773009 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -204,12 +204,6 @@ class OpRegistry { return CreateOp(op_desc.type(), inputs, outputs, attrs); } - static bool SupportGPU(const std::string& op_type) { - OperatorWithKernel::OpKernelKey key; - key.place_ = platform::GPUPlace(); - return OperatorWithKernel::AllOpKernels().at(op_type).count(key) != 0; - } - static std::shared_ptr CreateGradOp(const OperatorBase& op) { PADDLE_ENFORCE(!op.IsNetOp(), "Use framework::Backward to get backward ops"); diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index ec498ce3bd8b5d40c3c3ad081c217c4c7e8dd593..698ff5f36ddf0a621b6cf8d07d9f57f9074854b8 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -87,6 +87,8 @@ class OperatorBase { virtual bool IsNetOp() const { return false; } + virtual bool SupportGPU() const { return false; } + /// rename inputs outputs name void Rename(const std::string& old_name, const std::string& new_name); @@ -160,14 +162,14 @@ class OperatorContext { template const T* Input(const std::string& name) const { auto var = InputVar(name); - PADDLE_ENFORCE(var != nullptr, "Input(%s) should not be nullptr", name); + PADDLE_ENFORCE_NOT_NULL(var, "Input(%s) should not be nullptr", name); return &var->Get(); } template T* Output(const std::string& name) const { auto var = OutputVar(name); - PADDLE_ENFORCE(var != nullptr, "Output(%s) should not be nullptr", name); + PADDLE_ENFORCE_NOT_NULL(var, "Output(%s) should not be nullptr", name); return var->GetMutable(); } @@ -179,9 +181,9 @@ class OperatorContext { std::transform(names.begin(), names.end(), std::back_inserter(res), [&](const std::string& sub_name) { auto var = scope_.FindVar(sub_name); - PADDLE_ENFORCE(var != nullptr, - "MultiInput(%s:%s) should not be nullptr", - name, sub_name); + PADDLE_ENFORCE_NOT_NULL( + var, "MultiInput(%s:%s) should not be nullptr", name, + sub_name); return &var->Get(); }); return res; @@ -195,9 +197,9 @@ class OperatorContext { std::transform(names.begin(), names.end(), std::back_inserter(res), [&](const std::string& sub_name) { auto var = scope_.FindVar(sub_name); - PADDLE_ENFORCE(var != nullptr, - "MultiOutput(%s:%s) should not be nullptr", - name, sub_name); + PADDLE_ENFORCE_NOT_NULL( + var, "MultiOutput(%s:%s) should not be nullptr", name, + sub_name); return var->GetMutable(); }); return res; @@ -283,7 +285,7 @@ class OperatorWithKernel : public OperatorBase { using OpKernelMap = std::unordered_map, OpKernelHash>; - void InferShape(const Scope& scope) const { + void InferShape(const Scope& scope) const override { InferShape(InferShapeContext(this, scope)); } @@ -299,6 +301,12 @@ class OperatorWithKernel : public OperatorBase { return g_all_op_kernels; } + bool SupportGPU() const override { + OperatorWithKernel::OpKernelKey key; + key.place_ = platform::GPUPlace(); + return OperatorWithKernel::AllOpKernels().at(type_).count(key) != 0; + } + protected: virtual void InferShape(const InferShapeContext& ctx) const = 0; }; diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index bba3af70258d9f037231f7984769ddb72eb373cb..3b3f33c8e212e9005beccdedf1d6b33e8c4a45ce 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -18,11 +18,8 @@ limitations under the License. */ #include "paddle/framework/backward.h" #include "paddle/framework/op_registry.h" -#include "paddle/framework/operator.h" -#include "paddle/framework/scope.h" #include "paddle/framework/tensor_py.h" #include "paddle/operators/net_op.h" -#include "paddle/operators/type_alias.h" #include "paddle/platform/enforce.h" #include "paddle/platform/place.h" #include "pybind11/numpy.h" @@ -42,8 +39,12 @@ USE_OP(softmax); USE_OP(rowwise_add); USE_OP(fill_zeros_like); USE_OP_WITHOUT_KERNEL(recurrent_op); +USE_OP(uniform_random); namespace paddle { namespace framework { + +using Tensor = framework::Tensor; + template void ExposeOperator(ClassType &m) { m.def("infer_shape", &ClassType::type::InferShape) @@ -130,8 +131,8 @@ All parameter, weight, gradient are variables in Paddle. [](Variable &self) -> Tensor * { return self.GetMutable(); }, py::return_value_policy::reference) .def("get_net", - [](Variable &self) -> ops::NetOp * { - return self.GetMutable(); + [](Variable &self) -> operators::NetOp * { + return self.GetMutable(); }, py::return_value_policy::reference); @@ -202,8 +203,6 @@ All parameter, weight, gradient are variables in Paddle. return OpRegistry::CreateOp(desc); }); - operator_base.def_static("support_gpu", &OpRegistry::SupportGPU); - operator_base.def("backward", [](const OperatorBase &forwardOp, const std::unordered_set &no_grad_vars) { @@ -212,23 +211,24 @@ All parameter, weight, gradient are variables in Paddle. ExposeOperator(operator_base); - py::class_> net(m, "Net"); + py::class_> net(m, "Net"); net.def_static("create", - []() -> std::shared_ptr { - auto retv = std::make_shared(); + []() -> std::shared_ptr { + auto retv = std::make_shared(); retv->type_ = "plain_net"; return retv; }) - .def("add_op", &ops::NetOp::AddOp) - .def( - "add_op", - [](ops::NetOp &self, const std::shared_ptr &net) -> void { - self.AddOp(std::static_pointer_cast(net)); - }) - .def("complete_add_op", &ops::NetOp::CompleteAddOp) - .def("complete_add_op", - [](std::shared_ptr &self) { self->CompleteAddOp(); }); + .def("add_op", &operators::NetOp::AddOp) + .def("add_op", + [](operators::NetOp &self, + const std::shared_ptr &net) -> void { + self.AddOp(std::static_pointer_cast(net)); + }) + .def("complete_add_op", &operators::NetOp::CompleteAddOp) + .def("complete_add_op", [](std::shared_ptr &self) { + self->CompleteAddOp(); + }); ExposeOperator(net); diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 4c3b14b83d841e88683a13634c93f51c012128b6..c44df05e4b0fceed858fbf4f68eddc407a44c894 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -127,8 +127,8 @@ class Tensor { memory::PODDeleter(place)), place_(place), size_(size) { - PADDLE_ENFORCE(ptr_ != nullptr, "Insufficient %s memory to allocation.", - is_cpu_place(place_) ? "CPU" : "GPU"); + PADDLE_ENFORCE_NOT_NULL(ptr_, "Insufficient %s memory to allocation.", + (is_cpu_place(place_) ? "CPU" : "GPU")); } virtual size_t size() const { return size_; } diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h index 92621f8c18ec0d03160a23c462830d14272c7f64..8d9bec6dc9c3f0af822a0d8cd8588dc932970652 100644 --- a/paddle/framework/tensor_impl.h +++ b/paddle/framework/tensor_impl.h @@ -14,17 +14,18 @@ limitations under the License. */ #pragma once #include "paddle/memory/memcpy.h" +#include "paddle/platform/enforce.h" namespace paddle { namespace framework { template inline void Tensor::check_memory_size() const { - PADDLE_ENFORCE(holder_ != nullptr, - "Tenosr holds no memory. Call Tensor::mutable_data first."); - PADDLE_ENFORCE(holder_->size() >= product(dims_) * sizeof(T) + offset_, - "Tensor's dims_ is out of bound. Call Tensor::mutable_data " - "first to re-allocate memory."); + PADDLE_ENFORCE_NOT_NULL( + holder_, "Tenosr holds no memory. Call Tensor::mutable_data first."); + PADDLE_ENFORCE_GE(holder_->size(), product(dims_) * sizeof(T) + offset_, + "Tensor's dims_ is out of bound. Call Tensor::mutable_data " + "first to re-allocate memory."); } template @@ -51,9 +52,9 @@ inline T* Tensor::mutable_data(DDim dims, platform::Place place) { template inline T* Tensor::mutable_data(platform::Place place) { static_assert(std::is_pod::value, "T must be POD"); - PADDLE_ENFORCE(product(dims_) > 0, - "Tensor's numel must be larger than zero to call " - "Tensor::mutable_data. Call Tensor::set_dim first."); + PADDLE_ENFORCE_GT(product(dims_), 0, + "Tensor's numel must be larger than zero to call " + "Tensor::mutable_data. Call Tensor::set_dim first."); /* some versions of boost::variant don't have operator!= */ size_t size = product(dims_) * sizeof(T); if (holder_ == nullptr || !(holder_->place() == place) || @@ -120,11 +121,11 @@ inline void Tensor::CopyFrom(const Tensor& src, template inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const { check_memory_size(); - PADDLE_ENFORCE(begin_idx >= 0, "Slice begin index is less than zero."); - PADDLE_ENFORCE(end_idx <= dims_[0], "Slice end index is out of bound."); - PADDLE_ENFORCE(begin_idx < end_idx, - "Begin index must be less than end index."); - PADDLE_ENFORCE(dims_[0] != 1, "Can not slice a tensor with dims_[0] = 1."); + PADDLE_ENFORCE_GE(begin_idx, 0, "Slice begin index is less than zero."); + PADDLE_ENFORCE_LE(end_idx, dims_[0], "Slice end index is out of bound."); + PADDLE_ENFORCE_LT(begin_idx, end_idx, + "Begin index must be less than end index."); + PADDLE_ENFORCE_NE(dims_[0], 1, "Can not slice a tensor with dims_[0] = 1."); int base = product(dims_) / dims_[0]; Tensor dst; dst.holder_ = holder_; diff --git a/paddle/framework/tensor_test.cc b/paddle/framework/tensor_test.cc index ef1cc10b840896d9ab97f963fc12a4971cd74e1f..20276181b974bb5b3d6cb40fb5e6c1295cf1c02f 100644 --- a/paddle/framework/tensor_test.cc +++ b/paddle/framework/tensor_test.cc @@ -36,7 +36,8 @@ TEST(Tensor, DataAssert) { } catch (paddle::platform::EnforceNotMet err) { caught = true; std::string msg = - "Tenosr holds no memory. Call Tensor::mutable_data first."; + "holder_ should not be null\nTenosr holds no memory. Call " + "Tensor::mutable_data first."; const char* what = err.what(); for (size_t i = 0; i < msg.length(); ++i) { ASSERT_EQ(what[i], msg[i]); @@ -111,7 +112,8 @@ TEST(Tensor, ShareDataWith) { } catch (paddle::platform::EnforceNotMet err) { caught = true; std::string msg = - "Tenosr holds no memory. Call Tensor::mutable_data first."; + "holder_ should not be null\nTenosr holds no memory. Call " + "Tensor::mutable_data first."; const char* what = err.what(); for (size_t i = 0; i < msg.length(); ++i) { ASSERT_EQ(what[i], msg[i]); diff --git a/paddle/gserver/layers/KmaxSeqScoreLayer.cpp b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8ce591d4762466e1ed4b2970cb9cae9203bc0a2b --- /dev/null +++ b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp @@ -0,0 +1,117 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "Layer.h" + +namespace paddle { + +class KmaxSeqScoreLayer : public Layer { +private: + MatrixPtr scores_; + size_t beamSize_; + void kmaxScorePerSeq(const real* score, + real* sortedRes, + const ICpuGpuVectorPtr seqStartPos); + +public: + explicit KmaxSeqScoreLayer(const LayerConfig& config) : Layer(config) {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; +}; + +REGISTER_LAYER(kmax_seq_score, KmaxSeqScoreLayer); + +bool KmaxSeqScoreLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + bool ret = Layer::init(layerMap, parameterMap); + CHECK_EQ(1U, inputLayers_.size()); + + beamSize_ = config_.beam_size(); + CHECK_GE(beamSize_, 1U); + + setNeedSequenceInfo(false); + setNeedGradient(false); + return ret; +} + +void KmaxSeqScoreLayer::kmaxScorePerSeq(const real* scores, + real* sortedIds, + const ICpuGpuVectorPtr seqStartPos) { + int* starts = seqStartPos->getMutableData(false); + std::vector indices; + for (size_t i = 0; i < seqStartPos->getSize() - 1; ++i) { + int seqLen = starts[i + 1] - starts[i]; + int k = std::min(static_cast(beamSize_), seqLen); + + indices.resize(seqLen, 0); + std::iota(begin(indices), end(indices), 0.); + std::vector tmpScore(scores + starts[i], scores + starts[i + 1]); + std::partial_sort( + begin(indices), + begin(indices) + k, + end(indices), + [&](size_t a, size_t b) { return tmpScore[a] > tmpScore[b]; }); + memcpy(sortedIds + (i * beamSize_), indices.data(), k * sizeof(real)); + } +} + +void KmaxSeqScoreLayer::forward(PassType passType) { + Layer::forward(passType); + + const Argument& input = getInput(0); + const MatrixPtr inputScore = getInputValue(0); + + CHECK(input.hasSeq() || input.hasSubseq()) + << "input of " << getName() + << " must be a sequence or a nested sequence."; + CHECK_EQ(input.value->getWidth(), 1UL) + << "input of " << getName() + << " is score over a sequence or a nested sequence, so its width " + << " must be 1."; + + if (useGpu_) { + // this Layer runs only in CPU, if the model is runing on GPU, + // then copy the input to this layer from GPU to CPU. + Matrix::resizeOrCreate(scores_, + inputScore->getHeight(), + 1, + false /* trans */, + false /* useGpu */); + scores_->copyFrom(*inputScore); + } else { + scores_ = inputScore; + } + + Matrix::resizeOrCreate( + output_.value, + input.hasSubseq() ? input.getNumSubSequences() : input.getNumSequences(), + beamSize_, + false, + false); + output_.value->one(); + output_.value->mulScalar(-1.); + + kmaxScorePerSeq(scores_->getData(), + output_.value->getData(), + input.hasSubseq() ? input.subSequenceStartPositions + : input.sequenceStartPositions); +} + +void KmaxSeqScoreLayer::backward(const UpdateCallback& callback) {} + +} // namespace paddle diff --git a/paddle/gserver/layers/SubNestedSequenceLayer.cpp b/paddle/gserver/layers/SubNestedSequenceLayer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..76f587fff760d9eb9c2a8eeed53abf4d42e90834 --- /dev/null +++ b/paddle/gserver/layers/SubNestedSequenceLayer.cpp @@ -0,0 +1,176 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "Layer.h" +#include "paddle/math/Matrix.h" +#include "paddle/math/Vector.h" +#include "paddle/utils/Logging.h" +#include "paddle/utils/Stat.h" + +namespace paddle { + +class SubNestedSequenceLayer : public Layer { +public: + explicit SubNestedSequenceLayer(const LayerConfig& config) : Layer(config) {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; + +private: + /* + * This functions generates the indices of rows in a batch according to the + * indices of selected sub-sequence in each sequence. + * + * Examples: + * selectedIndices: + * [ + * [0, 1, -1], + * [0, 1, 2], + * [0, -1, -1], + * [0, 2, 3], + * ] + * inputSeqInfo: + * [ + * [0,3,4], + * [4,5,7,10,15], + * [15,20], + * [20,22,23,25,28] + * ] + * + * ths output is saved to private member rowIndice_; + * [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, + * 16,17,18,19,20,21,22,23,24,25,26,27] + */ + + void calSelectedCols(const MatrixPtr selectedIndices, + const std::vector>& inputSeqInfo); + + // if the second input of this layer is on GPU memory, copy it to CPU memory. + MatrixPtr selIdsCpu_; + + // reorganized sequenceStartPositions and subSequenceStartPositions + // into a 2d vector to facilitate the sequence selection process. + std::vector> inputSeqInfoVec_; + + // the final selected row indices in a batch, + // rowIdx_ and selectedRows_ actually share a same memory. + IVectorPtr rowIndice_; + std::vector selectedRows_; +}; + +REGISTER_LAYER(sub_nested_seq, SubNestedSequenceLayer); + +bool SubNestedSequenceLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + /* Initialize the basic parent class */ + Layer::init(layerMap, parameterMap); + CHECK_EQ(2U, inputLayers_.size()); + setNeedSequenceInfo(false); + return true; +} + +void SubNestedSequenceLayer::calSelectedCols( + const MatrixPtr selectedIndices, + const std::vector>& inputSeqInfo) { + selectedRows_.clear(); + + std::vector outSeqStartInfo(1, 0); + std::vector outSubSeqStartInfo(1, 0); + + size_t seqNum = selectedIndices->getHeight(); + size_t beamSize = selectedIndices->getWidth(); + for (size_t i = 0; i < seqNum; ++i) { + for (size_t j = 0; j < beamSize; ++j) { + if (selectedIndices->getElement(i, j) == -1.) break; + int selSubSeqIdx = selectedIndices->getElement(i, j); + CHECK_GT(inputSeqInfoVec_[i].size() - 1, selSubSeqIdx); + + size_t subSeqLen = inputSeqInfoVec_[i][selSubSeqIdx + 1] - + inputSeqInfoVec_[i][selSubSeqIdx]; + for (size_t k = 0; k < subSeqLen; ++k) + selectedRows_.push_back(inputSeqInfoVec_[i][selSubSeqIdx] + k); + outSubSeqStartInfo.push_back(outSubSeqStartInfo.back() + subSeqLen); + } + outSeqStartInfo.push_back(outSubSeqStartInfo.back()); + } + + if (useGpu_) { + rowIndice_ = IVector::create(selectedRows_.size(), useGpu_); + rowIndice_->copyFrom(selectedRows_.data(), selectedRows_.size()); + } else { + rowIndice_ = + IVector::create(selectedRows_.data(), selectedRows_.size(), useGpu_); + } + + // create the sequence information for the output. + ICpuGpuVector::resizeOrCreate( + output_.sequenceStartPositions, outSeqStartInfo.size(), false); + output_.sequenceStartPositions->copyFrom( + outSeqStartInfo.data(), outSeqStartInfo.size(), false); + + ICpuGpuVector::resizeOrCreate( + output_.subSequenceStartPositions, outSubSeqStartInfo.size(), false); + output_.subSequenceStartPositions->copyFrom( + outSubSeqStartInfo.data(), outSubSeqStartInfo.size(), false); +} + +void SubNestedSequenceLayer::forward(PassType passType) { + Layer::forward(passType); + + const Argument& inputSeq = getInput(0); + CHECK(inputSeq.hasSubseq()) << "The first input of SubNestSequence layer " + << "must be a nested sequence."; + const MatrixPtr selectedIndices = getInputValue(1); + CHECK_EQ(inputSeq.getNumSequences(), selectedIndices->getHeight()); + + if (dynamic_cast(selectedIndices.get())) { + /* + * Currently, the second input for this layer is generated by + * kmax_sequence_score_layer whose output is always stored on CPU, + * or a data_layer which canbe on GPU. + * + * If the second input is on GPU, copy it to CPU memory, because this + * input always uses very few memory, and operations related to it are + * all logic control, not computations. + */ + Matrix::resizeOrCreate(selIdsCpu_, + selectedIndices->getHeight(), + selectedIndices->getWidth(), + false /* trans */, + false /* useGpu */); + selIdsCpu_->copyFrom(*selectedIndices); + } else { + selIdsCpu_ = selectedIndices; + } + + Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions, + inputSeq.subSequenceStartPositions, + inputSeqInfoVec_); + calSelectedCols(selIdsCpu_, inputSeqInfoVec_); + + resetOutput(selectedRows_.size(), getSize()); + getOutputValue()->selectRows(*getInputValue(0), *rowIndice_); +} + +void SubNestedSequenceLayer::backward(const UpdateCallback& callback) { + MatrixPtr inputSeqGrad = getInputGrad(0); + MatrixPtr outputGrad = getOutputGrad(); + + if (inputSeqGrad) outputGrad->addToRows(*inputSeqGrad, *rowIndice_); +} + +} // namespace paddle diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt index 5511ab6b8bb05108e76cc0913264d864d2fecf5b..209d0ab9c8d7e8463c8636b1412622a94f359fb1 100644 --- a/paddle/gserver/tests/CMakeLists.txt +++ b/paddle/gserver/tests/CMakeLists.txt @@ -66,6 +66,16 @@ add_unittest_without_exec(test_BatchNorm add_test(NAME test_BatchNorm COMMAND test_BatchNorm) + + +################# test_KmaxSeqScore ####################### +add_unittest_without_exec(test_KmaxSeqScore + test_KmaxSeqScore.cpp + LayerGradUtil.cpp) + +add_test(NAME test_KmaxSeqScore + COMMAND test_KmaxSeqScore) + ################## test_Evaluator ####################### add_unittest(test_Evaluator test_Evaluator.cpp) diff --git a/paddle/gserver/tests/test_KmaxSeqScore.cpp b/paddle/gserver/tests/test_KmaxSeqScore.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f958b4974d45ef65f8f374148a31ad3a6ce7632f --- /dev/null +++ b/paddle/gserver/tests/test_KmaxSeqScore.cpp @@ -0,0 +1,160 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include +#include "ModelConfig.pb.h" +#include "paddle/gserver/layers/DataLayer.h" +#include "paddle/trainer/Trainer.h" +#include "paddle/utils/GlobalConstants.h" + +#include "LayerGradUtil.h" +#include "paddle/testing/TestUtil.h" + +using namespace paddle; // NOLINT +using namespace std; // NOLINT + +DECLARE_bool(use_gpu); +DECLARE_int32(gpu_id); +DECLARE_bool(thread_local_rand_use_global_seed); + +vector randSampling(int range, int n) { + CHECK_GE(range, n); + vector num(range); + iota(begin(num), end(num), 0); + if (range == n) return num; + + random_shuffle(begin(num), end(num)); + num.resize(n); + return num; +} + +void genRandomSeqInfo(vector& seqStartPosition, + vector& subSeqStartPosition) { + const int maxSeqNum = 100; + // generate random start position information + int seqNum = 1 + (rand() % maxSeqNum); + seqStartPosition.resize(seqNum + 1, 0); + subSeqStartPosition.resize(1, 0); + + for (int i = 0; i < seqNum; ++i) { + int subSeqLen = 1 + (rand() % maxSeqNum); + for (int j = 0; j < subSeqLen; ++j) + subSeqStartPosition.push_back(subSeqStartPosition.back() + subSeqLen); + seqStartPosition[i + 1] = subSeqStartPosition.back(); + } +} + +void genRandomGroundTruth(real* values, + vector>& groundTruth, + vector& startPos, + size_t beamSize) { + groundTruth.resize(startPos.size() - 1, vector(beamSize, -1)); + for (size_t i = 0; i < startPos.size() - 1; ++i) { + int seqLen = startPos[i + 1] - startPos[i]; + vector pos = + randSampling(seqLen, min(static_cast(beamSize), seqLen)); + for (size_t j = 0; j < pos.size(); ++j) { + groundTruth[i][j] = pos[j]; + values[startPos[i] + pos[j]] = 1.; + } + } +} + +void checkLayerOut(vector> groundTruth, + real* layerOut, + size_t beamSize) { + for (size_t i = 0; i < groundTruth.size(); ++i) { + int begPos = i * beamSize; + vector tmp(layerOut + begPos, layerOut + begPos + beamSize); + sort(begin(tmp), end(tmp)); + sort(begin(groundTruth[i]), end(groundTruth[i])); + for (size_t j = 0; j < beamSize; ++j) CHECK_EQ(tmp[j], groundTruth[i][j]); + } +} + +TEST(Layer, kmaxSeqScoreLayer) { + const size_t maxBeamSize = 100; + int beamSize = 1 + (rand() % maxBeamSize); + + vector seqStartPosition; + vector subSeqStartPosition; + genRandomSeqInfo(seqStartPosition, subSeqStartPosition); + MatrixPtr inValue = + Matrix::create(subSeqStartPosition.back(), 1, false, false); + + for (auto hasSubseq : {false, true}) { + vector> groundTruth; + inValue->randomizeUniform(); + genRandomGroundTruth(inValue->getData(), + groundTruth, + hasSubseq ? subSeqStartPosition : seqStartPosition, + beamSize); + + for (auto useGpu : {false, true}) { + TestConfig config; + config.layerConfig.set_type("kmax_seq_score"); + config.layerConfig.set_beam_size(beamSize); + + if (hasSubseq) { + config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, + "scores", + inValue, + seqStartPosition, + subSeqStartPosition}); + } else { + config.inputDefs.push_back( + {INPUT_SELF_DEFINE_DATA, "scores", inValue, seqStartPosition}); + } + config.layerConfig.add_inputs(); + + // data layer initialize + std::vector dataLayers; + LayerMap layerMap; + vector datas; + initDataLayer( + config, + &dataLayers, + &datas, + &layerMap, + "kmax_seq_score", + 100 /* actually this parameter is unused in self-defined input*/, + false, + useGpu); + // test layer initialize + std::vector parameters; + LayerPtr kmaxSeqScoreLayer; + FLAGS_use_gpu = useGpu; + initTestLayer(config, &layerMap, ¶meters, &kmaxSeqScoreLayer); + kmaxSeqScoreLayer->forward(PASS_TRAIN); + + const MatrixPtr outValue = kmaxSeqScoreLayer->getOutputValue(); + CHECK_EQ(outValue->getHeight(), + hasSubseq ? subSeqStartPosition.size() - 1 + : seqStartPosition.size() - 1); + CHECK_EQ(outValue->getWidth(), beamSize); + checkLayerOut(groundTruth, outValue->getData(), beamSize); + } + } +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + initMain(argc, argv); + FLAGS_thread_local_rand_use_global_seed = true; + srand((size_t)(time(NULL))); + return RUN_ALL_TESTS(); +} diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index fe11278f41c0118ee0bdb34f17fbf9602e0fa76b..0f312b6ca50bc1e6317251ba785f1c61a224b54e 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1899,6 +1899,84 @@ TEST(Layer, CropLayer) { } } +vector randSampling(real range, int n) { + CHECK_GE(range, n); + vector num(range); + iota(begin(num), end(num), 0.); + if (range == n) return num; + + random_shuffle(begin(num), end(num)); + num.resize(n); + sort(begin(num), end(num)); + return num; +} + +TEST(Layer, SubNestedSequenceLayer) { + // layer size is not crutial for this layer, + // so use a small layer size in unittest + const int layerSize = 4; + + const int maxSeqNum = 50; + const int maxSeqLen = 50; + const int maxBeamSize = 32; + + srand((size_t)(time(NULL))); + int beamSize = 1 + (rand() % maxBeamSize); + + TestConfig config; + config.layerConfig.set_type("sub_nested_seq"); + config.layerConfig.set_name("sub_nested_seq_layer"); + config.layerConfig.set_size(layerSize); + + int seqNum = 1 + (rand() % maxSeqNum); + + // sequence information for the first input, it is a nested sequence + vector seqStartPos(seqNum + 1, 0); + vector subSeqStartPos(1, 0); + + // selected indices + MatrixPtr selectedIndices = Matrix::create(seqNum, beamSize, false, false); + selectedIndices->one(); + selectedIndices->mulScalar(-1.); + real* indicesData = selectedIndices->getData(); + + for (int i = 0; i < seqNum; ++i) { + int subSeqNum = 1 + (rand() % maxSeqNum); + for (int j = 0; j < subSeqNum; ++j) { + subSeqStartPos.push_back(subSeqStartPos.back() + + (1 + (rand() % maxSeqLen))); + } + vector selSeqs = + randSampling(static_cast(subSeqNum), min(beamSize, subSeqNum)); + memcpy(indicesData + (i * beamSize), + selSeqs.data(), + selSeqs.size() * sizeof(real)); + seqStartPos[i + 1] = subSeqStartPos.back(); + } + + MatrixPtr seqInputPtr = + Matrix::create(seqStartPos.back(), layerSize, false, false); + seqInputPtr->randomizeUniform(); + config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, + "nested_seq_input", + seqInputPtr, + seqStartPos, + subSeqStartPos}); + config.layerConfig.add_inputs(); + config.inputDefs.push_back( + {INPUT_SELF_DEFINE_DATA, "selected_indices", selectedIndices}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, + "sub_nested_seq", + /* batchSize */ seqNum, + /* trans */ false, + /* useGpu*/ useGpu, + /* useWeight */ false); + } +} + TEST(Layer, ClipLayer) { const size_t batchSize = 128; const size_t size = 512; diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 2cf15ff69a7918fdf498fafaed8cbbc113a8e982..3ce18da887a1bf850b2e5c82b4c0040ac1eb3a1f 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -59,6 +59,7 @@ op_library(cross_entropy_op SRCS cross_entropy_op.cc cross_entropy_op.cu) op_library(fill_zeros_like_op SRCS fill_zeros_like_op.cc fill_zeros_like_op.cu) op_library(sgd_op SRCS sgd_op.cc sgd_op.cu) +cc_test(sgd_op_test SRCS sgd_op_test.cc DEPS sgd_op) op_library(fc_op SRCS fc_op.cc @@ -66,3 +67,5 @@ op_library(fc_op op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc DEPS framework_proto tensor op_registry operator net_op) cc_test(recurrent_op_test SRCS recurrent_op_test.cc DEPS recurrent_op gtest mul_op add_op) +op_library(uniform_random_op + SRCS uniform_random_op.cc uniform_random_op.cu) diff --git a/paddle/operators/add_op.cc b/paddle/operators/add_op.cc index 29943002acab6db42dfdad2bc32f4c8725901136..adb1c4f0412dd213f0505c4b3e96e9a17cbf6b48 100644 --- a/paddle/operators/add_op.cc +++ b/paddle/operators/add_op.cc @@ -17,9 +17,9 @@ limitations under the License. */ namespace paddle { namespace operators { -class AddOp : public OperatorWithKernel { +class AddOp : public framework::OperatorWithKernel { protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE_EQ(ctx.Input("X")->dims(), ctx.Input("Y")->dims(), "Two input of Add Op's dimension must be same."); @@ -27,9 +27,9 @@ class AddOp : public OperatorWithKernel { } }; -class AddOpMaker : public OpProtoAndCheckerMaker { +class AddOpMaker : public framework::OpProtoAndCheckerMaker { public: - AddOpMaker(OpProto *proto, OpAttrChecker *op_checker) + AddOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "The first input of add op"); AddInput("Y", "The second input of add op"); @@ -42,14 +42,17 @@ The equation is: Out = X + Y } }; -class AddOpGrad : public OperatorWithKernel { +class AddOpGrad : public framework::OperatorWithKernel { protected: - void InferShape(const InferShapeContext &ctx) const override {} + void InferShape(const framework::InferShapeContext &ctx) const override {} }; } // namespace operators } // namespace paddle +namespace ops = paddle::operators; REGISTER_OP(add_two, ops::AddOp, ops::AddOpMaker); REGISTER_GRADIENT_OP(add_two, add_two_grad, ops::AddOpGrad); -REGISTER_OP_CPU_KERNEL(add_two, ops::AddKernel); + +REGISTER_OP_CPU_KERNEL(add_two, + ops::AddKernel); diff --git a/paddle/operators/add_op.cu b/paddle/operators/add_op.cu index 9bd08634da96c5595d6dd702ad9afafb94632b03..cec5f558cbc161124620ad4241d6bd8a5324277c 100644 --- a/paddle/operators/add_op.cu +++ b/paddle/operators/add_op.cu @@ -16,4 +16,6 @@ #include "paddle/framework/op_registry.h" #include "paddle/operators/add_op.h" -REGISTER_OP_GPU_KERNEL(add_two, ops::AddKernel); +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(add_two, + ops::AddKernel); diff --git a/paddle/operators/add_op.h b/paddle/operators/add_op.h index 9310c1f7edfd2059e5f31486388a17a04359be63..a7307b6818aa3d10ff215d06281e2b53196fd101 100644 --- a/paddle/operators/add_op.h +++ b/paddle/operators/add_op.h @@ -13,15 +13,21 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "paddle/operators/type_alias.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { +using Tensor = framework::Tensor; +template +using EigenVector = framework::EigenVector; + template -class AddKernel : public OpKernel { +class AddKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& context) const override { + void Compute(const framework::ExecutionContext& context) const override { auto* input0 = context.Input("X"); auto* input1 = context.Input("Y"); auto* output = context.Output("Out"); diff --git a/paddle/operators/add_op_test.cc b/paddle/operators/add_op_test.cc index 3d52f5498323dbb7ca0ff25d038947f0ddb2017e..bf529defb20d27200a28666278db8607b986e2d5 100644 --- a/paddle/operators/add_op_test.cc +++ b/paddle/operators/add_op_test.cc @@ -14,9 +14,9 @@ limitations under the License. */ #include #define private public -#include +#include "paddle/framework/op_registry.h" + USE_OP(add_two); -// USE_OP(add_two_grad); TEST(AddOp, GetOpProto) { auto& protos = paddle::framework::OpRegistry::protos(); diff --git a/paddle/operators/cross_entropy_op.cc b/paddle/operators/cross_entropy_op.cc index 77c8271fd4ca490afba11abc109f5c4296f0d1fd..7cb2aa4e78cdf7de98fb6488d2398ac811256f9e 100644 --- a/paddle/operators/cross_entropy_op.cc +++ b/paddle/operators/cross_entropy_op.cc @@ -17,9 +17,9 @@ limitations under the License. */ namespace paddle { namespace operators { -class OnehotCrossEntropyOp : public OperatorWithKernel { +class OnehotCrossEntropyOp : public framework::OperatorWithKernel { protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { auto *X = ctx.Input("X"); auto *label = ctx.Input("label"); @@ -30,9 +30,9 @@ class OnehotCrossEntropyOp : public OperatorWithKernel { } }; -class OnehotCrossEntropyGradientOp : public OperatorWithKernel { +class OnehotCrossEntropyGradientOp : public framework::OperatorWithKernel { protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { auto X_grad = ctx.Output(framework::GradVarName("X")); auto X = ctx.Input("X"); @@ -41,9 +41,10 @@ class OnehotCrossEntropyGradientOp : public OperatorWithKernel { } }; -class OnehotCrossEntropyOpMaker : public OpProtoAndCheckerMaker { +class OnehotCrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker { public: - OnehotCrossEntropyOpMaker(OpProto *proto, OpAttrChecker *op_checker) + OnehotCrossEntropyOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "The first input of OnehotCrossEntropyOp"); AddInput("label", "The second input of OnehotCrossEntropyOp"); @@ -59,11 +60,14 @@ OnehotCrossEntropy Operator. } // namespace operators } // namespace paddle +namespace ops = paddle::operators; REGISTER_OP(onehot_cross_entropy, ops::OnehotCrossEntropyOp, ops::OnehotCrossEntropyOpMaker); -REGISTER_OP_CPU_KERNEL(onehot_cross_entropy, - ops::OnehotCrossEntropyOpKernel); - +REGISTER_OP_CPU_KERNEL( + onehot_cross_entropy, + ops::OnehotCrossEntropyOpKernel); +REGISTER_GRADIENT_OP(onehot_cross_entropy, onehot_cross_entropy_grad, + ops::OnehotCrossEntropyGradientOp); REGISTER_OP_CPU_KERNEL( onehot_cross_entropy_grad, - ops::OnehotCrossEntropyGradientOpKernel); + ops::OnehotCrossEntropyGradientOpKernel); diff --git a/paddle/operators/cross_entropy_op.cu b/paddle/operators/cross_entropy_op.cu index ec73721a810fa86d65409f643401eb77248ad5de..4bbc8f093a794d46737a16488684a6a0cc25e285 100644 --- a/paddle/operators/cross_entropy_op.cu +++ b/paddle/operators/cross_entropy_op.cu @@ -14,3 +14,8 @@ #define EIGEN_USE_GPU #include "paddle/operators/cross_entropy_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL( + onehot_cross_entropy, + ops::OnehotCrossEntropyOpKernel); diff --git a/paddle/operators/cross_entropy_op.h b/paddle/operators/cross_entropy_op.h index d5e3f29332809a63908ecc896b33d2adff6abe45..b7df92c9a98ebf12b72a8d3d8e8e4e1a950f06c9 100644 --- a/paddle/operators/cross_entropy_op.h +++ b/paddle/operators/cross_entropy_op.h @@ -13,11 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "paddle/operators/type_alias.h" +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { +using Tensor = framework::Tensor; + template T tolerable_value(T x) { static_assert(std::is_floating_point::value, @@ -38,9 +40,9 @@ T tolerable_value(T x) { } template -class OnehotCrossEntropyOpKernel : public OpKernel { +class OnehotCrossEntropyOpKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& ctx) const override { + void Compute(const framework::ExecutionContext& ctx) const override { auto X = ctx.Input("X"); const T* Xdata = X->data(); const int* label_data = ctx.Input("label")->data(); @@ -61,9 +63,9 @@ class OnehotCrossEntropyOpKernel : public OpKernel { }; template -class OnehotCrossEntropyGradientOpKernel : public OpKernel { +class OnehotCrossEntropyGradientOpKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& ctx) const override { + void Compute(const framework::ExecutionContext& ctx) const override { auto X = ctx.Input("X"); auto dX = ctx.Output(framework::GradVarName("X")); auto dY = ctx.Input(framework::GradVarName("Y")); diff --git a/paddle/operators/fc_op.cc b/paddle/operators/fc_op.cc index 0eccc5fe4c481a63f6b1dca00e4cc5328c8f9834..8453267b4addac44fa098b4d9a51f0780f068376 100644 --- a/paddle/operators/fc_op.cc +++ b/paddle/operators/fc_op.cc @@ -12,11 +12,16 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "type_alias.h" +#include "paddle/operators/net_op.h" + +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { +using OpRegistry = framework::OpRegistry; + class FullyConnectedOp : public NetOp { public: void Init() override { @@ -39,9 +44,10 @@ class FullyConnectedOp : public NetOp { } }; -class FullyConnectedOpMaker : public OpProtoAndCheckerMaker { +class FullyConnectedOpMaker : public framework::OpProtoAndCheckerMaker { public: - FullyConnectedOpMaker(OpProto *proto, OpAttrChecker *op_checker) + FullyConnectedOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "the input of fc operator"); AddInput("W", "the weight of fc operator"); @@ -66,4 +72,5 @@ USE_OP(rowwise_add); USE_OP(sigmoid); USE_OP(softmax); +namespace ops = paddle::operators; REGISTER_OP(fc, ops::FullyConnectedOp, ops::FullyConnectedOpMaker); diff --git a/paddle/operators/fill_zeros_like_op.cc b/paddle/operators/fill_zeros_like_op.cc index 405ed219f0145ec1c3d8a315f39a134d3d9d7643..04a820b6168b0e696024ed9f8cded6a9d1e45e9d 100644 --- a/paddle/operators/fill_zeros_like_op.cc +++ b/paddle/operators/fill_zeros_like_op.cc @@ -42,8 +42,8 @@ The output will have the same size with input. } // namespace operators } // namespace paddle -REGISTER_OP(fill_zeros_like, paddle::operators::FillZerosLikeOp, - paddle::operators::FillZerosLikeOpMaker); +namespace ops = paddle::operators; +REGISTER_OP(fill_zeros_like, ops::FillZerosLikeOp, ops::FillZerosLikeOpMaker); REGISTER_OP_CPU_KERNEL( fill_zeros_like, - paddle::operators::FillZerosLikeKernel); + ops::FillZerosLikeKernel); diff --git a/paddle/operators/fill_zeros_like_op.cu b/paddle/operators/fill_zeros_like_op.cu index 4f1054cf47e35572dbbc51ca742994065a027919..fdbcf520a0d7b4ddfe3fc1837a21e0ce88b8e8fa 100644 --- a/paddle/operators/fill_zeros_like_op.cu +++ b/paddle/operators/fill_zeros_like_op.cu @@ -16,6 +16,7 @@ #include "paddle/framework/op_registry.h" #include "paddle/operators/fill_zeros_like_op.h" +namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL( fill_zeros_like, - paddle::operators::FillZerosLikeKernel); + ops::FillZerosLikeKernel); diff --git a/paddle/operators/fill_zeros_like_op.h b/paddle/operators/fill_zeros_like_op.h index dfaed2c9aaf2bf5c1a9b803fc9c8b9ea0e5c5d4e..f846c7a8ab15e2cd997564edb36660a1360227a8 100644 --- a/paddle/operators/fill_zeros_like_op.h +++ b/paddle/operators/fill_zeros_like_op.h @@ -13,7 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "paddle/operators/type_alias.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { diff --git a/paddle/operators/mean_op.cc b/paddle/operators/mean_op.cc index aa5479ceaff379d99a5f7322edac0c5e69816974..2121faf15b9fcce5efd9fa8d63dbc298bb2ed527 100644 --- a/paddle/operators/mean_op.cc +++ b/paddle/operators/mean_op.cc @@ -17,18 +17,18 @@ limitations under the License. */ namespace paddle { namespace operators { -class MeanOp : public OperatorWithKernel { +class MeanOp : public framework::OperatorWithKernel { protected: - void InferShape(const InferShapeContext &ctx) const override { - PADDLE_ENFORCE(ctx.InputVar("X") != nullptr, - "Input of MeanOp must be initialized."); + void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), + "Input of MeanOp must be initialized."); ctx.Output("Out")->Resize({1}); } }; -class MeanOpMaker : public OpProtoAndCheckerMaker { +class MeanOpMaker : public framework::OpProtoAndCheckerMaker { public: - MeanOpMaker(OpProto *proto, OpAttrChecker *op_checker) + MeanOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "The input of mean op"); AddOutput("Out", "The output of mean op").IgnoreGradient(); @@ -36,9 +36,9 @@ class MeanOpMaker : public OpProtoAndCheckerMaker { } }; -class MeanGradOp : public OperatorWithKernel { +class MeanGradOp : public framework::OperatorWithKernel { protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { ctx.Output("X" + framework::kGradVarSuffix) ->Resize(ctx.Input("X")->dims()); } @@ -47,7 +47,10 @@ class MeanGradOp : public OperatorWithKernel { } // namespace operators } // namespace paddle +namespace ops = paddle::operators; REGISTER_OP(mean, ops::MeanOp, ops::MeanOpMaker); -REGISTER_OP_CPU_KERNEL(mean, ops::MeanKernel); +REGISTER_OP_CPU_KERNEL(mean, + ops::MeanKernel); REGISTER_GRADIENT_OP(mean, mean_grad, ops::MeanGradOp); -REGISTER_OP_CPU_KERNEL(mean_grad, ops::MeanGradKernel); +REGISTER_OP_CPU_KERNEL(mean_grad, + ops::MeanGradKernel); diff --git a/paddle/operators/mean_op.cu b/paddle/operators/mean_op.cu index 8b97b0154ccdc8c41a90f7580af829c5c8663b60..7af624d81dc5ffbb5c31b4d6f6eb8f9f8652a431 100644 --- a/paddle/operators/mean_op.cu +++ b/paddle/operators/mean_op.cu @@ -16,5 +16,8 @@ #include "paddle/operators/mean_op.h" -REGISTER_OP_GPU_KERNEL(mean, ops::MeanKernel); -REGISTER_OP_GPU_KERNEL(mean_grad, ops::MeanGradKernel); +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(mean, + ops::MeanKernel); +REGISTER_OP_GPU_KERNEL(mean_grad, + ops::MeanGradKernel); diff --git a/paddle/operators/mean_op.h b/paddle/operators/mean_op.h index 40a1e2d099acad90b1bbac50f62ea7c4f691c1b4..f3db0a29bb234948d180d964fb82057632ec4414 100644 --- a/paddle/operators/mean_op.h +++ b/paddle/operators/mean_op.h @@ -13,15 +13,24 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "paddle/operators/type_alias.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { +using Tensor = framework::Tensor; +template +using EigenScalar = framework::EigenScalar; +template +using EigenVector = framework::EigenVector; + template -class MeanKernel : public OpKernel { +class MeanKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& context) const override { + void Compute(const framework::ExecutionContext& context) const override { auto input = context.Input(0); auto output = context.Output(0); @@ -36,9 +45,9 @@ class MeanKernel : public OpKernel { }; template -class MeanGradKernel : public OpKernel { +class MeanGradKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& context) const override { + void Compute(const framework::ExecutionContext& context) const override { auto OG = context.Input("Out" + framework::kGradVarSuffix); PADDLE_ENFORCE(framework::product(OG->dims()) == 1, "Mean Gradient should be scalar"); diff --git a/paddle/operators/mul_op.cc b/paddle/operators/mul_op.cc index b9099ad4e3d000df45a8df1b6f8f2b27b197d744..9c570cff28e7c6da3d377482eced7eb12a5e1122 100644 --- a/paddle/operators/mul_op.cc +++ b/paddle/operators/mul_op.cc @@ -17,9 +17,9 @@ namespace paddle { namespace operators { -class MulOp : public OperatorWithKernel { +class MulOp : public framework::OperatorWithKernel { protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { auto dim0 = ctx.Input("X")->dims(); auto dim1 = ctx.Input("Y")->dims(); PADDLE_ENFORCE_EQ(dim0.size(), 2, @@ -35,9 +35,9 @@ class MulOp : public OperatorWithKernel { } }; -class MulOpMaker : public OpProtoAndCheckerMaker { +class MulOpMaker : public framework::OpProtoAndCheckerMaker { public: - MulOpMaker(OpProto *proto, OpAttrChecker *op_checker) + MulOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "The first input of mul op"); AddInput("Y", "The second input of mul op"); @@ -50,9 +50,9 @@ The equation is: Out = X * Y } }; -class MulOpGrad : public OperatorWithKernel { +class MulOpGrad : public framework::OperatorWithKernel { protected: - void InferShape(const InferShapeContext &ctx) const override {} + void InferShape(const framework::InferShapeContext &ctx) const override {} std::string DebugString() const override { LOG(INFO) << "MulGrad"; return ""; @@ -62,7 +62,8 @@ class MulOpGrad : public OperatorWithKernel { } // namespace operators } // namespace paddle +namespace ops = paddle::operators; REGISTER_OP(mul, ops::MulOp, ops::MulOpMaker); REGISTER_GRADIENT_OP(mul, mul_grad, ops::MulOpGrad); -REGISTER_OP_CPU_KERNEL(mul, ops::MulKernel); +REGISTER_OP_CPU_KERNEL(mul, ops::MulKernel); diff --git a/paddle/operators/mul_op.cu b/paddle/operators/mul_op.cu index 1dc04c4297daed7a7861a09cf6b99446c296ffa5..43debbc21a365a15c914e60e151f7782b82080cb 100644 --- a/paddle/operators/mul_op.cu +++ b/paddle/operators/mul_op.cu @@ -15,4 +15,6 @@ #define EIGEN_USE_GPU #include "paddle/operators/mul_op.h" -REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel); +namespace ops = paddle::operators; + +REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel); diff --git a/paddle/operators/mul_op.h b/paddle/operators/mul_op.h index 7ecd6e8ac01c9efeabe9d2873da39503966ba8df..ab12631c03453a18fbb067e2d12c2bc332acd567 100644 --- a/paddle/operators/mul_op.h +++ b/paddle/operators/mul_op.h @@ -13,16 +13,21 @@ limitations under the License. */ #pragma once - -#include "paddle/operators/type_alias.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { +using Tensor = framework::Tensor; +template +using EigenMatrix = framework::EigenMatrix; + template -class MulKernel : public OpKernel { +class MulKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& context) const override { + void Compute(const framework::ExecutionContext& context) const override { Eigen::array, 1> dim_pair = { {Eigen::IndexPair(1, 0)}}; @@ -40,5 +45,6 @@ class MulKernel : public OpKernel { Z.device(place) = X.contract(Y, dim_pair); } }; + } // namespace operators } // namespace paddle diff --git a/paddle/operators/net_op.h b/paddle/operators/net_op.h index 0342cf4adb831e394935083f77673611d26e2e55..4e2353aa2bd8cf20f6a7feedad71d83efd819a47 100644 --- a/paddle/operators/net_op.h +++ b/paddle/operators/net_op.h @@ -16,10 +16,6 @@ limitations under the License. */ #include "paddle/framework/framework.pb.h" #include "paddle/framework/op_registry.h" -#include "paddle/framework/operator.h" -#include "paddle/framework/scope.h" -#include "paddle/operators/type_alias.h" -#include "paddle/platform/device_context.h" namespace paddle { namespace operators { @@ -64,20 +60,29 @@ class NetOp : public framework::OperatorBase { } } + bool SupportGPU() const override { + for (auto& op : ops_) { + if (!op->SupportGPU()) { + return false; + } + } + return true; + } + /** * @brief Add an operator by ptr */ void AddOp(const std::shared_ptr& op) { PADDLE_ENFORCE(!add_op_done_, "Cannot AddOp when this network is sealed"); - PADDLE_ENFORCE(op != nullptr, "Cannot Insert Null op"); + PADDLE_ENFORCE_NOT_NULL(op, "Cannot Insert Null op"); ops_.push_back(op); } void InsertOp(size_t pos, const std::shared_ptr& op) { PADDLE_ENFORCE(!add_op_done_, "Cannot InsertOp when this network is sealed"); - PADDLE_ENFORCE(op != nullptr, "Cannot Insert Null op"); - PADDLE_ENFORCE(pos <= ops_.size(), "Out of range"); + PADDLE_ENFORCE_NOT_NULL(op, "Cannot Insert Null op"); + PADDLE_ENFORCE_LE(pos, ops_.size(), "Out of range"); ops_.insert(ops_.begin() + pos, op); } diff --git a/paddle/operators/net_op_test.cc b/paddle/operators/net_op_test.cc index eb9832dc2c7fd6368b979e742ab4eb683fdaedbe..977f3de706f8c81933ab751385f3d6f999f874b4 100644 --- a/paddle/operators/net_op_test.cc +++ b/paddle/operators/net_op_test.cc @@ -2,31 +2,27 @@ #include -#include "paddle/framework/op_registry.h" -#include "paddle/framework/operator.h" - namespace paddle { namespace operators { +using Scope = framework::Scope; +using DeviceContext = platform::DeviceContext; static int infer_shape_cnt = 0; static int run_cnt = 0; -class TestOp : public OperatorBase { +class TestOp : public framework::OperatorBase { public: - void InferShape(const framework::Scope& scope) const override { - ++infer_shape_cnt; - } - void Run(const framework::Scope& scope, - const paddle::platform::DeviceContext& dev_ctx) const override { + void InferShape(const Scope& scope) const override { ++infer_shape_cnt; } + void Run(const Scope& scope, + const platform::DeviceContext& dev_ctx) const override { ++run_cnt; } }; -class EmptyOp : public OperatorBase { +class EmptyOp : public framework::OperatorBase { public: void InferShape(const Scope& scope) const override {} - void Run(const Scope& scope, - const platform::DeviceContext& dev_ctx) const override {} + void Run(const Scope& scope, const DeviceContext& dev_ctx) const override {} }; template @@ -73,7 +69,7 @@ TEST(OpKernel, all) { net->Run(scope, dev_ctx); ASSERT_EQ(2, infer_shape_cnt); ASSERT_EQ(2, run_cnt); - ASSERT_THROW(net->AddOp(op2), paddle::platform::EnforceNotMet); + ASSERT_THROW(net->AddOp(op2), platform::EnforceNotMet); } TEST(NetOp, insert_op) { diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index 43c9aa72cd724a792c05501330d1ffee34cde632..4ed338359e62816ed2c4b58b512db8b230f2f786 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -14,17 +14,19 @@ #include "paddle/operators/recurrent_op.h" -#include #include #include #include "paddle/framework/op_registry.h" #include "paddle/operators/net_op.h" -#include "paddle/platform/enforce.h" namespace paddle { namespace operators { +using Scope = framework::Scope; +using Variable = framework::Variable; +using Tensor = framework::Tensor; + void RecurrentAlgorithm::InferShape(const Scope& scope) const { seq_len_ = scope.FindVar((arg_->inlinks[0]).external) ->GetMutable() @@ -140,10 +142,11 @@ void RecurrentOp::Init() { alg_.Init(std::move(arg)); } -class RecurrentAlgorithmProtoAndCheckerMaker : public OpProtoAndCheckerMaker { +class RecurrentAlgorithmProtoAndCheckerMaker + : public framework::OpProtoAndCheckerMaker { public: - RecurrentAlgorithmProtoAndCheckerMaker(OpProto* proto, - OpAttrChecker* op_checker) + RecurrentAlgorithmProtoAndCheckerMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { const auto& name = RecurrentOp::kArgName; // inputs and outputs stored in proto diff --git a/paddle/operators/rnn/recurrent_op_utils.cc b/paddle/operators/rnn/recurrent_op_utils.cc index 32c6c2dd4efa85359b4e95471e8ba09e56afec57..7e4770630ed2a49214194689aa489e6ab8e476da 100644 --- a/paddle/operators/rnn/recurrent_op_utils.cc +++ b/paddle/operators/rnn/recurrent_op_utils.cc @@ -18,7 +18,9 @@ namespace paddle { namespace operators { namespace rnn { -namespace fmw = paddle::framework; +namespace f = paddle::framework; + +using Tensor = framework::Tensor; void SegmentInputs(const std::vector& step_scopes, const std::vector& inlinks, const size_t seq_len, @@ -30,10 +32,10 @@ void SegmentInputs(const std::vector& step_scopes, inlinks[i].external); Tensor* input = input_var->GetMutable(); - fmw::DDim dims = input->dims(); + f::DDim dims = input->dims(); PADDLE_ENFORCE(static_cast(dims[0]) == seq_len, "all the inlinks must have same length"); - fmw::DDim step_dims = slice_ddim(dims, 1, dims.size()); + f::DDim step_dims = slice_ddim(dims, 1, dims.size()); for (size_t j = 0; j < seq_len; j++) { Tensor* step_input = step_scopes[j]->NewVar(inlinks[i].internal)->GetMutable(); @@ -58,11 +60,10 @@ void ConcatOutputs(const std::vector& step_scopes, auto step_scope_var = step_scopes[0]->FindVar(outlinks[i].internal); PADDLE_ENFORCE(step_scope_var != nullptr, "%s not in scope", outlinks[i].internal); - fmw::DDim step_dims = - step_scope_var->template GetMutable()->dims(); + f::DDim step_dims = step_scope_var->template GetMutable()->dims(); std::vector dims_vec = vectorize(step_dims); dims_vec.insert(dims_vec.begin(), seq_len); - output->Resize(fmw::make_ddim(dims_vec)); + output->Resize(f::make_ddim(dims_vec)); } else { output->mutable_data(platform::CPUPlace()); for (size_t j = 0; j < seq_len; j++) { @@ -104,7 +105,7 @@ void LinkMemories(const std::vector& scopes, } void InitArgument(const ArgumentName& name, Argument* arg, - const OperatorBase& op) { + const framework::OperatorBase& op) { arg->step_net = op.Input(name.step_net); arg->step_scopes = op.Output(name.step_scopes); diff --git a/paddle/operators/rnn/recurrent_op_utils.h b/paddle/operators/rnn/recurrent_op_utils.h index 379754b98fcead6debe0a60efa62fce4b7761940..17941c503cfcc83415b8bc635623a2c2ce2981c3 100644 --- a/paddle/operators/rnn/recurrent_op_utils.h +++ b/paddle/operators/rnn/recurrent_op_utils.h @@ -17,12 +17,13 @@ #include #include "paddle/framework/operator.h" -#include "paddle/operators/type_alias.h" namespace paddle { namespace operators { namespace rnn { +using Scope = framework::Scope; + /** * Memory of a RNN (same as the role of `Momory` in PaddlePaddle). * @@ -86,7 +87,7 @@ void LinkMemories(const std::vector& step_scopes, const int offset, bool infer_shape_mode); void InitArgument(const ArgumentName& name, Argument* arg, - const OperatorBase& op); + const framework::OperatorBase& op); } // namespace rnn } // namespace operators diff --git a/paddle/operators/rowwise_add_op.cc b/paddle/operators/rowwise_add_op.cc index c6a1f082138f5a75e2d9cebbe7b547911e952e17..28b56a6934a9f03fc779671bf49fbc4ef3bcee90 100644 --- a/paddle/operators/rowwise_add_op.cc +++ b/paddle/operators/rowwise_add_op.cc @@ -13,12 +13,13 @@ limitations under the License. */ #include "paddle/operators/rowwise_add_op.h" + namespace paddle { namespace operators { -class RowWiseAddOp : public OperatorWithKernel { +class RowWiseAddOp : public framework::OperatorWithKernel { protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { auto dim0 = ctx.Input("X")->dims(); auto dim1 = ctx.Input("b")->dims(); @@ -30,9 +31,10 @@ class RowWiseAddOp : public OperatorWithKernel { } }; -class RowWiseAddOpMaker : public OpProtoAndCheckerMaker { +class RowWiseAddOpMaker : public framework::OpProtoAndCheckerMaker { public: - RowWiseAddOpMaker(OpProto *proto, OpAttrChecker *op_checker) + RowWiseAddOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "The left input of row-wise add op, must be matrix"); AddInput("b", "The right input of row-wise add op, must be vector"); @@ -48,6 +50,7 @@ for i in xrange(X.shape[0]): } // namespace operators } // namespace paddle +namespace ops = paddle::operators; REGISTER_OP(rowwise_add, ops::RowWiseAddOp, ops::RowWiseAddOpMaker); -REGISTER_OP_CPU_KERNEL(rowwise_add, - ops::RowWiseAddKernel); +REGISTER_OP_CPU_KERNEL( + rowwise_add, ops::RowWiseAddKernel); diff --git a/paddle/operators/rowwise_add_op.cu b/paddle/operators/rowwise_add_op.cu index f76faa0a3a93a1ac277a1d1aa83c3fa6c3944648..86f80b81228a69ac4c05a4693901570f2b9966e0 100644 --- a/paddle/operators/rowwise_add_op.cu +++ b/paddle/operators/rowwise_add_op.cu @@ -15,5 +15,6 @@ #define EIGEN_USE_GPU #include "paddle/operators/rowwise_add_op.h" -REGISTER_OP_GPU_KERNEL(rowwise_add, - ops::RowWiseAddKernel); +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL( + rowwise_add, ops::RowWiseAddKernel); diff --git a/paddle/operators/rowwise_add_op.h b/paddle/operators/rowwise_add_op.h index 9e9f9d110c30452439cf4108d4a574b20a581c16..2a67407b52fe4aaf1d66571829c1e6ed7065c224 100644 --- a/paddle/operators/rowwise_add_op.h +++ b/paddle/operators/rowwise_add_op.h @@ -13,15 +13,24 @@ limitations under the License. */ #pragma once -#include "paddle/operators/type_alias.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { +using Tensor = framework::Tensor; +template +using EigenVector = framework::EigenVector; +template +using EigenMatrix = framework::EigenMatrix; + template -class RowWiseAddKernel : public OpKernel { +class RowWiseAddKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& context) const override { + void Compute(const framework::ExecutionContext& context) const override { auto out = context.Output(0); out->mutable_data(context.GetPlace()); diff --git a/paddle/operators/sgd_op.cc b/paddle/operators/sgd_op.cc index 659cb41d989489d0e0f8d3cea3f5fb06094d23dc..30fe6fd491a8ee4a30f0f317a816ad7c712d96d9 100644 --- a/paddle/operators/sgd_op.cc +++ b/paddle/operators/sgd_op.cc @@ -17,9 +17,9 @@ limitations under the License. */ namespace paddle { namespace operators { -class SGDOp : public OperatorWithKernel { +class SGDOp : public framework::OperatorWithKernel { protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE( ctx.Input("param")->dims() == ctx.Input("grad")->dims(), "Two input of SGD Op's dimension must be same."); @@ -27,9 +27,9 @@ class SGDOp : public OperatorWithKernel { } }; -class SGDOpMaker : public OpProtoAndCheckerMaker { +class SGDOpMaker : public framework::OpProtoAndCheckerMaker { public: - SGDOpMaker(OpProto *proto, OpAttrChecker *op_checker) + SGDOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("param", "input parameter"); AddInput("grad", "input gradient"); @@ -47,5 +47,7 @@ param_out = param - learning_rate * grad; } // namespace operators } // namespace paddle +namespace ops = paddle::operators; REGISTER_OP(sgd, ops::SGDOp, ops::SGDOpMaker); -REGISTER_OP_CPU_KERNEL(sgd, ops::SGDOpKernel); +REGISTER_OP_CPU_KERNEL(sgd, + ops::SGDOpKernel); diff --git a/paddle/operators/sgd_op.cu b/paddle/operators/sgd_op.cu index 72629ccfbb8bc8ec53045289bd985c721c62fa10..f5ba6d3c29f8dfbfdea4fbf2c3d5fd7f5b358666 100644 --- a/paddle/operators/sgd_op.cu +++ b/paddle/operators/sgd_op.cu @@ -15,4 +15,6 @@ #define EIGEN_USE_GPU #include "paddle/operators/sgd_op.h" -REGISTER_OP_GPU_KERNEL(sgd, ops::SGDOpKernel); +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(sgd, + ops::SGDOpKernel); diff --git a/paddle/operators/sgd_op.h b/paddle/operators/sgd_op.h index bf5b195933fce7faa46bcc96032e784076178cf7..bfb449d0b029409eda4177fc7643810ee6a1df3d 100644 --- a/paddle/operators/sgd_op.h +++ b/paddle/operators/sgd_op.h @@ -13,15 +13,21 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "paddle/operators/type_alias.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { +using Tensor = framework::Tensor; +template +using EigenVector = framework::EigenVector; + template -class SGDOpKernel : public OpKernel { +class SGDOpKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& ctx) const override { + void Compute(const framework::ExecutionContext& ctx) const override { auto param = ctx.Input("param"); auto grad = ctx.Input("grad"); auto param_out = ctx.Output(0); diff --git a/paddle/operators/sigmoid_op.cc b/paddle/operators/sigmoid_op.cc index 27904ea0c3c21ec760dfed1b822bd45b586a933d..315887d8c4035054b993805c77f811d963d8508c 100644 --- a/paddle/operators/sigmoid_op.cc +++ b/paddle/operators/sigmoid_op.cc @@ -13,19 +13,21 @@ limitations under the License. */ #include "paddle/operators/sigmoid_op.h" + namespace paddle { namespace operators { -class SigmoidOp : public OperatorWithKernel { +class SigmoidOp : public framework::OperatorWithKernel { protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { ctx.Output("Y")->Resize(ctx.Input("X")->dims()); } }; -class SigmoidOpMaker : public OpProtoAndCheckerMaker { +class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker { public: - SigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker) + SigmoidOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "sigmoid input"); AddOutput("Y", "sigmoid output"); @@ -33,9 +35,9 @@ class SigmoidOpMaker : public OpProtoAndCheckerMaker { } }; -class SigmoidOpGrad : public OperatorWithKernel { +class SigmoidOpGrad : public framework::OperatorWithKernel { protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { ctx.Output(0)->Resize(ctx.Input(0)->dims()); } }; @@ -43,9 +45,11 @@ class SigmoidOpGrad : public OperatorWithKernel { } // namespace operators } // namespace paddle +namespace ops = paddle::operators; REGISTER_OP(sigmoid, ops::SigmoidOp, ops::SigmoidOpMaker); REGISTER_GRADIENT_OP(sigmoid, sigmoid_grad, ops::SigmoidOpGrad); -REGISTER_OP_CPU_KERNEL(sigmoid, ops::SigmoidKernel); -REGISTER_OP_CPU_KERNEL(sigmoid_grad, - ops::SigmoidGradKernel); +REGISTER_OP_CPU_KERNEL(sigmoid, + ops::SigmoidKernel); +REGISTER_OP_CPU_KERNEL( + sigmoid_grad, ops::SigmoidGradKernel); diff --git a/paddle/operators/sigmoid_op.cu b/paddle/operators/sigmoid_op.cu index e80ba081f2ff805664cf92f3cb47e9ad51889058..1a50dfe14a7b9e2614aadb7729de9f9e461e9905 100644 --- a/paddle/operators/sigmoid_op.cu +++ b/paddle/operators/sigmoid_op.cu @@ -15,6 +15,9 @@ #define EIGEN_USE_GPU #include "paddle/operators/sigmoid_op.h" -REGISTER_OP_GPU_KERNEL(sigmoid, ops::SigmoidKernel); -REGISTER_OP_GPU_KERNEL(sigmoid_grad, - ops::SigmoidGradKernel); +namespace ops = paddle::operators; + +REGISTER_OP_GPU_KERNEL(sigmoid, + ops::SigmoidKernel); +REGISTER_OP_GPU_KERNEL( + sigmoid_grad, ops::SigmoidGradKernel); diff --git a/paddle/operators/sigmoid_op.h b/paddle/operators/sigmoid_op.h index d513261e74423ce93a50eaaaec1c7d5fadb8f4a8..7af879b2091e4a7f80a3a64be029394156650c23 100644 --- a/paddle/operators/sigmoid_op.h +++ b/paddle/operators/sigmoid_op.h @@ -13,16 +13,21 @@ limitations under the License. */ #pragma once - -#include "paddle/operators/type_alias.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { +using Tensor = framework::Tensor; +template +using EigenVector = framework::EigenVector; + template -class SigmoidKernel : public OpKernel { +class SigmoidKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& context) const override { + void Compute(const framework::ExecutionContext& context) const override { auto input = context.Input(0); auto output = context.Output(0); output->mutable_data(context.GetPlace()); @@ -37,9 +42,9 @@ class SigmoidKernel : public OpKernel { }; template -class SigmoidGradKernel : public OpKernel { +class SigmoidGradKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& context) const override { + void Compute(const framework::ExecutionContext& context) const override { auto Y_t = context.Input("Y"); auto dY_t = context.Input(framework::GradVarName("Y")); auto dX_t = context.Output(framework::GradVarName("X")); diff --git a/paddle/operators/softmax_op.cc b/paddle/operators/softmax_op.cc index 836bce2294f52af3062fe2bde82fb1df2207f54c..962787fffd8c5efaf42319b19dfcd071c48ba2bd 100644 --- a/paddle/operators/softmax_op.cc +++ b/paddle/operators/softmax_op.cc @@ -17,18 +17,19 @@ limitations under the License. */ namespace paddle { namespace operators { -class SoftmaxOp : public OperatorWithKernel { +class SoftmaxOp : public framework::OperatorWithKernel { protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE(ctx.Input("X")->dims().size() == 2UL, "The input of softmax op must be matrix"); ctx.Output("Y")->Resize(ctx.Input("X")->dims()); } }; -class SoftmaxOpMaker : public OpProtoAndCheckerMaker { +class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker { public: - SoftmaxOpMaker(OpProto *proto, OpAttrChecker *op_checker) + SoftmaxOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "input of softmax"); AddOutput("Y", "output of softmax"); @@ -36,12 +37,12 @@ class SoftmaxOpMaker : public OpProtoAndCheckerMaker { } }; -class SoftmaxOpGrad : public OperatorWithKernel { +class SoftmaxOpGrad : public framework::OperatorWithKernel { protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE(ctx.InputVar("Y") != nullptr, "Input(Y) should not be null"); - PADDLE_ENFORCE(ctx.InputVar(framework::GradVarName("Y")) != nullptr, - "Input(Y@GRAD) should not be null"); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Y")), + "Input(Y@GRAD) should not be null"); PADDLE_ENFORCE(ctx.Input("Y")->dims() == ctx.Input(framework::GradVarName("Y"))->dims(), "the shape of Input(0) and Input(1) should be the same"); @@ -53,8 +54,11 @@ class SoftmaxOpGrad : public OperatorWithKernel { } // namespace operators } // namespace paddle +namespace ops = paddle::operators; + REGISTER_OP(softmax, ops::SoftmaxOp, ops::SoftmaxOpMaker); -REGISTER_OP_CPU_KERNEL(softmax, ops::SoftmaxKernel); +REGISTER_OP_CPU_KERNEL(softmax, + ops::SoftmaxKernel); REGISTER_GRADIENT_OP(softmax, softmax_grad, ops::SoftmaxOpGrad); -REGISTER_OP_CPU_KERNEL(softmax_grad, - ops::SoftmaxGradKernel); +REGISTER_OP_CPU_KERNEL( + softmax_grad, ops::SoftmaxGradKernel); diff --git a/paddle/operators/softmax_op.cu b/paddle/operators/softmax_op.cu index b79228580a7ea0f70b62eb2dc7a61cf85bc0b5fb..2e99a89699dbdcafc8055c47debf9e49f10507e6 100644 --- a/paddle/operators/softmax_op.cu +++ b/paddle/operators/softmax_op.cu @@ -1,4 +1,4 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +/* Copyright (c) 2016 PaddlePaddle Authors All Rights Reserve. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,9 +13,11 @@ limitations under the License. */ #define EIGEN_USE_GPU -#include "paddle/framework/op_registry.h" #include "paddle/operators/softmax_op.h" -REGISTER_OP_GPU_KERNEL(softmax, ops::SoftmaxKernel); -REGISTER_OP_GPU_KERNEL(softmax_grad, - ops::SoftmaxGradKernel); +namespace ops = paddle::operators; + +REGISTER_OP_GPU_KERNEL(softmax, + ops::SoftmaxKernel); +REGISTER_OP_GPU_KERNEL( + softmax_grad, ops::SoftmaxGradKernel); diff --git a/paddle/operators/softmax_op.h b/paddle/operators/softmax_op.h index b2dbcf57edf1a64da8da0d9a4c14d708eec17f3f..4fa6b59540498638c3b7df639ae10a66c0fa1c16 100644 --- a/paddle/operators/softmax_op.h +++ b/paddle/operators/softmax_op.h @@ -13,19 +13,21 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once - -#include "paddle/framework/ddim.h" -#include "paddle/framework/operator.h" -#include "paddle/framework/tensor.h" -#include "paddle/operators/type_alias.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { +using Tensor = framework::Tensor; +template +using EigenMatrix = framework::EigenMatrix; + template -class SoftmaxKernel : public OpKernel { +class SoftmaxKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& context) const override { + void Compute(const framework::ExecutionContext& context) const override { auto input = context.Input("X"); auto output = context.Output("Y"); output->mutable_data(context.GetPlace()); @@ -62,9 +64,9 @@ class SoftmaxKernel : public OpKernel { }; template -class SoftmaxGradKernel : public OpKernel { +class SoftmaxGradKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& context) const override { + void Compute(const framework::ExecutionContext& context) const override { std::shared_ptr scale_ = std::make_shared(); auto Y = context.Input("Y"); diff --git a/paddle/operators/type_alias.h b/paddle/operators/type_alias.h deleted file mode 100644 index eac12d35dd8d2977191218167ebb0a6e638d5d73..0000000000000000000000000000000000000000 --- a/paddle/operators/type_alias.h +++ /dev/null @@ -1,55 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#pragma once - -#include "paddle/framework/eigen.h" -#include "paddle/framework/op_registry.h" -#include "paddle/operators/net_op.h" - -namespace paddle { -namespace operators { - -using OpKernel = framework::OpKernel; -using OperatorBase = framework::OperatorBase; -using InferShapeContext = framework::InferShapeContext; -using ExecutionContext = framework::ExecutionContext; -using Variable = framework::Variable; -template -using EigenScalar = framework::EigenScalar; -template -using EigenVector = framework::EigenVector; -template -using EigenMatrix = framework::EigenMatrix; -template -using EigenTensor = framework::EigenTensor; -using Tensor = framework::Tensor; -using Scope = framework::Scope; -using OperatorWithKernel = framework::OperatorWithKernel; -using OperatorBase = framework::OperatorBase; -using OpProtoAndCheckerMaker = framework::OpProtoAndCheckerMaker; -using OpProto = framework::OpProto; -using OpAttrChecker = framework::OpAttrChecker; -using CPUPlace = platform::CPUPlace; -using GPUPlace = platform::GPUPlace; -using OpRegistry = framework::OpRegistry; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; diff --git a/paddle/operators/uniform_random_op.cc b/paddle/operators/uniform_random_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..405b84b76d2e24db25d2ff16e99495f2f132ef09 --- /dev/null +++ b/paddle/operators/uniform_random_op.cc @@ -0,0 +1,84 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include +#include "paddle/framework/op_registry.h" +#include "paddle/framework/operator.h" + +namespace paddle { +namespace operators { + +// It seems that Eigen::Tensor::random in GPU will SEGFAULT. +// Use std::random and thrust::random(thrust is a std library in CUDA) to +// implement uniform random. +template +class CPUUniformRandomKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* tensor = context.Output(0); + T* data = tensor->mutable_data(context.GetPlace()); + unsigned int seed = + static_cast(context.op_.GetAttr("seed")); + std::minstd_rand engine; + if (seed == 0) { + seed = std::random_device()(); + } + engine.seed(seed); + std::uniform_real_distribution dist( + static_cast(context.op_.GetAttr("min")), + static_cast(context.op_.GetAttr("max"))); + for (ssize_t i = 0; i < framework::product(tensor->dims()); ++i) { + data[i] = dist(engine); + } + } +}; + +class UniformRandomOp : public framework::OperatorWithKernel { + protected: + void InferShape(const framework::InferShapeContext& ctx) const override { + PADDLE_ENFORCE(GetAttr("min") < GetAttr("max"), + "uniform_random's min must less then max"); + auto* tensor = ctx.Output(0); + auto dims = GetAttr>("dims"); + tensor->Resize(framework::make_ddim(dims)); + } +}; + +class UniformRandomOpMaker : public framework::OpProtoAndCheckerMaker { + public: + UniformRandomOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : framework::OpProtoAndCheckerMaker(proto, op_checker) { + AddOutput("Out", "The output tensor of uniform random op"); + AddComment(R"DOC(Uniform random operator. + +Used to initialize tensor with uniform random generator. +)DOC"); + AddAttr>("dims", "the dimension of random tensor"); + AddAttr("min", "Minimum value of uniform random").SetDefault(-1.0f); + AddAttr("max", "Maximun value of uniform random").SetDefault(1.0f); + AddAttr("seed", + "Random seed of uniform random. " + "0 means generate a seed by system") + .SetDefault(0); + } +}; +} // namespace operators +} // namespace paddle + +REGISTER_OP(uniform_random, paddle::operators::UniformRandomOp, + paddle::operators::UniformRandomOpMaker); +REGISTER_OP_CPU_KERNEL(uniform_random, + paddle::operators::CPUUniformRandomKernel); diff --git a/paddle/operators/uniform_random_op.cu b/paddle/operators/uniform_random_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..f1a63e52ec0d3d46a505a89d7d7916bf93a58221 --- /dev/null +++ b/paddle/operators/uniform_random_op.cu @@ -0,0 +1,70 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include +#include +#include +#include "paddle/framework/op_registry.h" +#include "paddle/framework/operator.h" + +namespace paddle { +namespace operators { + +template +struct UniformGenerator { + T min_, max_; + unsigned int seed_; + + __host__ __device__ UniformGenerator(T min, T max, int seed) + : min_(min), max_(max), seed_(seed) {} + + __host__ __device__ T operator()(const unsigned int n) const { + thrust::minstd_rand rng; + rng.seed(seed_); + thrust::uniform_real_distribution dist(min_, max_); + rng.discard(n); + return dist(rng); + } +}; + +// It seems that Eigen::Tensor::random in GPU will SEGFAULT. +// Use std::random and thrust::random(thrust is a std library in CUDA) to +// implement uniform random. +template +class GPUUniformRandomKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* tensor = context.Output(0); + T* data = tensor->mutable_data(context.GetPlace()); + unsigned int seed = + static_cast(context.op_.GetAttr("seed")); + if (seed == 0) { + seed = std::random_device()(); + } + T min = static_cast(context.op_.GetAttr("min")); + T max = static_cast(context.op_.GetAttr("max")); + thrust::counting_iterator index_sequence_begin(0); + ssize_t N = framework::product(tensor->dims()); + thrust::transform(index_sequence_begin, index_sequence_begin + N, + thrust::device_ptr(data), + UniformGenerator(min, max, seed)); + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OP_GPU_KERNEL(uniform_random, + paddle::operators::GPUUniformRandomKernel); diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp index ef72b973c1a465a8ac03cae1070429160eac0ac1..0547ac93cd183afbcede41d280c6b4b16ed7dab1 100644 --- a/paddle/parameter/Argument.cpp +++ b/paddle/parameter/Argument.cpp @@ -666,4 +666,24 @@ void Argument::subArgFrom(const Argument& input, } } +void Argument::reorganizeSeqInfo( + const ICpuGpuVectorPtr seqStartPos, + const ICpuGpuVectorPtr subSeqStartPos, + std::vector>& reorganizedSeqInfo) { + int* seqStarts = seqStartPos->getMutableData(false); + int* subSeqStarts = subSeqStartPos->getMutableData(false); + + int seqNum = seqStartPos->getSize() - 1; + reorganizedSeqInfo.resize(seqNum, std::vector()); + int seqIdx = 0; + for (size_t i = 0; i < subSeqStartPos->getSize(); ++i) { + reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]); + if (subSeqStarts[i] == seqStarts[seqIdx + 1]) { + seqIdx++; + if (seqIdx == seqNum) return; + reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]); + } + } +} + } // namespace paddle diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h index 0ccdef802e71b659788cfd24f28ebe43e1917db1..d8d7a4398f99a2794c5d25528a7d582f5ed629ba 100644 --- a/paddle/parameter/Argument.h +++ b/paddle/parameter/Argument.h @@ -317,6 +317,30 @@ struct Argument { */ void printValueString(std::ostream& stream, const std::string& prefix = "") const; + + /** + * @brief reorganizeSeqInfo will reorganize sequenceStartPositions and + * subSequenceStartPositions into a 2 dimensional arrary: reorganizedSeqInfo. + * + * @param seqStartPos: sequenceStartPositions of an Argument. + * @param subSeqStartPos: subSequenceStartPositions of an Argument. + * @param the reorganized sequence start position information. + * + * Examples: + * seqStartPos: [0, 4, 15, 20, 28] + * subSeqStartPos: [0, 3, 4, 5, 7, 10, 15, 20, 22, 23, 25, 28] + * reorganizedSeqInfo: + * [ + * [0,3,4], + * [4,5,7,10,15], + * [15,20], + * [20,22,23,25,28] + * ] + */ + static void reorganizeSeqInfo( + const ICpuGpuVectorPtr seqStartPos, + const ICpuGpuVectorPtr subSeqStartPos, + std::vector>& reorganizedSeqInfo); }; } // namespace paddle diff --git a/paddle/platform/enforce.h b/paddle/platform/enforce.h index 60ce5822d3ade7ea6fa940db2f002a89263a34ca..46293e6c0bb318ec1dadc76a0213ddfdbb8bcd70 100644 --- a/paddle/platform/enforce.h +++ b/paddle/platform/enforce.h @@ -187,13 +187,9 @@ inline void throw_on_error(T e) { __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <, >=, __VA_ARGS__) #define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) \ __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <=, >, __VA_ARGS__) - -// if two values have different data types, choose a compatible type for them. -template -struct CompatibleType { - static const bool t1_to_t2 = std::is_convertible::value; - typedef typename std::conditional::type type; -}; +#define PADDLE_ENFORCE_NOT_NULL(__VAL, ...) \ + PADDLE_ENFORCE(nullptr != (__VAL), #__VAL " should not be null\n%s", \ + paddle::string::Sprintf("" __VA_ARGS__)); template inline std::string enforce_to_string(const T& val) { @@ -211,17 +207,12 @@ inline std::string enforce_to_string(const char* const& val) { } #define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \ - PADDLE_ENFORCE(__COMPATIBLE_TYPE(__VAL0, __VAL1, __VAL0) \ - __CMP __COMPATIBLE_TYPE(__VAL0, __VAL1, __VAL1), \ + PADDLE_ENFORCE(__VAL0 __CMP __VAL1, \ "enforce %s " #__CMP " %s failed, %s " #__INV_CMP " %s\n%s", \ #__VAL0, #__VAL1, \ paddle::platform::enforce_to_string(__VAL0), \ paddle::platform::enforce_to_string(__VAL1), \ paddle::string::Sprintf("" __VA_ARGS__)); -#define __COMPATIBLE_TYPE(__VAL0, __VAL1, __VAL) \ - typename paddle::platform::CompatibleType::type(__VAL) - } // namespace platform } // namespace paddle diff --git a/paddle/platform/enforce_test.cc b/paddle/platform/enforce_test.cc index 7117b49474044af08ae9db79c2fae6693e966af2..4dfb69754608cb1120baa295072c3d031a4e1a7b 100644 --- a/paddle/platform/enforce_test.cc +++ b/paddle/platform/enforce_test.cc @@ -9,8 +9,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/platform/enforce.h" +#include + #include "gtest/gtest.h" +#include "paddle/platform/enforce.h" TEST(ENFORCE, OK) { PADDLE_ENFORCE(true, "Enforce is ok %d now %f", 123, 0.345); @@ -196,3 +198,27 @@ TEST(ENFORCE_LT, FAIL) { ASSERT_TRUE(in_catch); } + +TEST(ENFORCE_NOT_NULL, OK) { + int* a = new int; + PADDLE_ENFORCE_NOT_NULL(a); + delete a; +} +TEST(ENFORCE_NOT_NULL, FAIL) { + bool in_catch = false; + int* a{nullptr}; + + try { + PADDLE_ENFORCE_NOT_NULL(a); + + } catch (paddle::platform::EnforceNotMet error) { + in_catch = true; + const std::string msg = "a should not be null"; + const char* what = error.what(); + for (size_t i = 0; i < msg.length(); ++i) { + ASSERT_EQ(what[i], msg[i]); + } + } + + ASSERT_TRUE(in_catch); +} diff --git a/paddle/pybind/CMakeLists.txt b/paddle/pybind/CMakeLists.txt deleted file mode 100644 index 8e6b258e00c0012876cda8ffc5b340322d51e894..0000000000000000000000000000000000000000 --- a/paddle/pybind/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -cc_library(paddle_pybind SHARED - SRCS pybind.cc - DEPS pybind python backward - fc_op - sgd_op - add_op - mean_op - cross_entropy_op - recurrent_op - fill_zeros_like_op) diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index ede9e210245df740f13ebb32c98313554f522dd9..44442be4729ff77e8d378c93acebe1486eb75397 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -33,6 +33,9 @@ Configuring cmake in /paddle/build ... -DWITH_AVX=${WITH_AVX:-OFF} -DWITH_GOLANG=${WITH_GOLANG:-OFF} -DWITH_SWIG_PY=ON + -DWITH_C_API=${WITH_C_API:-OFF} + -DWITH_PYTHON=${WITH_PYTHON:-ON} + -DWITH_SWIG_PY=${WITH_SWIG_PY:-ON} -DCUDNN_ROOT=/usr/ -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} -DWITH_TESTING=${WITH_TESTING:-OFF} @@ -49,7 +52,9 @@ cmake .. \ -DWITH_GPU=${WITH_GPU:-OFF} \ -DWITH_AVX=${WITH_AVX:-OFF} \ -DWITH_GOLANG=${WITH_GOLANG:-OFF} \ - -DWITH_SWIG_PY=ON \ + -DWITH_SWIG_PY=${WITH_SWIG_PY:-ON} \ + -DWITH_C_API=${WITH_C_API:-OFF} \ + -DWITH_PYTHON=${WITH_PYTHON:-ON} \ -DCUDNN_ROOT=/usr/ \ -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} \ -DWITH_TESTING=${WITH_TESTING:-OFF} \ diff --git a/paddle/scripts/travis/build_doc.sh b/paddle/scripts/travis/build_doc.sh index 33fb5d84e2701c163b5d1b1bb3362ee81ebb34ea..dfcff38302703066e868c60e213f0f7cbc55a31e 100755 --- a/paddle/scripts/travis/build_doc.sh +++ b/paddle/scripts/travis/build_doc.sh @@ -5,15 +5,9 @@ set -e mkdir -p $TRAVIS_BUILD_DIR/build cd $TRAVIS_BUILD_DIR/build -# Compile paddle binaries first -cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=OFF -DWITH_MKLDNN=OFF -DWITH_MKLML=OFF -DWITH_GOLANG=ON -DWITH_STYLE_CHECK=OFF - -mkdir output -make -j `nproc` -find .. -name '*whl' | xargs pip install # install all wheels. -rm -rf * # Compile Documentation only. cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DWITH_MKLML=OFF -DWITH_DOC=ON +make -j `nproc` gen_proto_py make -j `nproc` paddle_docs paddle_docs_cn # check websites for broken links @@ -35,6 +29,7 @@ TARGET_BRANCH="gh-pages" SOURCE_BRANCH="master" # Clone the repo to output directory +mkdir output git clone $REPO output cd output diff --git a/proto/CMakeLists.txt b/proto/CMakeLists.txt index 18584cafe7971bad281b498908c54780250791b7..e1cea8bd0de5394020a498725485cea025512e48 100644 --- a/proto/CMakeLists.txt +++ b/proto/CMakeLists.txt @@ -17,7 +17,7 @@ foreach(filename ${proto_filenames}) COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} ARGS "--python_out=${PROJ_ROOT}/python/paddle/proto" "-I" ${CMAKE_CURRENT_SOURCE_DIR} ${ABS_FIL} - DEPENDS ${ABS_FIL} ${external_project_dependencies}) + DEPENDS ${ABS_FIL} protoc) endforeach() add_custom_target(gen_proto_py ALL DEPENDS ${PROTO_GEN_PY}) diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 9ea69fc5e57636c22fb20d5d97de760b9cc3bcde..b7b696ef0c13e1bae2e910e08d1a1ea3e45cd5d5 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2657,6 +2657,31 @@ class SubSequenceLayer(LayerBase): self.create_bias_parameter(bias, size) +@config_layer('sub_nested_seq') +class SubNestedSequenceLayer(LayerBase): + def __init__(self, name, inputs, selected_indices, bias=False, **xargs): + if isinstance(inputs, list): + assert len(inputs) == 1, ('the first input of sub_nested_seq ' + 'layer is a single nested sequence.') + inputs = inputs[0] + if isinstance(selected_indices, list): + assert len(selected_indices) == 1, ( + 'the second input of ' + 'sub_nested_seq layer is a single layer which is a ' + 'set of selected indices.') + selected_indices = selected_indices[0] + + super(SubNestedSequenceLayer, self).__init__( + name, + 'sub_nested_seq', + 0, + inputs=[inputs, selected_indices], + **xargs) + input_layer0 = self.get_input_layer(0) + size = input_layer0.size + self.set_layer_size(size) + + @config_layer('out_prod') class OuterProdLayer(LayerBase): def __init__(self, name, inputs, device=None): @@ -3223,6 +3248,16 @@ class CTCLayer(LayerBase): config_assert(len(self.inputs) == 2, 'CTCLayer must have 2 inputs') +@config_layer('kmax_seq_score') +class KmaxSeqScoreLayer(LayerBase): + def __init__(self, name, inputs, beam_size, **xargs): + super(KmaxSeqScoreLayer, self).__init__( + name, 'kmax_seq_score', 0, inputs=inputs, **xargs) + config_assert( + len(self.inputs) == 1, 'KmaxSeqScoreLayer has only one input.') + self.config.beam_size = beam_size + + @config_layer('warp_ctc') class WarpCTCLayer(LayerBase): def __init__(self, diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index ea5fdcc50f6abbc67fb61b7fd56c100d9f9811d0..1bc55c869601551aff5fc0311458f906385522d2 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -129,8 +129,10 @@ __all__ = [ 'prelu_layer', 'gated_unit_layer', 'crop_layer', + 'sub_nested_seq_layer', 'clip_layer', 'slice_projection', + 'kmax_sequence_score_layer', ] @@ -224,8 +226,11 @@ class LayerType(object): PRELU = 'prelu' CROP_LAYER = 'crop' + SUB_NESTED_SEQ = 'sub_nested_seq' CLIP_LAYER = 'clip' + KMAX_SEQ_SCORE = 'kmax_seq_score' + @staticmethod def is_layer_type(type_name): """ @@ -6088,6 +6093,53 @@ def crop_layer(input, offset, axis=2, shape=None, name=None, layer_attr=None): size=l.config.size) +@wrap_name_default() +@layer_support() +def sub_nested_seq_layer(input, selected_indices, name=None): + """ + The sub_nested_seq_layer accepts two inputs: the first one is a nested + sequence; the second one is a set of selceted indices in the nested sequence. + + Then sub_nest_seq_layer trims the first nested sequence input according + to the selected indices to form a new output. This layer is useful in + beam training. + + The example usage is: + + .. code-block:: python + + sub_nest_seq = sub_nested_seq_layer(input=[data, selected_indices]) + + + :param input: A nested sequence. + :type input: LayerOutput + :param selected_indices: a set of sequence indices in the nested sequence. + :type input: LayerOutput + :param name: name of this layer. + :type name: basestring + :return: LayerOutput object. + :rtype: LayerOutput + """ + + assert isinstance(input, LayerOutput), ( + 'The first input of ' + 'sub_nested_seq_layer must be a Paddle layer.') + assert isinstance(selected_indices, LayerOutput), ( + 'The second input of ' + 'sub_nested_seq_layer must be a Paddle layer.') + + l = Layer( + inputs=input.name, + selected_indices=selected_indices.name, + name=name, + type=LayerType.SUB_NESTED_SEQ) + return LayerOutput( + name=name, + layer_type=LayerType.SUB_NESTED_SEQ, + parents=input, + size=l.config.size) + + @wrap_name_default("clip") def clip_layer(input, min, max, name=None): """ @@ -6109,7 +6161,8 @@ def clip_layer(input, min, max, name=None): :type min: double :param max: The upper threshold for clipping. :type max: double - :return: LayerOutput + :return: LayerOutput object. + :rtype: LayerOutput """ Layer( name=name, @@ -6119,3 +6172,41 @@ def clip_layer(input, min, max, name=None): max=max) return LayerOutput( name, LayerType.CLIP_LAYER, parents=[input], size=input.size) + + +@wrap_name_default() +@layer_support() +def kmax_sequence_score_layer(input, name=None, beam_size=1): + """ + This layer accepts one input which are scores over a sequence or a nested + sequence, and returns indices of beam_size sequences with highest scores. + + .. code-block:: python + + kmax_indices = kmax_sequence_score_layer(input=input_layer, beam_size) + + + :param name: The Layer Name. + :type name: basestring + :param input: The input layer. It stores scores over a sequence or a nested + sequence and its size must be 1. + :type input: LayerOutput. + :param beam_size: squence indices with top beam_size scores are returned. + :type beam_size: double + :return: LayerOutput object. + :rtype: LayerOutput + """ + assert isinstance(input, LayerOutput), ("kmax_sequence_score_layer " + "accepts only one input.") + assert input.size == 1, ( + "input of kmax_sequence_score_layer is a score" + "over a sequence or a nested sequence, so its width must be 1.") + + Layer( + name=name, + type=LayerType.KMAX_SEQ_SCORE, + inputs=[input.name], + beam_size=beam_size) + + return LayerOutput( + name, LayerType.KMAX_SEQ_SCORE, parents=[input], size=input.size) diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index 0ffa58bc1e2088f75e7cd25c7ecdffbe270825a4..a61beb871ad064c617fa141451afcb2a5ac64854 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -7,6 +7,7 @@ test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer -test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer) +test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer +test_kmax_seq_socre_layer test_seq_select_layers) export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr new file mode 100644 index 0000000000000000000000000000000000000000..81bd71f68eb3f2c04ccd46ee3b77a07543395c60 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr @@ -0,0 +1,66 @@ +type: "nn" +layers { + name: "input" + type: "data" + size: 300 + active_type: "" +} +layers { + name: "data" + type: "data" + size: 128 + active_type: "" +} +layers { + name: "__fc_layer_0__" + type: "fc" + size: 1 + active_type: "exponential" + inputs { + input_layer_name: "data" + input_parameter_name: "___fc_layer_0__.w0" + } + bias_parameter_name: "___fc_layer_0__.wbias" +} +layers { + name: "__kmax_sequence_score_layer_0__" + type: "kmax_seq_score" + active_type: "" + inputs { + input_layer_name: "__fc_layer_0__" + } + beam_size: 5 +} +parameters { + name: "___fc_layer_0__.w0" + size: 128 + initial_mean: 0.0 + initial_std: 0.0883883476483 + dims: 128 + dims: 1 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___fc_layer_0__.wbias" + size: 1 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 1 + initial_strategy: 0 + initial_smart: false +} +input_layer_names: "data" +output_layer_names: "__kmax_sequence_score_layer_0__" +sub_models { + name: "root" + layer_names: "input" + layer_names: "data" + layer_names: "__fc_layer_0__" + layer_names: "__kmax_sequence_score_layer_0__" + input_layer_names: "data" + output_layer_names: "__kmax_sequence_score_layer_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_select_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_select_layers.protostr new file mode 100644 index 0000000000000000000000000000000000000000..4b906b113e3c0569d5576127e100d097e4923436 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_select_layers.protostr @@ -0,0 +1,37 @@ +type: "nn" +layers { + name: "input_seq" + type: "data" + size: 300 + active_type: "" +} +layers { + name: "input" + type: "data" + size: 5 + active_type: "" +} +layers { + name: "__sub_nested_seq_layer_0__" + type: "sub_nested_seq" + size: 300 + active_type: "" + inputs { + input_layer_name: "input_seq" + } + inputs { + input_layer_name: "input" + } +} +input_layer_names: "input_seq" +output_layer_names: "__sub_nested_seq_layer_0__" +sub_models { + name: "root" + layer_names: "input_seq" + layer_names: "input" + layer_names: "__sub_nested_seq_layer_0__" + input_layer_names: "input_seq" + output_layer_names: "__sub_nested_seq_layer_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..d245c5a41c793e1f02f306bfe64071bd9885906e --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python +#coding=utf-8 +from paddle.trainer_config_helpers import * + +data = data_layer(name='input', size=300) + +data = data_layer(name="data", size=128) +scores = fc_layer(input=data, size=1, act=ExpActivation()) +kmax_seq_id = kmax_sequence_score_layer(input=scores, beam_size=5) + +outputs(kmax_seq_id) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_seq_select_layers.py b/python/paddle/trainer_config_helpers/tests/configs/test_seq_select_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..6d1c3175ba9801d69f3f9cb9e754858253192270 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_seq_select_layers.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python +#coding=utf-8 +from paddle.trainer_config_helpers import * + +beam_size = 5 + +data = data_layer(name='input_seq', size=300) +selected_ids = data_layer(name='input', size=beam_size) +sub_nest_seq = sub_nested_seq_layer(input=data, selected_indices=selected_ids) + +outputs(sub_nest_seq) diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index 541639ac21661529b0b1f2cc8d8fa25605052c8c..10659caa882fd3d4060f9947413a392c3b681ee8 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -13,6 +13,7 @@ py_test(test_protobuf SRCS test_protobuf.py) py_test(test_add_two_op SRCS test_add_two_op.py) py_test(test_sigmoid_op SRCS test_sigmoid_op.py) py_test(test_softmax_op SRCS test_softmax_op.py) +py_test(test_cross_entropy_op SRCS test_cross_entropy_op.py) py_test(test_fill_zeros_like_op SRCS test_fill_zeros_like_op.py) py_test(gradient_checker SRCS gradient_checker.py) @@ -21,3 +22,4 @@ py_test(test_rowwise_add_op SRCS test_rowwise_add_op.py) py_test(test_default_scope_funcs SRCS test_default_scope_funcs.py) py_test(test_operator SRCS test_operator.py) +py_test(test_uniform_random_op SRCS test_uniform_random_op.py) diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py index cfd29932f5b46920815819c5a75d62a0138e21a2..b73c4869d14a62a951d8e45dafb14b7523355519 100644 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -1,16 +1,31 @@ +import unittest + +import numpy import paddle.v2.framework.core as core from paddle.v2.framework.op import Operator -import numpy -import unittest __all__ = ['get_numeric_gradient'] +def create_op(op_type): + kwargs = dict() + for in_name in Operator.get_op_input_names(op_type): + kwargs[in_name] = in_name + for out_name in Operator.get_op_output_names(op_type): + kwargs[out_name] = out_name + + return Operator(op_type, **kwargs) + + +def grad_var_name(var_name): + return var_name + "@GRAD" + + def get_numeric_gradient(op, input_values, output_name, input_to_check, - delta=1e-2, + delta=0.005, local_scope=None): """ Get Numeric Gradient for an operator's input. @@ -76,6 +91,113 @@ def get_numeric_gradient(op, return gradient_flat.reshape(tensor_to_check.get_dims()) +class GradientChecker(unittest.TestCase): + def __is_close(self, numeric_grads, scope, max_relative_error): + for name in numeric_grads: + op_grad = numpy.array( + scope.find_var(grad_var_name(name)).get_tensor()) + is_close = numpy.allclose( + numeric_grads[name], op_grad, rtol=max_relative_error, atol=100) + if not is_close: + return False + return True + + def check_grad(self, + forward_op, + input_vars, + inputs_to_check, + output_name, + no_grad_set=None, + only_cpu=False, + max_relative_error=0.005): + """ + :param forward_op: used to create backward_op + :param input_vars: numpy value of input variable. The following + computation will use these variables. + :param inputs_to_check: inputs var names that should check gradient. + :param output_name: output name that used to + :param max_relative_error: The relative tolerance parameter. + :param no_grad_set: used when create backward ops + :param only_cpu: only compute and check gradient on cpu kernel. + :return: + """ + if no_grad_set is None: + no_grad_set = set() + + tmp_outs = forward_op.temp_outputs() + no_tmp_out = filter(lambda name: name not in tmp_outs, + forward_op.outputs()) + if len(no_tmp_out) != 1: + raise ValueError("non temp out_names should be 1") + + in_names = forward_op.inputs() + for no_grad in no_grad_set: + if no_grad not in in_names: + raise ValueError("no_grad should be in in_names") + + backward_op = core.Operator.backward(forward_op, no_grad_set) + + places = [core.CPUPlace()] + if not only_cpu and core.is_compile_gpu() and backward_op.support_gpu(): + places.append(core.GPUPlace(0)) + + numeric_grad = dict() + # get numeric gradient + for check_name in inputs_to_check: + numeric_grad[check_name] = \ + get_numeric_gradient(forward_op, input_vars, output_name, check_name) + + # get operator gradient according to different device + for place in places: + scope = core.Scope() + ctx = core.DeviceContext.create(place) + + # create input var and set value + for name, value in input_vars.iteritems(): + if name not in in_names: + raise ValueError(name + " not in op.inputs_") + var = scope.new_var(name).get_tensor() + var.set_dims(value.shape) + var.set(value, place) + + # create output var + for out_name in forward_op.outputs(): + scope.new_var(out_name).get_tensor() + + # infer the shape of output var and compute/set value of output var + forward_op.infer_shape(scope) + forward_op.run(scope, ctx) + + # create output grad var + # set shape as the output var + # set value of this grad to ones + for name in forward_op.outputs(): + out_tensor = scope.find_var(name).get_tensor() + grad_tensor = scope.new_var(grad_var_name(name)).get_tensor() + grad_tensor.set_dims(out_tensor.shape()) + data = 1.0 * numpy.ones(out_tensor.shape()) + grad_tensor.set(data, place) + + # create input grad var + for name in backward_op.outputs(): + scope.new_var(name).get_tensor() + + # infer the shape of input gradient var and compute/set it's value + # with backward op + backward_op.infer_shape(scope) + backward_op.run(scope, ctx) + + if isinstance(place, core.CPUPlace): + msg = "CPU kernel gradient is not close to numeric gradient" + else: + if isinstance(place, core.GPUPlace): + msg = "GPU kernel gradient is not close to numeric gradient" + else: + raise ValueError("unknown place " + type(place)) + self.assertTrue( + self.__is_close(numeric_grad, scope, max_relative_error), msg) + + if __name__ == '__main__': class GetNumericGradientTest(unittest.TestCase): @@ -87,4 +209,28 @@ if __name__ == '__main__': arr = get_numeric_gradient(add_op, {'X': x, "Y": y}, 'Z', 'X') self.assertAlmostEqual(arr.mean(), 1.0, delta=1e-2) + def test_softmax_op(self): + def stable_softmax(x): + """Compute the softmax of vector x in a numerically stable way.""" + shiftx = x - numpy.max(x) + exps = numpy.exp(shiftx) + return exps / numpy.sum(exps) + + def label_softmax_grad(Y, dY): + dX = Y * 0.0 + for i in range(Y.shape[0]): + d = numpy.dot(Y[i, :], dY[i, :]) + dX[i, :] = Y[i, :] * (dY[i, :] - d) + return dX + + softmax_op = Operator("softmax", X="X", Y="Y") + + X = numpy.random.random((2, 2)).astype("float32") + Y = numpy.apply_along_axis(stable_softmax, 1, X) + dY = numpy.ones(Y.shape) + dX = label_softmax_grad(Y, dY) + + arr = get_numeric_gradient(softmax_op, {"X": X}, 'Y', 'X') + numpy.testing.assert_almost_equal(arr, dX, decimal=1e-2) + unittest.main() diff --git a/python/paddle/v2/framework/tests/op_test_util.py b/python/paddle/v2/framework/tests/op_test_util.py index da6bed0fcd690d5a7f53f44d0181c75f12e5d074..dd65e0f2dc23d3f657ff16c55fb297dae210b2d7 100644 --- a/python/paddle/v2/framework/tests/op_test_util.py +++ b/python/paddle/v2/framework/tests/op_test_util.py @@ -1,6 +1,5 @@ -import paddle.v2.framework.core as core -import unittest import numpy +import paddle.v2.framework.core as core from paddle.v2.framework.op import Operator @@ -24,7 +23,7 @@ class OpTestMeta(type): scope = core.Scope() kwargs = dict() places = [core.CPUPlace()] - if core.is_compile_gpu() and core.Operator.support_gpu(self.type): + if core.is_compile_gpu(): places.append(core.GPUPlace(0)) for place in places: @@ -53,6 +52,8 @@ class OpTestMeta(type): kwargs[attr_name] = self.attrs[attr_name] op = Operator(self.type, **kwargs) + if isinstance(place, core.GPUPlace) and not op.support_gpu(): + return op.infer_shape(scope) diff --git a/python/paddle/v2/framework/tests/test_cross_entropy_op.py b/python/paddle/v2/framework/tests/test_cross_entropy_op.py index b26e25d58b59bd1cb16e9ba2a1cccd27799b15f2..4815192e255c6e0429db3f50918a76a773b30131 100644 --- a/python/paddle/v2/framework/tests/test_cross_entropy_op.py +++ b/python/paddle/v2/framework/tests/test_cross_entropy_op.py @@ -1,9 +1,10 @@ import unittest import numpy from op_test_util import OpTestMeta +from gradient_checker import GradientChecker, create_op -class TestSGD(unittest.TestCase): +class TestCrossEntropy(unittest.TestCase): __metaclass__ = OpTestMeta def setUp(self): @@ -20,7 +21,18 @@ class TestSGD(unittest.TestCase): self.outputs = {'Y': numpy.array(Y).astype("float32")} -# TODO(superjom) add gradient check +class CrossEntropyGradOpTest(GradientChecker): + def test_softmax_grad(self): + op = create_op("onehot_cross_entropy") + batch_size = 100 + class_num = 10 + inputs = { + "X": numpy.random.uniform( + 0.1, 1.0, [batch_size, class_num]).astype("float32"), + "label": (class_num / 2) * numpy.ones(batch_size).astype("int32") + } + self.check_grad(op, inputs, set("X"), "Y") + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/v2/framework/tests/test_softmax_op.py b/python/paddle/v2/framework/tests/test_softmax_op.py index d20e085b8e43488480edf07b6cd4edcd861883f3..e670d93653e07d35e5019c9daac45c214eddf367 100644 --- a/python/paddle/v2/framework/tests/test_softmax_op.py +++ b/python/paddle/v2/framework/tests/test_softmax_op.py @@ -1,9 +1,8 @@ import unittest import numpy as np -import paddle.v2.framework.core as core -from paddle.v2.framework.op import Operator +from gradient_checker import GradientChecker, create_op from op_test_util import OpTestMeta @@ -25,62 +24,11 @@ class TestSoftmaxOp(unittest.TestCase): } -class TestSoftmaxGradOp(unittest.TestCase): - def test_softmax_grad(self): - op = Operator('softmax', X="X", Y="Y") - backward_op = core.Operator.backward(op, set()) - self.assertEqual(backward_op.type(), "softmax_grad") - expected = '''Op(softmax_grad), inputs:(X, Y, Y@GRAD), outputs:(X@GRAD).''' - self.assertEqual(expected, str(backward_op)) - - batch_size = 3 - class_num = 5 - # Initialize X and add 1e-2 for numerical stability - Y = np.random.rand(batch_size, class_num).astype(np.float32) - Y = Y + 1e-2 - dY = np.random.rand(batch_size, class_num).astype(np.float32) - - # Reference implementation of cross entropy with soft labels - def label_softmax_grad(Y, dY): - dX = Y * 0.0 - for i in range(batch_size): - d = np.dot(Y[i, :], dY[i, :]) - dX[i, :] = Y[i, :] * (dY[i, :] - d) - return dX - - expected = label_softmax_grad(Y, dY) - - scope = core.Scope() - places = [] - places.append(core.CPUPlace()) - if core.is_compile_gpu(): - places.append(core.GPUPlace(0)) - - for place in places: - y = scope.new_var("Y") - y_tensor = y.get_tensor() - y_tensor.set_dims([batch_size, class_num]) - y_tensor.alloc_float(place) - y_tensor.set(Y, place) - - dy = scope.new_var("Y@GRAD") - dy_tensor = dy.get_tensor() - dy_tensor.set_dims([batch_size, class_num]) - dy_tensor.alloc_float(place) - dy_tensor.set(dY, place) - - x = scope.new_var("X") - dx = scope.new_var("X@GRAD") - - tensor = scope.find_var("X@GRAD").get_tensor() - backward_op.infer_shape(scope) - self.assertEqual([batch_size, class_num], tensor.shape()) - - ctx = core.DeviceContext.create(place) - backward_op.run(scope, ctx) - actual = np.array(tensor) - - np.testing.assert_almost_equal(actual, expected, decimal=3) +class SoftmaxGradOpTest(GradientChecker): + def test_softmax(self): + op = create_op("softmax") + inputs = {"X": np.random.uniform(0.1, 1, [10, 10]).astype("float32")} + self.check_grad(op, inputs, set("X"), "Y") if __name__ == '__main__': diff --git a/python/paddle/v2/framework/tests/test_uniform_random_op.py b/python/paddle/v2/framework/tests/test_uniform_random_op.py new file mode 100644 index 0000000000000000000000000000000000000000..c3d2bb44da3977c0899b2609a8efe15b7e1789f2 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_uniform_random_op.py @@ -0,0 +1,35 @@ +import unittest +from paddle.v2.framework.op import Operator +import paddle.v2.framework.core as core +import numpy + + +class UniformRandomTest(unittest.TestCase): + def test_uniform_random_cpu(self): + self.uniform_random_test(place=core.CPUPlace()) + + def test_uniform_random_gpu(self): + if core.is_compile_gpu(): + self.uniform_random_test(place=core.GPUPlace(0)) + + def uniform_random_test(self, place): + scope = core.Scope() + scope.new_var("X").get_tensor() + + op = Operator( + "uniform_random", + Out="X", + dims=[1000, 784], + min=-5.0, + max=10.0, + seed=10) + + op.infer_shape(scope) + ctx = core.DeviceContext.create(place) + op.run(scope, ctx) + tensor = numpy.array(scope.find_var("X").get_tensor()) + self.assertAlmostEqual(tensor.mean(), 2.5, delta=0.1) + + +if __name__ == '__main__': + unittest.main()