diff --git a/cmake/external/mkldnn.cmake b/cmake/external/mkldnn.cmake index 5d24caebdcc5a28823164d718fb1628be5c4179d..b67c559fdf7a2b1695589a041cfa8fb8a9580516 100644 --- a/cmake/external/mkldnn.cmake +++ b/cmake/external/mkldnn.cmake @@ -63,9 +63,17 @@ ExternalProject_Add( -DMKLROOT:PATH=${MKLML_ROOT} ) -ADD_LIBRARY(mkldnn SHARED IMPORTED GLOBAL) -SET_PROPERTY(TARGET mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIB}) -ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT}) +ADD_LIBRARY(shared_mkldnn SHARED IMPORTED GLOBAL) +SET_PROPERTY(TARGET shared_mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIB}) +ADD_DEPENDENCIES(shared_mkldnn ${MKLDNN_PROJECT}) MESSAGE(STATUS "MKLDNN library: ${MKLDNN_LIB}") add_definitions(-DPADDLE_WITH_MKLDNN) -LIST(APPEND external_project_dependencies mkldnn) +LIST(APPEND external_project_dependencies shared_mkldnn) + +# generate a static dummy target to track mkldnn dependencies +# for cc_library(xxx SRCS xxx.c DEPS mkldnn) +SET(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/mkldnn_dummy.c) +FILE(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";") +ADD_LIBRARY(mkldnn STATIC ${dummyfile}) +TARGET_LINK_LIBRARIES(mkldnn ${MKLDNN_LIB} ${MKLML_LIB} ${MKLML_IOMP_LIB}) +ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT}) diff --git a/doc/design/ci_build_whl.png b/doc/design/ci_build_whl.png new file mode 100644 index 0000000000000000000000000000000000000000..232762b82a9ae3e979a1f38a7beb715c87438f40 Binary files /dev/null and b/doc/design/ci_build_whl.png differ diff --git a/doc/design/releasing_process.md b/doc/design/releasing_process.md index 14c081ea84282e52a2e36475c3c0ea755122d154..b9787261092f1f27377886152cb1596d9ff54188 100644 --- a/doc/design/releasing_process.md +++ b/doc/design/releasing_process.md @@ -7,11 +7,9 @@ PaddlePaddle每次发新的版本,遵循以下流程: 1. 从`develop`分支派生出新的分支,分支名为`release/版本号`。例如,`release/0.10.0` 1. 将新分支的版本打上tag,tag为`版本号rc.Patch号`。第一个tag为`0.10.0rc1`,第二个为`0.10.0rc2`,依次类推。 1. 对这个版本的提交,做如下几个操作: + * 使用Regression Test List作为检查列表,测试本次release的正确性。 + * 如果失败,记录下所有失败的例子,在这个`release/版本号`分支中,修复所有bug后,Patch号加一,到第二步 * 修改`python/setup.py.in`中的版本信息,并将`istaged`字段设为`True`。 - * 编译这个版本的Docker发行镜像,发布到dockerhub。如果失败,修复Docker编译镜像问题,Patch号加一,返回第二步 - * 编译这个版本的Ubuntu Deb包。如果失败,修复Ubuntu Deb包编译问题,Patch号加一,返回第二步。 - * 使用Regression Test List作为检查列表,测试Docker镜像/ubuntu安装包的功能正确性 - * 如果失败,记录下所有失败的例子,在这个`release/版本号`分支中,修复所有bug后,Patch号加一,返回第二步 * 编译这个版本的python wheel包,并发布到pypi。 * 由于pypi.python.org目前遵循[严格的命名规范PEP 513](https://www.python.org/dev/peps/pep-0513),在使用twine上传之前,需要重命名wheel包中platform相关的后缀,比如将`linux_x86_64`修改成`manylinux1_x86_64`。 * pypi上的package名称为paddlepaddle和paddlepaddle_gpu,如果要上传GPU版本的包,需要修改build/python/setup.py中,name: "paddlepaddle_gpu"并重新打包wheel包:`python setup.py bdist_wheel`。 @@ -21,8 +19,8 @@ PaddlePaddle每次发新的版本,遵循以下流程: pip install twine twine upload dist/[package to upload] ``` + * 编译这个版本的Docker发行镜像,发布到dockerhub。如果失败,修复Docker编译镜像问题,Patch号加一,返回第二步 1. 第三步完成后,将`release/版本号`分支合入master分支,并删除`release/版本号`分支。将master分支的合入commit打上tag,tag为`版本号`。同时再将`master`分支合入`develop`分支。最后删除`release/版本号`分支。 -1. 编译master分支的Docker发行镜像,发布到dockerhub。编译ubuntu的deb包,发布到github release页面 1. 协同完成Release Note的书写 @@ -31,6 +29,30 @@ PaddlePaddle每次发新的版本,遵循以下流程: * `release/版本号`分支一旦建立,一般不允许再从`develop`分支合入`release/版本号`。这样保证`release/版本号`分支功能的封闭,方便测试人员测试PaddlePaddle的行为。 * 在`release/版本号`分支存在的时候,如果有bugfix的行为,需要将bugfix的分支同时merge到`master`, `develop`和`release/版本号`这三个分支。 +## 发布wheel包到pypi + +使用[PaddlePaddle CI](https://paddleci.ngrok.io/project.html?projectId=Manylinux1&tab=projectOverview) +完成自动化二进制编译,参考下图,选择需要发布的版本(通常包含一个CPU版本和一个GPU版本),点击"run"右侧的"..."按钮,可以 +弹出下面的选择框,在第二个tab (Changes)里选择需要发布的分支,这里选择0.11.0,然后点击"Run Build"按钮。等待编译完成后 +可以在此页面的"Artifacts"下拉框中找到生成的3个二进制文件,分别对应CAPI,`cp27m`和`cp27mu`的版本。然后按照上述的方法 +使用`twine`工具上传即可。 + + + +* 注:CI环境使用 https://github.com/PaddlePaddle/buildtools 这里的DockerImage作为编译环境以支持更多的Linux + 发型版,如果需要手动编译,也可以使用这些镜像。这些镜像也可以从 https://hub.docker.com/r/paddlepaddle/paddle_manylinux_devel/tags/ 下载得到。 +* pypi不支持覆盖上传,所以一个版本号的wheel包发布之后,不可以更改。下一个wheel包需要更新版本号才可以上传。 + +## 发布Docker镜像 + +上述PaddlePaddle CI编译wheel完成后会自动将Docker镜像push到DockerHub,所以,发布Docker镜像只需要对自动push的镜像打上 +版本号对应的tag即可: + +1. 进入 https://hub.docker.com/r/paddlepaddle/paddle/tags/ 查看latest tag的更新时间是否在上述编译wheel包完成后是否最新。 +1. 执行 `docker pull paddlepaddle/paddle:[latest tag]`,latest tag可以是latest或latest-gpu等。 +1. 执行 `docker tag paddlepaddle/paddle:[latest tag] paddlepaddle/paddle:[version]` +1. 执行 `docker push paddlepaddle/paddle:[version]` + ## PaddlePaddle 分支规范 PaddlePaddle开发过程使用[git-flow](http://nvie.com/posts/a-successful-git-branching-model/)分支规范,并适应github的特性做了一些区别。 diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 528e45b51099d97a1f6f0dfc971b6231f928af94..3967a40136d6493d533ef9aadd2054cc23592879 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -41,7 +41,7 @@ device_context) cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute) cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker) cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto) -cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute) +cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute device_context) cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog shape_inference data_transform) cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry init) diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc index bf1f0471ccbfccf13cb6f74c8088da7acd68ec0b..844d98916ea5b1ffd88615825d79af37ba7d128e 100644 --- a/paddle/framework/executor.cc +++ b/paddle/framework/executor.cc @@ -111,7 +111,7 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, for (auto& op_desc : block.AllOps()) { auto op = paddle::framework::OpRegistry::CreateOp(*op_desc); - VLOG(3) << op->DebugString(); + VLOG(3) << op->DebugStringEx(local_scope); op->Run(*local_scope, place_); if (FLAGS_check_nan_inf) { for (auto& vname : op->OutputVars(true)) { diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index b9dcf16da5d5e1d10176b6c9ae6ea1be080064ae..4ef0c2523ca7d4548b9f509aa943449ca88dead1 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -73,6 +73,17 @@ void UseALL() { UseCUDNN(); } +static DDim GetDims(const Scope& scope, const std::string& name) { + Variable* var = scope.FindVar(name); + if (var->IsType()) { + return var->Get().dims(); + } else if (var->IsType()) { + return var->Get().GetCompleteDims(); + } else { + return DDim({-1}); + } +} + std::string OperatorBase::Input(const std::string& name) const { auto& ins = Inputs(name); PADDLE_ENFORCE_LE(ins.size(), 1UL, @@ -105,7 +116,7 @@ const std::vector& OperatorBase::Outputs( return it->second; } -std::string OperatorBase::DebugString() const { +std::string OperatorBase::DebugStringEx(const Scope* scope) const { std::stringstream ss; ss << "Op(" << type_ << "), inputs:{"; for (auto it = inputs_.begin(); it != inputs_.end();) { @@ -113,6 +124,9 @@ std::string OperatorBase::DebugString() const { ss << input.first << "["; for (size_t i = 0; i < input.second.size(); ++i) { ss << input.second[i]; + if (scope) { + ss << "(" << GetDims(*scope, input.second[i]) << ")"; + } if (i != input.second.size() - 1) { ss << ", "; } @@ -129,6 +143,9 @@ std::string OperatorBase::DebugString() const { ss << output.first << "["; for (size_t i = 0; i < output.second.size(); ++i) { ss << output.second[i]; + if (scope) { + ss << "(" << GetDims(*scope, output.second[i]) << ")"; + } if (i != output.second.size() - 1) { ss << ", "; } diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index 1f5a4af58c5a9ad2fa8f4ac08ece67084b8f741a..800397c077b52d02de115b40e3fe719f3f861389 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -108,7 +108,10 @@ class OperatorBase { return boost::get(attrs_.at(name)); } - virtual std::string DebugString() const; + /// if scope is not null, also show dimensions of arguments + virtual std::string DebugStringEx(const Scope* scope) const; + + std::string DebugString() const { return DebugStringEx(nullptr); } /// Net will call this function to Run an op. virtual void Run(const Scope& scope, const platform::Place& place) const = 0; diff --git a/paddle/inference/inference.cc b/paddle/inference/inference.cc index 48a51efcd25afeb499ab9389c2f27c8bd2515cd8..49e39358e81bbee64a618be88ee0fca6aa438b93 100644 --- a/paddle/inference/inference.cc +++ b/paddle/inference/inference.cc @@ -38,23 +38,16 @@ void InferenceEngine::LoadInferenceModel( LOG(INFO) << "program_desc_str's size: " << program_desc_str.size(); // PicklingTools cannot parse the vector of strings correctly. #else - // program_desc_str - // the inference.model is stored by following python codes: - // inference_program = fluid.io.get_inference_program(predict) - // model_filename = "recognize_digits_mlp.inference.model/inference.model" - // with open(model_filename, "w") as f: - // program_str = inference_program.desc.serialize_to_string() - // f.write(struct.pack('q', len(program_str))) - // f.write(program_str) - std::string model_filename = dirname + "/inference.model"; + std::string model_filename = dirname + "/__model__.dat"; LOG(INFO) << "loading model from " << model_filename; - std::ifstream fs(model_filename, std::ios_base::binary); - int64_t size = 0; - fs.read(reinterpret_cast(&size), sizeof(int64_t)); - LOG(INFO) << "program_desc_str's size: " << size; + std::ifstream inputfs(model_filename, std::ios::in | std::ios::binary); std::string program_desc_str; - program_desc_str.resize(size); - fs.read(&program_desc_str[0], size); + inputfs.seekg(0, std::ios::end); + program_desc_str.resize(inputfs.tellg()); + inputfs.seekg(0, std::ios::beg); + LOG(INFO) << "program_desc_str's size: " << program_desc_str.size(); + inputfs.read(&program_desc_str[0], program_desc_str.size()); + inputfs.close(); #endif program_ = new framework::ProgramDesc(program_desc_str); GenerateLoadProgram(dirname); diff --git a/paddle/operators/net_op.cc b/paddle/operators/net_op.cc index 78b5e2767842312722fac3509e843a05fe194559..03302f5cbf5674dca1d22a84137579090b4d5eac 100644 --- a/paddle/operators/net_op.cc +++ b/paddle/operators/net_op.cc @@ -56,11 +56,11 @@ void NetOp::CompleteAddOp(bool calc) { std::copy(output_set.begin(), output_set.end(), std::back_inserter(outputs)); } -std::string NetOp::DebugString() const { +std::string NetOp::DebugStringEx(const framework::Scope* scope) const { std::ostringstream os; - os << OperatorBase::DebugString() << std::endl; + os << OperatorBase::DebugStringEx(scope) << std::endl; for (auto& op : ops_) { - std::istringstream is(op->DebugString()); + std::istringstream is(op->DebugStringEx(scope)); for (std::string line; std::getline(is, line);) { os << " " << line << std::endl; } diff --git a/paddle/operators/net_op.h b/paddle/operators/net_op.h index 85d0153b32c0ba53bfe0912fc2682c8b635ba172..b24042f5ef5822eabcada8ed9d21c552579e8064 100644 --- a/paddle/operators/net_op.h +++ b/paddle/operators/net_op.h @@ -106,7 +106,8 @@ class NetOp : public framework::OperatorBase { void CompleteAddOp(bool calculate = true); - std::string DebugString() const override; + std::string DebugStringEx( + const framework::Scope* scope = nullptr) const override; bool IsNetOp() const override; std::vector OutputVars(bool has_intermediate) const override; diff --git a/paddle/operators/tensor.save b/paddle/operators/tensor.save deleted file mode 100644 index c24308a7d0131b84c28c0a9857cce4949afb2091..0000000000000000000000000000000000000000 Binary files a/paddle/operators/tensor.save and /dev/null differ diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index 8c4803b9739bb54cae89de62468a47631a5dde94..44f6d85cd1510f309595ca711de2e0f767219580 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -21,10 +21,16 @@ ELSE() set(GPU_CTX_DEPS) ENDIF() +IF(WITH_MKLDNN) + set(MKLDNN_CTX_DEPS mkldnn) +ELSE() + set(MKLDNN_CTX_DEPS) +ENDIF() + # memcpy deoends on device_context, here add deps individually for # avoiding cycle dependencies cc_library(device_context SRCS device_context.cc DEPS memory buddy_allocator - system_allocator memory_block meta_data meta_cache place eigen3 ${GPU_CTX_DEPS}) + system_allocator memory_block meta_data meta_cache place eigen3 ${GPU_CTX_DEPS} ${MKLDNN_CTX_DEPS}) nv_test(device_context_test SRCS device_context_test.cu DEPS device_context gpu_info) nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda) diff --git a/paddle/platform/device_context.cc b/paddle/platform/device_context.cc index 4bf643e048dcf97f7888dcc78eb1b4fa4feee046..9d9348079a0179418ab1a1474cfd8b69136f26b2 100644 --- a/paddle/platform/device_context.cc +++ b/paddle/platform/device_context.cc @@ -168,5 +168,69 @@ cudaStream_t CUDADeviceContext::stream() const { return stream_; } #endif +#ifdef PADDLE_WITH_MKLDNN +MKLDNNDeviceContext::MKLDNNDeviceContext(CPUPlace place) + : CPUDeviceContext(place), ready_(false) { + stream_.reset(new mkldnn::stream(mkldnn::stream::kind::eager)); + engine_.reset(new mkldnn::engine(mkldnn::engine::cpu, 0)); +} + +template +void MKLDNNDeviceContext::AddElement(const std::string& op_key, + const T& value) { + if (GetElement(op_key)) { + return; + } + GetElementPool().emplace(op_key, std::move(value)); +} + +template +const T& MKLDNNDeviceContext::GetElement(const std::string& op_key) const { + auto it = GetElementPool().find(op_key); + return it == GetElementPool().end() ? nullptr : it->second; +} + +template <> +const std::unordered_map>& +MKLDNNDeviceContext::GetElementPool() const { + return memory_pool_; +} + +template <> +const std::unordered_map>& +MKLDNNDeviceContext::GetElementPool() const { + return primitive_pool_; +} + +template <> +const std::unordered_map>& +MKLDNNDeviceContext::GetElementPool() const { + return primitive_desc_pool_; +} + +void MKLDNNDeviceContext::Execute(bool block) { + if (pipeline_.empty()) { + return; + } + ResetStream(); + stream_->submit(pipeline_).wait(block); + ready_ = false; + pipeline_.clear(); +} + +void MKLDNNDeviceContext::ResetStream() { + if (ready_) { + return; + } + // TODO(TJ): change me when mkldnn have specific method to reset this state + stream_.reset(new mkldnn::stream(mkldnn::stream::kind::eager)); + ready_ = true; +} + +#endif + } // namespace platform } // namespace paddle diff --git a/paddle/platform/device_context.h b/paddle/platform/device_context.h index 609ea4bd3ad50e2eefa908539339903ed1f0a807..7a0040c9c229af79ea8be1049dfd6c0d1b4d19cf 100644 --- a/paddle/platform/device_context.h +++ b/paddle/platform/device_context.h @@ -21,6 +21,10 @@ limitations under the License. */ #define EIGEN_USE_GPU #endif +#ifdef PADDLE_WITH_MKLDNN +#include "paddle/platform/mkldnn_helper.h" +#endif + #include "paddle/platform/enforce.h" #include "paddle/platform/place.h" #include "unsupported/Eigen/CXX11/Tensor" @@ -105,6 +109,54 @@ struct DefaultDeviceContextType { #endif +#ifdef PADDLE_WITH_MKLDNN +class MKLDNNDeviceContext : public CPUDeviceContext { + public: + explicit MKLDNNDeviceContext(CPUPlace place); + + /* \brief Add new element: memory, primitive or primitive desc */ + template + void AddElement(const std::string& op_key, const T& value); + + /* \brief Get existed element: memory, primitive or primitive desc */ + template + const T& GetElement(const std::string& op_key) const; + + /* \brief Get element pool: memory, primitive or primitive desc pool */ + template + const std::unordered_map>& + GetElementPool() const; + + /* \brief Get the active engine */ + const MKLDNNEngine& engine() const { return *engine_; } + + /* \brief Submit primitive to pipeline */ + void Submit(const MKLDNNPrimitivePtr& p) { pipeline_.push_back(*p); } + + /*! \brief Execute all submitted primitives in pipeline */ + void Execute(bool block = true); + + protected: + /*! \brief Reset the stream to prepare next exectue */ + void ResetStream(); + + private: + std::unordered_map> + memory_pool_; + std::unordered_map> + primitive_pool_; + std::unordered_map> + primitive_desc_pool_; + std::vector pipeline_; + MKLDNNStreamPtr stream_; + MKLDNNEnginePtr engine_; + bool ready_; +}; +#endif + /*! \brief device context pool singleton */ class DeviceContextPool { public: diff --git a/paddle/platform/mkldnn_helper.h b/paddle/platform/mkldnn_helper.h new file mode 100644 index 0000000000000000000000000000000000000000..cd52a8b4c434071a030a8e7a8a70fc3adba8460c --- /dev/null +++ b/paddle/platform/mkldnn_helper.h @@ -0,0 +1,35 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include + +namespace paddle { +namespace platform { + +using MKLDNNStream = mkldnn::stream; +using MKLDNNEngine = mkldnn::engine; +using MKLDNNMemory = mkldnn::memory; +using MKLDNNPrimitive = mkldnn::primitive; +using MKLDNNPrimitiveDesc = mkldnn::handle; + +typedef std::unique_ptr MKLDNNStreamPtr; +typedef std::unique_ptr MKLDNNEnginePtr; +typedef std::unique_ptr MKLDNNMemoryPtr; +typedef std::unique_ptr MKLDNNPrimitivePtr; +typedef std::unique_ptr MKLDNNPrimitiveDescPtr; + +} // namespace platform +} // namespace paddle diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index 92039ec6b05d224e702f0ba5dc05c057a492287e..f1e244772f3f9e636dc749a21dc965743aa642da 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -193,6 +193,25 @@ EOF EOF } +function gen_capi_package() { + if [[ ${WITH_C_API} == "ON" ]]; then + install_prefix="/paddle/build/capi_output" + rm -rf $install_prefix + + make DESTDIR="$install_prefix" install + + if [[ ${WITH_MKL:-OFF} == "ON" ]]; then + find ./third_party/install -name 'libmklml_gnu.so' -exec cp {} $install_prefix/usr/local/lib \; + find ./third_party/install -name 'libmklml_intel.so' -exec cp {} $install_prefix/usr/local/lib \; + cp -P ./third_party/install/mkldnn/lib/* $install_prefix/usr/local/lib/ + fi + + find ./third_party/install -name 'libiomp5.so' -exec cp {} $install_prefix/usr/local/lib \; + cd $install_prefix/usr/local + ls | egrep -v "^Found.*item$" | xargs tar /paddle/build/paddle.tgz + fi +} + set -xe cmake_gen ${PYTHON_ABI:-""} @@ -200,6 +219,11 @@ run_build run_test gen_docs gen_dockerfile - -printf "If you need to install PaddlePaddle in develop docker image," -printf "please make install or pip install build/python/dist/*.whl.\n" +gen_capi_package + +if [[ ${WITH_C_API:-OFF} == "ON" ]]; then + printf "PaddlePaddle C-API libraries was generated on build/paddle.tgz\n" +else + printf "If you need to install PaddlePaddle in develop docker image," + printf "please make install or pip install build/python/dist/*.whl.\n" +fi diff --git a/python/paddle/trainer_config_helpers/attrs.py b/python/paddle/trainer_config_helpers/attrs.py index ecba87191045cff6c05014010e60575741238f8d..e6f87ce61b1d16d4f98f111626776aa52c2ec35b 100644 --- a/python/paddle/trainer_config_helpers/attrs.py +++ b/python/paddle/trainer_config_helpers/attrs.py @@ -58,12 +58,12 @@ def is_compatible_with(x, Type): class HookAttribute(object): """ - Hook Attribute object. As a member of ParameterAttribute class, the hook is an auxiliary operation that occurs + Hook Attribute object. As a member of ParameterAttribute class, the hook is an auxiliary operation that occurs during training process of a layer with parameters, such as img_conv layer, fc layer. - :param type: Hook type, currently supported types: + :param type: Hook type, currently supported types: 'pruning' : user specify a sparsity_ratio before training started, and the - network will prune the parameters based on the sparsity_ratio. + network will prune the parameters based on the sparsity_ratio. eg: The definition of Hook object can be hk = HookAttribute('pruning', 0.6) The specific usage can be paddle.layer.img_conv(input=img, filter_size=3, num_channels=3, num_filters=64, @@ -71,10 +71,10 @@ class HookAttribute(object): The pruning details can be found https://arxiv.org/pdf/1506.02626.pdf :type type: string - :param sparsity_ratio: Must be specified if hook type is 'pruning', + :param sparsity_ratio: Must be specified if hook type is 'pruning', it represents the ratio of the zero elements to be set by the Parameter. :type sparsity_ratio: float or None - + """ def __init__(self, type, sparsity_ratio=None): @@ -130,10 +130,12 @@ class ParameterAttribute(object): :param sparse_update: Enable sparse update for this parameter. It will enable both local and remote sparse update. :type sparse_update: bool + :param update_hooks: A HookAttribute object. + :type update_hooks: HookAttribute :param initializer: If not None, it should be a callable object which accepts a parameter name and returns numpy array for the initial value of the parameter - :param initializer: callable object + :type initializer: callable object """ def __init__(self, diff --git a/python/paddle/v2/fluid/backward.py b/python/paddle/v2/fluid/backward.py index 88fe19da5e2c2df7f7eed7b26261ec155f0013f7..66a7f737574c438a2b945bd4a49d8317bd460c80 100644 --- a/python/paddle/v2/fluid/backward.py +++ b/python/paddle/v2/fluid/backward.py @@ -7,7 +7,7 @@ __all__ = ['append_backward'] def _rename_arg_(op_descs, old_name, new_name, begin_idx=None, end_idx=None): """ - Traverse all ops in op_descs[begin_idx : end_idx], + Traverse all ops in op_descs[begin_idx : end_idx], if any op has inputs/outputs named "old_name", rename it as 'new_name' """ if begin_idx is None: @@ -162,7 +162,7 @@ def _remove_no_grad_branch_(op_descs, no_grad_set): if core.grad_var_suffix() in arg and arg in no_grad_set: to_insert.append((_create_op_desc_("fill_zeros_like", { "X": [_strip_grad_suffix_(arg)] - }, {"Y": [arg]}, {}), idx)) + }, {"Out": [arg]}, {}), idx)) map(lambda p: op_descs.insert(p[1], p[0]), reversed(to_insert)) @@ -182,7 +182,7 @@ def _append_backward_ops_(target, target(Variable): the target variable of forward pass block(Block): the block where forward ops are target_block(Block): the block which is going to hold new generated grad ops - no_grad_dict(dict): + no_grad_dict(dict): key(int) block index val(set) a set of varibale names. These varibales have no gradient grad_to_var(dict)(output argument): @@ -276,8 +276,8 @@ def append_backward(loss, parameter_list=None, no_grad_set=None): loss(Variable): The variable generated by cost function. parameter_list(list): Parameters that need to be updated by optimizer. If None, it means all parameters need to be updated. - no_grad_set(set): Variables that have no gradients in Block 0. - If None, the set will be generated inside the function and + no_grad_set(set): Variables that have no gradients in Block 0. + If None, the set will be generated inside the function and contains all variables with `step_gradient=True` from all blocks. Return: diff --git a/python/paddle/v2/fluid/io.py b/python/paddle/v2/fluid/io.py index 926327b70c70d250766c2640d808bb3e3516d37b..c63567601accd8c072368351f2838857bb61c818 100644 --- a/python/paddle/v2/fluid/io.py +++ b/python/paddle/v2/fluid/io.py @@ -212,6 +212,11 @@ def save_inference_model(dirname, "fetch_var_names": fetch_var_names }, f, -1) + # Save only programDesc of inference_program in binary format + # in another file: __model__.dat + with open(model_file_name + ".dat", "wb") as fp: + fp.write(inference_program.desc.serialize_to_string()) + save_params(executor, dirname, main_program) diff --git a/python/paddle/v2/fluid/layers/control_flow.py b/python/paddle/v2/fluid/layers/control_flow.py index 787a3153f401dc2bf9a2350fdde24a7c5aa94e89..948a67524490ec7dcf84ef9af48be54f0fc0f908 100644 --- a/python/paddle/v2/fluid/layers/control_flow.py +++ b/python/paddle/v2/fluid/layers/control_flow.py @@ -998,7 +998,7 @@ class ConditionalBlock(object): out_list = [ parent_block.var(var_name) for var_name in parent_block.vars - if var_name not in intermediate + if var_name in intermediate ] step_scope = parent_block.create_var( diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py index 6883630ac6a28176258272996ed7d0e73652a40b..7feb479d2e3bc396eac53045888175ba54864b66 100644 --- a/python/paddle/v2/fluid/layers/nn.py +++ b/python/paddle/v2/fluid/layers/nn.py @@ -64,14 +64,14 @@ def fc(input, is flattened: the first `num_flatten_dims` dimensions will be flatten to form the first dimension of the final matrix (height of the - matrix), and the rest `rank(X) - num_col_dims` + matrix), and the rest `rank(X) - num_flatten_dims` dimensions are flattened to form the second dimension of the final matrix (width of the matrix). For example, suppose `X` is a 6-dimensional tensor with a shape [2, 3, 4, 5, 6], and - `x_num_col_dims` = 3. Then, the flattened matrix + `num_flatten_dims` = 3. Then, the flattened matrix will have a shape [2 x 3 x 4, 5 x 6] = [24, 30]. - By default, `x_num_col_dims` is set to 1. + By default, `num_flatten_dims` is set to 1. param_attr(ParamAttr|list): The parameter attribute for learnable parameters/weights of the fully connected layer. @@ -243,18 +243,21 @@ def gru_unit(input, r_t & = actGate(xr_{t} + W_r h_{t-1} + b_r) - ch_t & = actNode(xc_t + W_c dot(r_t, h_{t-1}) + b_c) + m_t & = actNode(xm_t + W_c dot(r_t, h_{t-1}) + b_m) - h_t & = dot((1-u_t), ch_{t-1}) + dot(u_t, h_t) + h_t & = dot((1-u_t), m_t) + dot(u_t, h_{t-1}) The inputs of gru unit includes :math:`z_t`, :math:`h_{t-1}`. In terms of the equation above, the :math:`z_t` is split into 3 parts - - :math:`xu_t`, :math:`xr_t` and :math:`xc_t`. This means that in order to + :math:`xu_t`, :math:`xr_t` and :math:`xm_t`. This means that in order to implement a full GRU unit operator for an input, a fully connected layer has to be applied, such that :math:`z_t = W_{fc}x_t`. - This layer has three outputs :math:`h_t`, :math:`dot(r_t, h_{t - 1})` - and concatenation of :math:`u_t`, :math:`r_t` and :math:`ch_t`. + The terms :math:`u_t` and :math:`r_t` represent the update and reset gates + of the GRU cell. Unlike LSTM, GRU has one lesser gate. However, there is + an intermediate candidate hidden output, which is denoted by :math:`m_t`. + This layer has three outputs :math:`h_t`, :math:`dot(r_t, h_{t-1})` + and concatenation of :math:`u_t`, :math:`r_t` and :math:`m_t`. Args: input (Variable): The fc transformed input value of current step.