diff --git a/cmake/external/mkldnn.cmake b/cmake/external/mkldnn.cmake
index 5d24caebdcc5a28823164d718fb1628be5c4179d..b67c559fdf7a2b1695589a041cfa8fb8a9580516 100644
--- a/cmake/external/mkldnn.cmake
+++ b/cmake/external/mkldnn.cmake
@@ -63,9 +63,17 @@ ExternalProject_Add(
-DMKLROOT:PATH=${MKLML_ROOT}
)
-ADD_LIBRARY(mkldnn SHARED IMPORTED GLOBAL)
-SET_PROPERTY(TARGET mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIB})
-ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT})
+ADD_LIBRARY(shared_mkldnn SHARED IMPORTED GLOBAL)
+SET_PROPERTY(TARGET shared_mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIB})
+ADD_DEPENDENCIES(shared_mkldnn ${MKLDNN_PROJECT})
MESSAGE(STATUS "MKLDNN library: ${MKLDNN_LIB}")
add_definitions(-DPADDLE_WITH_MKLDNN)
-LIST(APPEND external_project_dependencies mkldnn)
+LIST(APPEND external_project_dependencies shared_mkldnn)
+
+# generate a static dummy target to track mkldnn dependencies
+# for cc_library(xxx SRCS xxx.c DEPS mkldnn)
+SET(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/mkldnn_dummy.c)
+FILE(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";")
+ADD_LIBRARY(mkldnn STATIC ${dummyfile})
+TARGET_LINK_LIBRARIES(mkldnn ${MKLDNN_LIB} ${MKLML_LIB} ${MKLML_IOMP_LIB})
+ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT})
diff --git a/doc/design/ci_build_whl.png b/doc/design/ci_build_whl.png
new file mode 100644
index 0000000000000000000000000000000000000000..232762b82a9ae3e979a1f38a7beb715c87438f40
Binary files /dev/null and b/doc/design/ci_build_whl.png differ
diff --git a/doc/design/releasing_process.md b/doc/design/releasing_process.md
index 14c081ea84282e52a2e36475c3c0ea755122d154..b9787261092f1f27377886152cb1596d9ff54188 100644
--- a/doc/design/releasing_process.md
+++ b/doc/design/releasing_process.md
@@ -7,11 +7,9 @@ PaddlePaddle每次发新的版本,遵循以下流程:
1. 从`develop`分支派生出新的分支,分支名为`release/版本号`。例如,`release/0.10.0`
1. 将新分支的版本打上tag,tag为`版本号rc.Patch号`。第一个tag为`0.10.0rc1`,第二个为`0.10.0rc2`,依次类推。
1. 对这个版本的提交,做如下几个操作:
+ * 使用Regression Test List作为检查列表,测试本次release的正确性。
+ * 如果失败,记录下所有失败的例子,在这个`release/版本号`分支中,修复所有bug后,Patch号加一,到第二步
* 修改`python/setup.py.in`中的版本信息,并将`istaged`字段设为`True`。
- * 编译这个版本的Docker发行镜像,发布到dockerhub。如果失败,修复Docker编译镜像问题,Patch号加一,返回第二步
- * 编译这个版本的Ubuntu Deb包。如果失败,修复Ubuntu Deb包编译问题,Patch号加一,返回第二步。
- * 使用Regression Test List作为检查列表,测试Docker镜像/ubuntu安装包的功能正确性
- * 如果失败,记录下所有失败的例子,在这个`release/版本号`分支中,修复所有bug后,Patch号加一,返回第二步
* 编译这个版本的python wheel包,并发布到pypi。
* 由于pypi.python.org目前遵循[严格的命名规范PEP 513](https://www.python.org/dev/peps/pep-0513),在使用twine上传之前,需要重命名wheel包中platform相关的后缀,比如将`linux_x86_64`修改成`manylinux1_x86_64`。
* pypi上的package名称为paddlepaddle和paddlepaddle_gpu,如果要上传GPU版本的包,需要修改build/python/setup.py中,name: "paddlepaddle_gpu"并重新打包wheel包:`python setup.py bdist_wheel`。
@@ -21,8 +19,8 @@ PaddlePaddle每次发新的版本,遵循以下流程:
pip install twine
twine upload dist/[package to upload]
```
+ * 编译这个版本的Docker发行镜像,发布到dockerhub。如果失败,修复Docker编译镜像问题,Patch号加一,返回第二步
1. 第三步完成后,将`release/版本号`分支合入master分支,并删除`release/版本号`分支。将master分支的合入commit打上tag,tag为`版本号`。同时再将`master`分支合入`develop`分支。最后删除`release/版本号`分支。
-1. 编译master分支的Docker发行镜像,发布到dockerhub。编译ubuntu的deb包,发布到github release页面
1. 协同完成Release Note的书写
@@ -31,6 +29,30 @@ PaddlePaddle每次发新的版本,遵循以下流程:
* `release/版本号`分支一旦建立,一般不允许再从`develop`分支合入`release/版本号`。这样保证`release/版本号`分支功能的封闭,方便测试人员测试PaddlePaddle的行为。
* 在`release/版本号`分支存在的时候,如果有bugfix的行为,需要将bugfix的分支同时merge到`master`, `develop`和`release/版本号`这三个分支。
+## 发布wheel包到pypi
+
+使用[PaddlePaddle CI](https://paddleci.ngrok.io/project.html?projectId=Manylinux1&tab=projectOverview)
+完成自动化二进制编译,参考下图,选择需要发布的版本(通常包含一个CPU版本和一个GPU版本),点击"run"右侧的"..."按钮,可以
+弹出下面的选择框,在第二个tab (Changes)里选择需要发布的分支,这里选择0.11.0,然后点击"Run Build"按钮。等待编译完成后
+可以在此页面的"Artifacts"下拉框中找到生成的3个二进制文件,分别对应CAPI,`cp27m`和`cp27mu`的版本。然后按照上述的方法
+使用`twine`工具上传即可。
+
+
+
+* 注:CI环境使用 https://github.com/PaddlePaddle/buildtools 这里的DockerImage作为编译环境以支持更多的Linux
+ 发型版,如果需要手动编译,也可以使用这些镜像。这些镜像也可以从 https://hub.docker.com/r/paddlepaddle/paddle_manylinux_devel/tags/ 下载得到。
+* pypi不支持覆盖上传,所以一个版本号的wheel包发布之后,不可以更改。下一个wheel包需要更新版本号才可以上传。
+
+## 发布Docker镜像
+
+上述PaddlePaddle CI编译wheel完成后会自动将Docker镜像push到DockerHub,所以,发布Docker镜像只需要对自动push的镜像打上
+版本号对应的tag即可:
+
+1. 进入 https://hub.docker.com/r/paddlepaddle/paddle/tags/ 查看latest tag的更新时间是否在上述编译wheel包完成后是否最新。
+1. 执行 `docker pull paddlepaddle/paddle:[latest tag]`,latest tag可以是latest或latest-gpu等。
+1. 执行 `docker tag paddlepaddle/paddle:[latest tag] paddlepaddle/paddle:[version]`
+1. 执行 `docker push paddlepaddle/paddle:[version]`
+
## PaddlePaddle 分支规范
PaddlePaddle开发过程使用[git-flow](http://nvie.com/posts/a-successful-git-branching-model/)分支规范,并适应github的特性做了一些区别。
diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt
index 528e45b51099d97a1f6f0dfc971b6231f928af94..3967a40136d6493d533ef9aadd2054cc23592879 100644
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@@ -41,7 +41,7 @@ device_context)
cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute)
cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker)
cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto)
-cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute)
+cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute device_context)
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog
shape_inference data_transform)
cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry init)
diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc
index bf1f0471ccbfccf13cb6f74c8088da7acd68ec0b..844d98916ea5b1ffd88615825d79af37ba7d128e 100644
--- a/paddle/framework/executor.cc
+++ b/paddle/framework/executor.cc
@@ -111,7 +111,7 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
for (auto& op_desc : block.AllOps()) {
auto op = paddle::framework::OpRegistry::CreateOp(*op_desc);
- VLOG(3) << op->DebugString();
+ VLOG(3) << op->DebugStringEx(local_scope);
op->Run(*local_scope, place_);
if (FLAGS_check_nan_inf) {
for (auto& vname : op->OutputVars(true)) {
diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc
index b9dcf16da5d5e1d10176b6c9ae6ea1be080064ae..4ef0c2523ca7d4548b9f509aa943449ca88dead1 100644
--- a/paddle/framework/operator.cc
+++ b/paddle/framework/operator.cc
@@ -73,6 +73,17 @@ void UseALL() {
UseCUDNN();
}
+static DDim GetDims(const Scope& scope, const std::string& name) {
+ Variable* var = scope.FindVar(name);
+ if (var->IsType()) {
+ return var->Get().dims();
+ } else if (var->IsType()) {
+ return var->Get().GetCompleteDims();
+ } else {
+ return DDim({-1});
+ }
+}
+
std::string OperatorBase::Input(const std::string& name) const {
auto& ins = Inputs(name);
PADDLE_ENFORCE_LE(ins.size(), 1UL,
@@ -105,7 +116,7 @@ const std::vector& OperatorBase::Outputs(
return it->second;
}
-std::string OperatorBase::DebugString() const {
+std::string OperatorBase::DebugStringEx(const Scope* scope) const {
std::stringstream ss;
ss << "Op(" << type_ << "), inputs:{";
for (auto it = inputs_.begin(); it != inputs_.end();) {
@@ -113,6 +124,9 @@ std::string OperatorBase::DebugString() const {
ss << input.first << "[";
for (size_t i = 0; i < input.second.size(); ++i) {
ss << input.second[i];
+ if (scope) {
+ ss << "(" << GetDims(*scope, input.second[i]) << ")";
+ }
if (i != input.second.size() - 1) {
ss << ", ";
}
@@ -129,6 +143,9 @@ std::string OperatorBase::DebugString() const {
ss << output.first << "[";
for (size_t i = 0; i < output.second.size(); ++i) {
ss << output.second[i];
+ if (scope) {
+ ss << "(" << GetDims(*scope, output.second[i]) << ")";
+ }
if (i != output.second.size() - 1) {
ss << ", ";
}
diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h
index 1f5a4af58c5a9ad2fa8f4ac08ece67084b8f741a..800397c077b52d02de115b40e3fe719f3f861389 100644
--- a/paddle/framework/operator.h
+++ b/paddle/framework/operator.h
@@ -108,7 +108,10 @@ class OperatorBase {
return boost::get(attrs_.at(name));
}
- virtual std::string DebugString() const;
+ /// if scope is not null, also show dimensions of arguments
+ virtual std::string DebugStringEx(const Scope* scope) const;
+
+ std::string DebugString() const { return DebugStringEx(nullptr); }
/// Net will call this function to Run an op.
virtual void Run(const Scope& scope, const platform::Place& place) const = 0;
diff --git a/paddle/inference/inference.cc b/paddle/inference/inference.cc
index 48a51efcd25afeb499ab9389c2f27c8bd2515cd8..49e39358e81bbee64a618be88ee0fca6aa438b93 100644
--- a/paddle/inference/inference.cc
+++ b/paddle/inference/inference.cc
@@ -38,23 +38,16 @@ void InferenceEngine::LoadInferenceModel(
LOG(INFO) << "program_desc_str's size: " << program_desc_str.size();
// PicklingTools cannot parse the vector of strings correctly.
#else
- // program_desc_str
- // the inference.model is stored by following python codes:
- // inference_program = fluid.io.get_inference_program(predict)
- // model_filename = "recognize_digits_mlp.inference.model/inference.model"
- // with open(model_filename, "w") as f:
- // program_str = inference_program.desc.serialize_to_string()
- // f.write(struct.pack('q', len(program_str)))
- // f.write(program_str)
- std::string model_filename = dirname + "/inference.model";
+ std::string model_filename = dirname + "/__model__.dat";
LOG(INFO) << "loading model from " << model_filename;
- std::ifstream fs(model_filename, std::ios_base::binary);
- int64_t size = 0;
- fs.read(reinterpret_cast(&size), sizeof(int64_t));
- LOG(INFO) << "program_desc_str's size: " << size;
+ std::ifstream inputfs(model_filename, std::ios::in | std::ios::binary);
std::string program_desc_str;
- program_desc_str.resize(size);
- fs.read(&program_desc_str[0], size);
+ inputfs.seekg(0, std::ios::end);
+ program_desc_str.resize(inputfs.tellg());
+ inputfs.seekg(0, std::ios::beg);
+ LOG(INFO) << "program_desc_str's size: " << program_desc_str.size();
+ inputfs.read(&program_desc_str[0], program_desc_str.size());
+ inputfs.close();
#endif
program_ = new framework::ProgramDesc(program_desc_str);
GenerateLoadProgram(dirname);
diff --git a/paddle/operators/net_op.cc b/paddle/operators/net_op.cc
index 78b5e2767842312722fac3509e843a05fe194559..03302f5cbf5674dca1d22a84137579090b4d5eac 100644
--- a/paddle/operators/net_op.cc
+++ b/paddle/operators/net_op.cc
@@ -56,11 +56,11 @@ void NetOp::CompleteAddOp(bool calc) {
std::copy(output_set.begin(), output_set.end(), std::back_inserter(outputs));
}
-std::string NetOp::DebugString() const {
+std::string NetOp::DebugStringEx(const framework::Scope* scope) const {
std::ostringstream os;
- os << OperatorBase::DebugString() << std::endl;
+ os << OperatorBase::DebugStringEx(scope) << std::endl;
for (auto& op : ops_) {
- std::istringstream is(op->DebugString());
+ std::istringstream is(op->DebugStringEx(scope));
for (std::string line; std::getline(is, line);) {
os << " " << line << std::endl;
}
diff --git a/paddle/operators/net_op.h b/paddle/operators/net_op.h
index 85d0153b32c0ba53bfe0912fc2682c8b635ba172..b24042f5ef5822eabcada8ed9d21c552579e8064 100644
--- a/paddle/operators/net_op.h
+++ b/paddle/operators/net_op.h
@@ -106,7 +106,8 @@ class NetOp : public framework::OperatorBase {
void CompleteAddOp(bool calculate = true);
- std::string DebugString() const override;
+ std::string DebugStringEx(
+ const framework::Scope* scope = nullptr) const override;
bool IsNetOp() const override;
std::vector OutputVars(bool has_intermediate) const override;
diff --git a/paddle/operators/tensor.save b/paddle/operators/tensor.save
deleted file mode 100644
index c24308a7d0131b84c28c0a9857cce4949afb2091..0000000000000000000000000000000000000000
Binary files a/paddle/operators/tensor.save and /dev/null differ
diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt
index 8c4803b9739bb54cae89de62468a47631a5dde94..44f6d85cd1510f309595ca711de2e0f767219580 100644
--- a/paddle/platform/CMakeLists.txt
+++ b/paddle/platform/CMakeLists.txt
@@ -21,10 +21,16 @@ ELSE()
set(GPU_CTX_DEPS)
ENDIF()
+IF(WITH_MKLDNN)
+ set(MKLDNN_CTX_DEPS mkldnn)
+ELSE()
+ set(MKLDNN_CTX_DEPS)
+ENDIF()
+
# memcpy deoends on device_context, here add deps individually for
# avoiding cycle dependencies
cc_library(device_context SRCS device_context.cc DEPS memory buddy_allocator
- system_allocator memory_block meta_data meta_cache place eigen3 ${GPU_CTX_DEPS})
+ system_allocator memory_block meta_data meta_cache place eigen3 ${GPU_CTX_DEPS} ${MKLDNN_CTX_DEPS})
nv_test(device_context_test SRCS device_context_test.cu DEPS device_context gpu_info)
nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda)
diff --git a/paddle/platform/device_context.cc b/paddle/platform/device_context.cc
index 4bf643e048dcf97f7888dcc78eb1b4fa4feee046..9d9348079a0179418ab1a1474cfd8b69136f26b2 100644
--- a/paddle/platform/device_context.cc
+++ b/paddle/platform/device_context.cc
@@ -168,5 +168,69 @@ cudaStream_t CUDADeviceContext::stream() const { return stream_; }
#endif
+#ifdef PADDLE_WITH_MKLDNN
+MKLDNNDeviceContext::MKLDNNDeviceContext(CPUPlace place)
+ : CPUDeviceContext(place), ready_(false) {
+ stream_.reset(new mkldnn::stream(mkldnn::stream::kind::eager));
+ engine_.reset(new mkldnn::engine(mkldnn::engine::cpu, 0));
+}
+
+template
+void MKLDNNDeviceContext::AddElement(const std::string& op_key,
+ const T& value) {
+ if (GetElement(op_key)) {
+ return;
+ }
+ GetElementPool().emplace(op_key, std::move(value));
+}
+
+template
+const T& MKLDNNDeviceContext::GetElement(const std::string& op_key) const {
+ auto it = GetElementPool().find(op_key);
+ return it == GetElementPool().end() ? nullptr : it->second;
+}
+
+template <>
+const std::unordered_map>&
+MKLDNNDeviceContext::GetElementPool() const {
+ return memory_pool_;
+}
+
+template <>
+const std::unordered_map>&
+MKLDNNDeviceContext::GetElementPool() const {
+ return primitive_pool_;
+}
+
+template <>
+const std::unordered_map>&
+MKLDNNDeviceContext::GetElementPool() const {
+ return primitive_desc_pool_;
+}
+
+void MKLDNNDeviceContext::Execute(bool block) {
+ if (pipeline_.empty()) {
+ return;
+ }
+ ResetStream();
+ stream_->submit(pipeline_).wait(block);
+ ready_ = false;
+ pipeline_.clear();
+}
+
+void MKLDNNDeviceContext::ResetStream() {
+ if (ready_) {
+ return;
+ }
+ // TODO(TJ): change me when mkldnn have specific method to reset this state
+ stream_.reset(new mkldnn::stream(mkldnn::stream::kind::eager));
+ ready_ = true;
+}
+
+#endif
+
} // namespace platform
} // namespace paddle
diff --git a/paddle/platform/device_context.h b/paddle/platform/device_context.h
index 609ea4bd3ad50e2eefa908539339903ed1f0a807..7a0040c9c229af79ea8be1049dfd6c0d1b4d19cf 100644
--- a/paddle/platform/device_context.h
+++ b/paddle/platform/device_context.h
@@ -21,6 +21,10 @@ limitations under the License. */
#define EIGEN_USE_GPU
#endif
+#ifdef PADDLE_WITH_MKLDNN
+#include "paddle/platform/mkldnn_helper.h"
+#endif
+
#include "paddle/platform/enforce.h"
#include "paddle/platform/place.h"
#include "unsupported/Eigen/CXX11/Tensor"
@@ -105,6 +109,54 @@ struct DefaultDeviceContextType {
#endif
+#ifdef PADDLE_WITH_MKLDNN
+class MKLDNNDeviceContext : public CPUDeviceContext {
+ public:
+ explicit MKLDNNDeviceContext(CPUPlace place);
+
+ /* \brief Add new element: memory, primitive or primitive desc */
+ template
+ void AddElement(const std::string& op_key, const T& value);
+
+ /* \brief Get existed element: memory, primitive or primitive desc */
+ template
+ const T& GetElement(const std::string& op_key) const;
+
+ /* \brief Get element pool: memory, primitive or primitive desc pool */
+ template
+ const std::unordered_map>&
+ GetElementPool() const;
+
+ /* \brief Get the active engine */
+ const MKLDNNEngine& engine() const { return *engine_; }
+
+ /* \brief Submit primitive to pipeline */
+ void Submit(const MKLDNNPrimitivePtr& p) { pipeline_.push_back(*p); }
+
+ /*! \brief Execute all submitted primitives in pipeline */
+ void Execute(bool block = true);
+
+ protected:
+ /*! \brief Reset the stream to prepare next exectue */
+ void ResetStream();
+
+ private:
+ std::unordered_map>
+ memory_pool_;
+ std::unordered_map>
+ primitive_pool_;
+ std::unordered_map>
+ primitive_desc_pool_;
+ std::vector pipeline_;
+ MKLDNNStreamPtr stream_;
+ MKLDNNEnginePtr engine_;
+ bool ready_;
+};
+#endif
+
/*! \brief device context pool singleton */
class DeviceContextPool {
public:
diff --git a/paddle/platform/mkldnn_helper.h b/paddle/platform/mkldnn_helper.h
new file mode 100644
index 0000000000000000000000000000000000000000..cd52a8b4c434071a030a8e7a8a70fc3adba8460c
--- /dev/null
+++ b/paddle/platform/mkldnn_helper.h
@@ -0,0 +1,35 @@
+/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include
+
+namespace paddle {
+namespace platform {
+
+using MKLDNNStream = mkldnn::stream;
+using MKLDNNEngine = mkldnn::engine;
+using MKLDNNMemory = mkldnn::memory;
+using MKLDNNPrimitive = mkldnn::primitive;
+using MKLDNNPrimitiveDesc = mkldnn::handle;
+
+typedef std::unique_ptr MKLDNNStreamPtr;
+typedef std::unique_ptr MKLDNNEnginePtr;
+typedef std::unique_ptr MKLDNNMemoryPtr;
+typedef std::unique_ptr MKLDNNPrimitivePtr;
+typedef std::unique_ptr MKLDNNPrimitiveDescPtr;
+
+} // namespace platform
+} // namespace paddle
diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh
index 92039ec6b05d224e702f0ba5dc05c057a492287e..f1e244772f3f9e636dc749a21dc965743aa642da 100644
--- a/paddle/scripts/docker/build.sh
+++ b/paddle/scripts/docker/build.sh
@@ -193,6 +193,25 @@ EOF
EOF
}
+function gen_capi_package() {
+ if [[ ${WITH_C_API} == "ON" ]]; then
+ install_prefix="/paddle/build/capi_output"
+ rm -rf $install_prefix
+
+ make DESTDIR="$install_prefix" install
+
+ if [[ ${WITH_MKL:-OFF} == "ON" ]]; then
+ find ./third_party/install -name 'libmklml_gnu.so' -exec cp {} $install_prefix/usr/local/lib \;
+ find ./third_party/install -name 'libmklml_intel.so' -exec cp {} $install_prefix/usr/local/lib \;
+ cp -P ./third_party/install/mkldnn/lib/* $install_prefix/usr/local/lib/
+ fi
+
+ find ./third_party/install -name 'libiomp5.so' -exec cp {} $install_prefix/usr/local/lib \;
+ cd $install_prefix/usr/local
+ ls | egrep -v "^Found.*item$" | xargs tar /paddle/build/paddle.tgz
+ fi
+}
+
set -xe
cmake_gen ${PYTHON_ABI:-""}
@@ -200,6 +219,11 @@ run_build
run_test
gen_docs
gen_dockerfile
-
-printf "If you need to install PaddlePaddle in develop docker image,"
-printf "please make install or pip install build/python/dist/*.whl.\n"
+gen_capi_package
+
+if [[ ${WITH_C_API:-OFF} == "ON" ]]; then
+ printf "PaddlePaddle C-API libraries was generated on build/paddle.tgz\n"
+else
+ printf "If you need to install PaddlePaddle in develop docker image,"
+ printf "please make install or pip install build/python/dist/*.whl.\n"
+fi
diff --git a/python/paddle/trainer_config_helpers/attrs.py b/python/paddle/trainer_config_helpers/attrs.py
index ecba87191045cff6c05014010e60575741238f8d..e6f87ce61b1d16d4f98f111626776aa52c2ec35b 100644
--- a/python/paddle/trainer_config_helpers/attrs.py
+++ b/python/paddle/trainer_config_helpers/attrs.py
@@ -58,12 +58,12 @@ def is_compatible_with(x, Type):
class HookAttribute(object):
"""
- Hook Attribute object. As a member of ParameterAttribute class, the hook is an auxiliary operation that occurs
+ Hook Attribute object. As a member of ParameterAttribute class, the hook is an auxiliary operation that occurs
during training process of a layer with parameters, such as img_conv layer, fc layer.
- :param type: Hook type, currently supported types:
+ :param type: Hook type, currently supported types:
'pruning' : user specify a sparsity_ratio before training started, and the
- network will prune the parameters based on the sparsity_ratio.
+ network will prune the parameters based on the sparsity_ratio.
eg: The definition of Hook object can be hk = HookAttribute('pruning', 0.6)
The specific usage can be paddle.layer.img_conv(input=img, filter_size=3,
num_channels=3, num_filters=64,
@@ -71,10 +71,10 @@ class HookAttribute(object):
The pruning details can be found https://arxiv.org/pdf/1506.02626.pdf
:type type: string
- :param sparsity_ratio: Must be specified if hook type is 'pruning',
+ :param sparsity_ratio: Must be specified if hook type is 'pruning',
it represents the ratio of the zero elements to be set by the Parameter.
:type sparsity_ratio: float or None
-
+
"""
def __init__(self, type, sparsity_ratio=None):
@@ -130,10 +130,12 @@ class ParameterAttribute(object):
:param sparse_update: Enable sparse update for this parameter. It will
enable both local and remote sparse update.
:type sparse_update: bool
+ :param update_hooks: A HookAttribute object.
+ :type update_hooks: HookAttribute
:param initializer: If not None, it should be a callable object which accepts
a parameter name and returns numpy array for the initial
value of the parameter
- :param initializer: callable object
+ :type initializer: callable object
"""
def __init__(self,
diff --git a/python/paddle/v2/fluid/backward.py b/python/paddle/v2/fluid/backward.py
index 88fe19da5e2c2df7f7eed7b26261ec155f0013f7..66a7f737574c438a2b945bd4a49d8317bd460c80 100644
--- a/python/paddle/v2/fluid/backward.py
+++ b/python/paddle/v2/fluid/backward.py
@@ -7,7 +7,7 @@ __all__ = ['append_backward']
def _rename_arg_(op_descs, old_name, new_name, begin_idx=None, end_idx=None):
"""
- Traverse all ops in op_descs[begin_idx : end_idx],
+ Traverse all ops in op_descs[begin_idx : end_idx],
if any op has inputs/outputs named "old_name", rename it as 'new_name'
"""
if begin_idx is None:
@@ -162,7 +162,7 @@ def _remove_no_grad_branch_(op_descs, no_grad_set):
if core.grad_var_suffix() in arg and arg in no_grad_set:
to_insert.append((_create_op_desc_("fill_zeros_like", {
"X": [_strip_grad_suffix_(arg)]
- }, {"Y": [arg]}, {}), idx))
+ }, {"Out": [arg]}, {}), idx))
map(lambda p: op_descs.insert(p[1], p[0]), reversed(to_insert))
@@ -182,7 +182,7 @@ def _append_backward_ops_(target,
target(Variable): the target variable of forward pass
block(Block): the block where forward ops are
target_block(Block): the block which is going to hold new generated grad ops
- no_grad_dict(dict):
+ no_grad_dict(dict):
key(int) block index
val(set) a set of varibale names. These varibales have no gradient
grad_to_var(dict)(output argument):
@@ -276,8 +276,8 @@ def append_backward(loss, parameter_list=None, no_grad_set=None):
loss(Variable): The variable generated by cost function.
parameter_list(list): Parameters that need to be updated by optimizer.
If None, it means all parameters need to be updated.
- no_grad_set(set): Variables that have no gradients in Block 0.
- If None, the set will be generated inside the function and
+ no_grad_set(set): Variables that have no gradients in Block 0.
+ If None, the set will be generated inside the function and
contains all variables with `step_gradient=True` from all blocks.
Return:
diff --git a/python/paddle/v2/fluid/io.py b/python/paddle/v2/fluid/io.py
index 926327b70c70d250766c2640d808bb3e3516d37b..c63567601accd8c072368351f2838857bb61c818 100644
--- a/python/paddle/v2/fluid/io.py
+++ b/python/paddle/v2/fluid/io.py
@@ -212,6 +212,11 @@ def save_inference_model(dirname,
"fetch_var_names": fetch_var_names
}, f, -1)
+ # Save only programDesc of inference_program in binary format
+ # in another file: __model__.dat
+ with open(model_file_name + ".dat", "wb") as fp:
+ fp.write(inference_program.desc.serialize_to_string())
+
save_params(executor, dirname, main_program)
diff --git a/python/paddle/v2/fluid/layers/control_flow.py b/python/paddle/v2/fluid/layers/control_flow.py
index 787a3153f401dc2bf9a2350fdde24a7c5aa94e89..948a67524490ec7dcf84ef9af48be54f0fc0f908 100644
--- a/python/paddle/v2/fluid/layers/control_flow.py
+++ b/python/paddle/v2/fluid/layers/control_flow.py
@@ -998,7 +998,7 @@ class ConditionalBlock(object):
out_list = [
parent_block.var(var_name) for var_name in parent_block.vars
- if var_name not in intermediate
+ if var_name in intermediate
]
step_scope = parent_block.create_var(
diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py
index 6883630ac6a28176258272996ed7d0e73652a40b..7feb479d2e3bc396eac53045888175ba54864b66 100644
--- a/python/paddle/v2/fluid/layers/nn.py
+++ b/python/paddle/v2/fluid/layers/nn.py
@@ -64,14 +64,14 @@ def fc(input,
is flattened: the first `num_flatten_dims`
dimensions will be flatten to form the first
dimension of the final matrix (height of the
- matrix), and the rest `rank(X) - num_col_dims`
+ matrix), and the rest `rank(X) - num_flatten_dims`
dimensions are flattened to form the second
dimension of the final matrix (width of the matrix).
For example, suppose `X` is a 6-dimensional tensor
with a shape [2, 3, 4, 5, 6], and
- `x_num_col_dims` = 3. Then, the flattened matrix
+ `num_flatten_dims` = 3. Then, the flattened matrix
will have a shape [2 x 3 x 4, 5 x 6] = [24, 30].
- By default, `x_num_col_dims` is set to 1.
+ By default, `num_flatten_dims` is set to 1.
param_attr(ParamAttr|list): The parameter attribute for learnable
parameters/weights of the fully connected
layer.
@@ -243,18 +243,21 @@ def gru_unit(input,
r_t & = actGate(xr_{t} + W_r h_{t-1} + b_r)
- ch_t & = actNode(xc_t + W_c dot(r_t, h_{t-1}) + b_c)
+ m_t & = actNode(xm_t + W_c dot(r_t, h_{t-1}) + b_m)
- h_t & = dot((1-u_t), ch_{t-1}) + dot(u_t, h_t)
+ h_t & = dot((1-u_t), m_t) + dot(u_t, h_{t-1})
The inputs of gru unit includes :math:`z_t`, :math:`h_{t-1}`. In terms
of the equation above, the :math:`z_t` is split into 3 parts -
- :math:`xu_t`, :math:`xr_t` and :math:`xc_t`. This means that in order to
+ :math:`xu_t`, :math:`xr_t` and :math:`xm_t`. This means that in order to
implement a full GRU unit operator for an input, a fully
connected layer has to be applied, such that :math:`z_t = W_{fc}x_t`.
- This layer has three outputs :math:`h_t`, :math:`dot(r_t, h_{t - 1})`
- and concatenation of :math:`u_t`, :math:`r_t` and :math:`ch_t`.
+ The terms :math:`u_t` and :math:`r_t` represent the update and reset gates
+ of the GRU cell. Unlike LSTM, GRU has one lesser gate. However, there is
+ an intermediate candidate hidden output, which is denoted by :math:`m_t`.
+ This layer has three outputs :math:`h_t`, :math:`dot(r_t, h_{t-1})`
+ and concatenation of :math:`u_t`, :math:`r_t` and :math:`m_t`.
Args:
input (Variable): The fc transformed input value of current step.