diff --git a/Dockerfile b/Dockerfile
index 402adee2ea2822250ebc8f6229fd6a44545d58e5..634be18a51bf61e96a8bf6f263b6674a7932d6e4 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -53,7 +53,7 @@ RUN curl -s -q https://glide.sh/get | sh
# and its size is only one-third of the official one.
# 2. Manually add ~IPluginFactory() in IPluginFactory class of NvInfer.h, otherwise, it couldn't work in paddle.
# See https://github.com/PaddlePaddle/Paddle/issues/10129 for details.
-RUN wget -qO- http://paddlepaddledeps.bj.bcebos.com/TensorRT-4.0.0.3.Ubuntu-16.04.4.x86_64-gnu.cuda-8.0.cudnn7.0.tar.gz | \
+RUN wget -qO- http://paddlepaddledeps.cdn.bcebos.com/TensorRT-4.0.0.3.Ubuntu-16.04.4.x86_64-gnu.cuda-8.0.cudnn7.0.tar.gz | \
tar -xz -C /usr/local && \
cp -rf /usr/local/TensorRT/include /usr && \
cp -rf /usr/local/TensorRT/lib /usr
diff --git a/README.md b/README.md
index a67cb8ad439f462c361cb6bac2449c3a4b042126..60ffbe728178705b1734e682868614025214c2a4 100644
--- a/README.md
+++ b/README.md
@@ -76,33 +76,26 @@ pip install paddlepaddle-gpu==0.14.0.post85
## Installation
-It is recommended to check out the
-[Docker installation guide](http://www.paddlepaddle.org/docs/develop/documentation/fluid/en/build_and_install/docker_install_en.html)
-before looking into the
-[build from source guide](http://www.paddlepaddle.org/docs/develop/documentation/fluid/en/build_and_install/build_from_source_en.html).
+It is recommended to read [this doc](http://paddlepaddle.org/documentation/docs/zh/0.14.0/new_docs/beginners_guide/install/install_doc.html) on our website.
## Documentation
-We provide [English](http://www.paddlepaddle.org/docs/develop/documentation/en/getstarted/index_en.html) and
-[Chinese](http://www.paddlepaddle.org/docs/develop/documentation/zh/getstarted/index_cn.html) documentation.
+We provide [English](http://paddlepaddle.org/documentation/docs/en/0.14.0/getstarted/index_en.html) and
+[Chinese](http://paddlepaddle.org/documentation/docs/zh/0.14.0/new_docs/beginners_guide/index.html) documentation.
-- [Deep Learning 101](http://www.paddlepaddle.org/docs/develop/book/01.fit_a_line/index.html)
+- [Deep Learning 101](https://github.com/PaddlePaddle/book)
You might want to start from this online interactive book that can run in a Jupyter Notebook.
-- [Distributed Training](http://www.paddlepaddle.org/docs/develop/documentation/en/howto/cluster/index_en.html)
+- [Distributed Training](http://paddlepaddle.org/documentation/docs/zh/0.14.0/new_docs/user_guides/howto/training/cluster_howto.html)
You can run distributed training jobs on MPI clusters.
-- [Distributed Training on Kubernetes](http://www.paddlepaddle.org/docs/develop/documentation/en/howto/cluster/multi_cluster/k8s_en.html)
-
- You can also run distributed training jobs on Kubernetes clusters.
-
-- [Python API](http://www.paddlepaddle.org/docs/develop/api/en/overview.html)
+- [Python API](http://paddlepaddle.org/documentation/api/zh/0.14.0/fluid.html)
Our new API enables much shorter programs.
-- [How to Contribute](http://www.paddlepaddle.org/docs/develop/documentation/fluid/en/dev/contribute_to_paddle_en.html)
+- [How to Contribute](http://paddlepaddle.org/documentation/docs/zh/0.14.0/new_docs/advanced_usage/development/contribute_to_paddle.html)
We appreciate your contributions!
diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake
index b520c03a836a9e3f263ba050f151877ffe0d071d..03c73786a6c31868b1893bfcb319e43e37db1a3d 100644
--- a/cmake/cuda.cmake
+++ b/cmake/cuda.cmake
@@ -169,14 +169,19 @@ set(CUDA_PROPAGATE_HOST_FLAGS OFF)
# Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc.
# So, don't set these flags here.
+if (NOT WIN32) # windows msvc2015 support c++11 natively.
+# -std=c++11 -fPIC not recoginize by msvc, -Xcompiler will be added by cmake.
list(APPEND CUDA_NVCC_FLAGS "-std=c++11")
-list(APPEND CUDA_NVCC_FLAGS "--use_fast_math")
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -fPIC")
+endif(NOT WIN32)
+
+list(APPEND CUDA_NVCC_FLAGS "--use_fast_math")
# in cuda9, suppress cuda warning on eigen
list(APPEND CUDA_NVCC_FLAGS "-w")
# Set :expt-relaxed-constexpr to suppress Eigen warnings
list(APPEND CUDA_NVCC_FLAGS "--expt-relaxed-constexpr")
+if (NOT WIN32)
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
list(APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_DEBUG})
elseif(CMAKE_BUILD_TYPE STREQUAL "Release")
@@ -187,6 +192,13 @@ elseif(CMAKE_BUILD_TYPE STREQUAL "MinSizeRel")
# nvcc 9 does not support -Os. Use Release flags instead
list(APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_RELEASE})
endif()
+else(NOT WIN32)
+if(CMAKE_BUILD_TYPE STREQUAL "Release")
+ list(APPEND CUDA_NVCC_FLAGS "-O3 -DNDEBUG")
+else()
+ message(FATAL "Windows only support Release build now. Please set visual studio build type to Release, x64 build.")
+endif()
+endif(NOT WIN32)
mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD)
mark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION)
diff --git a/cmake/external/grpc.cmake b/cmake/external/grpc.cmake
index 7fb67afbe15a5a019c978092d5ba3a4a0f66d996..fd9835d023c67b76579913f2ec56c2444fea8c15 100644
--- a/cmake/external/grpc.cmake
+++ b/cmake/external/grpc.cmake
@@ -44,7 +44,7 @@ ExternalProject_Add(
# 3. keep only zlib, cares, protobuf, boringssl under "third_party",
# checkout and clean other dirs under third_party
# 4. remove .git, and package the directory.
- URL "http://paddlepaddledeps.bj.bcebos.com/grpc-v1.10.x.tar.gz"
+ URL "http://paddlepaddledeps.cdn.bcebos.com/grpc-v1.10.x.tar.gz"
URL_MD5 "1f268a2aff6759839dccd256adcc91cf"
PREFIX ${GRPC_SOURCES_DIR}
UPDATE_COMMAND ""
diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake
index bc36683a9facc253e7b9feb0c5a56e79491fb9b0..f61770514eb05a99c140cdb18575c89aa5235c14 100644
--- a/cmake/inference_lib.cmake
+++ b/cmake/inference_lib.cmake
@@ -128,16 +128,13 @@ set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid")
set(dst_dir "${FLUID_INSTALL_DIR}/paddle/fluid")
set(module "framework")
if (NOT WIN32)
-copy(framework_lib DEPS framework_py_proto
- SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/details/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/framework/framework.pb.h
- DSTS ${dst_dir}/${module} ${dst_dir}/${module}/details ${dst_dir}/${module}
-)
-else()
-copy(framework_lib
+set(framework_lib_deps framework_py_proto)
+endif(NOT WIN32)
+copy(framework_lib DEPS ${framework_lib_deps}
SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/details/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/framework/framework.pb.h
- DSTS ${dst_dir}/${module} ${dst_dir}/${module}/details ${dst_dir}/${module}
+ ${src_dir}/${module}/ir/*.h
+ DSTS ${dst_dir}/${module} ${dst_dir}/${module}/details ${dst_dir}/${module} ${dst_dir}/${module}/ir
)
-endif(NOT WIN32)
set(module "memory")
copy(memory_lib
@@ -161,7 +158,8 @@ set(module "inference")
copy(inference_lib DEPS ${inference_deps}
SRCS ${src_dir}/${module}/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/inference/libpaddle_fluid.*
${src_dir}/${module}/api/paddle_inference_api.h ${src_dir}/${module}/api/demo_ci
- DSTS ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module}
+ ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h
+ DSTS ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module}
)
set(module "platform")
diff --git a/doc/fluid/new_docs/advanced_usage/deploy/index_anakin.rst b/doc/fluid/new_docs/advanced_usage/deploy/index_anakin.rst
index b782242a6632a5d42a512cf3b830d6e047c064ab..e4682ccb94e6fc60e184632dff9ee16a6bf16ec0 100644
--- a/doc/fluid/new_docs/advanced_usage/deploy/index_anakin.rst
+++ b/doc/fluid/new_docs/advanced_usage/deploy/index_anakin.rst
@@ -1,5 +1,5 @@
-服务器端部署 - Anakin
-#####################
+Anakin - 服务器端加速引擎
+#######################
使用文档
diff --git a/doc/fluid/new_docs/advanced_usage/deploy/index_native.rst b/doc/fluid/new_docs/advanced_usage/deploy/index_native.rst
deleted file mode 100644
index a5209e8560b31e9f0f776fba9a2b8c5bc150165c..0000000000000000000000000000000000000000
--- a/doc/fluid/new_docs/advanced_usage/deploy/index_native.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-服务器端部署 - 原生引擎
-#######################
-
-.. toctree::
- :maxdepth: 2
-
- build_and_install_lib_cn.rst
- native_infer.rst
diff --git a/doc/fluid/new_docs/advanced_usage/index.rst b/doc/fluid/new_docs/advanced_usage/index.rst
index dea7c236619a0bdbf402f371571d947d1cdbba65..89166573eebca045e948046c69f3b7a3e0031d58 100644
--- a/doc/fluid/new_docs/advanced_usage/index.rst
+++ b/doc/fluid/new_docs/advanced_usage/index.rst
@@ -10,7 +10,6 @@
.. toctree::
:maxdepth: 2
- deploy/index_native.rst
deploy/index_anakin.rst
deploy/index_mobile.rst
development/contribute_to_paddle.md
diff --git a/doc/fluid/new_docs/beginners_guide/basics/image_classification/.gitignore b/doc/fluid/new_docs/beginners_guide/basics/image_classification/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..dc7c62b06287ad333dd41082e566b0553d3a5341
--- /dev/null
+++ b/doc/fluid/new_docs/beginners_guide/basics/image_classification/.gitignore
@@ -0,0 +1,8 @@
+*.pyc
+train.log
+output
+data/cifar-10-batches-py/
+data/cifar-10-python.tar.gz
+data/*.txt
+data/*.list
+data/mean.meta
diff --git a/doc/fluid/new_docs/beginners_guide/basics/image_classification/README.cn.md b/doc/fluid/new_docs/beginners_guide/basics/image_classification/README.cn.md
index 8d645718e12e4d976a8e71de105e11f495191fbf..4f20843596aa676962a36241f59560ec2a41257b 100644
--- a/doc/fluid/new_docs/beginners_guide/basics/image_classification/README.cn.md
+++ b/doc/fluid/new_docs/beginners_guide/basics/image_classification/README.cn.md
@@ -21,7 +21,7 @@
图像分类包括通用图像分类、细粒度图像分类等。图1展示了通用图像分类效果,即模型可以正确识别图像上的主要物体。
-
+
图1. 通用图像分类展示
@@ -30,7 +30,7 @@
-
+
图2. 细粒度图像分类展示
@@ -38,7 +38,7 @@
一个好的模型既要对不同类别识别正确,同时也应该能够对不同视角、光照、背景、变形或部分遮挡的图像正确识别(这里我们统一称作图像扰动)。图3展示了一些图像的扰动,较好的模型会像聪明的人类一样能够正确识别。
-
+
图3. 扰动图片展示[22]
@@ -61,7 +61,7 @@
Alex Krizhevsky在2012年ILSVRC提出的CNN模型 \[[9](#参考文献)\] 取得了历史性的突破,效果大幅度超越传统方法,获得了ILSVRC2012冠军,该模型被称作AlexNet。这也是首次将深度学习用于大规模图像分类中。从AlexNet之后,涌现了一系列CNN模型,不断地在ImageNet上刷新成绩,如图4展示。随着模型变得越来越深以及精妙的结构设计,Top-5的错误率也越来越低,降到了3.5%附近。而在同样的ImageNet数据集上,人眼的辨识错误率大概在5.1%,也就是目前的深度学习模型的识别能力已经超过了人眼。
-
+
图4. ILSVRC图像分类Top-5错误率
@@ -70,7 +70,7 @@ Alex Krizhevsky在2012年ILSVRC提出的CNN模型 \[[9](#参考文献)\] 取得
传统CNN包含卷积层、全连接层等组件,并采用softmax多类别分类器和多类交叉熵损失函数,一个典型的卷积神经网络如图5所示,我们先介绍用来构造CNN的常见组件。
-
+
图5. CNN网络示例[20]
@@ -89,7 +89,7 @@ Alex Krizhevsky在2012年ILSVRC提出的CNN模型 \[[9](#参考文献)\] 取得
牛津大学VGG(Visual Geometry Group)组在2014年ILSVRC提出的模型被称作VGG模型 \[[11](#参考文献)\] 。该模型相比以往模型进一步加宽和加深了网络结构,它的核心是五组卷积操作,每两组之间做Max-Pooling空间降维。同一组内采用多次连续的3X3卷积,卷积核的数目由较浅组的64增多到最深组的512,同一组内的卷积核数目是一样的。卷积之后接两层全连接层,之后是分类层。由于每组内卷积层的不同,有11、13、16、19层这几种模型,下图展示一个16层的网络结构。VGG模型结构相对简洁,提出之后也有很多文章基于此模型进行研究,如在ImageNet上首次公开超过人眼识别的模型\[[19](#参考文献)\]就是借鉴VGG模型的结构。
-
+
图6. 基于ImageNet的VGG16模型
@@ -106,7 +106,7 @@ NIN模型主要有两个特点:
Inception模块如下图7所示,图(a)是最简单的设计,输出是3个卷积层和一个池化层的特征拼接。这种设计的缺点是池化层不会改变特征通道数,拼接后会导致特征的通道数较大,经过几层这样的模块堆积后,通道数会越来越大,导致参数和计算量也随之增大。为了改善这个缺点,图(b)引入3个1x1卷积层进行降维,所谓的降维就是减少通道数,同时如NIN模型中提到的1x1卷积也可以修正线性特征。
-
+
图7. Inception模块
@@ -115,7 +115,7 @@ GoogleNet由多组Inception模块堆积而成。另外,在网络最后也没
GoogleNet整体网络结构如图8所示,总共22层网络:开始由3层普通的卷积组成;接下来由三组子网络组成,第一组子网络包含2个Inception模块,第二组包含5个Inception模块,第三组包含2个Inception模块;然后接均值池化层、全连接层。
-
+
图8. GoogleNet[12]
@@ -130,14 +130,14 @@ ResNet(Residual Network) \[[15](#参考文献)\] 是2015年ImageNet图像分类
残差模块如图9所示,左边是基本模块连接方式,由两个输出通道数相同的3x3卷积组成。右边是瓶颈模块(Bottleneck)连接方式,之所以称为瓶颈,是因为上面的1x1卷积用来降维(图示例即256->64),下面的1x1卷积用来升维(图示例即64->256),这样中间3x3卷积的输入和输出通道数都较小(图示例即64->64)。
-
+
图9. 残差模块
图10展示了50、101、152层网络连接示意图,使用的是瓶颈模块。这三个模型的区别在于每组中残差模块的重复次数不同(见图右上角)。ResNet训练收敛较快,成功的训练了上百乃至近千层的卷积神经网络。
-
+
图10. 基于ImageNet的ResNet模型
@@ -149,7 +149,7 @@ ResNet(Residual Network) \[[15](#参考文献)\] 是2015年ImageNet图像分类
由于ImageNet数据集较大,下载和训练较慢,为了方便大家学习,我们使用[CIFAR10]()数据集。CIFAR10数据集包含60,000张32x32的彩色图片,10个类别,每个类包含6,000张。其中50,000张图片作为训练集,10000张作为测试集。图11从每个类别中随机抽取了10张图片,展示了所有的类别。
-
+
图11. CIFAR10数据集[21]
@@ -377,7 +377,7 @@ test_reader = paddle.batch(
`event_handler_plot`可以用来利用回调数据来打点画图:
-
+
图12. 训练结果
@@ -469,7 +469,7 @@ Test with Pass 0, Loss 1.1, Acc 0.6
图13是训练的分类错误率曲线图,运行到第200个pass后基本收敛,最终得到测试集上分类错误率为8.54%。
-
+
图13. CIFAR10数据集上VGG模型的分类错误率
diff --git a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/cifar.png b/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/cifar.png
deleted file mode 100644
index f3c5f2f7b0c84f83382b70124dcd439586ed4eb0..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/cifar.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/dog.png b/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/dog.png
deleted file mode 100644
index ca8f858a902ea723d886d2b88c2c0a1005301c50..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/dog.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/dog_cat.png b/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/dog_cat.png
deleted file mode 100644
index 38b21f21604b1bb84fc3f6aa96bd5fce45d15a55..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/dog_cat.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/fea_conv0.png b/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/fea_conv0.png
deleted file mode 100644
index 647c822e52cd55d50e5f207978f5e6ada86cf34c..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/fea_conv0.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/flowers.png b/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/flowers.png
deleted file mode 100644
index 04245cef60fe7126ae4c92ba8085273965078bee..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/flowers.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/googlenet.jpeg b/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/googlenet.jpeg
deleted file mode 100644
index 249dbf96df61c3352ea5bd80470f6c4a1e03ff10..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/googlenet.jpeg and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/ilsvrc.png b/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/ilsvrc.png
deleted file mode 100644
index 4660ac122e9d533023a21154d35eee29e3b08d27..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/ilsvrc.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/inception.png b/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/inception.png
deleted file mode 100644
index 9591a0c1e8c0165c40ca560be35a7b9a91cd5027..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/inception.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/inception_en.png b/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/inception_en.png
deleted file mode 100644
index 39580c20b583f2a15d17fd124a572c84e6e2db1d..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/inception_en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/lenet.png b/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/lenet.png
deleted file mode 100644
index 77f785e03bacd38c4c64a817874a58ff3298d2f3..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/lenet.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/lenet_en.png b/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/lenet_en.png
deleted file mode 100644
index 97a1e3eee45c0db95e6a943ca3b8c0cf6c34d4b6..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/lenet_en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/plot.png b/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/plot.png
deleted file mode 100644
index 57e45cc0c27dd99b9918de2ff1228bc6b65f7424..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/plot.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/plot_en.png b/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/plot_en.png
deleted file mode 100644
index 147e575bf49086811c43420d5a9c8f749e2da405..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/plot_en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/resnet.png b/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/resnet.png
deleted file mode 100644
index 0aeb4f254639fdbf18e916dc219ca61602596d85..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/resnet.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/resnet_block.jpg b/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/resnet_block.jpg
deleted file mode 100644
index c500eb01a90190ff66150871fe83ec275e2de8d7..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/resnet_block.jpg and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/train_and_test.png b/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/train_and_test.png
deleted file mode 100644
index c6336a9a69b95dc978719ce68896e3e752e67fed..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/train_and_test.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/variations.png b/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/variations.png
deleted file mode 100644
index b4ebbbe6a50f5fd7cd0cccb52cdac5653e34654c..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/variations.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/variations_en.png b/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/variations_en.png
deleted file mode 100644
index 88c60fe87f802c5ce560bb15bbdbd229aeafc4e4..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/variations_en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/vgg16.png b/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/vgg16.png
deleted file mode 100644
index 6270eefcfd7071bc1643ee06567e5b81aaf4c177..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/image_classification/image/vgg16.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/index.rst b/doc/fluid/new_docs/beginners_guide/basics/index.rst
index e1fd226116d88fbf137741242b304b367e598ba5..0fcb008e0a7773e81e5124da09fe07366130b924 100644
--- a/doc/fluid/new_docs/beginners_guide/basics/index.rst
+++ b/doc/fluid/new_docs/beginners_guide/basics/index.rst
@@ -6,7 +6,7 @@
.. todo::
概述
-
+
.. toctree::
:maxdepth: 2
diff --git a/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/.gitignore b/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..29b5622a53a1b0847e9f53febf1cc50dcf4f044a
--- /dev/null
+++ b/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/.gitignore
@@ -0,0 +1,12 @@
+data/train.list
+data/test.*
+data/conll05st-release.tar.gz
+data/conll05st-release
+data/predicate_dict
+data/label_dict
+data/word_dict
+data/emb
+data/feature
+output
+predict.res
+train.log
diff --git a/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/README.cn.md b/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/README.cn.md
index 47e948bd1ffc0ca692dc9899193e94831ce4234b..0891f5b6b16a1b715b44db6c47ba079adfcad4c5 100644
--- a/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/README.cn.md
+++ b/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/README.cn.md
@@ -21,7 +21,7 @@ $$\mbox{[小明]}_{\mbox{Agent}}\mbox{[昨天]}_{\mbox{Time}}\mbox{[晚上]}_\mb
5. 对第4步的结果,通过多分类得到论元的语义角色标签。可以看到,句法分析是基础,并且后续步骤常常会构造的一些人工特征,这些特征往往也来自句法分析。
-
+
图1. 依存句法分析句法树示例
@@ -30,7 +30,7 @@ $$\mbox{[小明]}_{\mbox{Agent}}\mbox{[昨天]}_{\mbox{Time}}\mbox{[晚上]}_\mb
我们继续以上面的这句话为例,图1展示了BIO表示方法。
-
+
图2. BIO标注方法示例
@@ -53,7 +53,7 @@ $$\mbox{[小明]}_{\mbox{Agent}}\mbox{[昨天]}_{\mbox{Time}}\mbox{[晚上]}_\mb
图3是最终得到的栈式循环神经网络结构示意图。
-
+
图3. 基于LSTM的栈式循环神经网络结构示意图
@@ -64,7 +64,7 @@ $$\mbox{[小明]}_{\mbox{Agent}}\mbox{[昨天]}_{\mbox{Time}}\mbox{[晚上]}_\mb
为了克服这一缺陷,我们可以设计一种双向循环网络单元,它的思想简单且直接:对上一节的栈式循环神经网络进行一个小小的修改,堆叠多个LSTM单元,让每一层LSTM单元分别以:正向、反向、正向 …… 的顺序学习上一层的输出序列。于是,从第2层开始,$t$时刻我们的LSTM单元便总是可以看到历史和未来的信息。图4是基于LSTM的双向循环神经网络结构示意图。
-
+
图4. 基于LSTM的双向循环神经网络结构示意图
@@ -79,7 +79,7 @@ CRF是一种概率化结构模型,可以看作是一个概率无向图模型
序列标注任务只需要考虑输入和输出都是一个线性序列,并且由于我们只是将输入序列作为条件,不做任何条件独立假设,因此输入序列的元素之间并不存在图结构。综上,在序列标注任务中使用的是如图5所示的定义在链式图上的CRF,称之为线性链条件随机场(Linear Chain Conditional Random Field)。
-
+
图5. 序列标注任务中使用的线性链条件随机场
@@ -123,7 +123,7 @@ $$\DeclareMathOperator*{\argmax}{arg\,max} L(\lambda, D) = - \text{log}\left(\pr
4. CRF以第3步中LSTM学习到的特征为输入,以标记序列为监督信号,完成序列标注;
-
+
图6. SRL任务上的深层双向LSTM模型
diff --git a/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/bidirectional_stacked_lstm.png b/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/bidirectional_stacked_lstm.png
deleted file mode 100644
index e63f5ebd6d00f2e4ecf97b9ab2027e74683013f2..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/bidirectional_stacked_lstm.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/bidirectional_stacked_lstm_en.png b/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/bidirectional_stacked_lstm_en.png
deleted file mode 100644
index f0a195c24d9ee493f96bb93c28a99e70566be7a4..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/bidirectional_stacked_lstm_en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/bio_example.png b/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/bio_example.png
deleted file mode 100644
index e5f7151c9fcc50a7cf7af485cbbc7e4fccab0c20..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/bio_example.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/bio_example_en.png b/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/bio_example_en.png
deleted file mode 100644
index 93b44dd4874402ef29ad7bd7d94147609b92e309..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/bio_example_en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/db_lstm_network.png b/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/db_lstm_network.png
deleted file mode 100644
index 592f7ee23bdc88a9a35059612e5ab880bbc9d34b..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/db_lstm_network.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/db_lstm_network_en.png b/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/db_lstm_network_en.png
deleted file mode 100644
index c3646312e48db977402fb353dc0c9b4d02269bf4..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/db_lstm_network_en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/dependency_parsing.png b/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/dependency_parsing.png
deleted file mode 100644
index 9265b671735940ed6549e2980064d2ce08baae64..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/dependency_parsing.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/dependency_parsing_en.png b/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/dependency_parsing_en.png
deleted file mode 100644
index 23f4f45b603e3d60702af2b2464d10fc8deed061..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/dependency_parsing_en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/linear_chain_crf.png b/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/linear_chain_crf.png
deleted file mode 100644
index 0778fda74b2ad22ce4b631791a7b028cdef780a5..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/linear_chain_crf.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/stacked_lstm.png b/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/stacked_lstm.png
deleted file mode 100644
index 3d2914c726b5f4c46e66dfa85d4e88649fede6b3..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/stacked_lstm.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/stacked_lstm_en.png b/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/stacked_lstm_en.png
deleted file mode 100644
index 0b944ef91e8b5ba4b14d2a35bd8879f261cf8f61..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/label_semantic_roles/image/stacked_lstm_en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/machine_translation/.gitignore b/doc/fluid/new_docs/beginners_guide/basics/machine_translation/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..6129b9e8645010fcb8372d9dc3dbb568dfa80907
--- /dev/null
+++ b/doc/fluid/new_docs/beginners_guide/basics/machine_translation/.gitignore
@@ -0,0 +1,9 @@
+data/wmt14
+data/pre-wmt14
+pretrained/wmt14_model
+gen.log
+gen_result
+train.log
+dataprovider_copy_1.py
+*.pyc
+multi-bleu.perl
diff --git a/doc/fluid/new_docs/beginners_guide/basics/machine_translation/README.cn.md b/doc/fluid/new_docs/beginners_guide/basics/machine_translation/README.cn.md
index f37c559921483a3d7c619ed74903df56b0584bd5..6e5f77fec8a894c390ced8c93ee344fd8d27370e 100644
--- a/doc/fluid/new_docs/beginners_guide/basics/machine_translation/README.cn.md
+++ b/doc/fluid/new_docs/beginners_guide/basics/machine_translation/README.cn.md
@@ -11,10 +11,10 @@
为解决以上问题,统计机器翻译(Statistical Machine Translation, SMT)技术应运而生。在统计机器翻译技术中,转化规则是由机器自动从大规模的语料中学习得到的,而非我们人主动提供规则。因此,它克服了基于规则的翻译系统所面临的知识获取瓶颈的问题,但仍然存在许多挑战:1)人为设计许多特征(feature),但永远无法覆盖所有的语言现象;2)难以利用全局的特征;3)依赖于许多预处理环节,如词语对齐、分词或符号化(tokenization)、规则抽取、句法分析等,而每个环节的错误会逐步累积,对翻译的影响也越来越大。
近年来,深度学习技术的发展为解决上述挑战提供了新的思路。将深度学习应用于机器翻译任务的方法大致分为两类:1)仍以统计机器翻译系统为框架,只是利用神经网络来改进其中的关键模块,如语言模型、调序模型等(见图1的左半部分);2)不再以统计机器翻译系统为框架,而是直接用神经网络将源语言映射到目标语言,即端到端的神经网络机器翻译(End-to-End Neural Machine Translation, End-to-End NMT)(见图1的右半部分),简称为NMT模型。
-![nmt](./image/nmt.png)
-
+
+
图1. 基于神经网络的机器翻译系统
-
+
本教程主要介绍NMT模型,以及如何用PaddlePaddle来训练一个NMT模型。
@@ -45,19 +45,22 @@
具体来说,该双向循环神经网络分别在时间维以顺序和逆序——即前向(forward)和后向(backward)——依次处理输入序列,并将每个时间步RNN的输出拼接成为最终的输出层。这样每个时间步的输出节点,都包含了输入序列中当前时刻完整的过去和未来的上下文信息。下图展示的是一个按时间步展开的双向循环神经网络。该网络包含一个前向和一个后向RNN,其中有六个权重矩阵:输入到前向隐层和后向隐层的权重矩阵(`$W_1, W_3$`),隐层到隐层自己的权重矩阵(`$W_2,W_5$`),前向隐层和后向隐层到输出层的权重矩阵(`$W_4, W_6$`)。注意,该网络的前向隐层和后向隐层之间没有连接。
-![bi_rnn](./image/bi_rnn.png)
-
-图3. 按时间步展开的双向循环神经网络
-
+
+
+
+图2. 按时间步展开的双向循环神经网络
+
### 编码器-解码器框架
编码器-解码器(Encoder-Decoder)\[[2](#参考文献)\]框架用于解决由一个任意长度的源序列到另一个任意长度的目标序列的变换问题。即编码阶段将整个源序列编码成一个向量,解码阶段通过最大化预测序列概率,从中解码出整个目标序列。编码和解码的过程通常都使用RNN实现。
![encoder_decoder](./image/encoder_decoder.png)
-
-图4. 编码器-解码器框架
-
+
+
+图3. 编码器-解码器框架
+
+
#### 编码器
编码阶段分为三步:
@@ -69,19 +72,17 @@
3. 用RNN编码源语言词序列:这一过程的计算公式为`$h_i=\varnothing _\theta \left ( h_{i-1}, s_i \right )$`,其中`$h_0$`是一个全零的向量,`$\varnothing _\theta$`是一个非线性激活函数,最后得到的`$\mathbf{h}=\left \{ h_1,..., h_T \right \}$`就是RNN依次读入源语言`$T$`个词的状态编码序列。整句话的向量表示可以采用`$\mathbf{h}$`在最后一个时间步`$T$`的状态编码,或使用时间维上的池化(pooling)结果。
第3步也可以使用双向循环神经网络实现更复杂的句编码表示,具体可以用双向GRU实现。前向GRU按照词序列`$(x_1,x_2,...,x_T)$`的顺序依次编码源语言端词,并得到一系列隐层状态`$(\overrightarrow{h_1},\overrightarrow{h_2},...,\overrightarrow{h_T})$`。类似的,后向GRU按照`$(x_T,x_{T-1},...,x_1)$`的顺序依次编码源语言端词,得到`$(\overleftarrow{h_1},\overleftarrow{h_2},...,\overleftarrow{h_T})$`。最后对于词`$x_i$`,通过拼接两个GRU的结果得到它的隐层状态,即`$h_i=\left [ \overrightarrow{h_i^T},\overleftarrow{h_i^T} \right ]^{T}$`。
-
-![encoder_attention](./image/encoder_attention.png)
-
-图5. 使用双向GRU的编码器
-
+
+
+图4. 使用双向GRU的编码器
+
#### 解码器
机器翻译任务的训练过程中,解码阶段的目标是最大化下一个正确的目标语言词的概率。思路是:
-
1. 每一个时刻,根据源语言句子的编码信息(又叫上下文向量,context vector)`$c$`、真实目标语言序列的第`$i$`个词`$u_i$`和`$i$`时刻RNN的隐层状态`$z_i$`,计算出下一个隐层状态`$z_{i+1}$`。计算公式如下:
$$z_{i+1}=\phi_{\theta '} \left ( c,u_i,z_i \right )$$
-其中`$\phi _{\theta '}$`是一个非线性激活函数;`$c=q\mathbf{h}$`是源语言句子的上下文向量,在不使用[注意力机制](#注意力机制)时,如果[编码器](#编码器)的输出是源语言句子编码后的最后一个元素,则可以定义`$c=h_T$`;`$u_i$`是目标语言序列的第`$i$`个单词,`$u_0$`是目标语言序列的开始标记``,表示解码开始;`$z_i$`是`$i$`时刻解码RNN的隐层状态,`$z_0$`是一个全零的向量。
+其中`$\phi _{\theta '}$`是一个非线性激活函数;`$c=q\mathbf{h}$`是源语言句子的上下文向量,在不使用注意力机制时,如果[编码器](#编码器)的输出是源语言句子编码后的最后一个元素,则可以定义`$c=h_T$`;`$u_i$`是目标语言序列的第`$i$`个单词,`$u_0$`是目标语言序列的开始标记``,表示解码开始;`$z_i$`是`$i$`时刻解码RNN的隐层状态,`$z_0$`是一个全零的向量。
2. 将`$z_{i+1}$`通过`softmax`归一化,得到目标语言序列的第`$i+1$`个单词的概率分布`$p_{i+1}$`。概率分布公式如下:
$$p\left ( u_{i+1}|u_{<i+1},\mathbf{x} \right )=softmax(W_sz_{i+1}+b_z)$$
@@ -93,6 +94,7 @@ $$p\left ( u_{i+1}|u_{<i+1},\mathbf{x} \right )=softmax(W_sz_{i+1}+b_z)$$
机器翻译任务的生成过程,通俗来讲就是根据预先训练的模型来翻译源语言句子。生成过程中的解码阶段和上述训练过程的有所差异,具体介绍请见[柱搜索算法](#柱搜索算法)。
+
### 柱搜索算法
柱搜索([beam search](http://en.wikipedia.org/wiki/Beam_search))是一种启发式图搜索算法,用于在图或树中搜索有限集合中的最优扩展节点,通常用在解空间非常大的系统(如机器翻译、语音识别)中,原因是内存无法装下图或树中所有展开的解。如在机器翻译任务中希望翻译“`你好`”,就算目标语言字典中只有3个词(``, ``, `hello`),也可能生成无限句话(`hello`循环出现的次数不定),为了找到其中较好的翻译结果,我们可采用柱搜索算法。
@@ -100,7 +102,6 @@ $$p\left ( u_{i+1}|u_{<i+1},\mathbf{x} \right )=softmax(W_sz_{i+1}+b_z)$$
柱搜索算法使用广度优先策略建立搜索树,在树的每一层,按照启发代价(heuristic cost)(本教程中,为生成词的log概率之和)对节点进行排序,然后仅留下预先确定的个数(文献中通常称为beam width、beam size、柱宽度等)的节点。只有这些节点会在下一层继续扩展,其他节点就被剪掉了,也就是说保留了质量较高的节点,剪枝了质量较差的节点。因此,搜索所占用的空间和时间大幅减少,但缺点是无法保证一定获得最优解。
使用柱搜索算法的解码阶段,目标是最大化生成序列的概率。思路是:
-
1. 每一个时刻,根据源语言句子的编码信息`$c$`、生成的第`$i$`个目标语言序列单词`$u_i$`和`$i$`时刻RNN的隐层状态`$z_i$`,计算出下一个隐层状态`$z_{i+1}$`。
2. 将`$z_{i+1}$`通过`softmax`归一化,得到目标语言序列的第`$i+1$`个单词的概率分布`$p_{i+1}$`。
diff --git a/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/bi_rnn.png b/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/bi_rnn.png
deleted file mode 100644
index 9d8efd50a49d0305586f550344472ab94c93bed3..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/bi_rnn.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/bi_rnn_en.png b/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/bi_rnn_en.png
deleted file mode 100644
index 4b35c88fc8ea2c503473c0c15711744e784d6af6..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/bi_rnn_en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/decoder_attention.png b/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/decoder_attention.png
deleted file mode 100644
index 1b355e7786d25487a3f564af758c2c52c43b4690..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/decoder_attention.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/decoder_attention_en.png b/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/decoder_attention_en.png
deleted file mode 100644
index 3728f782ee09d9308d02b42305027b2735467ead..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/decoder_attention_en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/encoder_attention.png b/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/encoder_attention.png
deleted file mode 100644
index 28d7a15a3bd65262bde22a3f41b5aa78b46b368a..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/encoder_attention.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/encoder_attention_en.png b/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/encoder_attention_en.png
deleted file mode 100644
index ea8585565da1ecaf241654c278c6f9b15e283286..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/encoder_attention_en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/encoder_decoder.png b/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/encoder_decoder.png
deleted file mode 100644
index 60aee0017de73f462e35708b1055aff8992c03e1..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/encoder_decoder.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/encoder_decoder_en.png b/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/encoder_decoder_en.png
deleted file mode 100644
index 6b73798fe632e0873b35c117b86f347c8cf3116a..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/encoder_decoder_en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/gru.png b/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/gru.png
deleted file mode 100644
index 0cde685b84106650a4df18ce335a23e6338d3d11..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/gru.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/gru_en.png b/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/gru_en.png
deleted file mode 100644
index a6af429f23f0f7e82650139bbd8dcbef27a34abe..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/gru_en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/nmt.png b/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/nmt.png
deleted file mode 100644
index bf56d73ebf297fadf522389c7b6836dd379aa097..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/nmt.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/nmt_en.png b/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/nmt_en.png
deleted file mode 100644
index 557310e044b2b6687e5ea6895417ed946ac7bc11..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/machine_translation/image/nmt_en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/recommender_system/.gitignore b/doc/fluid/new_docs/beginners_guide/basics/recommender_system/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..f23901aeb3a9e7cd12611fc556742670d04a9bb5
--- /dev/null
+++ b/doc/fluid/new_docs/beginners_guide/basics/recommender_system/.gitignore
@@ -0,0 +1,2 @@
+.idea
+.ipynb_checkpoints
diff --git a/doc/fluid/new_docs/beginners_guide/basics/recommender_system/README.cn.md b/doc/fluid/new_docs/beginners_guide/basics/recommender_system/README.cn.md
index 0f7c97021f8ad463fc51ed169604b789ea068c3d..4b79e62f74e587fcd939d9f9e911af80992ea6a3 100644
--- a/doc/fluid/new_docs/beginners_guide/basics/recommender_system/README.cn.md
+++ b/doc/fluid/new_docs/beginners_guide/basics/recommender_system/README.cn.md
@@ -37,7 +37,7 @@ Prediction Score is 4.25
YouTube是世界上最大的视频上传、分享和发现网站,YouTube推荐系统为超过10亿用户从不断增长的视频库中推荐个性化的内容。整个系统由两个神经网络组成:候选生成网络和排序网络。候选生成网络从百万量级的视频库中生成上百个候选,排序网络对候选进行打分排序,输出排名最高的数十个结果。系统结构如图1所示:
-
+
图1. YouTube 推荐系统结构
@@ -48,7 +48,7 @@ YouTube是世界上最大的视频上传、分享和发现网站,YouTube推荐
首先,将观看历史及搜索词记录这类历史信息,映射为向量后取平均值得到定长表示;同时,输入人口学特征以优化新用户的推荐效果,并将二值特征和连续特征归一化处理到[0, 1]范围。接下来,将所有特征表示拼接为一个向量,并输入给非线形多层感知器(MLP,详见[识别数字](https://github.com/PaddlePaddle/book/blob/develop/02.recognize_digits/README.cn.md)教程)处理。最后,训练时将MLP的输出给softmax做分类,预测时计算用户的综合特征(MLP的输出)与所有视频的相似度,取得分最高的$k$个作为候选生成网络的筛选结果。图2显示了候选生成网络结构。
-
+
图2. 候选生成网络结构
@@ -73,7 +73,7 @@ $$P(\omega=i|u)=\frac{e^{v_{i}u}}{\sum_{j \in V}e^{v_{j}u}}$$
卷积神经网络主要由卷积(convolution)和池化(pooling)操作构成,其应用及组合方式灵活多变,种类繁多。本小结我们以如图3所示的网络进行讲解:
-
+
图3. 卷积神经网络文本分类模型
@@ -107,7 +107,7 @@ $$\hat c=max(c)$$
-
+
图4. 融合推荐模型
diff --git a/doc/fluid/new_docs/beginners_guide/basics/recommender_system/image/Deep_candidate_generation_model_architecture.en.png b/doc/fluid/new_docs/beginners_guide/basics/recommender_system/image/Deep_candidate_generation_model_architecture.en.png
deleted file mode 100644
index c213608e769f69fb2cfe8597f8e696ee53730e3d..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/recommender_system/image/Deep_candidate_generation_model_architecture.en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/recommender_system/image/Deep_candidate_generation_model_architecture.png b/doc/fluid/new_docs/beginners_guide/basics/recommender_system/image/Deep_candidate_generation_model_architecture.png
deleted file mode 100644
index 8aedb2204371e7691140ceffa5992f6080bbf097..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/recommender_system/image/Deep_candidate_generation_model_architecture.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/recommender_system/image/YouTube_Overview.en.png b/doc/fluid/new_docs/beginners_guide/basics/recommender_system/image/YouTube_Overview.en.png
deleted file mode 100644
index 4298567ac5600173343299999965b20612e7affe..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/recommender_system/image/YouTube_Overview.en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/recommender_system/image/YouTube_Overview.png b/doc/fluid/new_docs/beginners_guide/basics/recommender_system/image/YouTube_Overview.png
deleted file mode 100644
index a98e7cc67606b31e4c945f7eb907563e46dcef56..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/recommender_system/image/YouTube_Overview.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/recommender_system/image/output_32_0.png b/doc/fluid/new_docs/beginners_guide/basics/recommender_system/image/output_32_0.png
deleted file mode 100644
index 7fd97b9cc3a0b9105b41591af4e8f8e4646bd681..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/recommender_system/image/output_32_0.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/recommender_system/image/rec_regression_network.png b/doc/fluid/new_docs/beginners_guide/basics/recommender_system/image/rec_regression_network.png
deleted file mode 100644
index 90c9b09fb78db98391ee199934f2d16efd6d6652..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/recommender_system/image/rec_regression_network.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/recommender_system/image/rec_regression_network_en.png b/doc/fluid/new_docs/beginners_guide/basics/recommender_system/image/rec_regression_network_en.png
deleted file mode 100644
index 6fc8e11967000ec48c1c0a6fa3c2eaecb80cbb84..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/recommender_system/image/rec_regression_network_en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/recommender_system/image/text_cnn.png b/doc/fluid/new_docs/beginners_guide/basics/recommender_system/image/text_cnn.png
deleted file mode 100644
index 61e63d9147cbc2901706ef80776d706e5368c3c5..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/recommender_system/image/text_cnn.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/recommender_system/image/text_cnn_en.png b/doc/fluid/new_docs/beginners_guide/basics/recommender_system/image/text_cnn_en.png
deleted file mode 100644
index fbcae2be81141be955076e877b94b0ea5d7e4d4a..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/recommender_system/image/text_cnn_en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/understand_sentiment/.gitignore b/doc/fluid/new_docs/beginners_guide/basics/understand_sentiment/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..667762d327cb160376a4119fa9df9db41b6443b2
--- /dev/null
+++ b/doc/fluid/new_docs/beginners_guide/basics/understand_sentiment/.gitignore
@@ -0,0 +1,10 @@
+data/aclImdb
+data/imdb
+data/pre-imdb
+data/mosesdecoder-master
+*.log
+model_output
+dataprovider_copy_1.py
+model.list
+*.pyc
+.DS_Store
diff --git a/doc/fluid/new_docs/beginners_guide/basics/understand_sentiment/README.cn.md b/doc/fluid/new_docs/beginners_guide/basics/understand_sentiment/README.cn.md
index 5844b6fe137c2401a04e47b5b489434ee9b363f1..8477cf32146c33947ced447c8bdd287a3e1e71f5 100644
--- a/doc/fluid/new_docs/beginners_guide/basics/understand_sentiment/README.cn.md
+++ b/doc/fluid/new_docs/beginners_guide/basics/understand_sentiment/README.cn.md
@@ -37,7 +37,7 @@
循环神经网络是一种能对序列数据进行精确建模的有力工具。实际上,循环神经网络的理论计算能力是图灵完备的\[[4](#参考文献)\]。自然语言是一种典型的序列数据(词序列),近年来,循环神经网络及其变体(如long short term memory\[[5](#参考文献)\]等)在自然语言处理的多个领域,如语言模型、句法解析、语义角色标注(或一般的序列标注)、语义表示、图文生成、对话、机器翻译等任务上均表现优异甚至成为目前效果最好的方法。
-
+
图1. 循环神经网络按时间展开的示意图
@@ -66,7 +66,7 @@ $$ h_t = o_t\odot tanh(c_t) $$
其中,$i_t, f_t, c_t, o_t$分别表示输入门,遗忘门,记忆单元及输出门的向量值,带角标的$W$及$b$为模型参数,$tanh$为双曲正切函数,$\odot$表示逐元素(elementwise)的乘法操作。输入门控制着新输入进入记忆单元$c$的强度,遗忘门控制着记忆单元维持上一时刻值的强度,输出门控制着输出记忆单元的强度。三种门的计算方式类似,但有着完全不同的参数,它们各自以不同的方式控制着记忆单元$c$,如图2所示:
-
+
图2. 时刻$t$的LSTM [7]
@@ -83,7 +83,7 @@ $$ h_t=Recrurent(x_t,h_{t-1})$$
如图3所示(以三层为例),奇数层LSTM正向,偶数层LSTM反向,高一层的LSTM使用低一层LSTM及之前所有层的信息作为输入,对最高层LSTM序列使用时间维度上的最大池化即可得到文本的定长向量表示(这一表示充分融合了文本的上下文信息,并且对文本进行了深层次抽象),最后我们将文本表示连接至softmax构建分类模型。
-
+
图3. 栈式双向LSTM用于文本分类
@@ -149,6 +149,8 @@ def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim):
网络的输入`input_dim`表示的是词典的大小,`class_dim`表示类别数。这里,我们使用[`sequence_conv_pool`](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/trainer_config_helpers/networks.py) API实现了卷积和池化操作。
+
+
### 栈式双向LSTM
栈式双向神经网络`stacked_lstm_net`的代码片段如下:
diff --git a/doc/fluid/new_docs/beginners_guide/basics/understand_sentiment/image/lstm.png b/doc/fluid/new_docs/beginners_guide/basics/understand_sentiment/image/lstm.png
deleted file mode 100644
index 98fbea413a98a619004ca669c67f5f867fe974c9..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/understand_sentiment/image/lstm.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/understand_sentiment/image/lstm_en.png b/doc/fluid/new_docs/beginners_guide/basics/understand_sentiment/image/lstm_en.png
deleted file mode 100644
index d73a00bf2c1fca2f9b8c26bccf5ea844fa1db50b..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/understand_sentiment/image/lstm_en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/understand_sentiment/image/rnn.png b/doc/fluid/new_docs/beginners_guide/basics/understand_sentiment/image/rnn.png
deleted file mode 100644
index 26c904102a6e6c4e30f0048b81373ae8c148b355..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/understand_sentiment/image/rnn.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/understand_sentiment/image/stacked_lstm.jpg b/doc/fluid/new_docs/beginners_guide/basics/understand_sentiment/image/stacked_lstm.jpg
deleted file mode 100644
index 6b2adf70f2b5112a2e82505da5cff9f5fd0c6298..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/understand_sentiment/image/stacked_lstm.jpg and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/understand_sentiment/image/stacked_lstm_en.png b/doc/fluid/new_docs/beginners_guide/basics/understand_sentiment/image/stacked_lstm_en.png
deleted file mode 100644
index 8b5dbd726178b5555c513294e7b10a81acc96ff5..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/understand_sentiment/image/stacked_lstm_en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/word2vec/.gitignore b/doc/fluid/new_docs/beginners_guide/basics/word2vec/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..a620e0279c310d213d4e6d8e99e666962c11e352
--- /dev/null
+++ b/doc/fluid/new_docs/beginners_guide/basics/word2vec/.gitignore
@@ -0,0 +1,3 @@
+data/train.list
+data/test.list
+data/simple-examples*
diff --git a/doc/fluid/new_docs/beginners_guide/basics/word2vec/README.cn.md b/doc/fluid/new_docs/beginners_guide/basics/word2vec/README.cn.md
index d21c7ddcc501f863b5ce672123dbbc6c26528f15..904d99fe2ffc9ead69a86c9763568a5c098348d5 100644
--- a/doc/fluid/new_docs/beginners_guide/basics/word2vec/README.cn.md
+++ b/doc/fluid/new_docs/beginners_guide/basics/word2vec/README.cn.md
@@ -34,7 +34,7 @@ $$X = USV^T$$
本章中,当词向量训练好后,我们可以用数据可视化算法t-SNE\[[4](#参考文献)\]画出词语特征在二维上的投影(如下图所示)。从图中可以看出,语义相关的词语(如a, the, these; big, huge)在投影上距离很近,语意无关的词(如say, business; decision, japan)在投影上的距离很远。
-
+
图1. 词向量的二维投影
@@ -50,7 +50,7 @@ similarity: -0.0997506977351
```
-以上结果可以通过运行`calculate_dis.py`, 加载字典里的单词和对应训练特征结果得到,我们将在[应用模型](#应用模型)中详细描述用法。
+以上结果可以通过运行`calculate_dis.py`, 加载字典里的单词和对应训练特征结果得到,我们将在[模型应用](#模型应用)中详细描述用法。
## 模型概览
@@ -90,7 +90,7 @@ $$\frac{1}{T}\sum_t f(w_t, w_{t-1}, ..., w_{t-n+1};\theta) + R(\theta)$$
其中$f(w_t, w_{t-1}, ..., w_{t-n+1})$表示根据历史n-1个词得到当前词$w_t$的条件概率,$R(\theta)$表示参数正则项。
-
+
图2. N-gram神经网络模型
@@ -122,7 +122,7 @@ $$\frac{1}{T}\sum_t f(w_t, w_{t-1}, ..., w_{t-n+1};\theta) + R(\theta)$$
CBOW模型通过一个词的上下文(各N个词)预测当前词。当N=2时,模型如下图所示:
-
+
图3. CBOW模型
@@ -137,7 +137,7 @@ $$context = \frac{x_{t-1} + x_{t-2} + x_{t+1} + x_{t+2}}{4}$$
CBOW的好处是对上下文词语的分布在词向量上进行了平滑,去掉了噪声,因此在小数据集上很有效。而Skip-gram的方法中,用一个词预测其上下文,得到了当前词上下文的很多样本,因此可用于更大的数据集。
-
+
图4. Skip-gram模型
@@ -189,12 +189,13 @@ dream that one day
最后,每个输入会按其单词次在字典里的位置,转化成整数的索引序列,作为PaddlePaddle的输入。
+
## 编程实现
本配置的模型结构如下图所示:
-
+
图5. 模型配置中的N-gram神经网络模型
@@ -349,6 +350,7 @@ Step 20: Average Cost 5.766995
...
```
+
## 模型应用
在模型训练后,我们可以用它做一些预测。
diff --git a/doc/fluid/new_docs/beginners_guide/basics/word2vec/image/2d_similarity.png b/doc/fluid/new_docs/beginners_guide/basics/word2vec/image/2d_similarity.png
deleted file mode 100644
index 384f59919a2c8dedb198e97d51434616648932e1..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/word2vec/image/2d_similarity.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/word2vec/image/cbow.png b/doc/fluid/new_docs/beginners_guide/basics/word2vec/image/cbow.png
deleted file mode 100644
index 76b7d4bc0f99372465bd9aa34721513d39ad0776..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/word2vec/image/cbow.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/word2vec/image/cbow_en.png b/doc/fluid/new_docs/beginners_guide/basics/word2vec/image/cbow_en.png
deleted file mode 100644
index d985c393e618e9b79df05e4ff0ae57ccc93744d0..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/word2vec/image/cbow_en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/word2vec/image/ngram.en.png b/doc/fluid/new_docs/beginners_guide/basics/word2vec/image/ngram.en.png
deleted file mode 100644
index 2e16ab2f443732b8ef5404a8e7cd2457bc5eee23..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/word2vec/image/ngram.en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/word2vec/image/ngram.png b/doc/fluid/new_docs/beginners_guide/basics/word2vec/image/ngram.png
deleted file mode 100644
index 2449dce6a86b43b1b997ff418ed0dba56848463f..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/word2vec/image/ngram.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/word2vec/image/nnlm.png b/doc/fluid/new_docs/beginners_guide/basics/word2vec/image/nnlm.png
deleted file mode 100644
index 1e0b40a8f7aefdf46d42761305511f281c08e595..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/word2vec/image/nnlm.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/word2vec/image/nnlm_en.png b/doc/fluid/new_docs/beginners_guide/basics/word2vec/image/nnlm_en.png
deleted file mode 100644
index 158bd64b8f8729dea67834a8d591d21bce8b8564..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/word2vec/image/nnlm_en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/word2vec/image/sentence_emb.png b/doc/fluid/new_docs/beginners_guide/basics/word2vec/image/sentence_emb.png
deleted file mode 100644
index ce4a8bf4769183cbaff91793753d2350a3ce936c..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/word2vec/image/sentence_emb.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/word2vec/image/skipgram.png b/doc/fluid/new_docs/beginners_guide/basics/word2vec/image/skipgram.png
deleted file mode 100644
index a3ab385845d3dc8b5c670bae91225bc8dd47a8bb..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/word2vec/image/skipgram.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/basics/word2vec/image/skipgram_en.png b/doc/fluid/new_docs/beginners_guide/basics/word2vec/image/skipgram_en.png
deleted file mode 100644
index 3c36c6d1f66eb98ea78c0673965d02a4ee3aa288..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/basics/word2vec/image/skipgram_en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/quick_start/fit_a_line/README.cn.md b/doc/fluid/new_docs/beginners_guide/quick_start/fit_a_line/README.cn.md
index 27d25b43961ce74d73e391b735369501fb80a231..9574dbea2f9a39bb196b61bb4fd12ba7c378f75a 100644
--- a/doc/fluid/new_docs/beginners_guide/quick_start/fit_a_line/README.cn.md
+++ b/doc/fluid/new_docs/beginners_guide/quick_start/fit_a_line/README.cn.md
@@ -15,7 +15,7 @@ $$y_i = \omega_1x_{i1} + \omega_2x_{i2} + \ldots + \omega_dx_{id} + b, i=1,\ldo
## 效果展示
我们使用从[UCI Housing Data Set](https://archive.ics.uci.edu/ml/datasets/Housing)获得的波士顿房价数据集进行模型的训练和预测。下面的散点图展示了使用模型对部分房屋价格进行的预测。其中,每个点的横坐标表示同一类房屋真实价格的中位数,纵坐标表示线性回归模型根据特征预测的结果,当二者值完全相等的时候就会落在虚线上。所以模型预测得越准确,则点离虚线越近。
-
+
图1. 预测值 V.S. 真实值
@@ -40,13 +40,9 @@ $$MSE=\frac{1}{n}\sum_{i=1}^{n}{(\hat{Y_i}-Y_i)}^2$$
### 训练过程
定义好模型结构之后,我们要通过以下几个步骤进行模型训练
-
1. 初始化参数,其中包括权重$\omega_i$和偏置$b$,对其进行初始化(如0均值,1方差)。
-
2. 网络正向传播计算网络输出和损失函数。
-
3. 根据损失函数进行反向误差传播 ([backpropagation](https://en.wikipedia.org/wiki/Backpropagation)),将网络误差从输出层依次向前传递, 并更新网络中的参数。
-
4. 重复2~3步骤,直至网络训练误差达到规定的程度或训练轮次达到设定值。
## 数据集
@@ -84,7 +80,7 @@ $$MSE=\frac{1}{n}\sum_{i=1}^{n}{(\hat{Y_i}-Y_i)}^2$$
- 很多的机器学习技巧/模型(例如L1,L2正则项,向量空间模型-Vector Space Model)都基于这样的假设:所有的属性取值都差不多是以0为均值且取值范围相近的。
-
+
图2. 各维属性的取值范围
@@ -199,10 +195,12 @@ step = 0
def event_handler_plot(event):
global step
if isinstance(event, fluid.EndStepEvent):
- if event.step % 10 == 0: # record the test cost every 10 seconds
+ if step % 10 == 0: # record a train cost every 10 batches
+ plot_cost.append(train_title, step, event.metrics[0])
+
+ if step % 100 == 0: # record a test cost every 100 batches
test_metrics = trainer.test(
reader=test_reader, feed_order=feed_order)
-
plot_cost.append(test_title, step, test_metrics[0])
plot_cost.plot()
@@ -210,12 +208,13 @@ def event_handler_plot(event):
# If the accuracy is good enough, we can stop the training.
print('loss is less than 10.0, stop')
trainer.stop()
-
- # We can save the trained parameters for the inferences later
- if params_dirname is not None:
- trainer.save_params(params_dirname)
-
step += 1
+
+ if isinstance(event, fluid.EndEpochEvent):
+ if event.epoch % 10 == 0:
+ # We can save the trained parameters for the inferences later
+ if params_dirname is not None:
+ trainer.save_params(params_dirname)
```
### 开始训练
@@ -231,11 +230,10 @@ trainer.train(
event_handler=event_handler_plot,
feed_order=feed_order)
```
-
-
-
- 图3. 训练结果
-
+
+
+图3 训练结果
+
## 预测
@@ -262,18 +260,18 @@ inferencer = fluid.Inferencer(
batch_size = 10
test_reader = paddle.batch(paddle.dataset.uci_housing.test(),batch_size=batch_size)
test_data = test_reader().next()
-test_feat = numpy.array([data[0] for data in test_data]).astype("float32")
-test_label = numpy.array([data[1] for data in test_data]).astype("float32")
+test_x = numpy.array([data[0] for data in test_data]).astype("float32")
+test_y = numpy.array([data[1] for data in test_data]).astype("float32")
-results = inferencer.infer({'x': test_feat})
+results = inferencer.infer({'x': test_x})
print("infer results: (House Price)")
-for k in range(0, batch_size-1):
- print("%d. %f" % (k, results[0][k]))
+for idx, val in enumerate(results[0]):
+ print("%d: %.2f" % (idx, val))
print("\nground truth:")
-for k in range(0, batch_size-1):
- print("%d. %f" % (k, test_label[k]))
+for idx, val in enumerate(test_y):
+ print("%d: %.2f" % (idx, val))
```
## 总结
diff --git a/doc/fluid/new_docs/beginners_guide/quick_start/fit_a_line/image/predictions.png b/doc/fluid/new_docs/beginners_guide/quick_start/fit_a_line/image/predictions.png
deleted file mode 100644
index 27e4acb1313794f52ad9ad9e874cdadd197ff41f..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/quick_start/fit_a_line/image/predictions.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/quick_start/fit_a_line/image/predictions_en.png b/doc/fluid/new_docs/beginners_guide/quick_start/fit_a_line/image/predictions_en.png
deleted file mode 100644
index f111c7cd766b7e9981513cc8c65be87dbbf3a79e..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/quick_start/fit_a_line/image/predictions_en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/quick_start/fit_a_line/image/ranges.png b/doc/fluid/new_docs/beginners_guide/quick_start/fit_a_line/image/ranges.png
deleted file mode 100644
index 5325df4800985983e17476f007658d1cdb170b1c..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/quick_start/fit_a_line/image/ranges.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/quick_start/fit_a_line/image/ranges_en.png b/doc/fluid/new_docs/beginners_guide/quick_start/fit_a_line/image/ranges_en.png
deleted file mode 100644
index 6d6a079bfdcc33617f6cf36612b271b48be6304f..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/quick_start/fit_a_line/image/ranges_en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/quick_start/fit_a_line/image/train_and_test1.png b/doc/fluid/new_docs/beginners_guide/quick_start/fit_a_line/image/train_and_test1.png
deleted file mode 100644
index bcd304a6a0baf30ecfbc43e08fc0aca179d05958..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/quick_start/fit_a_line/image/train_and_test1.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/README.cn.md b/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/README.cn.md
index 3289116991cb8ebaa4a6fb78e100ce16f633d69c..ac36c4ecf6b9b716fe5f0dbe2346e64918c22242 100644
--- a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/README.cn.md
+++ b/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/README.cn.md
@@ -6,8 +6,8 @@
当我们学习编程的时候,编写的第一个程序一般是实现打印"Hello World"。而机器学习(或深度学习)的入门教程,一般都是 [MNIST](http://yann.lecun.com/exdb/mnist/) 数据库上的手写识别问题。原因是手写识别属于典型的图像分类问题,比较简单,同时MNIST数据集也很完备。MNIST数据集作为一个简单的计算机视觉数据集,包含一系列如图1所示的手写数字图片和对应的标签。图片是28x28的像素矩阵,标签则对应着0~9的10个数字。每张图片都经过了大小归一化和居中处理。
-
- 图1. MNIST图片示例
+
+图1. MNIST图片示例
MNIST数据集是从 [NIST](https://www.nist.gov/srd/nist-special-database-19) 的Special Database 3(SD-3)和Special Database 1(SD-1)构建而来。由于SD-3是由美国人口调查局的员工进行标注,SD-1是由美国高中生进行标注,因此SD-3比SD-1更干净也更容易识别。Yann LeCun等人从SD-1和SD-3中各取一半作为MNIST的训练集(60000条数据)和测试集(10000条数据),其中训练集来自250位不同的标注员,此外还保证了训练集和测试集的标注员是不完全相同的。
@@ -40,12 +40,12 @@ $$ y_i = \text{softmax}(\sum_j W_{i,j}x_j + b_i) $$
在分类问题中,我们一般采用交叉熵代价损失函数(cross entropy loss),公式如下:
-$$ L_{cross-entropy} (label, y) = -\sum_i label_ilog(y_i) $$
+$$ L_{cross-entropy}(label, y) = -\sum_i label_ilog(y_i) $$
图2为softmax回归的网络图,图中权重用蓝线表示、偏置用红线表示、+1代表偏置参数的系数为1。
-
+
图2. softmax回归网络结构图
@@ -54,16 +54,14 @@ $$ L_{cross-entropy} (label, y) = -\sum_i label_ilog(y_i) $$
Softmax回归模型采用了最简单的两层神经网络,即只有输入层和输出层,因此其拟合能力有限。为了达到更好的识别效果,我们考虑在输入层和输出层中间加上若干个隐藏层\[[10](#参考文献)\]。
1. 经过第一个隐藏层,可以得到 $ H_1 = \phi(W_1X + b_1) $,其中$\phi$代表激活函数,常见的有sigmoid、tanh或ReLU等函数。
-
2. 经过第二个隐藏层,可以得到 $ H_2 = \phi(W_2H_1 + b_2) $。
-
3. 最后,再经过输出层,得到的$Y=\text{softmax}(W_3H_2 + b_3)$,即为最后的分类结果向量。
图3为多层感知器的网络结构图,图中权重用蓝线表示、偏置用红线表示、+1代表偏置参数的系数为1。
-
+
图3. 多层感知器网络结构图
@@ -72,7 +70,7 @@ Softmax回归模型采用了最简单的两层神经网络,即只有输入层
在多层感知器模型中,将图像展开成一维向量输入到网络中,忽略了图像的位置和结构信息,而卷积神经网络能够更好的利用图像的结构信息。[LeNet-5](http://yann.lecun.com/exdb/lenet/)是一个较简单的卷积神经网络。图4显示了其结构:输入的二维图像,先经过两次卷积层到池化层,再经过全连接层,最后使用softmax分类作为输出层。下面我们主要介绍卷积层和池化层。
-
+
图4. LeNet-5卷积神经网络结构
@@ -81,7 +79,7 @@ Softmax回归模型采用了最简单的两层神经网络,即只有输入层
卷积层是卷积神经网络的核心基石。在图像识别里我们提到的卷积是二维卷积,即离散二维滤波器(也称作卷积核)与二维图像做卷积操作,简单的讲是二维滤波器滑动到二维图像上所有位置,并在每个位置上与该像素点及其领域像素点做内积。卷积操作被广泛应用与图像处理领域,不同卷积核可以提取不同的特征,例如边沿、线性、角等特征。在深层卷积神经网络中,通过卷积操作可以提取出图像低级到复杂的特征。
-
+
图5. 卷积层图片
@@ -98,16 +96,15 @@ Softmax回归模型采用了最简单的两层神经网络,即只有输入层
#### 池化层
-
+
图6. 池化层图片
池化是非线性下采样的一种形式,主要作用是通过减少网络的参数来减小计算量,并且能够在一定程度上控制过拟合。通常在卷积层的后面会加上一个池化层。池化包括最大池化、平均池化等。其中最大池化是用不重叠的矩形框将输入层分成不同的区域,对于每个矩形框的数取最大值作为输出层,如图6所示。
-更详细的关于卷积神经网络的具体知识可以参考[斯坦福大学公开课]( http://cs231n.github.io/convolutional-networks/ )和[图像分类](https://github.com/PaddlePaddle/book/blob/develop/image_classification/README.md)教程。
-
-### 常见激活函数介绍
+更详细的关于卷积神经网络的具体知识可以参考[斯坦福大学公开课]( http://cs231n.github.io/convolutional-networks/ )和[图像分类]( https://github.com/PaddlePaddle/book/tree/develop/03.image_classification )教程。
+### 常见激活函数介绍
- sigmoid激活函数: $ f(x) = sigmoid(x) = \frac{1}{1+e^{-x}} $
- tanh激活函数: $ f(x) = tanh(x) = \frac{e^x-e^{-x}}{e^x+e^{-x}} $
@@ -136,20 +133,18 @@ PaddlePaddle在API中提供了自动加载[MNIST](http://yann.lecun.com/exdb/mni
我们建议使用 Fluid API,因为它更容易学起来。
下面是快速的 Fluid API 概述。
-
1. `inference_program`:指定如何从数据输入中获得预测的函数。
这是指定网络流的地方。
-2. `train_program`:指定如何从 `inference_program` 和`标签值`中获取 `loss` 的函数。
+1. `train_program`:指定如何从 `inference_program` 和`标签值`中获取 `loss` 的函数。
这是指定损失计算的地方。
-3. `optimizer_func`: “指定优化器配置的函数。优化器负责减少损失并驱动培训。Paddle 支持多种不同的优化器。
+1. `optimizer_func`: “指定优化器配置的函数。优化器负责减少损失并驱动培训。Paddle 支持多种不同的优化器。
-4. `Trainer`:PaddlePaddle Trainer 管理由 `train_program` 和 `optimizer` 指定的训练过程。
+1. `Trainer`:PaddlePaddle Trainer 管理由 `train_program` 和 `optimizer` 指定的训练过程。
通过 `event_handler` 回调函数,用户可以监控培训的进展。
-5. `Inferencer`:Fluid inferencer 加载 `inference_program` 和由 Trainer 训练的参数。
-
+1. `Inferencer`:Fluid inferencer 加载 `inference_program` 和由 Trainer 训练的参数。
然后,它可以推断数据和返回预测。
在这个演示中,我们将深入了解它们。
@@ -240,6 +235,7 @@ def train_program():
acc = fluid.layers.accuracy(input=predict, label=label)
return [avg_cost, acc]
+
```
#### Optimizer Function 配置
@@ -255,9 +251,9 @@ def optimizer_program():
下一步,我们开始训练过程。`paddle.dataset.movielens.train()`和`paddle.dataset.movielens.test()`分别做训练和测试数据集。这两个函数各自返回一个reader——PaddlePaddle中的reader是一个Python函数,每次调用的时候返回一个Python yield generator。
-下面`shuffle`是一个reader decorator,它接受一个reader A,返回另一个reader B 。reader B 每次读入`buffer_size`条训练数据到一个buffer里,然后随机打乱其顺序,并且逐条输出。
+下面`shuffle`是一个reader decorator,它接受一个reader A,返回另一个reader B。reader B 每次读入`buffer_size`条训练数据到一个buffer里,然后随机打乱其顺序,并且逐条输出。
-`batch`是一个特殊的decorator,它的输入是一个reader,输出是一个batched reader 。在PaddlePaddle里,一个reader每次yield一条训练数据,而一个batched reader每次yield一个minibatch。
+`batch`是一个特殊的decorator,它的输入是一个reader,输出是一个batched reader。在PaddlePaddle里,一个reader每次yield一条训练数据,而一个batched reader每次yield一个minibatch。
```python
train_reader = paddle.batch(
@@ -280,7 +276,6 @@ place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
trainer = fluid.Trainer(
train_func=train_program, place=place, optimizer_func=optimizer_program)
-
```
#### Event Handler 配置
@@ -315,11 +310,10 @@ def event_handler(event):
`event_handler_plot` 可以用来在训练过程中画图如下:
-
-
-
-图7. 训练结果
-
+
+
+图7 训练结果
+
```python
diff --git a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/cnn.png b/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/cnn.png
deleted file mode 100644
index 3f5cdaacdc6acce41c5c6c99649be46685cf9903..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/cnn.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/cnn_en.png b/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/cnn_en.png
deleted file mode 100644
index bc1a9a4ccf81972dc0d69cf4c808a52218e14d61..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/cnn_en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/cnn_train_log.png b/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/cnn_train_log.png
deleted file mode 100644
index 65bd17eacd41bbdbdb042bd1ba366eb53663b410..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/cnn_train_log.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/cnn_train_log_en.png b/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/cnn_train_log_en.png
deleted file mode 100644
index 77524754df906ab096e120bd657449f4565c3418..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/cnn_train_log_en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/conv_layer.png b/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/conv_layer.png
deleted file mode 100644
index c751892ba0be3ae803b5933c3f33487ecfb6fe7f..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/conv_layer.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/infer_3.png b/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/infer_3.png
deleted file mode 100644
index 030cd60d3b4af9aecd4941204da4ad15f6e1189f..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/infer_3.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/max_pooling.png b/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/max_pooling.png
deleted file mode 100644
index 90b02fa2a735cfcc9efb2de90906325dedcb358c..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/max_pooling.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/max_pooling_en.png b/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/max_pooling_en.png
deleted file mode 100644
index c626723512b6ee02abd55e5bab65e7629d130522..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/max_pooling_en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/mlp.png b/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/mlp.png
deleted file mode 100644
index 9f4d26cd8da32201d0a5e9c72d466301dd2b42a1..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/mlp.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/mlp_en.png b/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/mlp_en.png
deleted file mode 100644
index 1fedea6a75abbf132cbbcf8ab10ce045997d697a..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/mlp_en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/mlp_train_log.png b/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/mlp_train_log.png
deleted file mode 100644
index f5a478fdc24f29c17555a2f1451f3f5a079faed9..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/mlp_train_log.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/mlp_train_log_en.png b/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/mlp_train_log_en.png
deleted file mode 100644
index 7d5508a1eccfcea1925f438043ee93b57769bebf..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/mlp_train_log_en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/mnist_example_image.png b/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/mnist_example_image.png
deleted file mode 100644
index 4edd7cabf8a2282f6392ac1421c7ca4afb288589..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/mnist_example_image.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/softmax_regression.png b/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/softmax_regression.png
deleted file mode 100644
index 40b98298288b9c406fce1cbca9c913753020a94d..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/softmax_regression.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/softmax_regression_en.png b/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/softmax_regression_en.png
deleted file mode 100644
index 833d3c663c94dd2d57fd19686949ded37a91f541..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/softmax_regression_en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/softmax_train_log.png b/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/softmax_train_log.png
deleted file mode 100644
index 47204941af7f22e68386a70a06ec4f122b83e262..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/softmax_train_log.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/softmax_train_log_en.png b/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/softmax_train_log_en.png
deleted file mode 100644
index 6fa0a951d5262effb707e3e15af8cb900e5560b8..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/softmax_train_log_en.png and /dev/null differ
diff --git a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/train_and_test2.png b/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/train_and_test2.png
deleted file mode 100644
index 5cb87b450d0398bcfaec0e647c362052069797e7..0000000000000000000000000000000000000000
Binary files a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/image/train_and_test2.png and /dev/null differ
diff --git a/doc/fluid/new_docs/user_guides/howto/debug/visualdl.md b/doc/fluid/new_docs/user_guides/howto/debug/visualdl.md
index a2f30823a6fcd379f94e6e98d043b0d00681827f..84987ea5daee9abd0fe2fe71bdfde62ea3388ab5 100644
--- a/doc/fluid/new_docs/user_guides/howto/debug/visualdl.md
+++ b/doc/fluid/new_docs/user_guides/howto/debug/visualdl.md
@@ -149,7 +149,7 @@ python setup.py bdist_wheel
pip install --upgrade dist/visualdl-*.whl
```
-如果打包和安装遇到其他问题,不安装只想运行Visual DL可以看[这里](https://github.com/PaddlePaddle/VisualDL/blob/develop/docs/how_to_dev_frontend_en.md)
+如果打包和安装遇到其他问题,不安装只想运行Visual DL可以看[这里](https://github.com/PaddlePaddle/VisualDL/blob/develop/docs/develop/how_to_dev_frontend_cn.md)
## SDK
diff --git a/doc/fluid/new_docs/advanced_usage/deploy/build_and_install_lib_cn.rst b/doc/fluid/new_docs/user_guides/howto/inference/build_and_install_lib_cn.rst
similarity index 100%
rename from doc/fluid/new_docs/advanced_usage/deploy/build_and_install_lib_cn.rst
rename to doc/fluid/new_docs/user_guides/howto/inference/build_and_install_lib_cn.rst
diff --git a/doc/fluid/new_docs/user_guides/howto/inference/index.rst b/doc/fluid/new_docs/user_guides/howto/inference/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..45e1a2883773b92ed47ef8d51417bbdcd060b4ec
--- /dev/null
+++ b/doc/fluid/new_docs/user_guides/howto/inference/index.rst
@@ -0,0 +1,11 @@
+############
+模型预测部署
+############
+
+PaddlePaddle Fluid 提供了 C++ API 来支持模型的部署上线
+
+.. toctree::
+ :maxdepth: 2
+
+ build_and_install_lib_cn.rst
+ native_infer.rst
diff --git a/doc/fluid/new_docs/advanced_usage/deploy/native_infer.rst b/doc/fluid/new_docs/user_guides/howto/inference/native_infer.rst
similarity index 97%
rename from doc/fluid/new_docs/advanced_usage/deploy/native_infer.rst
rename to doc/fluid/new_docs/user_guides/howto/inference/native_infer.rst
index aa9377c112856693cda72779bd399f2415d716f0..6d6f3035c0b5c985cd39d45df9f1bcce50dcefa0 100644
--- a/doc/fluid/new_docs/advanced_usage/deploy/native_infer.rst
+++ b/doc/fluid/new_docs/user_guides/howto/inference/native_infer.rst
@@ -4,12 +4,13 @@ Paddle 预测 API
为了更简单方便的预测部署,Fluid 提供了一套高层 API
用来隐藏底层不同的优化实现。
-`预测库相关代码 `__
+`预测库相关代码 `_
包括
- 头文件 ``paddle_inference_api.h`` 定义了所有的接口
- 库文件\ ``libpaddle_fluid.so`` 或 ``libpaddle_fluid.a``
+
编译和依赖可以参考 :ref:`install_or_build_cpp_inference_lib` 。
下面是一些 API 概念的介绍
@@ -95,7 +96,7 @@ engine
CHECK(predictor->Run(slots, &outputs));
// 获取 outputs ...
-编译时,联编 ``libpaddle_fluid.a/.so`` 即可。
+编译时,联编 ``libpaddle_fluid.a/.so`` 便可。
详细代码参考
------------
diff --git a/doc/fluid/new_docs/user_guides/howto/prepare_data/index.rst b/doc/fluid/new_docs/user_guides/howto/prepare_data/index.rst
index 56fa928029903f1e3bd3e8064c146797f01b2b85..cca3684b78518867eae95d82e1347b52427ddc81 100644
--- a/doc/fluid/new_docs/user_guides/howto/prepare_data/index.rst
+++ b/doc/fluid/new_docs/user_guides/howto/prepare_data/index.rst
@@ -38,7 +38,6 @@ PaddlePaddle Fluid支持两种传入数据的方式:
:maxdepth: 2
feeding_data
- use_recordio_reader
Python Reader
#############
diff --git a/doc/fluid/new_docs/user_guides/howto/prepare_data/use_recordio_reader.rst b/doc/fluid/new_docs/user_guides/howto/prepare_data/use_recordio_reader.rst
deleted file mode 100644
index dfda33f1b03516fe2c704f55d095955282b19109..0000000000000000000000000000000000000000
--- a/doc/fluid/new_docs/user_guides/howto/prepare_data/use_recordio_reader.rst
+++ /dev/null
@@ -1,167 +0,0 @@
-.. _user_guide_use_recordio_as_train_data:
-
-############################
-使用RecordIO文件作为训练数据
-############################
-
-相比于 :ref:`user_guide_use_numpy_array_as_train_data`,
-:ref:`user_guide_use_recordio_as_train_data` 的性能更好;
-但是用户需要先将训练数据集转换成RecordIO文件格式,再使用
-:code:`fluid.layers.open_files()` 层在神经网络配置中导入 RecordIO 文件。
-用户还可以使用 :code:`fluid.layers.double_buffer()` 加速数据从内存到显存的拷贝,
-使用 :code:`fluid.layers.Preprocessor` 工具进行数据增强。
-
-将训练数据转换成RecordIO文件格式
-################################
-
-:code:`fluid.recordio_writer` 中,每个记录都是一个
-:code:`vector`, 即一个支持序列信息的Tensor数组。这个数组包括训练所需
-的所有特征。例如对于图像分类来说,这个数组可以包含图片和分类标签。
-
-用户可以使用 :code:`fluid.recordio_writer.convert_reader_to_recordio_file()` 可以将
-:ref:`user_guide_reader` 转换成一个RecordIO文件。或者可以使用
-:code:`fluid.recordio_writer.convert_reader_to_recordio_files()` 将一个
-:ref:`user_guide_reader` 转换成多个RecordIO文件。
-
-具体使用方法为:
-
-.. code-block:: python
-
- import paddle.fluid as fluid
- import numpy
-
- def reader_creator():
- def __impl__():
- for i in range(1000):
- yield [
- numpy.random.random(size=[3,224,224], dtype="float32"),
- numpy.random.random(size=[1], dtype="int64")
- ]
- return __impl__
-
- img = fluid.layers.data(name="image", shape=[3, 224, 224])
- label = fluid.layers.data(name="label", shape=[1], dtype="int64")
- feeder = fluid.DataFeeder(feed_list=[img, label], place=fluid.CPUPlace())
-
- BATCH_SIZE = 32
- reader = paddle.batch(reader_creator(), batch_size=BATCH_SIZE)
- fluid.recordio_writer.convert_reader_to_recordio_file(
- "train.recordio", feeder=feeder, reader_creator=reader)
-
-其中 :code:`reader_creator` 创建了一个 :code:`Reader`。
-:ref:`_api_fluid_data_feeder_DataFeeder`
-是将 :code:`Reader` 转换成 :code:`LoDTensor` 的工具。详细请参考
-:ref:`user_guide_reader` 。
-
-上述程序将 :code:`reader_creator` 的数据转换成了 :code:`train.recordio` 文件,
-其中每一个record 含有 32 条样本。如果batch size会在训练过程中调整,
-用户可以将每一个Record的样本数设置成1。并参考
-:ref:`user_guide_use_recordio_as_train_data_use_op_create_batch`。
-
-
-配置神经网络, 打开RecordIO文件
-##############################
-
-RecordIO文件转换好之后,用户可以使用 :code:`fluid.layers.open_files()`
-打开文件,并使用 :code:`fluid.layers.read_file` 读取文件内容。
-简单使用方法如下:
-
-.. code-block:: python
-
- import paddle.fluid as fluid
-
- file_obj = fluid.layers.open_files(
- filenames=["train.recordio"],
- shape=[[3, 224, 224], [1]],
- lod_levels=[0, 0],
- dtypes=["float32", "int64"],
- pass_num=100
- )
-
- image, label = fluid.layers.read_file(file_obj)
-
-其中如果设置了 :code:`pass_num` ,那么当所有数据读完后,会重新读取数据,
-直到读取了 :code:`pass_num` 遍。
-
-
-
-进阶使用
-########
-
-
-使用 :code:`fluid.layers.double_buffer()`
-------------------------------------------
-
-:code:`Double buffer` 使用双缓冲技术,将训练数据从内存中复制到显存中。配置双缓冲
-需要使用 :code:`fluid.layers.double_buffer()` 修饰文件对象。 例如:
-
-.. code-block:: python
-
- import paddle.fliud as fluid
- file_obj = fluid.layers.open_files(...)
- file_obj = fluid.layers.double_buffer(file_obj)
-
- image, label = fluid.layers.read_file(file_obj)
-
-双缓冲技术可以参考
-`Multiple buffering `_ 。
-
-配置数据增强
-------------
-
-使用 :code:`fluid.layers.Preprocessor` 可以配置文件的数据增强方法。例如
-
-.. code-block:: python
-
- import paddle.fluid as fluid
- file_obj = fluid.layers.open_files(...)
- preprocessor = fluid.layers.Preprocessor(reader=data_file)
- with preprocessor.block():
- image, label = preprocessor.inputs()
- image = image / 2
- label = label + 1
- preprocessor.outputs(image, label)
-
-如上代码所示,使用 :code:`Preprocessor` 定义了一个数据增强模块,并在
-:code:`with preprocessor.block()` 中定义了数据增强的具体操作。 用户通过配置
-:code:`preprocessor.inputs()` 获得数据文件中的各个字段。 并用
-:code:`preprocessor.outputs()` 标记预处理后的输出。
-
-.. _user_guide_use_recordio_as_train_data_use_op_create_batch:
-
-使用Op组batch
--------------
-
-使用 :code:`fluid.layers.batch()` 可以在训练的过程中动态的组batch。例如
-
-.. code-block:: python
-
- import paddle.fluid as fluid
- file_obj = fluid.layers.open_files(...)
- file_obj = fluid.layers.batch(file_obj, batch_size=32)
-
- img, label = fluid.layers.read_file(file_obj)
-
-需要注意的是,如果数据集中的最后几个样本不能组成 :code:`batch_size` 大小的批量数据,
-那么这几个样本直接组成一个批量数据进行训练。
-
-读入数据的shuffle
------------------
-
-使用 :code:`fluid.layers.shuffle()` 可以在训练过程中动态重排训练数据。例如
-
-.. code-block:: python
-
- import paddle.fluid as fluid
- file_obj = fluid.layers.open_files(...)
- file_obj = fliud.layers.shuffle(file_obj, buffer_size=8192)
-
- img, label = fliud.layers.read_file(file_obj)
-
-需要注意的是:
-
-1. :code:`shuffle` 实现方法是:
-先读入 :code:`buffer_size` 条样本,再随机的选出样本进行训练。
-
-2. :code:`shuffle` 中 :code:`buffer_size` 会占用训练内存,需要确定训练过程中内存
-足够支持缓存 :code:`buffer_size` 条数据。
diff --git a/doc/fluid/new_docs/user_guides/index.rst b/doc/fluid/new_docs/user_guides/index.rst
index 453cb71cfdf72e031ce0f0517e2db936eca38dfc..377631109d8f65c149b12cd2a0e4da920fdf4def 100644
--- a/doc/fluid/new_docs/user_guides/index.rst
+++ b/doc/fluid/new_docs/user_guides/index.rst
@@ -15,4 +15,5 @@
howto/training/index
howto/debug/index
howto/evaluation/index
+ howto/inference/index
models/index.rst
diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec
index b6ae930b7155d15d24b287cc3eed50f2aeaa5599..70e5b97770a6c581c6a9c0145b03c42b83f14471 100644
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@@ -172,6 +172,7 @@ paddle.fluid.layers.sequence_mask ArgSpec(args=['x', 'maxlen', 'dtype', 'name'],
paddle.fluid.layers.stack ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,))
paddle.fluid.layers.pad2d ArgSpec(args=['input', 'paddings', 'mode', 'pad_value', 'data_format', 'name'], varargs=None, keywords=None, defaults=([0, 0, 0, 0], 'constant', 0.0, 'NCHW', None))
paddle.fluid.layers.unstack ArgSpec(args=['x', 'axis', 'num'], varargs=None, keywords=None, defaults=(0, None))
+paddle.fluid.layers.sequence_enumerate ArgSpec(args=['input', 'win_size', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0, None))
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
paddle.fluid.layers.open_recordio_file ArgSpec(args=['filename', 'shapes', 'lod_levels', 'dtypes', 'pass_num', 'for_parallel'], varargs=None, keywords=None, defaults=(1, True))
paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
@@ -311,7 +312,7 @@ paddle.fluid.layers.iou_similarity ArgSpec(args=[], varargs='args', keywords='kw
paddle.fluid.layers.box_coder ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.polygon_box_transform ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.accuracy ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None))
-paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk'], varargs=None, keywords=None, defaults=('ROC', 200, 1))
+paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk'], varargs=None, keywords=None, defaults=('ROC', 4095, 1))
paddle.fluid.layers.exponential_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.layers.natural_exp_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.layers.inverse_time_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))
@@ -375,7 +376,7 @@ paddle.fluid.optimizer.DecayedAdagradOptimizer.__init__ ArgSpec(args=['self', 'l
paddle.fluid.optimizer.DecayedAdagradOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.FtrlOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'l1', 'l2', 'lr_power'], varargs=None, keywords='kwargs', defaults=(0.0, 0.0, -0.5))
paddle.fluid.optimizer.FtrlOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
-paddle.fluid.optimizer.RMSPropOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'rho', 'epsilon', 'momentum'], varargs=None, keywords='kwargs', defaults=(0.95, 1e-06, 0.0))
+paddle.fluid.optimizer.RMSPropOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'rho', 'epsilon', 'momentum', 'centered'], varargs=None, keywords='kwargs', defaults=(0.95, 1e-06, 0.0, False))
paddle.fluid.optimizer.RMSPropOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.AdadeltaOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'epsilon', 'rho'], varargs=None, keywords='kwargs', defaults=(1e-06, 0.95))
paddle.fluid.optimizer.AdadeltaOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
diff --git a/paddle/fluid/framework/.gitignore b/paddle/fluid/framework/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..5132131e55e2feee8ae88b4c65ec102fbc9c5fe1
--- /dev/null
+++ b/paddle/fluid/framework/.gitignore
@@ -0,0 +1,2 @@
+.tensor_util.cu
+.data_type_transform.cu
\ No newline at end of file
diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt
index 0668ff43c8192f53ff7e05abaeb575e2b78b1de4..cc7938b2ac07f11ceb7f33a2e37380d1e2ed2072 100644
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@@ -1,3 +1,22 @@
+# windows treat symbolic file as a real file, which is different with unix
+# We create a hidden file and compile it instead of origin source file.
+function(windows_symbolic TARGET)
+ set(oneValueArgs "")
+ set(multiValueArgs SRCS DEPS)
+ cmake_parse_arguments(windows_symbolic "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+ foreach(src ${windows_symbolic_SRCS})
+ get_filename_component(src ${src} NAME_WE)
+ if (NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${src}.cc OR NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${src}.cu)
+ message(FATAL " ${src}.cc and ${src}.cu must exsits, and ${src}.cu must be symbolic file.")
+ endif()
+ add_custom_command(OUTPUT .${src}.cu
+ COMMAND ${CMAKE_COMMAND} -E remove ${CMAKE_CURRENT_SOURCE_DIR}/.${src}.cu
+ COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/${src}.cc" "${CMAKE_CURRENT_SOURCE_DIR}/.${src}.cu"
+ COMMENT "create hidden file of ${src}.cu")
+ add_custom_target(${TARGET} ALL DEPENDS .${src}.cu)
+ endforeach()
+endfunction()
+
add_subdirectory(ir)
if (NOT WIN32)
add_subdirectory(details)
@@ -11,7 +30,13 @@ nv_test(dim_test SRCS dim_test.cu DEPS ddim)
cc_library(data_type SRCS data_type.cc DEPS framework_proto ddim device_context)
cc_test(data_type_test SRCS data_type_test.cc DEPS data_type place tensor)
if(WITH_GPU)
- nv_library(tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context)
+ if (WIN32)
+ windows_symbolic(tensor_util SRCS tensor_util.cu)
+ nv_library(tensor SRCS tensor.cc .tensor_util.cu DEPS place memory data_type device_context)
+ add_dependencies(tensor tensor_util)
+ else()
+ nv_library(tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context)
+ endif(WIN32)
else()
cc_library(tensor SRCS tensor.cc tensor_util.cc DEPS place memory data_type device_context)
endif()
@@ -55,7 +80,13 @@ nv_test(data_device_transform_test SRCS data_device_transform_test.cu
DEPS operator op_registry device_context math_function)
if(WITH_GPU)
- nv_library(data_type_transform SRCS data_type_transform.cu DEPS tensor)
+ if (WIN32)
+ windows_symbolic(hidden_file SRCS data_type_transform.cu)
+ nv_library(data_type_transform SRCS .data_type_transform.cu DEPS tensor)
+ add_dependencies(data_type_transform hidden_file)
+ else()
+ nv_library(data_type_transform SRCS data_type_transform.cu DEPS tensor)
+ endif(WIN32)
nv_test(data_type_transform_test SRCS data_type_transform_test.cc data_type_transform_test.cu DEPS data_type_transform)
else()
cc_library(data_type_transform SRCS data_type_transform.cc DEPS tensor)
diff --git a/paddle/fluid/framework/details/multi_devices_graph_pass.cc b/paddle/fluid/framework/details/multi_devices_graph_pass.cc
index 0bfff745493d069e948e6d277ec2bbfb0673a70b..7a99169849debcbc57d6f197b36c5045b211f3ef 100644
--- a/paddle/fluid/framework/details/multi_devices_graph_pass.cc
+++ b/paddle/fluid/framework/details/multi_devices_graph_pass.cc
@@ -326,7 +326,7 @@ std::unique_ptr MultiDevSSAGraphBuilder::ApplyImpl(
ir::Graph &result = *graph;
for (auto &node : nodes) {
- if (node->NodeType() == ir::Node::Type::kVariable && node->Var()) {
+ if (node->IsVar() && node->Var()) {
all_vars_.emplace(node->Name(), node->Var());
}
}
@@ -583,18 +583,6 @@ void MultiDevSSAGraphBuilder::InsertDataBalanceOp(
}
}
-bool MultiDevSSAGraphBuilder::IsParameterGradientOnce(
- const std::string &og,
- std::unordered_set *og_has_been_broadcast) const {
- bool is_pg_once =
- grad_names_.count(og) != 0 && og_has_been_broadcast->count(og) == 0;
- if (is_pg_once) {
- // Insert NCCL AllReduce Op
- og_has_been_broadcast->insert(og);
- }
- return is_pg_once;
-}
-
int MultiDevSSAGraphBuilder::GetOpDeviceID(const ir::Graph &graph,
ir::Node *node) const {
if (strategy_.reduce_ != BuildStrategy::ReduceStrategy::kReduce) {
@@ -688,20 +676,6 @@ VarHandle *MultiDevSSAGraphBuilder::CreateReduceOp(ir::Graph *result,
return var;
}
-// Find the first occurence of `prev_op_name` and make current `op` depend
-// on it.
-void MultiDevSSAGraphBuilder::ConnectOp(ir::Graph *result, OpHandleBase *op,
- const std::string &prev_op_name) const {
- for (auto &prev_op : result->Get(kGraphOps)) {
- if (prev_op->Name() == prev_op_name) {
- auto *dep_var = new DummyVarHandle(result->CreateControlDepVar());
- prev_op->AddOutput(dep_var);
- result->Get(kGraphDepVars).emplace(dep_var);
- op->AddInput(dep_var);
- }
- }
-}
-
void MultiDevSSAGraphBuilder::CreateDistTrainOp(ir::Graph *result,
ir::Node *node) const {
int op_dev_id = -1;
diff --git a/paddle/fluid/framework/details/multi_devices_graph_pass.h b/paddle/fluid/framework/details/multi_devices_graph_pass.h
index 7a6f238f9cf7af18cb10ea271e453fec1902c833..ac6d9c5a64cfde60f75c76dae0a30cc7d735e996 100644
--- a/paddle/fluid/framework/details/multi_devices_graph_pass.h
+++ b/paddle/fluid/framework/details/multi_devices_graph_pass.h
@@ -69,9 +69,6 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
std::vector FindDistTrainRecvVars(
const std::vector &nodes) const;
- void ConnectOp(ir::Graph *result, OpHandleBase *op,
- const std::string &prev_op_name) const;
-
void CreateComputationalOps(ir::Graph *result, ir::Node *node,
size_t num_places) const;
@@ -83,10 +80,6 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
void CreateComputationalOp(ir::Graph *result, ir::Node *node,
int dev_id) const;
- bool IsParameterGradientOnce(
- const std::string &og,
- std::unordered_set *og_has_been_broadcast) const;
-
int GetOpDeviceID(const ir::Graph &graph, ir::Node *node) const;
void InsertAllReduceOp(ir::Graph *result, const std::string &og) const;
diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt
index bfc649017f19d67660bd11d590134cf56772bb27..f5235f70ad79616801110644999d511eeda33a32 100644
--- a/paddle/fluid/framework/ir/CMakeLists.txt
+++ b/paddle/fluid/framework/ir/CMakeLists.txt
@@ -1,20 +1,35 @@
+set(pass_file ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h)
+file(WRITE ${pass_file} "// Generated by the paddle/fluid/framework/ir/CMakeLists.txt. DO NOT EDIT!\n\n")
+file(APPEND ${pass_file} "\#include \"paddle/fluid/framework/ir/pass.h\"\n")
+function(pass_library TARGET)
+ set(options "")
+ set(oneValueArgs "")
+ set(multiValueArgs SRCS DEPS)
+ cmake_parse_arguments(op_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+ cc_library(${TARGET} SRCS ${TARGET}.cc DEPS graph_pattern_detector pass)
+ file(APPEND ${pass_file} "USE_PASS(${TARGET});\n")
+ set(PASS_LIBRARY ${TARGET} ${PASS_LIBRARY} PARENT_SCOPE)
+endfunction()
+
cc_library(node SRCS node.cc DEPS proto_desc)
cc_library(graph SRCS graph.cc DEPS node)
cc_library(graph_helper SRCS graph_helper.cc DEPS graph)
cc_library(pass SRCS pass.cc DEPS graph node graph_helper)
-cc_library(graph_viz_pass SRCS graph_viz_pass.cc DEPS graph pass graph_helper)
-cc_library(graph_to_program_pass SRCS graph_to_program_pass.cc DEPS graph pass graph_helper)
cc_library(graph_traits SRCS graph_traits.cc DEPS graph)
cc_library(graph_pattern_detector SRCS graph_pattern_detector.cc DEPS graph graph_helper graph_traits)
-cc_library(fc_fuse_pass SRCS fc_fuse_pass.cc DEPS graph graph_pattern_detector)
-cc_library(attention_lstm_fuse_pass SRCS attention_lstm_fuse_pass.cc DEPS graph graph_pattern_detector)
-cc_library(infer_clean_graph_pass SRCS infer_clean_graph_pass.cc DEPS graph pass)
-cc_library(fc_lstm_fuse_pass SRCS fc_lstm_fuse_pass.cc DEPS graph graph_pattern_detector)
-cc_library(seq_concat_fc_fuse_pass SRCS seq_concat_fc_fuse_pass.cc DEPS graph graph_pattern_detector)
+
+pass_library(graph_to_program_pass)
+pass_library(graph_viz_pass)
+pass_library(fc_fuse_pass)
+pass_library(attention_lstm_fuse_pass)
+pass_library(infer_clean_graph_pass)
+pass_library(fc_lstm_fuse_pass)
+pass_library(seq_concat_fc_fuse_pass)
+set(GLOB_PASS_LIB ${PASS_LIBRARY} CACHE INTERNAL "Global PASS library")
cc_test(pass_test SRCS pass_test.cc DEPS graph pass graph_helper)
cc_test(graph_test SRCS graph_test.cc DEPS graph graph_helper op_registry)
cc_test(graph_helper_test SRCS graph_helper_test.cc DEPS graph graph_helper op_registry)
cc_test(graph_to_program_pass_test SRCS graph_to_program_pass_test.cc DEPS graph_to_program_pass)
cc_test(test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS graph_pattern_detector)
-cc_test(test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass graph_pattern_detector graph pass graph_traits framework_proto)
+cc_test(test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass framework_proto)
diff --git a/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
index d2d051a69a33a38535e67227d4cc62f5b35e430c..bb52d7e498e55c02ddc2cd6d07ccccd51ce4edc5 100644
--- a/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
@@ -13,10 +13,10 @@
// limitations under the License.
#include "paddle/fluid/framework/ir/attention_lstm_fuse_pass.h"
+#include
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
#include "paddle/fluid/framework/lod_tensor.h"
-#include "paddle/fluid/inference/api/helper.h"
namespace paddle {
namespace framework {
@@ -96,17 +96,13 @@ void FindWhileOp(Graph* graph) {
auto* cell_init = graph->RetriveNode(6);
auto* hidden_init = graph->RetriveNode(8);
-#define LINK_TO(node0, node1) \
- node0->outputs.push_back(node1); \
- node1->inputs.push_back(node0);
-
auto* lstm_op = graph->CreateOpNode(&op_desc);
PrepareParameters(graph, param);
- LINK_TO(X, lstm_op);
- LINK_TO(cell_init, lstm_op);
- LINK_TO(hidden_init, lstm_op);
- LINK_TO(lstm_op, LSTMOUT);
+ IR_NODE_LINK_TO(X, lstm_op);
+ IR_NODE_LINK_TO(cell_init, lstm_op);
+ IR_NODE_LINK_TO(hidden_init, lstm_op);
+ IR_NODE_LINK_TO(lstm_op, LSTMOUT);
GraphSafeRemoveNodes(graph, marked_nodes);
}
@@ -216,11 +212,11 @@ void PrepareLSTMWeight(const LoDTensor& W_forget_w0,
float* out_data = out->mutable_data(platform::CPUPlace());
std::array tensors(
- {W_forget_w0.data(), W_input_w0.data(),
- W_output_w0.data(), W_cell_w0.data()});
+ {{W_forget_w0.data(), W_input_w0.data(),
+ W_output_w0.data(), W_cell_w0.data()}});
std::array tensors1(
- {W_forget_w1.data(), W_input_w1.data(),
- W_output_w1.data(), W_cell_w1.data()});
+ {{W_forget_w1.data(), W_input_w1.data(),
+ W_output_w1.data(), W_cell_w1.data()}});
for (int row = 0; row < D; row++) {
for (int col = 0; col < 4; col++) {
@@ -243,8 +239,8 @@ void PrepareLSTMBias(const LoDTensor& B_forget, const LoDTensor& B_input,
const LoDTensor& B_output, const LoDTensor& B_cell,
LoDTensor* out) {
std::array tensors(
- {B_forget.data(), B_input.data(), B_output.data(),
- B_cell.data()});
+ {{B_forget.data(), B_input.data(), B_output.data(),
+ B_cell.data()}});
PADDLE_ENFORCE_EQ(B_forget.dims().size(), 1);
int D = B_forget.dims()[0];
diff --git a/paddle/fluid/framework/ir/fc_fuse_pass.cc b/paddle/fluid/framework/ir/fc_fuse_pass.cc
index 513742bab69d465aac1bfb7bcef2fe89108c14a0..5a4ebd6f3de555acccd72c61bd377ffd8ce69780 100644
--- a/paddle/fluid/framework/ir/fc_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/fc_fuse_pass.cc
@@ -21,74 +21,26 @@ namespace paddle {
namespace framework {
namespace ir {
-bool VarOutLinksToOp(Node* node, const std::string& op_type) {
- for (auto* out : node->outputs) {
- if (out->IsOp() && out->Op()->Type() == op_type) {
- return true;
- }
- }
- return false;
-}
-
-void BuildFCPattern(PDPattern* pattern) {
- // Create Operators
- auto* mul_op = pattern->NewNode("mul")->assert_is_op("mul");
- auto* elementwise_add_op =
- pattern->NewNode("elementwise_add")->assert_is_op("elementwise_add");
- // Create variables
- // w
- auto* mul_weight_var = pattern->NewNode("mul_weight")
- ->AsInput()
- ->assert_is_op_nth_input("mul", "Y", 0);
- // x
- auto* mul_tmp_var = pattern->NewNode("mul_tmp_var")
- ->AsInput()
- ->assert_is_op_nth_input("mul", "X", 0);
- // intermediate variable, will be removed in the IR after fuse.
- auto* mul_out_var = pattern->NewNode("mul_out")
- ->AsIntermediate()
- ->assert_is_only_output_of_op("mul")
- ->assert_is_op_input("elementwise_add");
- // bias
- auto* elementwise_add_tmp_var = pattern->NewNode("elementwise_add_tmpvar")
- ->assert_is_op_input("elementwise_add")
- ->AsInput();
- // output
- auto* elementwise_add_out_var = pattern->NewNode("elementwise_add_out")
- ->AsOutput()
- ->assert_is_op_output("elementwise_add");
-
- mul_op->LinksFrom({mul_weight_var, mul_tmp_var}).LinksTo({mul_out_var});
- elementwise_add_op->LinksFrom({mul_out_var, elementwise_add_tmp_var})
- .LinksTo({elementwise_add_out_var});
-}
-
-// Replace the node `from` in the links to `to`
-bool LinksReplace(std::vector* links, Node* from, Node* to) {
- for (auto*& n : *links) {
- if (n == from) {
- n = to;
- return true;
- }
- }
- return false;
-}
-
std::unique_ptr FCFusePass::ApplyImpl(
std::unique_ptr graph) const {
PADDLE_ENFORCE(graph.get());
- FusePassBase::Init("fc", graph.get());
+ FusePassBase::Init("fc_fuse", graph.get());
std::unordered_set nodes2delete;
GraphPatternDetector gpd;
- BuildFCPattern(gpd.mutable_pattern());
-
-#define GET_NODE(id) \
- PADDLE_ENFORCE(subgraph.count(gpd.pattern().RetrieveNode(#id)), \
- "pattern has no Node called %s", #id); \
- auto* id = subgraph.at(gpd.pattern().RetrieveNode(#id)); \
- PADDLE_ENFORCE_NOT_NULL(id, "subgraph has no node %s", #id);
+ // BuildFCPattern(gpd.mutable_pattern());
+ auto* x = gpd.mutable_pattern()
+ ->NewNode("fc_fuse/x")
+ ->AsInput()
+ ->assert_is_op_input("mul", "X");
+ patterns::FC(gpd.mutable_pattern(), "fc_fuse", x, true /*with bias*/);
+
+#define GET_NODE(id) \
+ PADDLE_ENFORCE(subgraph.count(gpd.pattern().RetrieveNode("fc_fuse/" #id)), \
+ "pattern has no Node called %s", #id); \
+ auto* id = subgraph.at(gpd.pattern().RetrieveNode("fc_fuse/" #id)); \
+ PADDLE_ENFORCE_NOT_NULL(id, "subgraph has no node %s", "fc_fuse/" #id);
int found_fc_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
@@ -98,43 +50,33 @@ std::unique_ptr FCFusePass::ApplyImpl(
// scenerio.
// FC's fusion is simple, just op fuse, no need to process the
// parameters.
- GET_NODE(mul_tmp_var); // x
- GET_NODE(mul_weight); // Y
- GET_NODE(elementwise_add_tmpvar); // bias
- GET_NODE(elementwise_add_out); // Out
- GET_NODE(mul); // MUL op
- GET_NODE(elementwise_add); // ELEMENT_ADD op
- GET_NODE(mul_out); // tmp
+ GET_NODE(x); // x
+ GET_NODE(w); // Y
+ GET_NODE(fc_bias); // bias
+ GET_NODE(fc_out); // Out
+ GET_NODE(mul); // MUL op
+ GET_NODE(elementwise_add); // ELEMENT_ADD op
+ GET_NODE(mul_out); // tmp
#undef GET_NODE
// Create an FC Node.
OpDesc desc;
- std::string fc_x_in = mul_tmp_var->Name();
- std::string fc_Y_in = mul_weight->Name();
- std::string fc_bias_in = elementwise_add_tmpvar->Name();
- std::string fc_out = elementwise_add_out->Name();
+ std::string fc_x_in = x->Name();
+ std::string fc_Y_in = w->Name();
+ std::string fc_bias_in = fc_bias->Name();
+ std::string fc_out_out = fc_out->Name();
desc.SetInput("Input", std::vector({fc_x_in}));
desc.SetInput("W", std::vector({fc_Y_in}));
desc.SetInput("Bias", std::vector({fc_bias_in}));
- desc.SetOutput("Out", std::vector({fc_out}));
+ desc.SetOutput("Out", std::vector({fc_out_out}));
desc.SetType("fc");
auto fc_node = g->CreateOpNode(&desc); // OpDesc will be copied.
- fc_node->inputs =
- std::vector({mul_tmp_var, mul_weight, elementwise_add_tmpvar});
- fc_node->outputs.push_back(elementwise_add_out);
-
- // Update link relatons
- PADDLE_ENFORCE(LinksReplace(&mul_tmp_var->outputs, mul, fc_node));
- PADDLE_ENFORCE(LinksReplace(&mul_weight->outputs, mul, fc_node));
- PADDLE_ENFORCE(LinksReplace(&elementwise_add_tmpvar->outputs,
- elementwise_add, fc_node));
- PADDLE_ENFORCE(
- LinksReplace(&elementwise_add_out->inputs, elementwise_add, fc_node));
+ GraphSafeRemoveNodes(graph.get(), {mul, elementwise_add, mul_out});
- // Drop old nodes
- graph->RemoveNode(mul);
- graph->RemoveNode(elementwise_add);
- graph->RemoveNode(mul_out); // tmp variable
+ IR_NODE_LINK_TO(x, fc_node);
+ IR_NODE_LINK_TO(w, fc_node);
+ IR_NODE_LINK_TO(fc_bias, fc_node);
+ IR_NODE_LINK_TO(fc_node, fc_out);
found_fc_count++;
};
diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
index 5852705b6b8d1c650faeae3dc810aac65353b459..0d69dfa79aa26940f8f56f84b35ffed34f29f703 100644
--- a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
@@ -11,39 +11,39 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
-
#include "paddle/fluid/framework/ir/fc_lstm_fuse_pass.h"
+#include
+#include "paddle/fluid/framework/lod_tensor.h"
namespace paddle {
namespace framework {
namespace ir {
-std::unique_ptr FCLstmFusePass::ApplyImpl(
- std::unique_ptr graph) const {
- GraphPatternDetector gpd;
- auto* pattern = gpd.mutable_pattern();
-
- std::unordered_set fused_ops({// first lstm
- 13, 15, 16,
- // second lstm
- 23, 25, 26});
-
- pattern->NewNode([&](Node* x) { return fused_ops.count(x->id()); },
- "any_node");
+std::string GenNodeName(const std::string& prefix, const std::string& name) {
+ return prefix + "/" + name;
+}
- std::unordered_set marked_nodes;
+void BuildPattern(PDPattern* pattern, const std::string& name_scope,
+ bool with_fc_bias) {
+ PDNode* x = pattern->NewNode(name_scope, "x")
+ ->assert_is_op_input("mul")
+ ->assert_var_not_persistable();
+ auto* fc_out = patterns::FC(pattern, name_scope, x, with_fc_bias);
+ fc_out->AsIntermediate(); // fc_out is a tmp var, will be removed after fuse.
+ patterns::LSTM(pattern, name_scope, fc_out);
+ // LOG(INFO) << "\n" << pattern->DotString();
+}
- auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
- Graph* g) {
+int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
+ bool with_fc_bias) {
+ GraphPatternDetector gpd;
+ auto* pattern = gpd.mutable_pattern();
- auto* id = subgraph.at(gpd.pattern().RetrieveNode("any_node"));
- marked_nodes.insert(id);
- };
- gpd(graph.get(), handler);
+ BuildPattern(pattern, name_scope, with_fc_bias);
// Create New OpDesc
auto lstm_creator = [&](int lstm, int input, int weight_x, int weight_h,
- int bias, int hidden, int cell, int xx) {
+ int bias, int hidden, int cell, int xx, int fc_bias) {
#define GET_NODE(x) auto* x##_n = graph->RetriveNode(x);
GET_NODE(input);
GET_NODE(weight_x);
@@ -61,61 +61,147 @@ std::unique_ptr FCLstmFusePass::ApplyImpl(
SET_IN(WeightX, weight_x);
SET_IN(WeightH, weight_h);
SET_IN(Bias, bias);
-#undef GET_NODE
#undef SET_IN
+ if (with_fc_bias) {
+ // Add FC-bias with LSTM-bias and create a new weight
+ PADDLE_ENFORCE(scope);
+ const std::string& new_bias_var = name_scope + "_bias.new";
+ auto* bias_var = scope->Var(new_bias_var);
+ PADDLE_ENFORCE(bias_var);
+ auto* bias_tensor = bias_var->GetMutable();
+ auto* lstm_bias_var = scope->FindVar(bias_n->Name());
+ PADDLE_ENFORCE(lstm_bias_var);
+ const auto& lstm_bias_tensor = lstm_bias_var->Get();
+ bias_tensor->Resize(lstm_bias_tensor.dims());
+
+ GET_NODE(fc_bias);
+ auto* fc_bias_var = scope->FindVar(fc_bias_n->Name());
+ const auto& fc_bias_tensor = fc_bias_var->Get();
+
+ auto* data = bias_tensor->mutable_data(platform::CPUPlace());
+
+ for (int i = 0; i < bias_tensor->numel(); i++) {
+ data[i] =
+ fc_bias_tensor.data()[i] + lstm_bias_tensor.data()[i];
+ }
+ op_desc.SetInput("Bias", {new_bias_var});
+ }
+#undef GET_NODE
- VLOG(4) << "hidden_n: " << hidden_n->Name();
- VLOG(4) << "cell: " << cell_n->Name();
- VLOG(4) << "xx: " << xx_n->Name();
+ // Create temp variables.
+ scope->Var(name_scope + "/BatchedInput.new")
+ ->GetMutable();
+ scope->Var(name_scope + "/BatchCellPreAct.new")
+ ->GetMutable();
+ scope->Var(name_scope + "/BatchedGate.new")
+ ->GetMutable();
op_desc.SetInput("H0", {});
op_desc.SetInput("C0", {});
op_desc.SetOutput("Hidden", {hidden_n->Name()});
op_desc.SetOutput("Cell", {cell_n->Name()});
op_desc.SetOutput("XX", {xx_n->Name()});
- op_desc.SetOutput("BatchedGate", {"blstm_0.tmp_2"});
- op_desc.SetOutput("BatchCellPreAct", {"blstm_1.tmp_2"});
+ op_desc.SetOutput("BatchedGate", {name_scope + "/BatchedGate.new"});
+ op_desc.SetOutput("BatchCellPreAct", {name_scope + "/BatchCellPreAct.new"});
+ op_desc.SetOutput("BatchedInput", {name_scope + "/BatchedInput.new"});
op_desc.SetAttr("is_reverse", lstm_n->Op()->GetAttr("is_reverse"));
- op_desc.SetAttr("use_peepholes", false);
- auto* op = graph->CreateOpNode(&op_desc);
+ op_desc.SetAttr("use_peepholes", lstm_n->Op()->GetAttr("use_peepholes"));
+ // TODO(TJ): get from attr
+ op_desc.SetAttr("use_seq", true);
+
+#define TMP_NAME(x) "at.new.tmp." #x
+#define OP_SET_OUT(x) op_desc.SetOutput(#x, {TMP_NAME(x)})
+ OP_SET_OUT(BatchedCell);
+ OP_SET_OUT(BatchedHidden);
+ OP_SET_OUT(ReorderedH0);
+ OP_SET_OUT(ReorderedC0);
+#undef OP_SET_OUT
-#define LINK_TO(a, b) \
- a->outputs.push_back(b); \
- b->inputs.push_back(a);
- LINK_TO(input_n, op);
- LINK_TO(weight_x_n, op);
- LINK_TO(weight_h_n, op);
- LINK_TO(bias_n, op);
- LINK_TO(op, hidden_n);
-#undef LINK_TO
+ auto* op = graph->CreateOpNode(&op_desc);
+ PADDLE_ENFORCE(graph->Has(kParamScopeAttr));
+ auto* scope = graph->Get(kParamScopeAttr);
+
+#define TMP_NEW(x) scope->Var(TMP_NAME(x))->GetMutable()
+ TMP_NEW(BatchedCell);
+ TMP_NEW(BatchedHidden);
+ TMP_NEW(ReorderedH0);
+ TMP_NEW(ReorderedC0);
+#undef TMP_NEW
+#undef TMP_NAME
+
+ IR_NODE_LINK_TO(input_n, op);
+ IR_NODE_LINK_TO(weight_x_n, op);
+ IR_NODE_LINK_TO(weight_h_n, op);
+ IR_NODE_LINK_TO(bias_n, op);
+ IR_NODE_LINK_TO(op, hidden_n);
return op;
+ };
+
+ int fusion_count{0};
+
+ auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
+ Graph* g) {
+#define GET_NODE(name__) \
+ std::string name__##key = name_scope + "/" + #name__; \
+ auto* name__##n = pattern->RetrieveNode(name__##key); \
+ PADDLE_ENFORCE(name__##n); \
+ PADDLE_ENFORCE(subgraph.count(name__##n)); \
+ Node* name__##_n = subgraph.at(name__##n); \
+ int name__ __attribute__((unused)) = name__##_n->id();
+
+ GET_NODE(x);
+ GET_NODE(w);
+ GET_NODE(mul);
+ GET_NODE(fc_out);
+ GET_NODE(Weight);
+ GET_NODE(lstm);
+ GET_NODE(Bias);
+ GET_NODE(Hidden);
+ GET_NODE(Cell);
+
+ if (with_fc_bias) {
+ GET_NODE(fc_bias);
+ GET_NODE(elementwise_add);
+ lstm_creator(lstm, x, w, Weight, Bias, Hidden, Cell, fc_out, fc_bias);
+ // Remove unneeded nodes.
+ std::unordered_set marked_nodes(
+ {mul_n, lstm_n, elementwise_add_n});
+ GraphSafeRemoveNodes(graph, marked_nodes);
+ } else {
+ lstm_creator(lstm, x, w, Weight, Bias, Hidden, Cell, fc_out, -1);
+ // Remove unneeded nodes.
+ std::unordered_set marked_nodes({mul_n, lstm_n});
+ GraphSafeRemoveNodes(graph, marked_nodes);
+ }
+#undef GET_NODE
+ ++fusion_count;
};
- lstm_creator(16, 12, 14, 18, 17, 22, 21, 19);
- lstm_creator(26, 12, 24, 28, 27, 32, 31, 29);
+ gpd(graph, handler);
- // remove all the nodes
+ return fusion_count;
+}
- for (auto* node : marked_nodes) {
- graph->RemoveNode(const_cast(node));
- }
+std::unique_ptr MulLstmFusePass::ApplyImpl(
+ std::unique_ptr graph) const {
+ FusePassBase::Init(name_scope_, graph.get());
- for (auto* node : graph->Nodes()) {
- for (auto it = node->inputs.begin(); it != node->inputs.end();) {
- if (marked_nodes.count(*it)) {
- it = const_cast(node)->inputs.erase(it);
- } else
- it++;
- }
- for (auto it = node->outputs.begin(); it != node->outputs.end();) {
- if (marked_nodes.count(*it)) {
- it = const_cast(node)->outputs.erase(it);
- } else
- it++;
- }
- }
+ int fusion_count = BuildFusion(graph.get(), name_scope_, param_scope(),
+ false /*with_fc_bias*/);
+
+ AddStatis(fusion_count);
+ return graph;
+}
+
+std::unique_ptr FCLstmFusePass::ApplyImpl(
+ std::unique_ptr graph) const {
+ FusePassBase::Init(name_scope_, graph.get());
+
+ int fusion_count = BuildFusion(graph.get(), name_scope_, param_scope(),
+ true /*with_fc_bias*/);
+ AddStatis(fusion_count);
return graph;
}
@@ -123,4 +209,5 @@ std::unique_ptr FCLstmFusePass::ApplyImpl(
} // namespace framework
} // namespace paddle
+REGISTER_PASS(mul_lstm_fuse_pass, paddle::framework::ir::MulLstmFusePass);
REGISTER_PASS(fc_lstm_fuse_pass, paddle::framework::ir::FCLstmFusePass);
diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.h b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.h
index 74b08ae558b12c9328db58687cd01edbc37291a8..3ee32c63a46fcc34bdccd1e14d4bbaf9668c49e9 100644
--- a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.h
+++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.h
@@ -12,20 +12,36 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+#pragma once
+
+#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
-#include "paddle/fluid/framework/ir/pass.h"
namespace paddle {
namespace framework {
namespace ir {
-class FCLstmFusePass : public Pass {
+// The MulLstmFusePass and MulLstmFusePass will fuse to the same FusionLstm op.
+
+// Just FC without bias
+class FCLstmFusePass : public FusePassBase {
public:
virtual ~FCLstmFusePass() {}
protected:
std::unique_ptr ApplyImpl(std::unique_ptr graph) const;
+
+ const std::string name_scope_{"fc_lstm_fuse"};
+};
+
+class MulLstmFusePass : public FusePassBase {
+ public:
+ virtual ~MulLstmFusePass() {}
+
+ protected:
+ std::unique_ptr ApplyImpl(std::unique_ptr graph) const;
+ const std::string name_scope_{"fc_nobias_lstm_fuse"};
};
} // namespace ir
diff --git a/paddle/fluid/framework/ir/graph.h b/paddle/fluid/framework/ir/graph.h
index 55e495a0ed75c3a09703438dcfe01ca8f9d36118..ae8496204d4aeb88c04154d571325d440274e821 100644
--- a/paddle/fluid/framework/ir/graph.h
+++ b/paddle/fluid/framework/ir/graph.h
@@ -167,7 +167,6 @@ class Graph {
std::map> attr_dels_;
std::map> nodes_;
std::unordered_set node_set_;
- int node_count_{0};
};
bool IsControlDepVar(const ir::Node &var);
diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc
index 945ab110b148c320b6626cadaa47d483df68419e..731b89423354532f684e19305dfa87e8eb75d4b1 100644
--- a/paddle/fluid/framework/ir/graph_pattern_detector.cc
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc
@@ -19,6 +19,7 @@
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/graph_traits.h"
+#include "paddle/fluid/framework/ir/graph_viz_pass.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
@@ -71,7 +72,10 @@ void PDPattern::AddEdge(PDNode* a, PDNode* b) {
void GraphPatternDetector::operator()(Graph* graph,
GraphPatternDetector::handle_t handler) {
- if (!MarkPDNodesInGraph(*graph)) return;
+ if (!MarkPDNodesInGraph(*graph)) {
+ return;
+ }
+
auto subgraphs = DetectPatterns();
UniquePatterns(&subgraphs);
RemoveOverlappedMatch(&subgraphs);
@@ -81,13 +85,13 @@ void GraphPatternDetector::operator()(Graph* graph,
LOG(INFO) << "detect " << subgraphs.size() << " subgraph matches the pattern";
int id = 0;
for (auto& g : subgraphs) {
- LOG(INFO) << "optimizing #" << id++ << " subgraph";
+ VLOG(3) << "optimizing #" << id++ << " subgraph";
handler(g, graph);
}
}
bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph& graph) {
- VLOG(4) << "mark pdnodes in graph";
+ VLOG(3) << "mark pdnodes in graph";
if (graph.Nodes().empty()) return false;
for (auto& node : GraphTraits::DFS(graph)) {
@@ -106,7 +110,13 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph& graph) {
return false;
}
}
+ for (auto& item : pdnodes2nodes_) {
+ for (auto& n : item.second) {
+ GetMarkedNodes(const_cast(&graph)).insert(n);
+ }
+ }
VLOG(3) << pdnodes2nodes_.size() << " nodes marked";
+
return !pdnodes2nodes_.empty();
}
@@ -272,7 +282,7 @@ void GraphPatternDetector::RemoveOverlappedMatch(
for (const auto& subgraph : *subgraphs) {
bool valid = true;
for (auto& item : subgraph) {
- if (node_set.count(item.second)) {
+ if (item.first->IsIntermediate() && node_set.count(item.second)) {
valid = false;
break;
}
@@ -328,22 +338,22 @@ PDNode& PDNode::LinksFrom(const std::vector& others) {
}
PDNode* PDNode::assert_is_op() {
- asserts_.emplace_back([this](Node* x) { return x && x->IsOp(); });
+ asserts_.emplace_back([](Node* x) { return x && x->IsOp(); });
return this;
}
PDNode* PDNode::assert_is_op(const std::string& op_type) {
- asserts_.emplace_back([this, op_type](Node* x) {
+ asserts_.emplace_back([op_type](Node* x) {
return x && x->IsOp() && x->Op()->Type() == op_type;
});
return this;
}
PDNode* PDNode::assert_is_var() {
- asserts_.emplace_back([this](Node* x) { return x && x->IsVar(); });
+ asserts_.emplace_back([](Node* x) { return x && x->IsVar(); });
return this;
}
PDNode* PDNode::assert_var_not_persistable() {
assert_is_var();
- asserts_.emplace_back([this](Node* x) { return !x->Var()->Persistable(); });
+ asserts_.emplace_back([](Node* x) { return !x->Var()->Persistable(); });
return this;
}
PDNode* PDNode::assert_is_persistable_var() {
@@ -357,7 +367,9 @@ PDNode* PDNode::assert_is_op_nth_input(const std::string& op_type,
assert_is_op_input(op_type);
asserts_.emplace_back([=](Node* x) {
for (auto* op : x->outputs) {
- if (IsNthInput(x, op, argument, nth)) return true;
+ if (op->IsOp() && op->Op()->Type() == op_type &&
+ IsNthInput(x, op, argument, nth))
+ return true;
}
return false;
});
@@ -368,7 +380,9 @@ PDNode* PDNode::assert_is_op_nth_output(const std::string& op_type,
assert_is_var();
asserts_.emplace_back([=](Node* x) {
for (auto* op : x->inputs) {
- if (IsNthOutput(x, op, argument, nth)) return true;
+ if (op->IsOp() && op->Op()->Type() == op_type &&
+ IsNthOutput(x, op, argument, nth))
+ return true;
}
return false;
});
@@ -412,6 +426,12 @@ PDNode* PDNode::assert_is_op_output(const std::string& op_type) {
});
return this;
}
+PDNode* PDNode::assert_is_op_output(const std::string& op_type,
+ const std::string& argument) {
+ assert_is_var();
+ assert_is_op_nth_output(op_type, argument, 0);
+ return this;
+}
PDNode* PDNode::assert_is_op_input(const std::string& op_type) {
assert_is_var();
asserts_.emplace_back([=](Node* x) {
@@ -424,6 +444,12 @@ PDNode* PDNode::assert_is_op_input(const std::string& op_type) {
});
return this;
}
+PDNode* PDNode::assert_is_op_input(const std::string& op_type,
+ const std::string& argument) {
+ assert_is_var();
+ assert_is_op_nth_input(op_type, argument, 0);
+ return this;
+}
PDNode* PDNode::assert_op_has_n_inputs(const std::string& op_type, size_t n) {
assert_is_op(op_type);
asserts_.emplace_back([=](Node* x) { return x->inputs.size() == n; });
@@ -439,6 +465,130 @@ PDNode* PDNode::assert_more(PDNode::teller_t&& teller) {
return this;
}
+bool VarLinksToOp(Node* node, const std::string& op_type) {
+ for (auto* out : node->outputs) {
+ if (out->IsOp() && out->Op()->Type() == op_type) {
+ return true;
+ }
+ }
+ return false;
+}
+bool IsNthInput(Node* var, Node* op, const std::string& argument, size_t nth) {
+ PADDLE_ENFORCE(var->IsVar());
+ PADDLE_ENFORCE(op->IsOp());
+ if (op->Op()->Input(argument).size() <= nth) return false;
+ return var->Name() == op->Op()->Input(argument)[nth];
+}
+bool IsNthOutput(Node* var, Node* op, const std::string& argument, size_t nth) {
+ PADDLE_ENFORCE(var->IsVar());
+ PADDLE_ENFORCE(op->IsOp());
+ if (op->Op()->Output(argument).size() <= nth) return false;
+ return var->Name() == op->Op()->Output(argument)[nth];
+}
+void GraphSafeRemoveNodes(Graph* graph,
+ const std::unordered_set& nodes) {
+ for (auto* node : nodes) {
+ graph->RemoveNode(const_cast(node));
+ }
+
+ for (auto* node : graph->Nodes()) {
+ for (auto it = node->inputs.begin(); it != node->inputs.end();) {
+ if (nodes.count(*it)) {
+ it = const_cast(node)->inputs.erase(it);
+ } else {
+ it++;
+ }
+ }
+ for (auto it = node->outputs.begin(); it != node->outputs.end();) {
+ if (nodes.count(*it)) {
+ it = const_cast(node)->outputs.erase(it);
+ } else {
+ it++;
+ }
+ }
+ }
+}
+bool VarLinksFromOp(Node* node, const std::string& op_type) {
+ for (auto* out : node->inputs) {
+ if (out->IsOp() && out->Op()->Type() == op_type) {
+ return true;
+ }
+ }
+ return false;
+}
+
+PDNode* patterns::FC(PDPattern* pattern, const std::string& name_scope,
+ PDNode* x, bool with_bias) {
+ // Create Operators
+ PDNode* elementwise_add_op{nullptr};
+ auto* mul_op = pattern->NewNode(name_scope, "mul")->assert_is_op("mul");
+ if (with_bias) {
+ elementwise_add_op = pattern->NewNode(name_scope, "elementwise_add")
+ ->assert_is_op("elementwise_add");
+ }
+ // Create variables
+ // w
+ auto* mul_weight_var = pattern->NewNode(name_scope, "w")
+ ->AsInput()
+ ->assert_is_persistable_var()
+ ->assert_is_op_nth_input("mul", "Y", 0);
+ PDNode* mul_out_var{nullptr};
+ if (with_bias) {
+ // intermediate variable, will be removed in the IR after fuse.
+ mul_out_var = pattern->NewNode(name_scope, "mul_out")
+ ->AsIntermediate()
+ ->assert_is_only_output_of_op("mul")
+ ->assert_is_op_input("elementwise_add");
+ }
+ PDNode *bias{nullptr}, *fc_out{nullptr};
+ if (with_bias) {
+ // bias
+ bias = pattern->NewNode(name_scope, "fc_bias")
+ ->assert_is_op_input("elementwise_add")
+ ->AsInput();
+ // output
+ fc_out = pattern->NewNode(name_scope, "fc_out")
+ ->AsOutput()
+ ->assert_is_op_output("elementwise_add");
+ } else {
+ fc_out = pattern->NewNode(name_scope, "fc_out")
+ ->AsOutput()
+ ->assert_is_op_output("mul");
+ }
+
+ if (with_bias) {
+ mul_op->LinksFrom({mul_weight_var, x}).LinksTo({mul_out_var});
+ elementwise_add_op->LinksFrom({mul_out_var, bias}).LinksTo({fc_out});
+ } else {
+ mul_op->LinksFrom({mul_weight_var, x}).LinksTo({fc_out});
+ }
+
+ return fc_out;
+}
+PDNode* patterns::LSTM(PDPattern* pattern, const std::string& name_scope,
+ PDNode* x) {
+ x->assert_is_op_input("lstm", "Input");
+ auto* lstm_op = pattern->NewNode(name_scope, "lstm")->assert_is_op("lstm");
+#define NEW_NODE(arg__, io__) \
+ auto* arg__ = pattern->NewNode(name_scope, #arg__) \
+ ->assert_is_op_##io__("lstm", #arg__);
+
+ // Currently, the H0 and C0 are optional
+ // TODO(Superjomn) upgrade the fuse framework to support optional.
+ // NEW_NODE(H0, input);
+ // NEW_NODE(C0, input);
+ NEW_NODE(Weight, input);
+ NEW_NODE(Bias, input);
+
+ NEW_NODE(Hidden, output);
+ NEW_NODE(Cell, output);
+ NEW_NODE(BatchGate, output);
+ NEW_NODE(BatchCellPreAct, output);
+
+ lstm_op->LinksFrom({x, Weight, Bias});
+ lstm_op->LinksTo({Hidden, Cell, BatchGate, BatchCellPreAct});
+ return Hidden;
+}
} // namespace ir
} // namespace framework
} // namespace paddle
diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h
index f8488c84962d1caa6e7817b3c0349d6da3a59182..eacea1750f6f1e86a8fe79637c3bd757a7275398 100644
--- a/paddle/fluid/framework/ir/graph_pattern_detector.h
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.h
@@ -19,6 +19,9 @@
#endif
#include
+#include
+#include
+#include
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/node.h"
#include "paddle/fluid/inference/analysis/dot.h"
@@ -95,7 +98,11 @@ struct PDNode {
PDNode* assert_var_not_persistable();
PDNode* assert_is_persistable_var();
PDNode* assert_is_op_output(const std::string& op_type);
+ PDNode* assert_is_op_output(const std::string& op_type,
+ const std::string& argument);
PDNode* assert_is_op_input(const std::string& op_type);
+ PDNode* assert_is_op_input(const std::string& op_type,
+ const std::string& argument);
PDNode* assert_is_op_nth_input(const std::string& op_type,
const std::string& argument, int nth);
PDNode* assert_is_op_nth_output(const std::string& op_type,
@@ -167,6 +174,9 @@ class PDPattern {
PDNode* NewNode(PDNode::teller_t&& teller, const std::string& name = NewID());
PDNode* NewNode(const std::string& name = NewID());
+ PDNode* NewNode(const std::string& prefix, const std::string& name) {
+ return NewNode(prefix + "/" + name);
+ }
PDNode* RetrieveNode(const std::string& id) const;
const std::vector>& nodes() const { return nodes_; }
@@ -238,6 +248,8 @@ class GraphPatternDetector {
void UniquePatterns(std::vector* subgraphs);
// Remove overlapped match subgraphs, when overlapped, keep the previous one.
+ // The intermediate PDNodes will be removed, so can't shared by multiple
+ // patterns.
void RemoveOverlappedMatch(std::vector* subgraphs);
// Validate whether the intermediate nodes are linked by external nodes.
@@ -257,64 +269,40 @@ class GraphPatternDetector {
// some helper methods.
-// Op's input.
-static bool VarLinksToOp(Node* node, const std::string& op_type) {
- for (auto* out : node->outputs) {
- if (out->IsOp() && out->Op()->Type() == op_type) {
- return true;
- }
- }
- return false;
-}
-
-// Op's output.
-static bool VarLinksFromOp(Node* node, const std::string& op_type) {
- for (auto* out : node->inputs) {
- if (out->IsOp() && out->Op()->Type() == op_type) {
- return true;
- }
- }
- return false;
-}
+// Tell if a var links to an Op
+bool VarLinksToOp(Node* node, const std::string& op_type);
+
+// Tell if an op links to a var
+bool VarLinksFromOp(Node* node, const std::string& op_type);
// Check whether a var node is a op node's nth input.
-static bool IsNthInput(Node* var, Node* op, const std::string& argument,
- size_t nth) {
- PADDLE_ENFORCE(var->IsVar());
- PADDLE_ENFORCE(op->IsOp());
- if (op->inputs.size() <= nth) return false;
- return var->Name() == op->Op()->Input(argument)[nth];
-}
-
-static bool IsNthOutput(Node* var, Node* op, const std::string& argument,
- size_t nth) {
- PADDLE_ENFORCE(var->IsVar());
- PADDLE_ENFORCE(op->IsOp());
- if (op->inputs.size() <= nth) return false;
- return var->Name() == op->Op()->Output(argument)[nth];
-}
-
-static void GraphSafeRemoveNodes(Graph* graph,
- const std::unordered_set& nodes) {
- for (auto* node : nodes) {
- graph->RemoveNode(const_cast(node));
- }
+bool IsNthInput(Node* var, Node* op, const std::string& argument, size_t nth);
- for (auto* node : graph->Nodes()) {
- for (auto it = node->inputs.begin(); it != node->inputs.end();) {
- if (nodes.count(*it)) {
- it = const_cast(node)->inputs.erase(it);
- } else
- it++;
- }
- for (auto it = node->outputs.begin(); it != node->outputs.end();) {
- if (nodes.count(*it)) {
- it = const_cast(node)->outputs.erase(it);
- } else
- it++;
- }
- }
-}
+// Tell whether a var node is a op node's nth output.
+bool IsNthOutput(Node* var, Node* op, const std::string& argument, size_t nth);
+
+// Graph safely remove some nodes, will automatically clean up the edges.
+void GraphSafeRemoveNodes(Graph* graph,
+ const std::unordered_set& nodes);
+
+// Some pre-defined patterns those can be reused in multiple passes.
+namespace patterns {
+
+// FC with bias
+// op: mul + elementwise_add
+// named nodes:
+// mul, elementwise_add
+// w, mul_out, bias, fc_out
+PDNode* FC(PDPattern* pattern, const std::string& name_scope, PDNode* x,
+ bool with_bias);
+
+PDNode* LSTM(PDPattern* pattern, const std::string& name_scope, PDNode* x);
+
+} // namespace patterns
+
+#define IR_NODE_LINK_TO(a, b) \
+ a->outputs.push_back(b); \
+ b->inputs.push_back(a);
} // namespace ir
} // namespace framework
diff --git a/paddle/fluid/framework/ir/graph_pattern_detector_tester.cc b/paddle/fluid/framework/ir/graph_pattern_detector_tester.cc
index 7e5c86b033a7c69a306491cf4bf8d099018c5f19..6c466fb21fb46e09961dc874e9e39655f83d17c6 100644
--- a/paddle/fluid/framework/ir/graph_pattern_detector_tester.cc
+++ b/paddle/fluid/framework/ir/graph_pattern_detector_tester.cc
@@ -140,8 +140,9 @@ TEST(GraphPatternDetecter, MultiSubgraph) {
return node->IsOp() && (node->Name() == "op2" || node->Name() == "op3");
},
"OP0");
- auto* any_var = x.mutable_pattern()->NewNode(
- [](Node* node) { return node->IsVar(); }, "VAR");
+ auto* any_var = x.mutable_pattern()
+ ->NewNode([](Node* node) { return node->IsVar(); }, "VAR")
+ ->AsIntermediate();
auto* any_op1 = x.mutable_pattern()->NewNode(
[](Node* node) { return node->IsOp(); }, "OP1");
diff --git a/paddle/fluid/framework/ir/graph_viz_pass.cc b/paddle/fluid/framework/ir/graph_viz_pass.cc
index 4c7ffe69e933de3d52c8f762a1eeb73de17e0561..31ed98db72c8fd4af8c970861d386687962001ce 100644
--- a/paddle/fluid/framework/ir/graph_viz_pass.cc
+++ b/paddle/fluid/framework/ir/graph_viz_pass.cc
@@ -50,20 +50,37 @@ std::unique_ptr GraphVizPass::ApplyImpl(
Dot dot;
- std::vector op_attrs({Dot::Attr("style", "filled"),
- Dot::Attr("shape", "box"),
- Dot::Attr("fillcolor", "red")});
- std::vector var_attrs({Dot::Attr("style", "filled,rounded"),
- // Dot::Attr("shape", "diamond"),
- Dot::Attr("fillcolor", "yellow")});
-
- std::vector marked_op_attrs({Dot::Attr("style", "filled"),
- Dot::Attr("shape", "box"),
- Dot::Attr("fillcolor", "lightgray")});
- std::vector marked_var_attrs(
- {Dot::Attr("style", "filled,rounded"),
- // Dot::Attr("shape", "diamond"),
- Dot::Attr("fillcolor", "lightgray")});
+ const std::vector op_attrs({
+ Dot::Attr("style", "rounded,filled,bold"), //
+ Dot::Attr("shape", "box"), //
+ Dot::Attr("color", "#303A3A"), //
+ Dot::Attr("fontcolor", "#ffffff"), //
+ Dot::Attr("width", "1.3"), //
+ Dot::Attr("height", "0.84"), //
+ Dot::Attr("fontname", "Arial"), //
+ });
+ const std::vector arg_attrs({
+ Dot::Attr("shape", "box"), //
+ Dot::Attr("style", "rounded,filled,bold"), //
+ Dot::Attr("fontname", "Arial"), //
+ Dot::Attr("fillcolor", "#999999"), //
+ Dot::Attr("color", "#dddddd"), //
+ });
+
+ const std::vector param_attrs({
+ Dot::Attr("shape", "box"), //
+ Dot::Attr("style", "rounded,filled,bold"), //
+ Dot::Attr("fontname", "Arial"), //
+ Dot::Attr("color", "#148b97"), //
+ Dot::Attr("fontcolor", "#ffffff"), //
+ });
+
+ const std::vector marked_op_attrs(
+ {Dot::Attr("style", "rounded,filled,bold"), Dot::Attr("shape", "box"),
+ Dot::Attr("fillcolor", "yellow")});
+ const std::vector marked_var_attrs(
+ {Dot::Attr("style", "filled,rounded"), Dot::Attr("shape", "box"),
+ Dot::Attr("fillcolor", "yellow")});
auto marked_nodes = ConsumeMarkedNodes(graph.get());
// Create nodes
@@ -74,9 +91,17 @@ std::unique_ptr GraphVizPass::ApplyImpl(
marked_nodes.count(n) ? marked_op_attrs : op_attrs;
dot.AddNode(node_id, attr, node_id);
} else if (n->IsVar()) {
- decltype(op_attrs) attr =
- marked_nodes.count(n) ? marked_var_attrs : var_attrs;
- dot.AddNode(node_id, attr, node_id);
+ decltype(op_attrs)* attr;
+ if (marked_nodes.count(n)) {
+ attr = &marked_var_attrs;
+ } else if (const_cast(n)->Var() &&
+ const_cast(n)->Var()->Persistable()) {
+ attr = ¶m_attrs;
+ } else {
+ attr = &arg_attrs;
+ }
+
+ dot.AddNode(node_id, *attr, node_id);
}
node2dot[n] = node_id;
}
diff --git a/paddle/fluid/framework/ir/graph_viz_pass.h b/paddle/fluid/framework/ir/graph_viz_pass.h
index 8d885cb9e4ee6e01de386b0f22423988dbe60ca6..e64916a5bb662e3b00cfe212f0bbbc537c7bc2cc 100644
--- a/paddle/fluid/framework/ir/graph_viz_pass.h
+++ b/paddle/fluid/framework/ir/graph_viz_pass.h
@@ -42,6 +42,13 @@ class GraphVizPass : public Pass {
marked_nodes_t ConsumeMarkedNodes(Graph* graph) const;
};
+static GraphVizPass::marked_nodes_t& GetMarkedNodes(Graph* graph) {
+ if (!graph->Has(kGraphvizMarkedNodeAttr)) {
+ graph->Set(kGraphvizMarkedNodeAttr, new GraphVizPass::marked_nodes_t);
+ }
+ return graph->Get(kGraphvizMarkedNodeAttr);
+}
+
} // namespace ir
} // namespace framework
} // namespace paddle
diff --git a/paddle/fluid/framework/ir/infer_clean_graph_pass.cc b/paddle/fluid/framework/ir/infer_clean_graph_pass.cc
index f885567da1965b997b2063e06c839af95b43e1e1..7713ed1eab88ee4fa16d52e7425075ae66f721a3 100644
--- a/paddle/fluid/framework/ir/infer_clean_graph_pass.cc
+++ b/paddle/fluid/framework/ir/infer_clean_graph_pass.cc
@@ -13,42 +13,41 @@
// limitations under the License.
#include
+#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h"
-#include "paddle/fluid/framework/ir/pass.h"
+#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
namespace paddle {
namespace framework {
namespace ir {
-class InferCleanGraphPass : public Pass {
+class InferCleanGraphPass : public FusePassBase {
public:
virtual ~InferCleanGraphPass() {}
protected:
std::unique_ptr ApplyImpl(std::unique_ptr graph) const {
+ FusePassBase::Init("original_graph", graph.get());
PADDLE_ENFORCE(graph.get());
auto is_valid_node = [](Node* x) {
return x && IsControlDepVar(*x) && x->IsVar() && !x->Var();
};
- std::unordered_set invalid_nodes;
+ std::unordered_set invalid_nodes;
+ int valid_op = 0;
for (auto* node : graph->Nodes()) {
if (is_valid_node(node)) {
invalid_nodes.insert(node);
+ } else if (node->IsOp()) {
+ // Collect all the operators to help tracking number of operators.
+ ++valid_op;
}
}
- // remove nodes from the graph.
- for (auto* node : invalid_nodes) {
- graph->RemoveNode(node);
- }
+ GraphSafeRemoveNodes(graph.get(), invalid_nodes);
- // clean edges.
- for (auto* node : graph->Nodes()) {
- CleanEdges(&node->inputs, invalid_nodes);
- CleanEdges(&node->outputs, invalid_nodes);
- }
+ AddStatis(valid_op);
return graph;
}
diff --git a/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc b/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc
index a776a898a5ee13b4dde12460dce71433268fb9d4..e1a441d09aaa3647c4b2a582210a2c7e2b64e0da 100644
--- a/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc
@@ -219,16 +219,13 @@ std::unique_ptr SeqConcatFcFusePass::ApplyImpl(
op_desc.SetAttr("fc_activation", act->Op()->Type());
auto* op_node = graph->CreateOpNode(&op_desc);
-// Add links
-#define NODE_LINKS(a, b) \
- a->outputs.push_back(b); \
- b->inputs.push_back(a);
- NODE_LINKS(fc_w, op_node);
- NODE_LINKS(fc_bias, op_node);
- NODE_LINKS(concat_in0, op_node);
- NODE_LINKS(sequence_expand0_in, op_node);
- NODE_LINKS(sequence_expand1_in, op_node);
- NODE_LINKS(op_node, fc_out);
+ // Add links
+ IR_NODE_LINK_TO(fc_w, op_node);
+ IR_NODE_LINK_TO(fc_bias, op_node);
+ IR_NODE_LINK_TO(concat_in0, op_node);
+ IR_NODE_LINK_TO(sequence_expand0_in, op_node);
+ IR_NODE_LINK_TO(sequence_expand1_in, op_node);
+ IR_NODE_LINK_TO(op_node, fc_out);
// Clean nodes.
std::unordered_set marked_nodes;
@@ -241,7 +238,6 @@ std::unique_ptr SeqConcatFcFusePass::ApplyImpl(
marked_nodes.erase(sequence_expand0_in);
marked_nodes.erase(sequence_expand1_in);
marked_nodes.erase(fc_out);
-
GraphSafeRemoveNodes(graph, marked_nodes);
});
diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt
index a4f6364ae5b7d832096c92e9c6d8b3e865713cff..2006e3b24f71d0ae32b4e2ae34f1a1e4d3a82f91 100644
--- a/paddle/fluid/inference/CMakeLists.txt
+++ b/paddle/fluid/inference/CMakeLists.txt
@@ -10,19 +10,19 @@ set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor)
# TODO(panyx0718): Should this be called paddle_fluid_inference_api_internal?
cc_library(paddle_fluid_api
SRCS io.cc
- DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB} graph_to_program_pass)
+ DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB})
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
# paddle_fluid_origin exclude inference api interface
cc_library(paddle_fluid_origin DEPS ${fluid_modules} paddle_fluid_api)
-if(NOT APPLE)
+#if(APPLE)
add_subdirectory(api)
-endif()
+#endif()
# Create static library
-cc_library(paddle_fluid DEPS ${fluid_modules} paddle_fluid_api paddle_inference_api)
+cc_library(paddle_fluid DEPS ${fluid_modules} paddle_fluid_api paddle_inference_api analysis_predictor)
if(NOT APPLE)
# TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
set(LINK_FLAGS "-Wl,--retain-symbols-file ${CMAKE_CURRENT_SOURCE_DIR}/paddle_fluid.sym")
@@ -32,6 +32,7 @@ endif()
# Create shared library
cc_library(paddle_fluid_shared SHARED
SRCS io.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/api.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/api_impl.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/api/analysis_predictor.cc
DEPS ${fluid_modules} paddle_fluid_api)
set_target_properties(paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid)
diff --git a/paddle/fluid/inference/analysis/CMakeLists.txt b/paddle/fluid/inference/analysis/CMakeLists.txt
index d43ecc722ea3c78541835fb3f5efc9a3529fbf11..f2e18a461fd221252e4a10262a13bc8e942f5988 100644
--- a/paddle/fluid/inference/analysis/CMakeLists.txt
+++ b/paddle/fluid/inference/analysis/CMakeLists.txt
@@ -25,17 +25,16 @@ function (inference_analysis_test TARGET)
if(WITH_TESTING)
set(options "")
set(oneValueArgs "")
- set(multiValueArgs SRCS EXTRA_DEPS)
+ set(multiValueArgs SRCS ARGS EXTRA_DEPS)
cmake_parse_arguments(analysis_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
-
set(mem_opt "")
if(WITH_GPU)
set(mem_opt "--fraction_of_gpu_memory_to_use=0.5")
endif()
cc_test(${TARGET}
SRCS "${analysis_test_SRCS}"
- DEPS analysis graph fc_fuse_pass graph_viz_pass infer_clean_graph_pass graph_pattern_detector pass ${analysis_test_EXTRA_DEPS}
- ARGS --inference_model_dir=${PYTHON_TESTS_DIR}/book/word2vec.inference.model ${mem_opt})
+ DEPS analysis pass ${GLOB_PASS_LIB} ${analysis_test_EXTRA_DEPS}
+ ARGS --inference_model_dir=${PYTHON_TESTS_DIR}/book/word2vec.inference.model ${mem_opt} ${analysis_test_ARGS})
set_tests_properties(${TARGET} PROPERTIES DEPENDS test_word2vec)
endif(WITH_TESTING)
endfunction(inference_analysis_test)
@@ -51,32 +50,19 @@ endfunction(inference_download_and_uncompress)
set(DITU_RNN_MODEL_URL "http://paddle-inference-dist.bj.bcebos.com/ditu_rnn_fluid%2Fmodel.tar.gz")
set(DITU_RNN_DATA_URL "http://paddle-inference-dist.bj.bcebos.com/ditu_rnn_fluid%2Fdata.txt.tar.gz")
set(DITU_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo/ditu_rnn" CACHE PATH "Ditu RNN model and data root." FORCE)
-if (NOT EXISTS ${DITU_INSTALL_DIR})
+if (NOT EXISTS ${DITU_INSTALL_DIR} AND WITH_TESTING)
inference_download_and_uncompress(${DITU_INSTALL_DIR} ${DITU_RNN_MODEL_URL} "ditu_rnn_fluid%2Fmodel.tar.gz")
inference_download_and_uncompress(${DITU_INSTALL_DIR} ${DITU_RNN_DATA_URL} "ditu_rnn_fluid%2Fdata.txt.tar.gz")
endif()
inference_analysis_test(test_analyzer SRCS analyzer_tester.cc
- EXTRA_DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis
- analysis_predictor
- # ir
- fc_fuse_pass
- fc_lstm_fuse_pass
- seq_concat_fc_fuse_pass
- graph_viz_pass
- infer_clean_graph_pass
- graph_pattern_detector
- infer_clean_graph_pass
- attention_lstm_fuse_pass
- paddle_inference_api
- pass
- ARGS --inference_model_dir=${PYTHON_TESTS_DIR}/book/word2vec.inference.model
- --infer_ditu_rnn_model=${DITU_INSTALL_DIR}/model
- --infer_ditu_rnn_data=${DITU_INSTALL_DIR}/data.txt)
+ EXTRA_DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis_predictor
+ ARGS --infer_ditu_rnn_model=${DITU_INSTALL_DIR}/model
+ --infer_ditu_rnn_data=${DITU_INSTALL_DIR}/data.txt)
inference_analysis_test(test_data_flow_graph SRCS data_flow_graph_tester.cc)
-inference_analysis_test(test_data_flow_graph_to_fluid_pass SRCS data_flow_graph_to_fluid_pass_tester.cc EXTRA_DEPS paddle_inference_api)
-inference_analysis_test(test_fluid_to_ir_pass SRCS fluid_to_ir_pass_tester.cc EXTRA_DEPS paddle_fluid)
+inference_analysis_test(test_data_flow_graph_to_fluid_pass SRCS data_flow_graph_to_fluid_pass_tester.cc)
+inference_analysis_test(test_fluid_to_ir_pass SRCS fluid_to_ir_pass_tester.cc)
inference_analysis_test(test_fluid_to_data_flow_graph_pass SRCS fluid_to_data_flow_graph_pass_tester.cc)
inference_analysis_test(test_subgraph_splitter SRCS subgraph_splitter_tester.cc)
inference_analysis_test(test_dfg_graphviz_draw_pass SRCS dfg_graphviz_draw_pass_tester.cc)
@@ -88,13 +74,37 @@ inference_analysis_test(test_model_store_pass SRCS model_store_pass_tester.cc)
set(CHINESE_NER_MODEL_URL "http://paddle-inference-dist.bj.bcebos.com/chinese_ner_model.tar.gz")
set(CHINESE_NER_DATA_URL "http://paddle-inference-dist.bj.bcebos.com/chinese_ner-data.txt.tar.gz")
set(CHINESE_NER_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo/chinese_ner" CACHE PATH "Chinese ner model and data root." FORCE)
-if (NOT EXISTS ${CHINESE_NER_INSTALL_DIR})
+if (NOT EXISTS ${CHINESE_NER_INSTALL_DIR} AND WITH_TESTING AND WITH_INFERENCE)
inference_download_and_uncompress(${CHINESE_NER_INSTALL_DIR} ${CHINESE_NER_MODEL_URL} "chinese_ner_model.tar.gz")
inference_download_and_uncompress(${CHINESE_NER_INSTALL_DIR} ${CHINESE_NER_DATA_URL} "chinese_ner-data.txt.tar.gz")
endif()
-inference_analysis_test(test_chinese_ner SRCS chinese_ner_tester.cc
+inference_analysis_test(test_analyzer_ner SRCS analyzer_ner_tester.cc
EXTRA_DEPS paddle_inference_api paddle_fluid_api
- ARGS --inference_model_dir=${PYTHON_TESTS_DIR}/book/word2vec.inference.model
- --infer_model=${CHINESE_NER_INSTALL_DIR}/model
+ ARGS --infer_model=${CHINESE_NER_INSTALL_DIR}/model
--infer_data=${CHINESE_NER_INSTALL_DIR}/data.txt)
+
+set(LAC_MODEL_URL "http://paddle-inference-dist.bj.bcebos.com/lac_model.tar.gz")
+set(LAC_DATA_URL "http://paddle-inference-dist.bj.bcebos.com/lac_data.txt.tar.gz")
+set(LAC_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo/lac" CACHE PATH "LAC model and data root." FORCE)
+if (NOT EXISTS ${LAC_INSTALL_DIR} AND WITH_TESTING AND WITH_INFERENCE)
+ inference_download_and_uncompress(${LAC_INSTALL_DIR} ${LAC_MODEL_URL} "lac_model.tar.gz")
+ inference_download_and_uncompress(${LAC_INSTALL_DIR} ${LAC_DATA_URL} "lac_data.txt.tar.gz")
+endif()
+
+inference_analysis_test(test_analyzer_lac SRCS analyzer_lac_tester.cc
+ EXTRA_DEPS paddle_inference_api paddle_fluid_api
+ ARGS --infer_model=${LAC_INSTALL_DIR}/model
+ --infer_data=${LAC_INSTALL_DIR}/data.txt)
+
+
+set(TEXT_CLASSIFICATION_MODEL_URL "http://paddle-inference-dist.bj.bcebos.com/text-classification-Senta.tar.gz")
+set(TEXT_CLASSIFICATION_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo/text_classification" CACHE PATH "Text Classification model and data root." FORCE)
+
+if (NOT EXISTS ${TEXT_CLASSIFICATION_INSTALL_DIR} AND WITH_TESTING AND WITH_INFERENCE)
+ inference_download_and_uncompress(${TEXT_CLASSIFICATION_INSTALL_DIR} ${TEXT_CLASSIFICATION_MODEL_URL} "text-classification-Senta.tar.gz")
+endif()
+
+inference_analysis_test(test_text_classification SRCS test_text_classification.cc
+ EXTRA_DEPS paddle_inference_api paddle_fluid_api analysis_predictor
+ ARGS --infer_model=${TEXT_CLASSIFICATION_INSTALL_DIR}/text-classification-Senta)
diff --git a/paddle/fluid/inference/analysis/analyzer.cc b/paddle/fluid/inference/analysis/analyzer.cc
index e6e63544ffa2de09e39b02769aaaf0793d6b1111..1fd884435d173800563ea37809003ed3aee16c7c 100644
--- a/paddle/fluid/inference/analysis/analyzer.cc
+++ b/paddle/fluid/inference/analysis/analyzer.cc
@@ -14,6 +14,7 @@
#include "paddle/fluid/inference/analysis/analyzer.h"
#include
+#include
#include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h"
#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h"
#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
@@ -41,20 +42,16 @@ class DfgPassManagerImpl final : public DfgPassManager {
public:
DfgPassManagerImpl() {
// TODO(Superjomn) set the key with pass reprs.
- LOG(INFO)
- << "-----------------------------------------------------------------";
- if (FLAGS_IA_enable_ir) {
- AddPass("fluid-to-ir-pass", new FluidToIrPass);
- } else {
+ if (!FLAGS_IA_enable_ir) {
AddPass("fluid-to-data-flow-graph", new FluidToDataFlowGraphPass);
+ } else {
+ AddPass("fluid-to-ir-pass", new FluidToIrPass);
}
TryAddTensorRtPass();
AddPass("data-flow-graph-to-fluid", new DataFlowGraphToFluidPass);
if (!FLAGS_IA_output_storage_path.empty()) {
AddPass("model-store-pass", new ModelStorePass);
}
- LOG(INFO)
- << "-----------------------------------------------------------------";
}
std::string repr() const override { return "dfg-pass-manager"; }
@@ -101,18 +98,15 @@ class DfgPassManagerImpl final : public DfgPassManager {
Analyzer::Analyzer() { Register("manager1", new DfgPassManagerImpl); }
void Analyzer::Run(Argument* argument) {
- // Ugly support fluid-to-ir-pass
- argument->Set(kFluidToIrPassesAttr,
- new std::vector({
- // Manual update the passes here.
- "graph_viz_pass", //
- "infer_clean_graph_pass", "graph_viz_pass", //
- "attention_lstm_fuse_pass", "graph_viz_pass", //
- "fc_lstm_fuse_pass", "graph_viz_pass", //
- "seq_concat_fc_fuse_pass", "graph_viz_pass", //
- "fc_fuse_pass", "graph_viz_pass" //
-
- }));
+ std::vector passes;
+ for (auto& pass : all_ir_passes_) {
+ if (!disabled_ir_passes_.count(pass)) {
+ passes.push_back(pass);
+ passes.push_back("graph_viz_pass"); // add graphviz for debug.
+ }
+ }
+ passes.push_back("graph_viz_pass");
+ argument->Set(kFluidToIrPassesAttr, new std::vector(passes));
for (auto& x : data_) {
PADDLE_ENFORCE(x->Initialize(argument));
@@ -121,6 +115,11 @@ void Analyzer::Run(Argument* argument) {
}
}
+Analyzer& Analyzer::DisableIrPasses(const std::vector& passes) {
+ disabled_ir_passes_.insert(passes.begin(), passes.end());
+ return *this;
+}
+
} // namespace analysis
} // namespace inference
} // namespace paddle
diff --git a/paddle/fluid/inference/analysis/analyzer.h b/paddle/fluid/inference/analysis/analyzer.h
index 2e107c82dd50d5cf22797f4c82e69d302514f955..3fdd2b9ec7537c891a04efb3ca9a1d45075ffa5e 100644
--- a/paddle/fluid/inference/analysis/analyzer.h
+++ b/paddle/fluid/inference/analysis/analyzer.h
@@ -36,16 +36,10 @@ limitations under the License. */
*/
#include
+#include "paddle/fluid/inference/analysis/flags.h"
#include "paddle/fluid/inference/analysis/pass.h"
#include "paddle/fluid/inference/analysis/pass_manager.h"
-// TODO(Superjomn) add a definition flag like PADDLE_WITH_TENSORRT and hide this
-// flag if not available.
-DECLARE_bool(IA_enable_tensorrt_subgraph_engine);
-DECLARE_string(IA_graphviz_log_root);
-DECLARE_string(IA_output_storage_path);
-DECLARE_bool(IA_enable_ir);
-
namespace paddle {
namespace inference {
namespace analysis {
@@ -57,7 +51,26 @@ class Analyzer : public OrderedRegistry {
void Run(Argument* argument);
+ Analyzer& DisableIrPasses(const std::vector& passes);
+
DISABLE_COPY_AND_ASSIGN(Analyzer);
+
+ private:
+ // All avaiable IR passes.
+ // The bigger fuse comes first, so that the small operators prefer to be
+ // merged in a larger fuse op. The small fusion will not break the pattern of
+ // larger fusion.
+ const std::vector all_ir_passes_{{
+ // Manual update the passes here.
+ "infer_clean_graph_pass", //
+ "attention_lstm_fuse_pass", //
+ "fc_lstm_fuse_pass", //
+ "mul_lstm_fuse_pass", //
+ "seq_concat_fc_fuse_pass", //
+ "fc_fuse_pass", //
+ }};
+
+ std::unordered_set disabled_ir_passes_;
};
} // namespace analysis
diff --git a/paddle/fluid/inference/analysis/analyzer_lac_tester.cc b/paddle/fluid/inference/analysis/analyzer_lac_tester.cc
new file mode 100644
index 0000000000000000000000000000000000000000..e2f7253ac04cac8457fa60a055e4ef2770aa874b
--- /dev/null
+++ b/paddle/fluid/inference/analysis/analyzer_lac_tester.cc
@@ -0,0 +1,199 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "paddle/fluid/inference/analysis/analyzer.h"
+#include
+#include
+#include "paddle/fluid/framework/ir/pass.h"
+#include "paddle/fluid/inference/analysis/ut_helper.h"
+#include "paddle/fluid/inference/api/helper.h"
+#include "paddle/fluid/inference/api/paddle_inference_api.h"
+#include "paddle/fluid/platform/profiler.h"
+
+DEFINE_string(infer_model, "", "model path for LAC");
+DEFINE_string(infer_data, "", "data file for LAC");
+DEFINE_int32(batch_size, 1, "batch size.");
+DEFINE_int32(burning, 0, "Burning before repeat.");
+DEFINE_int32(repeat, 1, "Running the inference program repeat times.");
+DEFINE_bool(test_all_data, false, "Test the all dataset in data file.");
+
+namespace paddle {
+namespace inference {
+namespace analysis {
+
+struct DataRecord {
+ std::vector data;
+ std::vector lod;
+ // for dataset and nextbatch
+ size_t batch_iter{0};
+ std::vector> batched_lods;
+ std::vector> batched_datas;
+ std::vector> datasets;
+ DataRecord() = default;
+ explicit DataRecord(const std::string &path, int batch_size = 1) {
+ Load(path);
+ Prepare(batch_size);
+ batch_iter = 0;
+ }
+ void Load(const std::string &path) {
+ std::ifstream file(path);
+ std::string line;
+ int num_lines = 0;
+ datasets.resize(0);
+ while (std::getline(file, line)) {
+ num_lines++;
+ std::vector data;
+ split(line, ';', &data);
+ std::vector words_ids;
+ split_to_int64(data[1], ' ', &words_ids);
+ datasets.emplace_back(words_ids);
+ }
+ }
+ void Prepare(int bs) {
+ if (bs == 1) {
+ batched_datas = datasets;
+ for (auto one_sentence : datasets) {
+ batched_lods.push_back({0, one_sentence.size()});
+ }
+ } else {
+ std::vector one_batch;
+ std::vector lod{0};
+ int bs_id = 0;
+ for (auto one_sentence : datasets) {
+ bs_id++;
+ one_batch.insert(one_batch.end(), one_sentence.begin(),
+ one_sentence.end());
+ lod.push_back(lod.back() + one_sentence.size());
+ if (bs_id == bs) {
+ bs_id = 0;
+ batched_datas.push_back(one_batch);
+ batched_lods.push_back(lod);
+ one_batch.clear();
+ one_batch.resize(0);
+ lod.clear();
+ lod.resize(0);
+ lod.push_back(0);
+ }
+ }
+ if (one_batch.size() != 0) {
+ batched_datas.push_back(one_batch);
+ batched_lods.push_back(lod);
+ }
+ }
+ }
+ DataRecord NextBatch() {
+ DataRecord data;
+ data.data = batched_datas[batch_iter];
+ data.lod = batched_lods[batch_iter];
+ batch_iter++;
+ if (batch_iter >= batched_datas.size()) {
+ batch_iter = 0;
+ }
+ return data;
+ }
+};
+void GetOneBatch(std::vector *input_slots, DataRecord *data,
+ int batch_size) {
+ auto one_batch = data->NextBatch();
+ PaddleTensor input_tensor;
+ input_tensor.name = "word";
+ input_tensor.shape.assign({static_cast(one_batch.data.size()), 1});
+ input_tensor.lod.assign({one_batch.lod});
+ input_tensor.dtype = PaddleDType::INT64;
+ TensorAssignData(&input_tensor, {one_batch.data});
+ PADDLE_ENFORCE_EQ(batch_size, static_cast(one_batch.lod.size() - 1));
+ input_slots->assign({input_tensor});
+}
+static void PrintTime(const double latency, const int bs, const int repeat) {
+ LOG(INFO) << "===========profile result===========";
+ LOG(INFO) << "batch_size: " << bs << ", repeat: " << repeat
+ << ", avg latency: " << latency / repeat << "ms";
+ LOG(INFO) << "=====================================";
+}
+void BenchAllData(const std::string &model_path, const std::string &data_file,
+ const int batch_size, const int repeat) {
+ NativeConfig config;
+ config.model_dir = model_path;
+ config.use_gpu = false;
+ config.device = 0;
+ config.specify_input_name = true;
+ std::vector input_slots, outputs_slots;
+ DataRecord data(data_file, batch_size);
+ auto predictor =
+ CreatePaddlePredictor(config);
+ GetOneBatch(&input_slots, &data, batch_size);
+ for (int i = 0; i < FLAGS_burning; i++) {
+ predictor->Run(input_slots, &outputs_slots);
+ }
+ Timer timer;
+ double sum = 0;
+ for (int i = 0; i < repeat; i++) {
+ for (size_t bid = 0; bid < data.batched_datas.size(); ++bid) {
+ GetOneBatch(&input_slots, &data, batch_size);
+ timer.tic();
+ predictor->Run(input_slots, &outputs_slots);
+ sum += timer.toc();
+ }
+ }
+ PrintTime(sum, batch_size, repeat);
+}
+const int64_t lac_ref_data[] = {24, 25, 25, 25, 38, 30, 31, 14, 15, 44, 24, 25,
+ 25, 25, 25, 25, 44, 24, 25, 25, 25, 36, 42, 43,
+ 44, 14, 15, 44, 14, 15, 44, 14, 15, 44, 38, 39,
+ 14, 15, 44, 22, 23, 23, 23, 23, 23, 23, 23};
+void TestLACPrediction(const std::string &model_path,
+ const std::string &data_file, const int batch_size,
+ const int repeat, bool test_all_data) {
+ if (test_all_data) {
+ BenchAllData(model_path, data_file, batch_size, repeat);
+ return;
+ }
+ NativeConfig config;
+ config.model_dir = model_path;
+ config.use_gpu = false;
+ config.device = 0;
+ config.specify_input_name = true;
+ std::vector input_slots, outputs_slots;
+ DataRecord data(data_file, batch_size);
+ GetOneBatch(&input_slots, &data, batch_size);
+ auto predictor =
+ CreatePaddlePredictor(config);
+ for (int i = 0; i < FLAGS_burning; i++) {
+ predictor->Run(input_slots, &outputs_slots);
+ }
+ Timer timer;
+ timer.tic();
+ for (int i = 0; i < repeat; i++) {
+ predictor->Run(input_slots, &outputs_slots);
+ }
+ PrintTime(timer.toc(), batch_size, repeat);
+ EXPECT_EQ(outputs_slots.size(), 1UL);
+ auto &out = outputs_slots[0];
+ size_t size = std::accumulate(out.shape.begin(), out.shape.end(), 1,
+ [](int a, int b) { return a * b; });
+ size_t batch1_size = sizeof(lac_ref_data) / sizeof(int64_t);
+ PADDLE_ENFORCE_GT(size, 0);
+ EXPECT_GE(size, batch1_size);
+ int64_t *pdata = static_cast(out.data.data());
+ for (size_t i = 0; i < batch1_size; ++i) {
+ EXPECT_EQ(pdata[i], lac_ref_data[i]);
+ }
+}
+TEST(Analyzer_LAC, native) {
+ LOG(INFO) << "LAC with native";
+ TestLACPrediction(FLAGS_infer_model, FLAGS_infer_data, FLAGS_batch_size,
+ FLAGS_repeat, FLAGS_test_all_data);
+}
+} // namespace analysis
+} // namespace inference
+} // namespace paddle
diff --git a/paddle/fluid/inference/analysis/chinese_ner_tester.cc b/paddle/fluid/inference/analysis/analyzer_ner_tester.cc
similarity index 100%
rename from paddle/fluid/inference/analysis/chinese_ner_tester.cc
rename to paddle/fluid/inference/analysis/analyzer_ner_tester.cc
index 9088a29d504309bc2c7b96fd49a0bf44e7cf0da9..720a8811db75a91a5774a29dd95285eceabadf83 100644
--- a/paddle/fluid/inference/analysis/chinese_ner_tester.cc
+++ b/paddle/fluid/inference/analysis/analyzer_ner_tester.cc
@@ -12,10 +12,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+#include "paddle/fluid/inference/analysis/analyzer.h"
#include
#include
#include "paddle/fluid/framework/ir/pass.h"
-#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
diff --git a/paddle/fluid/inference/analysis/analyzer_tester.cc b/paddle/fluid/inference/analysis/analyzer_tester.cc
index 1a65e85dd237eb1bacd3c15b4538a9835ec4b9e0..4cf26d3c70eafd951d14c26335416ec2c71c001d 100644
--- a/paddle/fluid/inference/analysis/analyzer_tester.cc
+++ b/paddle/fluid/inference/analysis/analyzer_tester.cc
@@ -16,25 +16,27 @@
#include
#include
+#include // NOLINT
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
+#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/utils/singleton.h"
-#include "paddle/fluid/platform/profiler.h"
DEFINE_string(infer_ditu_rnn_model, "", "model path for ditu RNN");
DEFINE_string(infer_ditu_rnn_data, "", "data path for ditu RNN");
DEFINE_int32(batch_size, 10, "batch size.");
DEFINE_int32(repeat, 1, "Running the inference program repeat times.");
+DEFINE_int32(num_threads, 1, "Running the inference program in multi-threads.");
namespace paddle {
namespace inference {
namespace analysis {
-using namespace framework;
+using namespace framework; // NOLINT
TEST(Analyzer, analysis_without_tensorrt) {
FLAGS_IA_enable_tensorrt_subgraph_engine = false;
@@ -219,39 +221,6 @@ void PrepareInputs(std::vector *input_slots, DataRecord *data,
}
}
-std::string DescribeTensor(const PaddleTensor &tensor) {
- std::stringstream os;
- os << "Tensor [" << tensor.name << "]\n";
- os << " - type: ";
- switch (tensor.dtype) {
- case PaddleDType::FLOAT32:
- os << "float32";
- break;
- case PaddleDType::INT64:
- os << "int64";
- break;
- default:
- os << "unset";
- }
- os << '\n';
-
- os << " - shape: " << to_string(tensor.shape) << '\n';
- os << " - lod: ";
- for (auto &l : tensor.lod) {
- os << to_string(l) << "; ";
- }
- os << "\n";
- os << " - data: ";
-
- int dim = std::accumulate(tensor.shape.begin(), tensor.shape.end(), 1,
- [](int a, int b) { return a * b; });
- for (int i = 0; i < dim; i++) {
- os << static_cast(tensor.data.data())[i] << " ";
- }
- os << '\n';
- return os.str();
-}
-
} // namespace
const float ditu_rnn_target_data[] = {
@@ -265,57 +234,97 @@ const float ditu_rnn_target_data[] = {
10.7286, 12.0595, 10.6672, 0, 0, 0, 0, 0,
93.5771, 3.84641, 0, 0, 0, 0, 0, 0,
169.426, 0, 0, 0, 0, 0, 0, 0};
+void CompareResult(const std::vector &outputs,
+ const std::vector &base_outputs) {
+ PADDLE_ENFORCE_GT(outputs.size(), 0);
+ PADDLE_ENFORCE_EQ(outputs.size(), base_outputs.size());
+ for (size_t i = 0; i < outputs.size(); i++) {
+ auto &out = outputs[i];
+ auto &base_out = base_outputs[i];
+ size_t size = std::accumulate(out.shape.begin(), out.shape.end(), 1,
+ [](int a, int b) { return a * b; });
+ size_t size1 = std::accumulate(base_out.shape.begin(), base_out.shape.end(),
+ 1, [](int a, int b) { return a * b; });
+ PADDLE_ENFORCE_EQ(size, size1);
+ PADDLE_ENFORCE_GT(size, 0);
+ float *data = static_cast(out.data.data());
+ float *base_data = static_cast(base_out.data.data());
+ for (size_t i = 0; i < size; i++) {
+ EXPECT_NEAR(data[i], base_data[i], 1e-3);
+ }
+ }
+}
// Test with a really complicate model.
-void TestDituRNNPrediction(const std::string &model_path,
- const std::string &data_path, int batch_size,
- bool use_analysis, bool activate_ir,
- int num_times = 1) {
- NativeConfig config;
+void TestDituRNNPrediction(bool use_analysis, bool activate_ir,
+ int num_threads) {
+ AnalysisConfig config;
config.prog_file = FLAGS_infer_ditu_rnn_model + "/__model__";
config.param_file = FLAGS_infer_ditu_rnn_model + "/param";
config.use_gpu = false;
config.device = 0;
config.specify_input_name = true;
+ config.enable_ir_optim = activate_ir;
+ PADDLE_ENFORCE(config.ir_mode ==
+ AnalysisConfig::IrPassMode::kExclude); // default
+ config.ir_passes.clear(); // Do not exclude any pass.
+ int batch_size = FLAGS_batch_size;
+ int num_times = FLAGS_repeat;
auto base_predictor =
CreatePaddlePredictor(config);
auto predictor =
- CreatePaddlePredictor(config);
+ CreatePaddlePredictor(
+ config);
std::vector input_slots;
- DataRecord data(data_path, batch_size);
+ DataRecord data(FLAGS_infer_ditu_rnn_data, batch_size);
// Prepare inputs.
PrepareInputs(&input_slots, &data, batch_size);
std::vector outputs, base_outputs;
base_predictor->Run(input_slots, &base_outputs);
- Timer timer;
- timer.tic();
- for (int i = 0; i < num_times; i++) {
- predictor->Run(input_slots, &outputs);
- }
LOG(INFO) << "===========profile result===========";
- LOG(INFO) << "batch_size: " << batch_size << ", repeat: " << num_times
- << ", latency: " << timer.toc() / num_times << "ms";
- LOG(INFO) << "=====================================";
-
- PADDLE_ENFORCE_GT(outputs.size(), 0);
- PADDLE_ENFORCE_EQ(outputs.size(), base_outputs.size());
- for (size_t i = 0; i < outputs.size(); i++) {
- auto &out = outputs[i];
- auto &base_out = base_outputs[i];
- size_t size = std::accumulate(out.shape.begin(), out.shape.end(), 1,
- [](int a, int b) { return a * b; });
- size_t size1 = std::accumulate(base_out.shape.begin(), base_out.shape.end(),
- 1, [](int a, int b) { return a * b; });
- PADDLE_ENFORCE_EQ(size, size1);
- PADDLE_ENFORCE_GT(size, 0);
- float *data = static_cast(out.data.data());
- float *base_data = static_cast(base_out.data.data());
- for (size_t i = 0; i < size; i++) {
- EXPECT_NEAR(data[i], base_data[i], 1e-3);
+ if (num_threads == 1) {
+ // Prepare inputs.
+ Timer timer;
+ timer.tic();
+ for (int i = 0; i < num_times; i++) {
+ predictor->Run(input_slots, &outputs);
+ }
+ PrintTime(batch_size, num_times, 1, 0, timer.toc() / num_times);
+ CompareResult(outputs, base_outputs);
+ } else {
+ std::vector threads;
+ std::vector> predictors;
+ // TODO(yanchunwei): Bug here, the analyzer phase can't be parallelled
+ // because AttentionLSTM's hard code nodeid will be damanged.
+ for (int tid = 0; tid < num_threads; ++tid) {
+ predictors.emplace_back(
+ CreatePaddlePredictor(
+ config));
+ }
+ for (int tid = 0; tid < num_threads; ++tid) {
+ threads.emplace_back([&, tid]() {
+ // Each thread should have local input_slots and outputs.
+ std::vector input_slots;
+ DataRecord data(FLAGS_infer_ditu_rnn_data, batch_size);
+ PrepareInputs(&input_slots, &data, batch_size);
+ std::vector outputs;
+ Timer timer;
+ timer.tic();
+ for (int i = 0; i < num_times; i++) {
+ predictors[tid]->Run(input_slots, &outputs);
+ }
+ PrintTime(batch_size, num_times, num_threads, tid,
+ timer.toc() / num_times);
+ CompareResult(outputs, base_outputs);
+ });
+ }
+ for (int i = 0; i < num_threads; ++i) {
+ threads[i].join();
}
}
+ LOG(INFO) << "=====================================";
if (use_analysis && activate_ir) {
AnalysisPredictor *analysis_predictor =
@@ -327,39 +336,45 @@ void TestDituRNNPrediction(const std::string &model_path,
LOG(INFO) << "fused " << item.first << " " << item.second;
}
- ASSERT_TRUE(fuse_statis.count("fc"));
- EXPECT_EQ(fuse_statis.at("fc"), 1);
- }
-}
+ int num_ops = 0;
+ for (auto &node :
+ analysis_predictor->analysis_argument().main_dfg->nodes.nodes()) {
+ if (node->IsFunction()) {
+ ++num_ops;
+ }
+ }
+ LOG(INFO) << "has num ops: " << num_ops;
-// Directly infer with the original model.
-TEST(Analyzer, DituRNN_without_analysis) {
- TestDituRNNPrediction(FLAGS_infer_ditu_rnn_model, FLAGS_infer_ditu_rnn_data,
- FLAGS_batch_size, false, false, FLAGS_repeat);
+ ASSERT_TRUE(fuse_statis.count("fc_fuse"));
+ EXPECT_EQ(fuse_statis.at("fc_fuse"), 1);
+ EXPECT_EQ(fuse_statis.at("fc_nobias_lstm_fuse"), 2); // bi-directional LSTM
+ EXPECT_EQ(num_ops,
+ 13); // After graph optimization, only 13 operators exists.
+ }
}
-// Inference with the original model with the analysis turned on, the analysis
-// module will transform the program to a data flow graph.
-TEST(Analyzer, DituRNN_with_analysis) {
- LOG(INFO) << "ditu rnn with analysis";
- TestDituRNNPrediction(FLAGS_infer_ditu_rnn_model, FLAGS_infer_ditu_rnn_data,
- FLAGS_batch_size, true, false, FLAGS_repeat);
+// Inference with analysis and IR, easy for profiling independently.
+TEST(Analyzer, DituRNN) {
+ TestDituRNNPrediction(true, true, FLAGS_num_threads);
}
-// Inference with analysis and IR. The IR module will fuse some large kernels.
-TEST(Analyzer, DituRNN_with_analysis_with_IR) {
- LOG(INFO) << "ditu rnn with analysis and IR fuse";
- TestDituRNNPrediction(FLAGS_infer_ditu_rnn_model, FLAGS_infer_ditu_rnn_data,
- FLAGS_batch_size, true, true, FLAGS_repeat);
+// Other unit-tests of DituRNN, test different options of use_analysis,
+// activate_ir and multi-threads.
+TEST(Analyzer, DituRNN_tests) {
+ int num_threads[2] = {1, 4};
+ for (auto i : num_threads) {
+ // Directly infer with the original model.
+ TestDituRNNPrediction(false, false, i);
+ // Inference with the original model with the analysis turned on, the
+ // analysis
+ // module will transform the program to a data flow graph.
+ TestDituRNNPrediction(true, false, i);
+ // Inference with analysis and IR. The IR module will fuse some large
+ // kernels.
+ TestDituRNNPrediction(true, true, i);
+ }
}
} // namespace analysis
} // namespace inference
} // namespace paddle
-
-USE_PASS(fc_fuse_pass);
-USE_PASS(seq_concat_fc_fuse_pass);
-USE_PASS(fc_lstm_fuse_pass);
-USE_PASS(graph_viz_pass);
-USE_PASS(infer_clean_graph_pass);
-USE_PASS(attention_lstm_fuse_pass);
diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h
index 3a4ffe967e67ab0487192bbf12d4d5a15f536aa3..e8fb0775b45761f64fd6fd28306c35b76d1e40c4 100644
--- a/paddle/fluid/inference/analysis/argument.h
+++ b/paddle/fluid/inference/analysis/argument.h
@@ -67,7 +67,7 @@ struct Argument {
PADDLE_ENFORCE(!attrs_.count(key), "Duplicate set Argument's attr [%s]",
key);
attrs_[key] = data;
- attr_deleters_[key] = [data, key, this]() {
+ attr_deleters_[key] = [data, key]() {
VLOG(3) << "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
VLOG(3) << "argument delete attr: " << key;
delete data;
diff --git a/paddle/fluid/inference/analysis/flags.h b/paddle/fluid/inference/analysis/flags.h
new file mode 100644
index 0000000000000000000000000000000000000000..717e543f01dfa071865a5c14c0b7679e65239daf
--- /dev/null
+++ b/paddle/fluid/inference/analysis/flags.h
@@ -0,0 +1,22 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include
+
+// TODO(Superjomn) add a definition flag like PADDLE_WITH_TENSORRT and hide this
+// flag if not available.
+DECLARE_bool(IA_enable_tensorrt_subgraph_engine);
+DECLARE_string(IA_graphviz_log_root);
+DECLARE_string(IA_output_storage_path);
+DECLARE_bool(IA_enable_ir);
diff --git a/paddle/fluid/inference/analysis/fluid_to_ir_pass.h b/paddle/fluid/inference/analysis/fluid_to_ir_pass.h
index 6731b1f759363eec5dd8645783212a72ace67b2f..3086085710d6e850ed27e82d2323690dfdd3ef19 100644
--- a/paddle/fluid/inference/analysis/fluid_to_ir_pass.h
+++ b/paddle/fluid/inference/analysis/fluid_to_ir_pass.h
@@ -15,6 +15,7 @@
#pragma once
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
+#include "paddle/fluid/inference/analysis/flags.h"
#include "paddle/fluid/inference/analysis/ir_pass_manager.h"
#include "paddle/fluid/inference/analysis/pass.h"
@@ -85,9 +86,11 @@ class FluidToIrPass final : public DataFlowGraphPass {
new Scope *(&argument_->Get(ir::kParamScopeAttr)));
}
- const auto &ir_passes_to_apply =
- argument_->Get>(kFluidToIrPassesAttr);
- ir_passes.Apply(ir_passes_to_apply);
+ if (FLAGS_IA_enable_ir) {
+ const auto &ir_passes_to_apply =
+ argument_->Get>(kFluidToIrPassesAttr);
+ ir_passes.Apply(ir_passes_to_apply);
+ }
PADDLE_ENFORCE(argument_->main_dfg.get());
argument_->main_dfg->Build(ir_passes.graph());
diff --git a/paddle/fluid/inference/analysis/fluid_to_ir_pass_tester.cc b/paddle/fluid/inference/analysis/fluid_to_ir_pass_tester.cc
index 6a13c60e7b2ebf645b12d5ddf83ef6ab3a2e83bd..367c25805d05f8d10fb8341158760ac6356a5c48 100644
--- a/paddle/fluid/inference/analysis/fluid_to_ir_pass_tester.cc
+++ b/paddle/fluid/inference/analysis/fluid_to_ir_pass_tester.cc
@@ -16,6 +16,7 @@
#include
#include "paddle/fluid/inference/analysis/ut_helper.h"
+#include "paddle/fluid/inference/api/paddle_inference_pass.h"
namespace paddle {
namespace inference {
@@ -33,10 +34,3 @@ TEST(FluidToIrPass, Test) {
} // namespace analysis
} // namespace inference
} // namespace paddle
-
-USE_PASS(graph_viz_pass);
-USE_PASS(infer_clean_graph_pass);
-USE_PASS(attention_lstm_fuse_pass);
-USE_PASS(fc_lstm_fuse_pass);
-USE_PASS(seq_concat_fc_fuse_pass);
-USE_PASS(fc_fuse_pass);
diff --git a/paddle/fluid/inference/analysis/test_text_classification.cc b/paddle/fluid/inference/analysis/test_text_classification.cc
new file mode 100644
index 0000000000000000000000000000000000000000..2913824f62301795aea967c22021b2af11f343c1
--- /dev/null
+++ b/paddle/fluid/inference/analysis/test_text_classification.cc
@@ -0,0 +1,109 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include
+#include // use glog instead of PADDLE_ENFORCE to avoid importing other paddle header files.
+#include
+#include "paddle/fluid/framework/ir/pass.h"
+#include "paddle/fluid/inference/analysis/analyzer.h"
+#include "paddle/fluid/inference/analysis/ut_helper.h"
+#include "paddle/fluid/inference/api/paddle_inference_api.h"
+#include "paddle/fluid/inference/api/timer.h"
+
+DEFINE_string(infer_model, "", "Directory of the inference model.");
+DEFINE_string(infer_data, "", "Path of the dataset.");
+DEFINE_int32(batch_size, 1, "batch size.");
+DEFINE_int32(repeat, 1, "How many times to repeat run.");
+
+namespace paddle {
+
+template
+std::string to_string(const std::vector &vec) {
+ std::stringstream ss;
+ for (const auto &c : vec) {
+ ss << c << " ";
+ }
+ return ss.str();
+}
+
+void PrintTime(const double latency, const int bs, const int repeat) {
+ LOG(INFO) << "===========profile result===========";
+ LOG(INFO) << "batch_size: " << bs << ", repeat: " << repeat
+ << ", avg latency: " << latency / repeat << "ms";
+ LOG(INFO) << "=====================================";
+}
+
+void Main(int batch_size) {
+ // Three sequence inputs.
+ std::vector input_slots(1);
+ // one batch starts
+ // data --
+ int64_t data0[] = {0, 1, 2};
+ for (auto &input : input_slots) {
+ input.data.Reset(data0, sizeof(data0));
+ input.shape = std::vector({3, 1});
+ // dtype --
+ input.dtype = PaddleDType::INT64;
+ // LoD --
+ input.lod = std::vector>({{0, 3}});
+ }
+
+ // shape --
+ // Create Predictor --
+ AnalysisConfig config;
+ config.model_dir = FLAGS_infer_model;
+ config.use_gpu = false;
+ config.enable_ir_optim = true;
+ config.ir_passes.push_back("fc_lstm_fuse_pass");
+ auto predictor =
+ CreatePaddlePredictor(
+ config);
+
+ inference::Timer timer;
+ double sum = 0;
+ std::vector output_slots;
+ for (int i = 0; i < FLAGS_repeat; i++) {
+ timer.tic();
+ CHECK(predictor->Run(input_slots, &output_slots));
+ sum += timer.toc();
+ }
+ PrintTime(sum, batch_size, FLAGS_repeat);
+
+ // Get output
+ LOG(INFO) << "get outputs " << output_slots.size();
+
+ for (auto &output : output_slots) {
+ LOG(INFO) << "output.shape: " << to_string(output.shape);
+ // no lod ?
+ CHECK_EQ(output.lod.size(), 0UL);
+ LOG(INFO) << "output.dtype: " << output.dtype;
+ std::stringstream ss;
+ for (int i = 0; i < 5; i++) {
+ ss << static_cast(output.data.data())[i] << " ";
+ }
+ LOG(INFO) << "output.data summary: " << ss.str();
+ // one batch ends
+ }
+}
+
+TEST(text_classification, basic) { Main(FLAGS_batch_size); }
+
+} // namespace paddle
+
+USE_PASS(fc_fuse_pass);
+USE_PASS(seq_concat_fc_fuse_pass);
+USE_PASS(fc_lstm_fuse_pass);
+USE_PASS(graph_viz_pass);
+USE_PASS(infer_clean_graph_pass);
+USE_PASS(attention_lstm_fuse_pass);
diff --git a/paddle/fluid/inference/api/CMakeLists.txt b/paddle/fluid/inference/api/CMakeLists.txt
index adfe4392448557a30cd834022b9a5d21d9086b95..ea00bf364951b0a4304b380df492d00e84451136 100644
--- a/paddle/fluid/inference/api/CMakeLists.txt
+++ b/paddle/fluid/inference/api/CMakeLists.txt
@@ -18,10 +18,7 @@ if(APPLE)
endif(APPLE)
-set(inference_deps paddle_inference_api paddle_fluid_api analysis pass ir_pass_manager
- graph_viz_pass fc_fuse_pass
- infer_clean_graph_pass
- )
+set(inference_deps paddle_inference_api paddle_fluid_api analysis pass ir_pass_manager ${GLOB_PASS_LIB})
if(WITH_GPU AND TENSORRT_FOUND)
set(inference_deps ${inference_deps} paddle_inference_tensorrt_subgraph_engine)
@@ -47,7 +44,19 @@ function(inference_api_test TARGET_NAME)
endfunction(inference_api_test)
cc_library(paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor)
-cc_library(analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api)
+cc_library(analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api
+ analysis
+ ir_pass_manager
+ pass
+ fc_fuse_pass
+ fc_lstm_fuse_pass
+ seq_concat_fc_fuse_pass
+ graph_viz_pass
+ infer_clean_graph_pass
+ graph_pattern_detector
+ infer_clean_graph_pass
+ attention_lstm_fuse_pass
+ )
cc_test(test_paddle_inference_api
SRCS api_tester.cc
diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index 33862232bdaae817b9ca72879605386c32ed3e8b..79eeea88ea83ad862b5e2ac1390dae377b676685 100644
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -14,10 +14,13 @@
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include
+#include
+#include
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
+#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/utils/singleton.h"
namespace paddle {
@@ -27,10 +30,11 @@ bool AnalysisPredictor::Init(
VLOG(3) << "Predictor::init()";
if (config_.use_gpu) {
place_ = paddle::platform::CUDAPlace(config_.device);
+ LOG(WARNING) << "ir optimize only supports CPU currently";
+ config_.enable_ir_optim = false;
} else {
place_ = paddle::platform::CPUPlace();
}
- PADDLE_ENFORCE(!parent_scope);
if (parent_scope) {
scope_ = parent_scope;
sub_scope_ = &(parent_scope->NewScope());
@@ -72,7 +76,7 @@ bool AnalysisPredictor::Init(
void AnalysisPredictor::OptimizeInferenceProgram() {
LOG(INFO) << "optimize begin";
- FLAGS_IA_enable_ir = true;
+ FLAGS_IA_enable_ir = config_.enable_ir_optim;
FLAGS_IA_enable_tensorrt_subgraph_engine = false;
FLAGS_IA_output_storage_path = ""; // Don't output the model.
// Analyze inference_program
@@ -89,24 +93,26 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
}
argument_.origin_program_desc.reset(
new ProgramDesc(*inference_program_->Proto()));
- Analyzer().Run(&argument_);
+ PADDLE_ENFORCE(config_.ir_mode == AnalysisConfig::IrPassMode::kExclude,
+ "Only kExclude is supported yet.");
+ Analyzer().DisableIrPasses(config_.ir_passes).Run(&argument_);
+
CHECK(argument_.transformed_program_desc);
VLOG(5) << "to prepare executor";
- // LOG(INFO) << "transformed_parogram_desc " <<
- // argument.transformed_program_desc->DebugString();
inference_program_.reset(
new framework::ProgramDesc(*argument_.transformed_program_desc));
- PADDLE_ENFORCE(argument_.Has(framework::ir::kParamScopeAttr));
- // Update scope.
- scope_.reset(
- argument_.Release(framework::ir::kParamScopeAttr));
- LOG(INFO) << "optimize end ==";
+ if (argument_.Has(framework::ir::kParamScopeAttr)) {
+ // Update scope.
+ scope_.reset(
+ argument_.Release(framework::ir::kParamScopeAttr));
+ }
+ LOG(INFO) << "== optimize end ==";
}
template <>
std::unique_ptr CreatePaddlePredictor<
- NativeConfig, PaddleEngineKind::kAnalysis>(const NativeConfig& config) {
- VLOG(3) << "create NativePredictor";
+ AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig& config) {
+ VLOG(3) << "create AnalysisConfig";
if (config.use_gpu) {
// 1. GPU memeroy
PADDLE_ENFORCE_GT(
@@ -133,7 +139,3 @@ std::unique_ptr CreatePaddlePredictor<
}
} // namespace paddle
-
-USE_PASS(fc_fuse_pass);
-USE_PASS(graph_viz_pass);
-USE_PASS(infer_clean_graph_pass);
diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h
index e32b6185f6044ab3577bde0a8f8dcf2391688aa8..e53925366e9214cd60422efe56884751297c15e5 100644
--- a/paddle/fluid/inference/api/analysis_predictor.h
+++ b/paddle/fluid/inference/api/analysis_predictor.h
@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+#include
+#include
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
@@ -28,7 +30,7 @@ using framework::proto::ProgramDesc;
*/
class AnalysisPredictor : public NativePaddlePredictor {
public:
- explicit AnalysisPredictor(const NativeConfig& config)
+ explicit AnalysisPredictor(const AnalysisConfig& config)
: NativePaddlePredictor(config), config_(config) {}
bool Init(const std::shared_ptr& parent_scope);
@@ -44,7 +46,7 @@ class AnalysisPredictor : public NativePaddlePredictor {
Argument& analysis_argument() { return argument_; }
private:
- NativeConfig config_;
+ AnalysisConfig config_;
Argument argument_;
};
diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc
index 530274f0c9262b6ed0e43766606585c8459eabb9..bd9b4b1a814f995e3979105f5b9830b95fd8ea7d 100644
--- a/paddle/fluid/inference/api/api_impl.cc
+++ b/paddle/fluid/inference/api/api_impl.cc
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-#include
#include
#include