diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7c7eb260aea8478f4833cb79253f4481e10b8685..e8ea828dd2a25f5f47b03e92ae86e083d4425dc9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -39,7 +39,7 @@ option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_F
option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND})
option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FOUND})
option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON)
-option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON)
+option(WITH_TESTING "Compile PaddlePaddle with unit testing" OFF)
option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON)
option(WITH_STYLE_CHECK "Compile PaddlePaddle with style check" ON)
option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON)
diff --git a/cmake/generic.cmake b/cmake/generic.cmake
index 585db019d521b1699baadfae31ef95b5059c71b4..33ef6860e1d38f4e87c4431addf43f9f8a655fc2 100644
--- a/cmake/generic.cmake
+++ b/cmake/generic.cmake
@@ -186,6 +186,11 @@ function(cc_library TARGET_NAME)
add_library(${TARGET_NAME} STATIC ${cc_library_SRCS})
endif()
if (cc_library_DEPS)
+ # Don't need link libwarpctc.so
+ if ("${cc_library_DEPS};" MATCHES "warpctc;")
+ list(REMOVE_ITEM cc_library_DEPS warpctc)
+ add_dependencies(${TARGET_NAME} warpctc)
+ endif()
add_dependencies(${TARGET_NAME} ${cc_library_DEPS})
target_link_libraries(${TARGET_NAME} ${cc_library_DEPS})
endif()
@@ -224,12 +229,18 @@ function(cc_test TARGET_NAME)
if(WITH_TESTING)
set(options "")
set(oneValueArgs "")
- set(multiValueArgs SRCS DEPS)
+ set(multiValueArgs SRCS DEPS ARGS)
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${cc_test_SRCS})
- target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main paddle_memory gtest gflags)
+ # Support linking flags: --whole-archive (Linux) / -force_load (MacOS)
+ target_circle_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main paddle_memory gtest gflags)
+ if("${cc_test_DEPS}" MATCHES "ARCHIVE_START")
+ list(REMOVE_ITEM cc_test_DEPS ARCHIVE_START ARCHIVE_END)
+ endif()
add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main paddle_memory gtest gflags)
- add_test(NAME ${TARGET_NAME} COMMAND ${TARGET_NAME} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
+ add_test(NAME ${TARGET_NAME}
+ COMMAND ${TARGET_NAME} ${cc_test_ARGS}
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
endif()
endfunction(cc_test)
@@ -457,12 +468,12 @@ endfunction()
function(py_test TARGET_NAME)
if(WITH_TESTING)
- set(options STATIC static SHARED shared)
+ set(options "")
set(oneValueArgs "")
- set(multiValueArgs SRCS DEPS ARGS)
+ set(multiValueArgs SRCS DEPS ARGS ENVS)
cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_test(NAME ${TARGET_NAME}
- COMMAND env PYTHONPATH=${PADDLE_PYTHON_BUILD_DIR}/lib-python
+ COMMAND env PYTHONPATH=${PADDLE_PYTHON_BUILD_DIR}/lib-python ${py_test_ENVS}
${PYTHON_EXECUTABLE} -u ${py_test_SRCS} ${py_test_ARGS}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
endif()
diff --git a/doc/api/v2/fluid/data_feeder.rst b/doc/api/v2/fluid/data_feeder.rst
index 0fa78f7dfb04c13be7eb83b7fd35cb03f2f4a7fa..a591c7334fd31c98a94b50a4344f251560a0f2f9 100644
--- a/doc/api/v2/fluid/data_feeder.rst
+++ b/doc/api/v2/fluid/data_feeder.rst
@@ -1,9 +1,14 @@
+.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
+ !DO NOT EDIT THIS FILE MANUALLY!
+
===========
-DataFeeder
+data_feeder
===========
DataFeeder
------------
-.. automodule:: paddle.v2.fluid.data_feeder
- :members: DataFeeder
+----------
+
+.. autoclass:: paddle.v2.fluid.data_feeder.DataFeeder
+ :members:
:noindex:
+
diff --git a/doc/api/v2/fluid/evaluator.rst b/doc/api/v2/fluid/evaluator.rst
index a23f3301d0331e0ea3733f06444515eb4680cd31..00dcecfd628a35d83d1c596bf0aea819a1705862 100644
--- a/doc/api/v2/fluid/evaluator.rst
+++ b/doc/api/v2/fluid/evaluator.rst
@@ -1,9 +1,21 @@
-===========
-Evaluator
-===========
-
-Evaluator
------------
-.. automodule:: paddle.v2.fluid.evaluator
- :members: Evaluator
+.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
+ !DO NOT EDIT THIS FILE MANUALLY!
+
+=========
+evaluator
+=========
+
+Accuracy
+--------
+
+.. autoclass:: paddle.v2.fluid.evaluator.Accuracy
+ :members:
:noindex:
+
+ChunkEvaluator
+--------------
+
+.. autoclass:: paddle.v2.fluid.evaluator.ChunkEvaluator
+ :members:
+ :noindex:
+
diff --git a/doc/api/v2/fluid/executor.rst b/doc/api/v2/fluid/executor.rst
index 3a283538c120cfa1ef646c390bb71c6251c23675..a028f6283f2ca333bdf6c9857a98661c0222b41e 100644
--- a/doc/api/v2/fluid/executor.rst
+++ b/doc/api/v2/fluid/executor.rst
@@ -1,9 +1,32 @@
-===========
-Executor
-===========
+.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
+ !DO NOT EDIT THIS FILE MANUALLY!
+
+========
+executor
+========
Executor
+--------
+
+.. autoclass:: paddle.v2.fluid.executor.Executor
+ :members:
+ :noindex:
+
+global_scope
+------------
+
+.. autofunction:: paddle.v2.fluid.executor.global_scope
+ :noindex:
+
+scope_guard
-----------
-.. automodule:: paddle.v2.fluid.executor
- :members: Executor
+
+.. autofunction:: paddle.v2.fluid.executor.scope_guard
+ :noindex:
+
+switch_scope
+------------
+
+.. autofunction:: paddle.v2.fluid.executor.switch_scope
:noindex:
+
diff --git a/doc/api/v2/fluid/gen_doc.py b/doc/api/v2/fluid/gen_doc.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2147fd3f7ea635d8f14210fbcd1a568ee2230ee
--- /dev/null
+++ b/doc/api/v2/fluid/gen_doc.py
@@ -0,0 +1,109 @@
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+import argparse
+import sys
+import types
+
+import paddle.v2.fluid as fluid
+
+
+def parse_arg():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--submodules', nargs="*")
+ parser.add_argument(
+ 'module', type=str, help='Generate the documentation of which module')
+ return parser.parse_args()
+
+
+class DocGenerator(object):
+ def __init__(self, module_name, stream=sys.stdout):
+ self.stream = stream
+ self.module_name = module_name
+ if not hasattr(fluid, module_name):
+ raise ValueError("Cannot find fluid.{0}".format(module_name))
+ else:
+ self.module = getattr(fluid, module_name)
+ self.stream.write('''.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
+ !DO NOT EDIT THIS FILE MANUALLY!
+
+''')
+
+ self._print_header_(module_name, dot='=', is_title=True)
+
+ def print_submodule(self, submodule_name):
+ submodule = getattr(self.module, submodule_name)
+ if submodule is None:
+ raise ValueError("Cannot find submodule {0}".format(submodule_name))
+ self.print_section(submodule_name)
+
+ for item in submodule.__all__:
+ self.print_item(item)
+
+ def print_current_module(self):
+ for item in self.module.__all__:
+ self.print_item(item)
+
+ def print_section(self, name):
+ self._print_header_(name, dot='=', is_title=False)
+
+ def print_item(self, name):
+ item = getattr(self.module, name)
+ if isinstance(item, types.TypeType):
+ self.print_class(name)
+ elif isinstance(item, types.FunctionType):
+ self.print_method(name)
+ else:
+ raise RuntimeError("Unsupported item {0}".format(name))
+
+ def print_class(self, name):
+ self._print_header_(name, dot='-', is_title=False)
+ self.stream.write('''.. autoclass:: paddle.v2.fluid.{0}.{1}
+ :members:
+ :noindex:
+
+'''.format(self.module_name, name))
+
+ def print_method(self, name):
+ self._print_header_(name, dot='-', is_title=False)
+ self.stream.write('''.. autofunction:: paddle.v2.fluid.{0}.{1}
+ :noindex:
+
+'''.format(self.module_name, name))
+
+ def _print_header_(self, name, dot, is_title):
+ dot_line = dot * len(name)
+ if is_title:
+ self.stream.write(dot_line)
+ self.stream.write('\n')
+ self.stream.write(name)
+ self.stream.write('\n')
+ self.stream.write(dot_line)
+ self.stream.write('\n')
+ self.stream.write('\n')
+
+
+def main():
+ args = parse_arg()
+ gen = DocGenerator(args.module)
+ if args.submodules is None:
+ gen.print_current_module()
+ else:
+ for submodule_name in args.submodules:
+ gen.print_submodule(submodule_name)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/doc/api/v2/fluid/gen_doc.sh b/doc/api/v2/fluid/gen_doc.sh
new file mode 100755
index 0000000000000000000000000000000000000000..ba7b7ba8e51399deb852b0a7c8ddd3128f521e85
--- /dev/null
+++ b/doc/api/v2/fluid/gen_doc.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+python gen_doc.py layers --submodules control_flow device io nn ops tensor > layers.rst
+
+for module in io data_feeder evaluator executor initializer io nets optimizer param_attr profiler regularizer
+do
+ python gen_doc.py ${module} > ${module}.rst
+done
diff --git a/doc/api/v2/fluid/initializer.rst b/doc/api/v2/fluid/initializer.rst
index 8f587837e9873370722062404f511654a9460587..c38be033fff2997930525f51c93995db09daa2b6 100644
--- a/doc/api/v2/fluid/initializer.rst
+++ b/doc/api/v2/fluid/initializer.rst
@@ -1,50 +1,35 @@
+.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
+ !DO NOT EDIT THIS FILE MANUALLY!
+
===========
-Initializer
+initializer
===========
+Constant
+--------
-
-Initializer
------------
-.. automodule:: paddle.v2.fluid.initializer
- :members: Initializer
- :noindex:
-
-
-
-ConstantInitializer
--------------------
-.. automodule:: paddle.v2.fluid.initializer
- :members: ConstantInitializer
+.. autoclass:: paddle.v2.fluid.initializer.Constant
+ :members:
:noindex:
+Uniform
+-------
-
-UniformInitializer
-------------------
-.. automodule:: paddle.v2.fluid.initializer
- :members: UniformInitializer
- :noindex:
-
-
-
-NormalInitializer
------------------
-.. automodule:: paddle.v2.fluid.initializer
- :members: NormalInitializer
+.. autoclass:: paddle.v2.fluid.initializer.Uniform
+ :members:
:noindex:
+Normal
+------
-XavierInitializer
------------------
-.. automodule:: paddle.v2.fluid.initializer
- :members: XavierInitializer
+.. autoclass:: paddle.v2.fluid.initializer.Normal
+ :members:
:noindex:
+Xavier
+------
-MSRAInitializer
----------------
-.. automodule:: paddle.v2.fluid.initializer
- :members: MSRAInitializer
+.. autoclass:: paddle.v2.fluid.initializer.Xavier
+ :members:
:noindex:
diff --git a/doc/api/v2/fluid/io.rst b/doc/api/v2/fluid/io.rst
index 67f68c4e9e16b379207b8de114cdf769e056f78e..37c9c273e369532e8ff596e9649cb695a98a2505 100644
--- a/doc/api/v2/fluid/io.rst
+++ b/doc/api/v2/fluid/io.rst
@@ -1,10 +1,61 @@
-===========
-IO
-===========
+.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
+ !DO NOT EDIT THIS FILE MANUALLY!
+==
+io
+==
+save_vars
+---------
-is_parameter
+.. autofunction:: paddle.v2.fluid.io.save_vars
+ :noindex:
+
+save_params
-----------
-.. autofunction:: paddle.v2.fluid.io.is_parameter
+
+.. autofunction:: paddle.v2.fluid.io.save_params
+ :noindex:
+
+save_persistables
+-----------------
+
+.. autofunction:: paddle.v2.fluid.io.save_persistables
+ :noindex:
+
+load_vars
+---------
+
+.. autofunction:: paddle.v2.fluid.io.load_vars
+ :noindex:
+
+load_params
+-----------
+
+.. autofunction:: paddle.v2.fluid.io.load_params
:noindex:
+
+load_persistables
+-----------------
+
+.. autofunction:: paddle.v2.fluid.io.load_persistables
+ :noindex:
+
+save_inference_model
+--------------------
+
+.. autofunction:: paddle.v2.fluid.io.save_inference_model
+ :noindex:
+
+load_inference_model
+--------------------
+
+.. autofunction:: paddle.v2.fluid.io.load_inference_model
+ :noindex:
+
+get_inference_program
+---------------------
+
+.. autofunction:: paddle.v2.fluid.io.get_inference_program
+ :noindex:
+
diff --git a/doc/api/v2/fluid/layers.rst b/doc/api/v2/fluid/layers.rst
index 231ec2d4ba102a5d31c47cbc7a5d484ef17a7f3a..e24613b94b422b7cdf9c6383c359fa92a4faf6ff 100644
--- a/doc/api/v2/fluid/layers.rst
+++ b/doc/api/v2/fluid/layers.rst
@@ -1,546 +1,799 @@
-==========
-Layers
-==========
+.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
+ !DO NOT EDIT THIS FILE MANUALLY!
+======
+layers
+======
-fc
----
-.. autofunction:: paddle.v2.fluid.layers.fc
+control_flow
+============
+
+split_lod_tensor
+----------------
+
+.. autofunction:: paddle.v2.fluid.layers.split_lod_tensor
:noindex:
-embedding
----------
-.. autofunction:: paddle.v2.fluid.layers.embedding
+merge_lod_tensor
+----------------
+
+.. autofunction:: paddle.v2.fluid.layers.merge_lod_tensor
:noindex:
-dynamic_lstm
-------------
-.. autofunction:: paddle.v2.fluid.layers.dynamic_lstm
+BlockGuard
+----------
+
+.. autoclass:: paddle.v2.fluid.layers.BlockGuard
+ :members:
:noindex:
-dynamic_lstmp
--------------
-.. autofunction:: paddle.v2.fluid.layers.dynamic_lstmp
+BlockGuardWithCompletion
+------------------------
+
+.. autoclass:: paddle.v2.fluid.layers.BlockGuardWithCompletion
+ :members:
:noindex:
-dynamic_gru
------------
-.. autofunction:: paddle.v2.fluid.layers.dynamic_gru
+StaticRNNMemoryLink
+-------------------
+
+.. autoclass:: paddle.v2.fluid.layers.StaticRNNMemoryLink
+ :members:
:noindex:
-data
-----
-.. autofunction:: paddle.v2.fluid.layers.data
+WhileGuard
+----------
+
+.. autoclass:: paddle.v2.fluid.layers.WhileGuard
+ :members:
:noindex:
-mean
-----
-.. autofunction:: paddle.v2.fluid.layers.mean
+While
+-----
+
+.. autoclass:: paddle.v2.fluid.layers.While
+ :members:
:noindex:
-mul
----
-.. autofunction:: paddle.v2.fluid.layers.mul
+lod_rank_table
+--------------
+
+.. autofunction:: paddle.v2.fluid.layers.lod_rank_table
:noindex:
-elementwise_add
----------------
-.. autofunction:: paddle.v2.fluid.layers.elementwise_add
+max_sequence_len
+----------------
+
+.. autofunction:: paddle.v2.fluid.layers.max_sequence_len
:noindex:
-elementwise_sub
----------------
-.. autofunction:: paddle.v2.fluid.layers.elementwise_sub
+topk
+----
+
+.. autofunction:: paddle.v2.fluid.layers.topk
:noindex:
-elementwise_mul
----------------
-.. autofunction:: paddle.v2.fluid.layers.elementwise_mul
+lod_tensor_to_array
+-------------------
+
+.. autofunction:: paddle.v2.fluid.layers.lod_tensor_to_array
:noindex:
-elementwise_div
----------------
-.. autofunction:: paddle.v2.fluid.layers.elementwise_div
+array_to_lod_tensor
+-------------------
+
+.. autofunction:: paddle.v2.fluid.layers.array_to_lod_tensor
:noindex:
+increment
+---------
-dropout
--------
-.. autofunction:: paddle.v2.fluid.layers.dropout
+.. autofunction:: paddle.v2.fluid.layers.increment
:noindex:
+array_write
+-----------
-reshape
---------
-.. autofunction:: paddle.v2.fluid.layers.reshape
+.. autofunction:: paddle.v2.fluid.layers.array_write
:noindex:
+create_array
+------------
-sigmoid
+.. autofunction:: paddle.v2.fluid.layers.create_array
+ :noindex:
+
+less_than
---------
-.. autofunction:: paddle.v2.fluid.layers.sigmoid
+
+.. autofunction:: paddle.v2.fluid.layers.less_than
:noindex:
+array_read
+----------
-scale
----------
-.. autofunction:: paddle.v2.fluid.layers.scale
+.. autofunction:: paddle.v2.fluid.layers.array_read
+ :noindex:
+
+shrink_memory
+-------------
+
+.. autofunction:: paddle.v2.fluid.layers.shrink_memory
:noindex:
+array_length
+------------
-transpose
+.. autofunction:: paddle.v2.fluid.layers.array_length
+ :noindex:
+
+IfElse
+------
+
+.. autoclass:: paddle.v2.fluid.layers.IfElse
+ :members:
+ :noindex:
+
+DynamicRNN
+----------
+
+.. autoclass:: paddle.v2.fluid.layers.DynamicRNN
+ :members:
+ :noindex:
+
+ConditionalBlock
+----------------
+
+.. autoclass:: paddle.v2.fluid.layers.ConditionalBlock
+ :members:
+ :noindex:
+
+StaticRNN
---------
-.. autofunction:: paddle.v2.fluid.layers.transpose
+
+.. autoclass:: paddle.v2.fluid.layers.StaticRNN
+ :members:
:noindex:
+reorder_lod_tensor_by_rank
+--------------------------
-sigmoid_cross_entropy_with_logits
----------------------------------
-.. autofunction:: paddle.v2.fluid.layers.esigmoid_cross_entropy_with_logits
+.. autofunction:: paddle.v2.fluid.layers.reorder_lod_tensor_by_rank
:noindex:
+ParallelDo
+----------
-cast
+.. autoclass:: paddle.v2.fluid.layers.ParallelDo
+ :members:
+ :noindex:
+
+Print
+-----
+
+.. autofunction:: paddle.v2.fluid.layers.Print
+ :noindex:
+
+device
+======
+
+get_places
+----------
+
+.. autofunction:: paddle.v2.fluid.layers.get_places
+ :noindex:
+
+io
+==
+
+data
----
-.. autofunction:: paddle.v2.fluid.layers.cast
+
+.. autofunction:: paddle.v2.fluid.layers.data
:noindex:
+BlockGuardServ
+--------------
-concat
--------
-.. autofunction:: paddle.v2.fluid.layers.concat
+.. autoclass:: paddle.v2.fluid.layers.BlockGuardServ
+ :members:
:noindex:
+ListenAndServ
+-------------
-sums
+.. autoclass:: paddle.v2.fluid.layers.ListenAndServ
+ :members:
+ :noindex:
+
+Send
----
-.. autofunction:: paddle.v2.fluid.layers.sums
+
+.. autofunction:: paddle.v2.fluid.layers.Send
:noindex:
+nn
+==
-linear_chain_crf
-----------------
-.. autofunction:: paddle.v2.fluid.layers.linear_chain_crf
+fc
+--
+
+.. autofunction:: paddle.v2.fluid.layers.fc
:noindex:
+embedding
+---------
-assign
--------
.. autofunction:: paddle.v2.fluid.layers.embedding
:noindex:
+dynamic_lstm
+------------
-split_lod_tensor
-----------------
-.. autofunction:: paddle.v2.fluid.layers.split_lod_tensor
+.. autofunction:: paddle.v2.fluid.layers.dynamic_lstm
:noindex:
+dynamic_lstmp
+-------------
-merge_lod_tensor
+.. autofunction:: paddle.v2.fluid.layers.dynamic_lstmp
+ :noindex:
+
+dynamic_gru
+-----------
+
+.. autofunction:: paddle.v2.fluid.layers.dynamic_gru
+ :noindex:
+
+gru_unit
+--------
+
+.. autofunction:: paddle.v2.fluid.layers.gru_unit
+ :noindex:
+
+linear_chain_crf
----------------
-.. autofunction:: paddle.v2.fluid.layers.merge_lod_tensor
+
+.. autofunction:: paddle.v2.fluid.layers.linear_chain_crf
+ :noindex:
+
+crf_decoding
+------------
+
+.. autofunction:: paddle.v2.fluid.layers.crf_decoding
:noindex:
cos_sim
---------
+-------
+
.. autofunction:: paddle.v2.fluid.layers.cos_sim
:noindex:
-
cross_entropy
-------------
+
.. autofunction:: paddle.v2.fluid.layers.cross_entropy
:noindex:
-
-
square_error_cost
-----------------
+
.. autofunction:: paddle.v2.fluid.layers.square_error_cost
:noindex:
-
accuracy
----------
+--------
+
.. autofunction:: paddle.v2.fluid.layers.accuracy
:noindex:
+chunk_eval
+----------
+
+.. autofunction:: paddle.v2.fluid.layers.chunk_eval
+ :noindex:
sequence_conv
-------------
+
.. autofunction:: paddle.v2.fluid.layers.sequence_conv
:noindex:
-
conv2d
------
+
.. autofunction:: paddle.v2.fluid.layers.conv2d
:noindex:
-
sequence_pool
-------------
+
.. autofunction:: paddle.v2.fluid.layers.sequence_pool
:noindex:
+pool2d
+------
-sequence_first_step
--------------------
-.. autofunction:: paddle.v2.fluid.layers.sequence_first_step
+.. autofunction:: paddle.v2.fluid.layers.pool2d
:noindex:
+batch_norm
+----------
+
+.. autofunction:: paddle.v2.fluid.layers.batch_norm
+ :noindex:
-sequence_last_step
+beam_search_decode
------------------
-.. autofunction:: paddle.v2.fluid.layers.sequence_last_step
+
+.. autofunction:: paddle.v2.fluid.layers.beam_search_decode
:noindex:
+conv2d_transpose
+----------------
-pool2d
-------
-.. autofunction:: paddle.v2.fluid.layers.pool2d
+.. autofunction:: paddle.v2.fluid.layers.conv2d_transpose
:noindex:
+sequence_expand
+---------------
-batch_norm
+.. autofunction:: paddle.v2.fluid.layers.sequence_expand
+ :noindex:
+
+lstm_unit
+---------
+
+.. autofunction:: paddle.v2.fluid.layers.lstm_unit
+ :noindex:
+
+reduce_sum
----------
-.. autofunction:: paddle.v2.fluid.layers.batch_norm
+
+.. autofunction:: paddle.v2.fluid.layers.reduce_sum
+ :noindex:
+
+reduce_mean
+-----------
+
+.. autofunction:: paddle.v2.fluid.layers.reduce_mean
:noindex:
+reduce_max
+----------
+
+.. autofunction:: paddle.v2.fluid.layers.reduce_max
+ :noindex:
-beam_search_decode
+reduce_min
+----------
+
+.. autofunction:: paddle.v2.fluid.layers.reduce_min
+ :noindex:
+
+sequence_first_step
+-------------------
+
+.. autofunction:: paddle.v2.fluid.layers.sequence_first_step
+ :noindex:
+
+sequence_last_step
------------------
-.. autofunction:: paddle.v2.fluid.layers.beam_search_decode
+
+.. autofunction:: paddle.v2.fluid.layers.sequence_last_step
+ :noindex:
+
+dropout
+-------
+
+.. autofunction:: paddle.v2.fluid.layers.dropout
:noindex:
+split
+-----
-lod_rank_table
---------------
-.. autofunction:: paddle.v2.fluid.layers.lod_rank_table
+.. autofunction:: paddle.v2.fluid.layers.split
:noindex:
+ctc_greedy_decoder
+------------------
-max_sequence_len
-----------------
-.. autofunction:: paddle.v2.fluid.layers.max_sequence_len
+.. autofunction:: paddle.v2.fluid.layers.ctc_greedy_decoder
:noindex:
+edit_distance
+-------------
-topk
------
-.. autofunction:: paddle.v2.fluid.layers.topk
+.. autofunction:: paddle.v2.fluid.layers.edit_distance
:noindex:
+l2_normalize
+------------
-lod_tensor_to_array
--------------------
-.. autofunction:: paddle.v2.fluid.layers.lod_tensor_to_array
+.. autofunction:: paddle.v2.fluid.layers.l2_normalize
:noindex:
+matmul
+------
-
-array_to_lod_tensor
--------------------
-.. autofunction:: paddle.v2.fluid.layers.array_to_lod_tensor
+.. autofunction:: paddle.v2.fluid.layers.matmul
:noindex:
+warpctc
+-------
+.. autofunction:: paddle.v2.fluid.layers.warpctc
+ :noindex:
+sequence_reshape
+----------------
-fill_constant
--------------
-.. autofunction:: paddle.v2.fluid.layers.fill_constant
+.. autofunction:: paddle.v2.fluid.layers.sequence_reshape
:noindex:
+transpose
+---------
+.. autofunction:: paddle.v2.fluid.layers.transpose
+ :noindex:
-fill_constant_batch_size_like
------------------------------
-.. autofunction:: paddle.v2.fluid.layers.fill_constant_batch_size_like
+im2sequence
+-----------
+
+.. autofunction:: paddle.v2.fluid.layers.im2sequence
:noindex:
+nce
+---
-ones
-----
-.. autofunction:: paddle.v2.fluid.layers.ones
+.. autofunction:: paddle.v2.fluid.layers.nce
:noindex:
+beam_search
+-----------
-zeros
------
-.. autofunction:: paddle.v2.fluid.layers.zeros
+.. autofunction:: paddle.v2.fluid.layers.beam_search
:noindex:
+row_conv
+--------
-increment
----------
-.. autofunction:: paddle.v2.fluid.layers.increment
+.. autofunction:: paddle.v2.fluid.layers.row_conv
:noindex:
+multiplex
+---------
-array_write
------------
-.. autofunction:: paddle.v2.fluid.layers.array_write
+.. autofunction:: paddle.v2.fluid.layers.multiplex
:noindex:
+ops
+===
+mean
+----
-create_array
-------------
-.. autofunction:: paddle.v2.fluid.layers.create_array
+.. autofunction:: paddle.v2.fluid.layers.mean
:noindex:
+mul
+---
-less_than
----------
-.. autofunction:: paddle.v2.fluid.layers.less_than
+.. autofunction:: paddle.v2.fluid.layers.mul
:noindex:
+reshape
+-------
-array_read
-----------
-.. autofunction:: paddle.v2.fluid.layers.array_read
+.. autofunction:: paddle.v2.fluid.layers.reshape
:noindex:
+scale
+-----
-shrink_memory
---------------
-.. autofunction:: paddle.v2.fluid.layers.shrink_memory
+.. autofunction:: paddle.v2.fluid.layers.scale
:noindex:
+sigmoid_cross_entropy_with_logits
+---------------------------------
-array_length
--------------
-.. autofunction:: paddle.v2.fluid.layers.array_length
+.. autofunction:: paddle.v2.fluid.layers.sigmoid_cross_entropy_with_logits
:noindex:
+elementwise_add
+---------------
-conv2d_transpose
-----------------
-.. autofunction:: paddle.v2.fluid.layers.conv2d_transpose
+.. autofunction:: paddle.v2.fluid.layers.elementwise_add
:noindex:
-
-sequence_expand
+elementwise_div
---------------
-.. autofunction:: paddle.v2.fluid.layers.sequence_expand
+
+.. autofunction:: paddle.v2.fluid.layers.elementwise_div
:noindex:
+elementwise_sub
+---------------
-gru_unit
---------
-.. autofunction:: paddle.v2.fluid.layers.gru_unit
+.. autofunction:: paddle.v2.fluid.layers.elementwise_sub
:noindex:
+elementwise_mul
+---------------
-lstm_unit
----------
-.. autofunction:: paddle.v2.fluid.layers.lstm_unit
+.. autofunction:: paddle.v2.fluid.layers.elementwise_mul
:noindex:
+elementwise_max
+---------------
-sequence_softmax
-----------------
-.. autofunction:: paddle.v2.fluid.layers.sequence_softmax
+.. autofunction:: paddle.v2.fluid.layers.elementwise_max
:noindex:
+elementwise_min
+---------------
-reduce_sum
-----------
-.. autofunction:: paddle.v2.fluid.layers.reduce_sum
+.. autofunction:: paddle.v2.fluid.layers.elementwise_min
:noindex:
+elementwise_pow
+---------------
-reduce_mean
------------
-.. autofunction:: paddle.v2.fluid.layers.reduce_mean
+.. autofunction:: paddle.v2.fluid.layers.elementwise_pow
:noindex:
+clip
+----
-reduce_max
-----------
-.. autofunction:: paddle.v2.fluid.layers.reduce_max
+.. autofunction:: paddle.v2.fluid.layers.clip
:noindex:
+clip_by_norm
+------------
-reduce_min
-----------
-.. autofunction:: paddle.v2.fluid.layers.reduce_min
+.. autofunction:: paddle.v2.fluid.layers.clip_by_norm
:noindex:
+sequence_softmax
+----------------
-split
------
-.. autofunction:: paddle.v2.fluid.layers.split
+.. autofunction:: paddle.v2.fluid.layers.sequence_softmax
:noindex:
+sigmoid
+-------
-matmul
-------
-.. autofunction:: paddle.v2.fluid.layers.matmul
+.. autofunction:: paddle.v2.fluid.layers.sigmoid
:noindex:
logsigmoid
----------
+
.. autofunction:: paddle.v2.fluid.layers.logsigmoid
:noindex:
exp
---
+
.. autofunction:: paddle.v2.fluid.layers.exp
:noindex:
relu
----
+
.. autofunction:: paddle.v2.fluid.layers.relu
:noindex:
tanh
----
+
.. autofunction:: paddle.v2.fluid.layers.tanh
:noindex:
tanh_shrink
-----------
+
.. autofunction:: paddle.v2.fluid.layers.tanh_shrink
:noindex:
softshrink
----------
+
.. autofunction:: paddle.v2.fluid.layers.softshrink
:noindex:
sqrt
----
+
.. autofunction:: paddle.v2.fluid.layers.sqrt
:noindex:
abs
-----
+---
+
.. autofunction:: paddle.v2.fluid.layers.abs
:noindex:
ceil
----
+
.. autofunction:: paddle.v2.fluid.layers.ceil
:noindex:
floor
-----
+
.. autofunction:: paddle.v2.fluid.layers.floor
:noindex:
round
-----
+
.. autofunction:: paddle.v2.fluid.layers.round
:noindex:
reciprocal
----------
+
.. autofunction:: paddle.v2.fluid.layers.reciprocal
:noindex:
log
---
+
.. autofunction:: paddle.v2.fluid.layers.log
:noindex:
square
------
+
.. autofunction:: paddle.v2.fluid.layers.square
:noindex:
softplus
--------
+
.. autofunction:: paddle.v2.fluid.layers.softplus
:noindex:
softsign
----------
+--------
+
.. autofunction:: paddle.v2.fluid.layers.softsign
:noindex:
brelu
-----
+
.. autofunction:: paddle.v2.fluid.layers.brelu
:noindex:
leaky_relu
----------
+
.. autofunction:: paddle.v2.fluid.layers.leaky_relu
:noindex:
soft_relu
---------
+
.. autofunction:: paddle.v2.fluid.layers.soft_relu
:noindex:
elu
-----
+---
+
.. autofunction:: paddle.v2.fluid.layers.elu
:noindex:
relu6
-----
+
.. autofunction:: paddle.v2.fluid.layers.relu6
:noindex:
pow
-----
+---
+
.. autofunction:: paddle.v2.fluid.layers.pow
:noindex:
+stanh
+-----
+
+.. autofunction:: paddle.v2.fluid.layers.stanh
+ :noindex:
+
hard_shrink
-----------
+
.. autofunction:: paddle.v2.fluid.layers.hard_shrink
:noindex:
thresholded_relu
----------------
+
.. autofunction:: paddle.v2.fluid.layers.thresholded_relu
:noindex:
hard_sigmoid
--------------
+------------
+
.. autofunction:: paddle.v2.fluid.layers.hard_sigmoid
:noindex:
swish
-------
+-----
+
.. autofunction:: paddle.v2.fluid.layers.swish
:noindex:
-im2sequence
+tensor
+======
+
+create_tensor
+-------------
+
+.. autofunction:: paddle.v2.fluid.layers.create_tensor
+ :noindex:
+
+create_parameter
+----------------
+
+.. autofunction:: paddle.v2.fluid.layers.create_parameter
+ :noindex:
+
+create_global_var
+-----------------
+
+.. autofunction:: paddle.v2.fluid.layers.create_global_var
+ :noindex:
+
+cast
+----
+
+.. autofunction:: paddle.v2.fluid.layers.cast
+ :noindex:
+
+concat
------
-.. autofunction:: paddle.v2.fluid.layers.im2sequence
+
+.. autofunction:: paddle.v2.fluid.layers.concat
:noindex:
-edit_distance
----------------
-.. autofunction:: paddle.v2.fluid.layers.edit_distance_error
+sums
+----
+
+.. autofunction:: paddle.v2.fluid.layers.sums
:noindex:
-ctc_greedy_decoder
----------------
-.. autofunction:: paddle.v2.fluid.layers.ctc_greedy_decoder
+assign
+------
+
+.. autofunction:: paddle.v2.fluid.layers.assign
:noindex:
-l2_normalize
-------------
-.. autofunction:: paddle.v2.fluid.layers.l2_normalize
+fill_constant_batch_size_like
+-----------------------------
+
+.. autofunction:: paddle.v2.fluid.layers.fill_constant_batch_size_like
:noindex:
-sequence_reshape
-----------------
-.. autofunction:: paddle.v2.fluid.layers.sequence_reshape
+fill_constant
+-------------
+
+.. autofunction:: paddle.v2.fluid.layers.fill_constant
:noindex:
-row_conv
---------
-.. autofunction:: paddle.v2.fluid.layers.row_conv
+ones
+----
+
+.. autofunction:: paddle.v2.fluid.layers.ones
:noindex:
-multiplex
----------
-.. autofunction:: paddle.v2.fluid.layers.multiplex
+zeros
+-----
+
+.. autofunction:: paddle.v2.fluid.layers.zeros
:noindex:
+
diff --git a/doc/api/v2/fluid/nets.rst b/doc/api/v2/fluid/nets.rst
index 500019bc507f859c4c91de5d322a82eb1e78e2de..015581b7660848bdb0845fafe2d3fc05405e6ae6 100644
--- a/doc/api/v2/fluid/nets.rst
+++ b/doc/api/v2/fluid/nets.rst
@@ -1,33 +1,31 @@
-===========
-Nets
-===========
+.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
+ !DO NOT EDIT THIS FILE MANUALLY!
+
+====
+nets
+====
simple_img_conv_pool
--------------------
-.. autofunction:: paddle.v2.fluid.nets.simple_img_conv_pool
- :noindex:
-
-img_conv_group
----------------
-.. autofunction:: paddle.v2.fluid.nets.img_conv_group
+.. autofunction:: paddle.v2.fluid.nets.simple_img_conv_pool
:noindex:
-
sequence_conv_pool
------------------
+
.. autofunction:: paddle.v2.fluid.nets.sequence_conv_pool
:noindex:
-
glu
---
+
.. autofunction:: paddle.v2.fluid.nets.glu
:noindex:
-
scaled_dot_product_attention
----------------------------
+
.. autofunction:: paddle.v2.fluid.nets.scaled_dot_product_attention
:noindex:
diff --git a/doc/api/v2/fluid/optimizer.rst b/doc/api/v2/fluid/optimizer.rst
index 19b4940f08de3e2f7dc177f2961e538946d10a78..1691ebb9a7cb16da96e04147d0adea322374f529 100644
--- a/doc/api/v2/fluid/optimizer.rst
+++ b/doc/api/v2/fluid/optimizer.rst
@@ -1,54 +1,49 @@
-===========
-Optimizer
-===========
-
-Optimizer
------------
-.. automodule:: paddle.v2.fluid.optimizer
- :members: Optimizer
- :noindex:
+.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
+ !DO NOT EDIT THIS FILE MANUALLY!
+=========
+optimizer
+=========
-SGDOptimizer
------------
-.. automodule:: paddle.v2.fluid.optimizer
- :members: SGDOptimizer
- :noindex:
+SGD
+---
+.. autoclass:: paddle.v2.fluid.optimizer.SGD
+ :members:
+ :noindex:
+Momentum
+--------
-MomentumOptimizer
------------------
-.. automodule:: paddle.v2.fluid.optimizer
- :members: MomentumOptimizer
+.. autoclass:: paddle.v2.fluid.optimizer.Momentum
+ :members:
:noindex:
+Adagrad
+-------
-
-AdagradOptimizer
-----------------
-.. automodule:: paddle.v2.fluid.optimizer
- :members: AdagradOptimizer
+.. autoclass:: paddle.v2.fluid.optimizer.Adagrad
+ :members:
:noindex:
+Adam
+----
-AdamOptimizer
--------------
-.. automodule:: paddle.v2.fluid.optimizer
- :members: AdamOptimizer
+.. autoclass:: paddle.v2.fluid.optimizer.Adam
+ :members:
:noindex:
+Adamax
+------
-AdamaxOptimizer
------------
-.. automodule:: paddle.v2.fluid.optimizer
- :members: AdamaxOptimizer
+.. autoclass:: paddle.v2.fluid.optimizer.Adamax
+ :members:
:noindex:
+DecayedAdagrad
+--------------
-DecayedAdagradOptimizer
------------------------
-.. automodule:: paddle.v2.fluid.optimizer
- :members: DecayedAdagradOptimizer
+.. autoclass:: paddle.v2.fluid.optimizer.DecayedAdagrad
+ :members:
:noindex:
diff --git a/doc/api/v2/fluid/param_attr.rst b/doc/api/v2/fluid/param_attr.rst
index ca0c8af9e8c4f2271de7a131ad0d27c0e8635f50..8083d0d858dafcd275eaddb9b475875ee42ef724 100644
--- a/doc/api/v2/fluid/param_attr.rst
+++ b/doc/api/v2/fluid/param_attr.rst
@@ -1,11 +1,21 @@
-===========
+.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
+ !DO NOT EDIT THIS FILE MANUALLY!
+
+==========
+param_attr
+==========
+
ParamAttr
-===========
+---------
+.. autoclass:: paddle.v2.fluid.param_attr.ParamAttr
+ :members:
+ :noindex:
+WeightNormParamAttr
+-------------------
-ParamAttr
------------
-.. automodule:: paddle.v2.fluid.param_attr
- :members: ParamAttr
+.. autoclass:: paddle.v2.fluid.param_attr.WeightNormParamAttr
+ :members:
:noindex:
+
diff --git a/doc/api/v2/fluid/profiler.rst b/doc/api/v2/fluid/profiler.rst
index 7d4042d1f41c12c4a551ba6576559d612116872a..4a1ff7cb6976e0054f77428b699ea679aa91394f 100644
--- a/doc/api/v2/fluid/profiler.rst
+++ b/doc/api/v2/fluid/profiler.rst
@@ -1,10 +1,25 @@
-===========
-Profiler
-===========
+.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
+ !DO NOT EDIT THIS FILE MANUALLY!
+========
+profiler
+========
+cuda_profiler
+-------------
-Profiler
------------
.. autofunction:: paddle.v2.fluid.profiler.cuda_profiler
:noindex:
+
+reset_profiler
+--------------
+
+.. autofunction:: paddle.v2.fluid.profiler.reset_profiler
+ :noindex:
+
+profiler
+--------
+
+.. autofunction:: paddle.v2.fluid.profiler.profiler
+ :noindex:
+
diff --git a/doc/api/v2/fluid/regularizer.rst b/doc/api/v2/fluid/regularizer.rst
index 868e225ed3d59e79aeb217fb88081ea25f80fa2c..2c17d15599baa1d02eb87c7b6c40034769ebb3a4 100644
--- a/doc/api/v2/fluid/regularizer.rst
+++ b/doc/api/v2/fluid/regularizer.rst
@@ -1,25 +1,27 @@
+.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
+ !DO NOT EDIT THIS FILE MANUALLY!
+
===========
-Regularizer
+regularizer
===========
-WeightDecayRegularizer
-----------------------
-.. automodule:: paddle.v2.fluid.regularizer
- :members: WeightDecayRegularizer
- :noindex:
-
+append_regularization_ops
+-------------------------
-L2DecayRegularizer
-------------------
-.. automodule:: paddle.v2.fluid.regularizer
- :members: L2DecayRegularizer
+.. autofunction:: paddle.v2.fluid.regularizer.append_regularization_ops
:noindex:
+L1Decay
+-------
+.. autoclass:: paddle.v2.fluid.regularizer.L1Decay
+ :members:
+ :noindex:
-L1DecayRegularizer
--------------------
-.. automodule:: paddle.v2.fluid.regularizer
- :members: L1DecayRegularizer
+L2Decay
+-------
+.. autoclass:: paddle.v2.fluid.regularizer.L2Decay
+ :members:
+ :noindex:
diff --git a/doc/design/speech/README.MD b/doc/design/speech/deep_speech_2.md
similarity index 85%
rename from doc/design/speech/README.MD
rename to doc/design/speech/deep_speech_2.md
index 7304650e628dba210488cd2dc4836318b5383b2a..cfdc4d6df04344c70d3334626bd38eca997c31ff 100644
--- a/doc/design/speech/README.MD
+++ b/doc/design/speech/deep_speech_2.md
@@ -140,7 +140,19 @@ TODO by Assignees
### Beam Search with CTC and LM
-TODO by Assignees
+
+
+Figure 2. Algorithm for CTC Beam Search Decoder.
+
+
+- The **Beam Search Decoder** for DS2 CTC-trained network follows the similar approach in \[[3](#references)\] as shown in Figure 2, with two important modifications for the ambiguous parts:
+ - 1) in the iterative computation of probabilities, the assignment operation is changed to accumulation for one prefix may comes from different paths;
+ - 2) the if condition ```if l^+ not in A_prev then``` after probabilities' computation is deprecated for it is hard to understand and seems unnecessary.
+- An **external scorer** would be passed into the decoder to evaluate a candidate prefix during decoding whenever a white space appended in English decoding and any character appended in Mandarin decoding.
+- Such external scorer consists of language model, word count or any other custom scorers.
+- The **language model** is built from Task 5, with parameters should be carefully tuned to achieve minimum WER/CER (c.f. Task 7)
+- This decoder needs to perform with **high efficiency** for the convenience of parameters tuning and speech recognition in reality.
+
## Future Work
@@ -153,3 +165,4 @@ TODO by Assignees
1. Dario Amodei, etc., [Deep Speech 2 : End-to-End Speech Recognition in English and Mandarin](http://proceedings.mlr.press/v48/amodei16.pdf). ICML 2016.
2. Dario Amodei, etc., [Deep Speech 2 : End-to-End Speech Recognition in English and Mandarin](https://arxiv.org/abs/1512.02595). arXiv:1512.02595.
+3. Awni Y. Hannun, etc. [First-Pass Large Vocabulary Continuous Speech Recognition using Bi-Directional Recurrent DNNs](https://arxiv.org/abs/1408.2873). arXiv:1408.2873
diff --git a/doc/design/speech/image/beam_search.png b/doc/design/speech/image/beam_search.png
new file mode 100644
index 0000000000000000000000000000000000000000..7f7e35f34223162d0f7f0ed97375909c43b830ae
Binary files /dev/null and b/doc/design/speech/image/beam_search.png differ
diff --git a/doc/getstarted/build_and_install/build_from_source_cn.rst b/doc/getstarted/build_and_install/build_from_source_cn.rst
index 71904dc41ed0d946867d890cc585e1b88450ca8c..ff904b1022a41612c9680dce92d3fc2c69ad7e93 100644
--- a/doc/getstarted/build_and_install/build_from_source_cn.rst
+++ b/doc/getstarted/build_and_install/build_from_source_cn.rst
@@ -115,7 +115,7 @@ PaddlePaddle的编译选项,包括生成CPU/GPU二进制文件、链接何种B
"WITH_AVX", "是否编译含有AVX指令集的PaddlePaddle二进制文件", "ON"
"WITH_PYTHON", "是否内嵌PYTHON解释器", "ON"
"WITH_STYLE_CHECK", "是否编译时进行代码风格检查", "ON"
- "WITH_TESTING", "是否开启单元测试", "ON"
+ "WITH_TESTING", "是否开启单元测试", "OFF"
"WITH_DOC", "是否编译中英文文档", "OFF"
"WITH_SWIG_PY", "是否编译PYTHON的SWIG接口,该接口可用于预测和定制化训练", "Auto"
"WITH_GOLANG", "是否编译go语言的可容错parameter server", "ON"
diff --git a/doc/getstarted/build_and_install/build_from_source_en.rst b/doc/getstarted/build_and_install/build_from_source_en.rst
index 27f73b2e2c029b41d514e1612912ed1c335605b6..718fb869c23a1f7be82c87c726282bded9dad516 100644
--- a/doc/getstarted/build_and_install/build_from_source_en.rst
+++ b/doc/getstarted/build_and_install/build_from_source_en.rst
@@ -126,7 +126,7 @@ You can add :code:`-D` argument to pass such options, like:
"WITH_AVX", "Build with AVX support", "ON"
"WITH_PYTHON", "Build with integrated Python interpreter", "ON"
"WITH_STYLE_CHECK", "Check code style when building", "ON"
- "WITH_TESTING", "Build unit tests", "ON"
+ "WITH_TESTING", "Build unit tests", "OFF"
"WITH_DOC", "Build documentations", "OFF"
"WITH_SWIG_PY", "Build Python SWIG interface for V2 API", "Auto"
"WITH_GOLANG", "Build fault-tolerant parameter server written in go", "ON"
diff --git a/doc/getstarted/build_and_install/docker_install_cn.rst b/doc/getstarted/build_and_install/docker_install_cn.rst
index 98fada7bdb46f4dd2927d6f93bcbcebbe7d18604..79d214635a069a739060e0b79424729f6ff90387 100644
--- a/doc/getstarted/build_and_install/docker_install_cn.rst
+++ b/doc/getstarted/build_and_install/docker_install_cn.rst
@@ -95,6 +95,12 @@ PaddlePaddle Book是为用户和开发者制作的一个交互式的Jupyter Note
docker run -p 8888:8888 paddlepaddle/book
+国内用户可以使用下面的镜像源来加速访问:
+
+ .. code-block: bash
+
+ docker run -p 8888:8888 docker.paddlepaddlehub.com/book
+
然后在浏览器中输入以下网址:
.. code-block:: text
diff --git a/doc/getstarted/build_and_install/docker_install_en.rst b/doc/getstarted/build_and_install/docker_install_en.rst
index b1d0890b4cdddb77114a80276130afd07c22d270..e0e0559fb858a093db96a9b4ec1c5a45d6c71a38 100644
--- a/doc/getstarted/build_and_install/docker_install_en.rst
+++ b/doc/getstarted/build_and_install/docker_install_en.rst
@@ -102,6 +102,12 @@ We provide a packaged book image, simply issue the command:
docker run -p 8888:8888 paddlepaddle/book
+For users in China, we provide a faster mirror:
+
+ .. code-block: bash
+
+ docker run -p 8888:8888 docker.paddlepaddlehub.com/book
+
Then, you would back and paste the address into the local browser:
.. code-block:: text
diff --git a/doc/howto/usage/cluster/cluster_train_cn.md b/doc/howto/usage/cluster/cluster_train_cn.md
index c2fc86687d7106aac7c74d6dd16bc229353cb7c1..0f3db59607fb6b43da01f5fdb46949087517ed6c 100644
--- a/doc/howto/usage/cluster/cluster_train_cn.md
+++ b/doc/howto/usage/cluster/cluster_train_cn.md
@@ -92,11 +92,11 @@ paddle.init(
参数说明
- use_gpu: **可选,默认False**,是否启用GPU训练
-- trainer_count:**必选,默认1**,当前训练任务trainer总个数
+- trainer_count:**必选,默认1**,当前trainer的线程数目
- port:**必选,默认7164**,连接到pserver的端口
- ports_num:**必选,默认1**,连接到pserver的端口个数
- ports_num_for_sparse:**必选,默认0**,和pserver之间用于稀疏类型参数通信的端口个数
-- num_gradient_servers:**必选,默认1**,当前训练任务pserver总数
+- num_gradient_servers:**必选,默认1**,当前训练任务trainer总数
- trainer_id:**必选,默认0**,每个trainer的唯一ID,从0开始的整数
- pservers:**必选,默认127.0.0.1**,当前训练任务启动的pserver的IP列表,多个IP使用“,”隔开
diff --git a/doc/howto/usage/cluster/cluster_train_en.md b/doc/howto/usage/cluster/cluster_train_en.md
index 28cd1fa7903e559e33a7fc2f00172fdfbe2fdc97..f9424f8f1a29fcf001c4e7976086512b22f6e858 100644
--- a/doc/howto/usage/cluster/cluster_train_en.md
+++ b/doc/howto/usage/cluster/cluster_train_en.md
@@ -95,11 +95,11 @@ paddle.init(
Parameter Description
- use_gpu: **optional, default False**, set to "True" to enable GPU training.
-- trainer_count: **required, default 1**, total count of trainers in the training job.
+- trainer_count: **required, default 1**, number of threads in current trainer.
- port: **required, default 7164**, port to connect to parameter server.
- ports_num: **required, default 1**, number of ports for communication.
- ports_num_for_sparse: **required, default 0**, number of ports for sparse type caculation.
-- num_gradient_servers: **required, default 1**, total number of gradient server.
+- num_gradient_servers: **required, default 1**, number of trainers in current job.
- trainer_id: **required, default 0**, ID for every trainer, start from 0.
- pservers: **required, default 127.0.0.1**, list of IPs of parameter servers, separated by ",".
diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt
index 318661af8bd04880577222fdc82cc1b6e79a457f..8b71f73c36c33d882b34c833031c50cd14817e76 100644
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@@ -22,11 +22,11 @@ cc_test(eigen_test SRCS eigen_test.cc DEPS tensor)
cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto)
cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor paddle_memory)
-nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor)
+nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor init)
cc_test(variable_test SRCS variable_test.cc)
-cc_library(threadpool SRCS threadpool.cc)
+cc_library(threadpool SRCS threadpool.cc DEPS enforce)
cc_test(threadpool_test SRCS threadpool_test.cc DEPS threadpool)
cc_library(scope SRCS scope.cc DEPS glog threadpool)
diff --git a/paddle/framework/channel.h b/paddle/framework/channel.h
index 70ecccc1a1078374f3190b3956103ed8000c4fc5..0570980c5a4d7fa45e672ae5baac65d2c65ddad9 100644
--- a/paddle/framework/channel.h
+++ b/paddle/framework/channel.h
@@ -26,9 +26,7 @@ class Channel {
virtual void Send(T*) = 0;
virtual void Receive(T*) = 0;
virtual size_t Cap() = 0;
-
- // Don't delete channels; instead, call Channel::Close.
- protected:
+ virtual void Close() = 0;
virtual ~Channel() {}
};
@@ -50,11 +48,7 @@ Channel* MakeChannel(size_t buffer_size) {
template
void CloseChannel(Channel* ch) {
- if (ch->Cap() > 0) {
- delete dynamic_cast*>(ch);
- } else {
- delete dynamic_cast*>(ch);
- }
+ ch->Close();
}
} // namespace framework
diff --git a/paddle/framework/channel_test.cc b/paddle/framework/channel_test.cc
index 9efc0172658c800d14102531332dbef68fa392f4..020f806380626d2f1efac683741ee84f1b573aeb 100644
--- a/paddle/framework/channel_test.cc
+++ b/paddle/framework/channel_test.cc
@@ -14,13 +14,114 @@ limitations under the License. */
#include "paddle/framework/channel.h"
+#include
+#include
+
#include "gtest/gtest.h"
+using paddle::framework::Channel;
+using paddle::framework::MakeChannel;
+using paddle::framework::CloseChannel;
+
TEST(Channel, MakeAndClose) {
- using paddle::framework::Channel;
- using paddle::framework::MakeChannel;
- using paddle::framework::CloseChannel;
+ using paddle::framework::details::Buffered;
+ using paddle::framework::details::UnBuffered;
+ {
+ // MakeChannel should return a buffered channel is buffer_size > 0.
+ auto ch = MakeChannel(10);
+ EXPECT_NE(dynamic_cast*>(ch), nullptr);
+ EXPECT_EQ(dynamic_cast*>(ch), nullptr);
+ CloseChannel(ch);
+ delete ch;
+ }
+ {
+ // MakeChannel should return an un-buffered channel is buffer_size = 0.
+ auto ch = MakeChannel(0);
+ EXPECT_EQ(dynamic_cast*>(ch), nullptr);
+ EXPECT_NE(dynamic_cast*>(ch), nullptr);
+ CloseChannel(ch);
+ delete ch;
+ }
+}
+
+TEST(Channel, SufficientBufferSizeDoesntBlock) {
+ const size_t buffer_size = 10;
+ auto ch = MakeChannel(buffer_size);
+ for (size_t i = 0; i < buffer_size; ++i) {
+ ch->Send(&i); // should not block
+ }
+
+ size_t out;
+ for (size_t i = 0; i < buffer_size; ++i) {
+ ch->Receive(&out); // should not block
+ EXPECT_EQ(out, i);
+ }
+ CloseChannel(ch);
+ delete ch;
+}
+
+TEST(Channel, ConcurrentSendNonConcurrentReceiveWithSufficientBufferSize) {
+ const size_t buffer_size = 10;
+ auto ch = MakeChannel(buffer_size);
+ size_t sum = 0;
+ std::thread t([&]() {
+ // Try to write more than buffer size.
+ for (size_t i = 0; i < 2 * buffer_size; ++i) {
+ ch->Send(&i); // should not block
+ sum += i;
+ }
+ });
+ std::this_thread::sleep_for(std::chrono::milliseconds(100)); // wait 0.5 sec
+ EXPECT_EQ(sum, 45U);
+
+ CloseChannel(ch);
+ t.join();
+ delete ch;
+}
+
+TEST(Channel, SimpleUnbufferedChannelTest) {
+ auto ch = MakeChannel(0);
+ unsigned sum_send = 0;
+ std::thread t([&]() {
+ for (int i = 0; i < 5; i++) {
+ ch->Send(&i);
+ sum_send += i;
+ }
+ });
+ for (int i = 0; i < 5; i++) {
+ int recv;
+ ch->Receive(&recv);
+ EXPECT_EQ(recv, i);
+ }
+
+ CloseChannel(ch);
+ t.join();
+ EXPECT_EQ(sum_send, 10U);
+ delete ch;
+}
+
+TEST(Channel, UnbufferedLessReceiveMoreSendTest) {
+ auto ch = MakeChannel(0);
+ unsigned sum_send = 0;
+ // Send should block after three iterations
+ // since we only have three receivers.
+ std::thread t([&]() {
+ // Try to send more number of times
+ // than receivers
+ for (int i = 0; i < 4; i++) {
+ ch->Send(&i);
+ sum_send += i;
+ }
+ });
+ for (int i = 0; i < 3; i++) {
+ int recv;
+ ch->Receive(&recv);
+ EXPECT_EQ(recv, i);
+ }
+ std::this_thread::sleep_for(std::chrono::milliseconds(100)); // wait 0.5 sec
+ EXPECT_EQ(sum_send, 3U);
- Channel* ch = MakeChannel(10);
CloseChannel(ch);
+ t.join();
+ delete ch;
}
diff --git a/paddle/framework/details/buffered_channel.h b/paddle/framework/details/buffered_channel.h
index 572e29d44a3baec84a029d87f9b0874784aa761b..b093e1589293b030ef2bedb82504a8e86b3dc857 100644
--- a/paddle/framework/details/buffered_channel.h
+++ b/paddle/framework/details/buffered_channel.h
@@ -18,6 +18,7 @@ limitations under the License. */
#include
#include "paddle/framework/channel.h"
+#include "paddle/platform/enforce.h"
namespace paddle {
namespace framework {
@@ -32,6 +33,8 @@ class Buffered : public paddle::framework::Channel {
virtual void Send(T*);
virtual void Receive(T*);
virtual size_t Cap() { return cap_; }
+ virtual void Close();
+ virtual ~Buffered();
private:
size_t cap_;
@@ -39,9 +42,11 @@ class Buffered : public paddle::framework::Channel {
std::condition_variable empty_cond_var_;
std::condition_variable full_cond_var_;
std::deque channel_;
+ bool closed_;
- Buffered(size_t cap) : cap_(cap) {}
- virtual ~Buffered();
+ Buffered(size_t cap) : cap_(cap), closed_(false) {
+ PADDLE_ENFORCE_GT(cap, 0);
+ }
void NotifyAllSenders(std::unique_lock*);
};
@@ -49,24 +54,39 @@ class Buffered : public paddle::framework::Channel {
template
void Buffered::Send(T* item) {
std::unique_lock lock(mu_);
- full_cond_var_.wait(lock, [this]() { return channel_.size() < cap_; });
- channel_.push_back(std::move(*item));
- lock.unlock();
- empty_cond_var_.notify_one();
+ full_cond_var_.wait(lock,
+ [this]() { return channel_.size() < cap_ || closed_; });
+ if (!closed_) {
+ channel_.push_back(std::move(*item));
+ lock.unlock();
+ empty_cond_var_.notify_one();
+ }
}
template
void Buffered::Receive(T* item) {
std::unique_lock lock(mu_);
- empty_cond_var_.wait(lock, [this]() { return !channel_.empty(); });
- *item = std::move(channel_.front());
- channel_.pop_front();
+ empty_cond_var_.wait(lock, [this]() { return !channel_.empty() || closed_; });
+ if (!closed_) {
+ *item = std::move(channel_.front());
+ channel_.pop_front();
+ NotifyAllSenders(&lock);
+ } else {
+ item = nullptr;
+ }
+}
+
+template
+void Buffered::Close() {
+ std::unique_lock lock(mu_);
+ closed_ = true;
NotifyAllSenders(&lock);
}
template
Buffered::~Buffered() {
std::unique_lock lock(mu_);
+ closed_ = true;
channel_.clear();
NotifyAllSenders(&lock);
}
@@ -74,7 +94,7 @@ Buffered::~Buffered() {
template
void Buffered::NotifyAllSenders(std::unique_lock* lock) {
lock->unlock();
- full_cond_var_.notify_one();
+ full_cond_var_.notify_all();
}
} // namespace details
diff --git a/paddle/framework/details/unbuffered_channel.h b/paddle/framework/details/unbuffered_channel.h
index 7ecced1fba88fea781fc342091bc71e5aa496d3a..0dc5afd7e57c1f59dfc1b86093eea231d46966f1 100644
--- a/paddle/framework/details/unbuffered_channel.h
+++ b/paddle/framework/details/unbuffered_channel.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
+#include
#include
-#include
#include
#include "paddle/framework/channel.h"
@@ -32,20 +32,108 @@ class UnBuffered : public paddle::framework::Channel {
virtual void Send(T*);
virtual void Receive(T*);
virtual size_t Cap() { return 0; }
+ virtual void Close();
+ virtual ~UnBuffered();
private:
- UnBuffered() {}
- virtual ~UnBuffered();
+ std::mutex mu_ch_;
+ // Mutex for readers and writers who are waiting for other reader
+ // and writer to complete execution
+ std::recursive_mutex mu_read_, mu_write_;
+ // reader_found_ is set true when a reader is ready to accept data
+ // writer_found_ is set true when a writer is ready to send data
+ // A transaction occurs only when both are true
+ std::atomic reader_found_{false}, writer_found_{false};
+ std::condition_variable cv_channel_;
+ std::condition_variable_any cv_reader_, cv_writer_;
+ T* item{nullptr};
+ std::atomic closed_{false};
+
+ UnBuffered() : closed_(false) {}
+
+ void NotifyAllParticipants(std::unique_lock*);
};
+// This function implements the concept of how data should
+// be sent from a writer to a reader.
+template
+void UnBuffered::Send(T* data) {
+ // Prevent other writers from entering
+ std::unique_lock writer_lock(mu_write_);
+ writer_found_ = true;
+ std::unique_lock cv_lock(mu_write_);
+ // If writer comes first, it should wait till a reader arrives
+ cv_writer_.wait(cv_lock,
+ [this]() { return reader_found_ == true || closed_; });
+ cv_reader_.notify_one();
+ if (!closed_) {
+ std::unique_lock channel_lock(mu_ch_);
+ item = data;
+ channel_lock.unlock();
+ cv_channel_.notify_one();
+ channel_lock.lock();
+ cv_channel_.wait(channel_lock,
+ [this]() { return item == nullptr || closed_; });
+ }
+ writer_found_ = false;
+}
+
+// This function implements the concept of how
+// data that was sent by a writer is read from a reader.
+template
+void UnBuffered::Receive(T* data) {
+ // Prevent other readers from entering
+ std::unique_lock read_lock{mu_read_};
+ reader_found_ = true;
+ std::unique_lock cv_lock{mu_read_};
+ // If reader comes first, it should wait till a writer arrives
+ cv_reader_.wait(cv_lock,
+ [this]() { return writer_found_ == true || closed_; });
+ cv_writer_.notify_one();
+ if (!closed_) {
+ std::unique_lock lock_ch{mu_ch_};
+ // Reader should wait for the writer to first write its data
+ cv_channel_.wait(lock_ch, [this]() { return item != nullptr || closed_; });
+ if (!closed_) {
+ *data = std::move(*item);
+ item = nullptr;
+ lock_ch.unlock();
+ }
+ cv_channel_.notify_one();
+ }
+ reader_found_ = false;
+}
+
+// This function implements the sequence of events
+// that take place once the channel is closed.
template
-void UnBuffered::Send(T* channel_element) {}
+void UnBuffered::Close() {
+ std::unique_lock lock(mu_ch_);
+ item = nullptr;
+ closed_ = true;
+ NotifyAllParticipants(&lock);
+}
+// This function implements the sequence of events
+// that are executed once the object of an UnBuffered
+// channel is destroyed.
template
-void UnBuffered::Receive(T*) {}
+UnBuffered::~UnBuffered() {
+ std::unique_lock lock(mu_ch_);
+ item = nullptr;
+ closed_ = true;
+ NotifyAllParticipants(&lock);
+}
+// This function notifies all the readers, writers and
+// the channel condition variables.
template
-UnBuffered::~UnBuffered() {}
+void UnBuffered::NotifyAllParticipants(std::unique_lock* lock) {
+ lock->unlock();
+ cv_writer_.notify_all();
+ cv_channel_.notify_all();
+ cv_reader_.notify_all();
+}
} // namespace details
} // namespace framework
diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc
index cbf3ec75265fa74aaacffee684b7b7d5f73b7c02..9a232b08434d299d10bb2acdb6e96295de875d56 100644
--- a/paddle/framework/executor.cc
+++ b/paddle/framework/executor.cc
@@ -25,7 +25,7 @@ limitations under the License. */
#include "paddle/platform/place.h"
#include "paddle/platform/profiler.h"
-DECLARE_bool(do_memory_benchmark);
+DECLARE_bool(benchmark);
DEFINE_bool(check_nan_inf, false,
"Checking whether operator produce NAN/INF or not. It will be "
"extremely slow so please use this flag wisely.");
@@ -33,9 +33,6 @@ DEFINE_bool(check_nan_inf, false,
namespace paddle {
namespace framework {
-const std::string kFeedOpType = "feed";
-const std::string kFetchOpType = "fetch";
-
Executor::Executor(const platform::Place& place) : place_(place) {}
static void CreateTensor(Variable* var, proto::VarDesc::VarType var_type) {
@@ -125,7 +122,7 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
op->Run(*local_scope, place_);
VLOG(3) << op->DebugStringEx(local_scope);
- if (FLAGS_do_memory_benchmark) {
+ if (FLAGS_benchmark) {
VLOG(2) << "Memory used after operator " + op->Type() + " running: "
<< memory::memory_usage(place_);
}
@@ -142,7 +139,7 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
if (create_vars && create_local_scope) {
scope->DeleteScope(local_scope);
}
- if (FLAGS_do_memory_benchmark) {
+ if (FLAGS_benchmark) {
VLOG(2) << "-------------------------------------------------------";
VLOG(2) << "Memory used after deleting local scope: "
<< memory::memory_usage(place_);
diff --git a/paddle/framework/feed_fetch_type.h b/paddle/framework/feed_fetch_type.h
index 9bc4a90c44828ecb7458d524f59609f01848cc5c..168f456675af508df86dd0520cdeb5d16d94ad31 100644
--- a/paddle/framework/feed_fetch_type.h
+++ b/paddle/framework/feed_fetch_type.h
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
+#include
#include
#include "paddle/framework/lod_tensor.h"
@@ -20,5 +21,8 @@ namespace paddle {
namespace framework {
using FeedFetchType = LoDTensor;
using FeedFetchList = std::vector;
+
+static const std::string kFeedOpType = "feed";
+static const std::string kFetchOpType = "fetch";
} // namespace framework
} // namespace paddle
diff --git a/paddle/framework/init.cc b/paddle/framework/init.cc
index 4ef82a541efaa35bcf831d5122570154f2fa2423..3f6ea121b3994979d89a7d5a8c20c59240a0c111 100644
--- a/paddle/framework/init.cc
+++ b/paddle/framework/init.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include // for strdup
#include
+#include
#include
#include "paddle/framework/init.h"
@@ -46,17 +47,23 @@ void InitDevices() {
std::vector places;
places.emplace_back(platform::CPUPlace());
+ int count = 0;
#ifdef PADDLE_WITH_CUDA
- int count = platform::GetCUDADeviceCount();
- for (int i = 0; i < count; ++i) {
- places.emplace_back(platform::CUDAPlace(i));
+ try {
+ count = platform::GetCUDADeviceCount();
+ } catch (const std::exception &exp) {
+ LOG(WARNING) << "Compiled with WITH_GPU, but no GPU found in runtime.";
}
#else
LOG(WARNING)
- << "'GPU' is not supported, Please re-compile with WITH_GPU option";
+ << "'CUDA' is not supported, Please re-compile with WITH_GPU option";
#endif
+ for (int i = 0; i < count; ++i) {
+ places.emplace_back(platform::CUDAPlace(i));
+ }
+
platform::DeviceContextPool::Init(places);
}
diff --git a/paddle/framework/init_test.cc b/paddle/framework/init_test.cc
index f837a965d3be7d40c20803ae4462b3bfd91bffd0..01e076dd8ea24831e3ed7c8a7f8fae6818a89335 100644
--- a/paddle/framework/init_test.cc
+++ b/paddle/framework/init_test.cc
@@ -20,7 +20,21 @@ TEST(InitDevices, CPU) {
using paddle::framework::InitDevices;
using paddle::platform::DeviceContextPool;
+#ifndef PADDLE_WITH_CUDA
InitDevices();
DeviceContextPool& pool = DeviceContextPool::Instance();
- ASSERT_GE(pool.size(), 1U);
+ ASSERT_EQ(pool.size(), 1U);
+#endif
+}
+
+TEST(InitDevices, CUDA) {
+ using paddle::framework::InitDevices;
+ using paddle::platform::DeviceContextPool;
+
+#ifdef PADDLE_WITH_CUDA
+ int count = paddle::platform::GetCUDADeviceCount();
+ InitDevices();
+ DeviceContextPool& pool = DeviceContextPool::Instance();
+ ASSERT_EQ(pool.size(), 1U + static_cast(count));
+#endif
}
diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc
index 53b0d0fe083579da4f0bb600f292765aa2aa0d8a..cb27de6991674247e6215ce64a2da5000fa78ed4 100644
--- a/paddle/framework/lod_tensor.cc
+++ b/paddle/framework/lod_tensor.cc
@@ -24,8 +24,6 @@ limitations under the License. */
#include
#include
-#include
-
namespace paddle {
namespace framework {
diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h
index 9d1294fdeb9bd76bf944f7ec3687e3c5bb333241..d0ab640485baf6d76ee629ea420b603f42b031b4 100644
--- a/paddle/framework/lod_tensor.h
+++ b/paddle/framework/lod_tensor.h
@@ -18,11 +18,11 @@ limitations under the License. */
#ifdef PADDLE_WITH_CUDA
#include
#include
-#include
#endif
#include
#include "paddle/framework/ddim.h"
+#include "paddle/framework/mixed_vector.h"
#include "paddle/framework/tensor.h"
#include "paddle/framework/tensor_util.h"
#include "paddle/platform/enforce.h"
@@ -31,15 +31,6 @@ limitations under the License. */
namespace paddle {
namespace framework {
-#ifndef PADDLE_WITH_CUDA
-template
-using Vector = std::vector;
-#else
-template
-using Vector = thrust::host_vector<
- T, thrust::system::cuda::experimental::pinned_allocator>;
-#endif
-
/*
* LoD is short for Level of Details.
*
@@ -55,7 +46,15 @@ using Vector = thrust::host_vector<
* 0 2 4 7
* 0 2 5 7 10 12 15 20
*/
-using LoD = std::vector>;
+struct LoD : public std::vector> {
+ using std::vector>::vector;
+
+ void CopyFromCUDA() {
+ for (auto it = this->begin(); it != this->end(); ++it) {
+ it->CopyFromCUDA();
+ }
+ }
+};
std::ostream& operator<<(std::ostream& os, const LoD& lod);
std::ostream& operator<<(std::ostream& os, const LoDTensor& t);
@@ -109,7 +108,10 @@ bool CheckAbsLoD(const LoD& in, int tensor_height = -1);
*/
class LoDTensor : public Tensor {
public:
- LoDTensor() {}
+ LoDTensor() : Tensor() {}
+
+ /* Constructor with place should only be used in pybind */
+ explicit LoDTensor(const platform::Place& place) : Tensor(place) {}
explicit LoDTensor(const LoD& lod) : lod_(lod) {}
diff --git a/paddle/framework/lod_tensor_test.cc b/paddle/framework/lod_tensor_test.cc
index 4d172c43c7cceacb7d0dfaf1c4d3028717350268..3b63020e685436396071fa05cd7697630ae56c95 100644
--- a/paddle/framework/lod_tensor_test.cc
+++ b/paddle/framework/lod_tensor_test.cc
@@ -23,6 +23,17 @@
namespace paddle {
namespace framework {
+TEST(LoD, data) {
+ LoD lod{{0, 1, 2}};
+ lod.push_back({0, 2, 4, 5});
+ lod.push_back(std::vector({0, 1, 6, 8, 10, 11}));
+
+ auto& v = lod[0];
+ for (size_t i = 0; i < v.size(); ++i) {
+ EXPECT_EQ(v[i], i);
+ }
+}
+
TEST(LodExpand, test) {
LoD lod{{0, 2}};
LoDTensor tensor;
diff --git a/paddle/framework/lod_tensor_test.cu b/paddle/framework/lod_tensor_test.cu
index 1e253a2f6f35e827fb2e5db6270da03705b39514..d4c9f00bd9c00f3cae68858ca46c5320fc117405 100644
--- a/paddle/framework/lod_tensor_test.cu
+++ b/paddle/framework/lod_tensor_test.cu
@@ -14,6 +14,8 @@
#include
#include
+#include
+#include "paddle/framework/init.h"
#include "paddle/framework/lod_tensor.h"
#include "paddle/platform/assert.h"
@@ -26,7 +28,48 @@ __global__ void test(size_t* a, int size) {
}
}
+TEST(Vector, Normal) {
+ using namespace paddle::framework;
+ using namespace paddle::platform;
+ using namespace paddle::memory;
+
+ paddle::framework::InitDevices();
+
+ paddle::framework::Vector vec({1, 2, 3});
+ size_t* ptr = vec.data();
+ for (size_t i = 0; i < vec.size(); ++i) {
+ EXPECT_EQ(vec[i], *(ptr + i));
+ }
+
+ vec.clear();
+ vec.CopyFromCUDA();
+
+ std::vector v = {1, 2, 3};
+ for (size_t i = 0; i < v.size(); ++i) {
+ EXPECT_EQ(v[i], vec[i]);
+ }
+}
+
+TEST(LoD, data) {
+ paddle::framework::InitDevices();
+
+ paddle::framework::LoD lod{{0, 1, 2}};
+ lod.push_back({0, 2, 4, 5});
+ lod.push_back(std::vector({0, 1, 6, 8, 10, 11}));
+
+ auto& v = lod[0];
+ test<<<1, 1>>>(v.cuda_data(), v.size());
+ cudaDeviceSynchronize();
+
+ v.CopyFromCUDA();
+ for (size_t i = 0; i < v.size(); ++i) {
+ EXPECT_EQ(v[i], i * 2);
+ }
+}
+
TEST(LoDTensor, LoDInGPU) {
+ paddle::framework::InitDevices();
+
paddle::framework::LoDTensor lod_tensor;
paddle::platform::CUDAPlace place(0);
@@ -42,8 +85,9 @@ TEST(LoDTensor, LoDInGPU) {
auto lod = lod_tensor.lod();
- test<<<1, 8>>>(lod[0].data(), lod[0].size());
+ test<<<1, 8>>>(lod[0].cuda_data(), lod[0].size());
cudaDeviceSynchronize();
+ lod.CopyFromCUDA();
for (size_t i = 0; i < src_lod[0].size(); ++i) {
EXPECT_EQ(lod[0].data()[i], src_lod[0].data()[i] * 2);
diff --git a/paddle/framework/mixed_vector.h b/paddle/framework/mixed_vector.h
new file mode 100644
index 0000000000000000000000000000000000000000..85caac8dcd9ede4fe997e2fd246d1421aa73c80a
--- /dev/null
+++ b/paddle/framework/mixed_vector.h
@@ -0,0 +1,135 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+#pragma once
+
+#include
+#include
+
+#include "paddle/memory/memcpy.h"
+#include "paddle/memory/memory.h"
+#include "paddle/platform/device_context.h"
+#include "paddle/platform/enforce.h"
+#include "paddle/platform/place.h"
+
+namespace paddle {
+namespace framework {
+
+/**
+ * @brief Vector support both cpu and gpu.
+ * host vector lifetime is same with Vector
+ * device vector is lazily malloc and modified.
+ */
+
+template
+class Vector : public std::vector {
+ public:
+ using std::vector::vector;
+
+ Vector() {}
+ Vector(const std::vector &v) : std::vector(v) {} // NOLINT
+
+ virtual ~Vector() {
+#ifdef PADDLE_WITH_CUDA
+ if (cuda_ptr_ != nullptr) {
+ memory::Free(place_, cuda_ptr_);
+ }
+#endif
+ }
+
+ /* Get device vector */
+ T *cuda_data() {
+ CopyToCUDA();
+ PADDLE_ENFORCE_NOT_NULL(
+ cuda_ptr_, "No data or Insufficient CUDA memory to allocation");
+ return static_cast(cuda_ptr_);
+ }
+
+ /* Get host vector */
+ T *data() { return std::vector::data(); }
+ const T *data() const { return std::vector::data(); }
+
+ /* Synchronize host vector to device vector */
+ void CopyToCUDA();
+ /* Synchronize device vector to host vector */
+ void CopyFromCUDA();
+ /* Switch device vector location */
+ void CopyToPeer(platform::Place);
+
+ private:
+ void *cuda_ptr_ = nullptr;
+ size_t cuda_size_ = 0; // device vector numel
+ platform::CUDAPlace place_;
+};
+
+template
+void Vector::CopyToCUDA() {
+#ifdef PADDLE_WITH_CUDA
+ if (cuda_size_ < this->size()) {
+ if (cuda_ptr_ != nullptr) {
+ memory::Free(place_, cuda_ptr_);
+ }
+ cuda_ptr_ =
+ memory::Alloc(place_, this->size() * sizeof(T));
+ }
+ cuda_size_ = this->size();
+ platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
+ auto *ctx = pool.GetByPlace(place_);
+ memory::Copy(place_, cuda_ptr_, platform::CPUPlace(),
+ static_cast(this->data()),
+ this->size() * sizeof(T), ctx->stream());
+ ctx->Wait();
+#endif
+}
+
+template
+void Vector::CopyFromCUDA() {
+#ifdef PADDLE_WITH_CUDA
+ if (cuda_ptr_ == nullptr) {
+ LOG(WARNING) << "No uncommitted cuda data.";
+ return;
+ }
+ this->resize(cuda_size_);
+ platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
+ auto *ctx = pool.GetByPlace(place_);
+ memory::Copy(platform::CPUPlace(), static_cast(this->data()), place_,
+ static_cast(cuda_ptr_), this->size() * sizeof(T),
+ ctx->stream());
+ ctx->Wait();
+#endif
+}
+
+template
+void Vector::CopyToPeer(platform::Place peer_place) {
+#ifdef PADDLE_WITH_CUDA
+ auto *ctx = platform::DeviceContextPool::Instance().GetByPlace(place_);
+ void *peer_cuda_ptr = memory::Alloc(
+ boost::get(peer_place), this->size() * sizeof(T));
+ memory::Copy(boost::get(peer_place), peer_cuda_ptr,
+ place_, cuda_ptr_, this->size() * sizeof(T), ctx->stream());
+ ctx->Wait();
+
+ memory::Free(place_, cuda_ptr_);
+ place_ = boost::get(peer_place);
+ cuda_ptr_ = peer_cuda_ptr;
+#endif
+}
+
+template class Vector;
+template class Vector;
+template class Vector;
+template class Vector;
+
+} // namespace framework
+} // namespace paddle
diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc
index 831b1e2a1e10777d9e89364adcd4b1f367e86080..4e854f54dd43d760bab44fb5f7cafeb13314b27c 100644
--- a/paddle/framework/operator.cc
+++ b/paddle/framework/operator.cc
@@ -22,9 +22,7 @@ limitations under the License. */
#include "paddle/framework/shape_inference.h"
#include "paddle/framework/var_type.h"
-DEFINE_bool(op_sync, false,
- "Default cuda is asynchronous device, set to True will"
- "force op run in synchronous mode.");
+DECLARE_bool(benchmark);
namespace paddle {
namespace framework {
@@ -531,7 +529,7 @@ void OperatorWithKernel::Run(const Scope& scope,
ExecutionContext(*this, new_scope, *new_dev_ctx));
/*For profiling/benchmark only*/
- if (FLAGS_op_sync) {
+ if (FLAGS_benchmark) {
new_dev_ctx->Wait();
}
}
diff --git a/paddle/framework/program_desc.cc b/paddle/framework/program_desc.cc
index b5d9e5e385c1ba57169ef885824fc23b0f130692..15ea4035c6e6193105b621210a900e74d1466941 100644
--- a/paddle/framework/program_desc.cc
+++ b/paddle/framework/program_desc.cc
@@ -14,6 +14,7 @@ limitations under the License. */
#include "paddle/framework/program_desc.h"
#include "paddle/framework/block_desc.h"
+#include "paddle/framework/feed_fetch_type.h"
namespace paddle {
namespace framework {
@@ -64,5 +65,27 @@ ProgramDesc::ProgramDesc(const std::string &binary_str) {
}
}
+const std::vector ProgramDesc::GetFeedTargetNames() {
+ BlockDesc *global_block = blocks_[0].get();
+ std::vector feed_target_names;
+ for (auto *op : global_block->AllOps()) {
+ if (op->Type() == kFeedOpType) {
+ feed_target_names.insert(feed_target_names.begin(), op->Output("Out")[0]);
+ }
+ }
+ return feed_target_names;
+}
+
+const std::vector ProgramDesc::GetFetchTargetNames() {
+ BlockDesc *global_block = blocks_[0].get();
+ std::vector fetch_target_names;
+ for (auto *op : global_block->AllOps()) {
+ if (op->Type() == kFetchOpType) {
+ fetch_target_names.push_back(op->Input("X")[0]);
+ }
+ }
+ return fetch_target_names;
+}
+
} // namespace framework
} // namespace paddle
diff --git a/paddle/framework/program_desc.h b/paddle/framework/program_desc.h
index 15a962bb696d6172acd1a83cf9bb1ffd0846d449..8e958eab6ee08436ca73b13bac010e66c7df2b8b 100644
--- a/paddle/framework/program_desc.h
+++ b/paddle/framework/program_desc.h
@@ -16,6 +16,7 @@ limitations under the License. */
#include
#include
+#include "paddle/framework/block_desc.h"
#include "paddle/framework/framework.pb.h"
#include "paddle/framework/proto_desc.h"
#include "paddle/platform/macros.h"
@@ -45,6 +46,9 @@ class ProgramDesc {
proto::ProgramDesc *Proto();
+ const std::vector GetFeedTargetNames();
+ const std::vector GetFetchTargetNames();
+
private:
proto::ProgramDesc desc_;
diff --git a/paddle/framework/prune.cc b/paddle/framework/prune.cc
index 25eb813ffb96e9b1e13299421ead9f85c02da59f..bff8e0bceaca9749101b2c45edddba526d565624 100644
--- a/paddle/framework/prune.cc
+++ b/paddle/framework/prune.cc
@@ -17,6 +17,7 @@ limitations under the License. */
#include
#include
#include
+#include
#include
#include
@@ -102,6 +103,32 @@ void prune_impl(const proto::ProgramDesc& input, proto::ProgramDesc* output,
*op_field->Add() = input.blocks(block_id).ops(i);
}
}
+
+ // remove the VarDescs in BlockDesc that are not referenced in
+ // the pruned OpDescs
+ std::unordered_map var_map;
+ auto* var_field = output->mutable_blocks(block_id)->mutable_vars();
+ for (const auto& var : *var_field) {
+ var_map[var.name()] = var;
+ }
+
+ var_field->Clear();
+ for (const auto& op : *op_field) {
+ // add VarDescs of all input arguments for each OpDesc
+ auto& input_field = op.inputs();
+ for (auto& input_var : input_field) {
+ for (auto& arg : input_var.arguments()) {
+ *var_field->Add() = var_map[arg];
+ }
+ }
+ // add VarDescs of all output arguments for each OpDesc
+ auto& output_field = op.outputs();
+ for (auto& output_var : output_field) {
+ for (auto& arg : output_var.arguments()) {
+ *var_field->Add() = var_map[arg];
+ }
+ }
+ }
}
// TODO(fengjiayi): Prune() could be inplaced to avoid unnecessary copies
diff --git a/paddle/framework/scope.cc b/paddle/framework/scope.cc
index a67ff910093d93060d07d849f6e968e5f4ce21cd..af08b2ab816f63c05d4c65df9601c787e57994f5 100644
--- a/paddle/framework/scope.cc
+++ b/paddle/framework/scope.cc
@@ -20,9 +20,11 @@ limitations under the License. */
#include "paddle/framework/threadpool.h"
#include "paddle/string/printf.h"
-DEFINE_bool(do_memory_benchmark, false,
+DEFINE_bool(benchmark, false,
"Doing memory benchmark. It will make deleting scope synchronized, "
- "and add some memory usage logs");
+ "and add some memory usage logs."
+ "Default cuda is asynchronous device, set to True will"
+ "force op run in synchronous mode.");
namespace paddle {
namespace framework {
@@ -93,7 +95,7 @@ void Scope::DeleteScope(Scope* scope) {
PADDLE_ENFORCE(it != this->kids_.end(), "Cannot find %p as kid scope", scope);
this->kids_.erase(it);
// When making memory benchmark on Fluid, we have to delete scope sync.
- if (FLAGS_do_memory_benchmark) {
+ if (FLAGS_benchmark) {
delete scope;
} else {
Async([scope] { delete scope; });
diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h
index 4aaa29d794c95592832a1fe990e2dce274eba9d5..f0ea709a5c37e769e3ffa1b2e9d1e39721979251 100644
--- a/paddle/framework/tensor.h
+++ b/paddle/framework/tensor.h
@@ -47,6 +47,11 @@ class Tensor {
public:
Tensor() : offset_(0) {}
+ /*! Constructor with place should only be used in pybind. */
+ explicit Tensor(const platform::Place& place) : offset_(0) {
+ holder_->set_place(place);
+ }
+
/*! Return a pointer to mutable memory block. */
template
inline T* data();
@@ -137,6 +142,7 @@ class Tensor {
virtual std::type_index type() const = 0;
virtual platform::Place place() const = 0;
virtual void set_type(std::type_index type) = 0;
+ virtual void set_place(platform::Place place) = 0;
};
template
@@ -156,6 +162,7 @@ class Tensor {
virtual void* ptr() const { return static_cast(ptr_.get()); }
virtual std::type_index type() const { return type_; }
virtual void set_type(std::type_index type) { type_ = type; }
+ virtual void set_place(platform::Place place) { place_ = place; }
/*! the pointer of memory block. */
std::unique_ptr> ptr_;
diff --git a/paddle/framework/threadpool.cc b/paddle/framework/threadpool.cc
index b2f5ae4a96593fde1623dd10d3b63c984ae228db..b7d7c00bcf9d9770f58284023ca2defcda299d64 100644
--- a/paddle/framework/threadpool.cc
+++ b/paddle/framework/threadpool.cc
@@ -14,6 +14,8 @@
#include "paddle/framework/threadpool.h"
+#include "paddle/platform/enforce.h"
+
namespace paddle {
namespace framework {
diff --git a/paddle/framework/threadpool.h b/paddle/framework/threadpool.h
index 8912b1a43a26f9df662d3b5ddf68bfb2b87f4a20..4e9b58679d9e7c84adf76b6245b397c7a8872483 100644
--- a/paddle/framework/threadpool.h
+++ b/paddle/framework/threadpool.h
@@ -22,7 +22,7 @@ limitations under the License. */
#include
#include
-#include "paddle/platform/enforce.h"
+#include "paddle/platform/macros.h" // for DISABLE_COPY_AND_ASSIGN
namespace paddle {
namespace framework {
diff --git a/paddle/function/GemmConvOp.cpp b/paddle/function/GemmConvOp.cpp
index cbdbf5335d32d55a0221728758025c9d2cb3e7d1..a9876cec2aabf7d116443b685391ee9d20bc1370 100644
--- a/paddle/function/GemmConvOp.cpp
+++ b/paddle/function/GemmConvOp.cpp
@@ -178,19 +178,22 @@ public:
real* inputData = inputs[0].data();
real* filterData = inputs[1].data();
real* outputData = outputs[0].data();
+ real* colData = NULL;
bool needIm2col = isNeedIm2col(filter);
TensorShape imShape =
TensorShape({inputChannels / groups_, inputHeight, inputWidth});
-
TensorShape colShape;
- real* colData = NULL;
- size_t colHeight = inputChannels / groups_ * filterHeight * filterWidth;
- size_t colWidth = outputHeight * outputWidth;
- // Max col matrix height 256, Max col matrix width 1024
- size_t stepColHeight = std::min(colHeight, static_cast(256));
- size_t stepColWidth = std::min(colWidth, static_cast(2048));
+ // Max col matrix width 4096, Max col matrix size 4M.
+ size_t outputHeightSteps =
+ std::min(std::max(4096 / outputWidth, (size_t)1), outputHeight);
+ size_t maxColWidth = outputHeightSteps * outputWidth;
+ size_t channelSteps =
+ std::min(std::max((1048576 / maxColWidth) / filterHeight * filterWidth,
+ (size_t)1),
+ inputChannels / groups_);
+ size_t maxColHeight = channelSteps * filterHeight * filterWidth;
if (needIm2col) {
colShape = TensorShape({inputChannels / groups_,
@@ -199,7 +202,7 @@ public:
outputHeight,
outputWidth});
- resizeBuffer(stepColHeight * stepColWidth * sizeof(real));
+ resizeBuffer(maxColHeight * maxColWidth * sizeof(real));
colData = reinterpret_cast(memory_->getBuf());
}
@@ -209,20 +212,24 @@ public:
(outputChannels / groups_) * outputHeight * outputWidth;
size_t filterOffset = filter.getElements() / groups_;
- int nStride = colWidth;
- int kStride = colHeight;
+ int nStride = outputHeight * outputWidth;
+ int kStride = inputChannels / groups_ * filterHeight * filterWidth;
for (size_t i = 0; i < batchSize; i++) {
+ filterData = inputs[1].data();
for (size_t g = 0; g < groups_; g++) {
if (needIm2col) {
real beta_ = beta;
- for (size_t colHeightStart = 0; colHeightStart < colHeight;
- colHeightStart += stepColHeight) {
- for (size_t colWidthStart = 0; colWidthStart < colWidth;
- colWidthStart += stepColWidth) {
- int N = std::min(colWidth - colWidthStart, stepColWidth);
- int K = std::min(colHeight - colHeightStart, stepColHeight);
+ for (size_t ic = 0; ic < inputChannels / groups_;
+ ic += channelSteps) {
+ int channels = std::min(inputChannels / groups_ - ic, channelSteps);
+ for (size_t oh = 0; oh < outputHeight; oh += outputHeightSteps) {
+ int height = std::min(outputHeight - oh, outputHeightSteps);
+
+ int M = outputChannels / groups_;
+ int N = height * outputWidth;
+ int K = channels * filterHeight * filterWidth;
// im2col
- im2col(inputData + g * inputOffset,
+ im2col(inputData,
imShape,
colData,
colShape,
@@ -232,13 +239,12 @@ public:
paddingW(),
dilationH(),
dilationW(),
- colHeightStart,
- K,
- colWidthStart,
+ channels,
+ oh,
+ height,
N);
// gemm
- int M = outputChannels / groups_;
BlasGemm::compute(
false,
false,
@@ -246,12 +252,12 @@ public:
N,
K,
1.0f,
- filterData + g * filterOffset + colHeightStart,
+ filterData + ic * filterHeight * filterWidth,
kStride,
colData,
N,
beta_,
- outputData + g * outputOffset + colWidthStart,
+ outputData + oh * outputWidth,
nStride);
}
beta_ = 1.0;
@@ -266,17 +272,18 @@ public:
N,
K,
1.0f,
- filterData + g * filterOffset,
+ filterData,
K,
- inputData + g * inputOffset,
+ inputData,
N,
beta,
- outputData + g * outputOffset,
+ outputData,
N);
}
+ inputData += inputOffset;
+ outputData += outputOffset;
+ filterData += filterOffset;
}
- inputData += inputChannels * inputHeight * inputWidth;
- outputData += outputChannels * outputHeight * outputWidth;
}
memory_.reset();
diff --git a/paddle/function/Im2Col.h b/paddle/function/Im2Col.h
index 36a9bcf84e4b14965c83627821b71d1c7c0da1b2..915119e291caaa223249cf8e37078723621517b0 100644
--- a/paddle/function/Im2Col.h
+++ b/paddle/function/Im2Col.h
@@ -111,39 +111,42 @@ public:
int paddingWidth,
int dilationHeight,
int dilationWidth,
- int colHeightStart,
- int colHeightSize,
- int colWidthStart,
- int colWidthSize) {
+ int inputChannels,
+ int colOffset,
+ int colOutputHeight,
+ int colWidth) {
int inputHeight = imShape[1];
int inputWidth = imShape[2];
int filterHeight = colShape[1];
int filterWidth = colShape[2];
int outputWidth = colShape[4];
- for (int colh = 0; colh < colHeightSize; colh++) {
- int wOffset = (colHeightStart + colh) % filterWidth;
- int hOffset = ((colHeightStart + colh) / filterWidth) % filterHeight;
- int c_im = (colHeightStart + colh) / filterWidth / filterHeight;
-
- for (int colw = 0; colw < colWidthSize; colw++) {
- int h = (colWidthStart + colw) / outputWidth;
- int w = (colWidthStart + colw) % outputWidth;
-
- int imRowIdx = h * strideHeight + hOffset * dilationHeight;
- int imColIdx = w * strideWidth + wOffset * dilationWidth;
- if ((imRowIdx - paddingHeight) < 0 ||
- (imRowIdx - paddingHeight) >= inputHeight ||
- (imColIdx - paddingWidth) < 0 ||
- (imColIdx - paddingWidth) >= inputWidth) {
- colData[colh * colWidthSize + colw] = static_cast(0);
- } else {
- imRowIdx += c_im * inputHeight - paddingHeight;
- imColIdx -= paddingWidth;
- colData[colh * colWidthSize + colw] =
- imData[imRowIdx * inputWidth + imColIdx];
+ for (int ic = 0; ic < inputChannels; ic++) {
+ for (int oh = 0; oh < colOutputHeight; oh++) {
+ T* dstData = colData + oh * outputWidth;
+ for (int fh = 0; fh < filterHeight; fh++) {
+ for (int fw = 0; fw < filterWidth; fw++) {
+ int imRowIdx = (oh + colOffset) * strideHeight +
+ fh * dilationHeight - paddingHeight;
+ if (imRowIdx < 0 || imRowIdx >= inputHeight) {
+ memset(dstData, 0, outputWidth * sizeof(T));
+ } else {
+ for (int ow = 0; ow < outputWidth; ow++) {
+ int imColIdx =
+ ow * strideWidth + fw * dilationWidth - paddingWidth;
+ if (imColIdx < 0 || imColIdx >= inputWidth) {
+ dstData[ow] = T(0);
+ } else {
+ dstData[ow] = imData[imRowIdx * inputWidth + imColIdx];
+ }
+ }
+ }
+ dstData += colWidth;
+ }
}
}
+ colData += filterHeight * filterWidth * colWidth;
+ imData += inputHeight * inputWidth;
}
}
};
diff --git a/paddle/function/Im2ColTest.cpp b/paddle/function/Im2ColTest.cpp
index 3ba866dcdd845403d52f7a85adfef08cbb11c305..fe44a8bf79005efb87c56f6a79f46421129bab22 100644
--- a/paddle/function/Im2ColTest.cpp
+++ b/paddle/function/Im2ColTest.cpp
@@ -202,10 +202,10 @@ void TestIm2ColMobileFunctor() {
padding,
dilation,
dilation,
+ channels,
0,
- height,
- 0,
- width);
+ outputHeight,
+ outputHeight * outputWidth);
autotest::TensorCheckEqual(*output1, *output2);
}
diff --git a/paddle/inference/CMakeLists.txt b/paddle/inference/CMakeLists.txt
index ae4d3fd2f58daf87a650428e04722581610ed780..2289ddc139cbddfbaa5238e683b2f8e784a7291e 100644
--- a/paddle/inference/CMakeLists.txt
+++ b/paddle/inference/CMakeLists.txt
@@ -1,14 +1,14 @@
-set(FLUID_CORE_MODULES proto_desc paddle_memory executor prune init)
+set(FLUID_CORE_MODULES proto_desc paddle_memory lod_tensor executor prune init)
cc_library(paddle_fluid_api
- SRCS inference.cc
+ SRCS io.cc
DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB})
# Merge all modules into a single static library
cc_library(paddle_fluid DEPS paddle_fluid_api ${FLUID_CORE_MODULES} ${GLOB_OP_LIB})
# Create shared library
-add_library(paddle_fluid_shared SHARED inference.cc)
+add_library(paddle_fluid_shared SHARED io.cc)
target_circle_link_libraries(paddle_fluid_shared
ARCHIVE_START
@@ -20,23 +20,10 @@ SET_TARGET_PROPERTIES(paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid)
# install library & headers
if(NOT WITH_C_API AND WITH_FLUID)
- install(FILES inference.h DESTINATION include/paddle/inference)
+ install(FILES io.h DESTINATION include/paddle/inference)
install(TARGETS paddle_fluid_shared DESTINATION lib)
endif()
-add_executable(example example.cc)
-if(APPLE)
- set(OPTIONAL_LINK_FLAGS)
- if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang")
- set(OPTIONAL_LINK_FLAGS "-undefined dynamic_lookup")
- endif()
- target_link_libraries(example
- -Wl,-force_load paddle_fluid
- ${OPTIONAL_LINK_FLAGS}
- ${PTOOLS_LIB})
-else()
- target_link_libraries(example
- -Wl,--start-group -Wl,--whole-archive paddle_fluid
- -Wl,--no-whole-archive -Wl,--end-group
- ${PTOOLS_LIB})
+if(WITH_TESTING)
+ add_subdirectory(tests/book)
endif()
diff --git a/paddle/inference/example.cc b/paddle/inference/example.cc
deleted file mode 100644
index 0c18b45624dedcb5839d4b771e044b4a7b32af52..0000000000000000000000000000000000000000
--- a/paddle/inference/example.cc
+++ /dev/null
@@ -1,67 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include
-#include
-#include "gflags/gflags.h"
-#include "paddle/inference/inference.h"
-
-DEFINE_string(dirname, "", "Directory of the inference model.");
-
-int main(int argc, char** argv) {
- google::ParseCommandLineFlags(&argc, &argv, true);
- if (FLAGS_dirname.empty()) {
- // Example:
- // ./example --dirname=recognize_digits_mlp.inference.model
- std::cout << "Usage: ./example --dirname=path/to/your/model" << std::endl;
- exit(1);
- }
-
- std::cout << "FLAGS_dirname: " << FLAGS_dirname << std::endl;
- std::string dirname = FLAGS_dirname;
-
- paddle::InferenceEngine* engine = new paddle::InferenceEngine();
- engine->LoadInferenceModel(dirname);
-
- paddle::framework::LoDTensor input;
- srand(time(0));
- float* input_ptr =
- input.mutable_data({1, 784}, paddle::platform::CPUPlace());
- for (int i = 0; i < 784; ++i) {
- input_ptr[i] = rand() / (static_cast(RAND_MAX));
- }
-
- std::vector feeds;
- feeds.push_back(input);
- std::vector fetchs;
- engine->Execute(feeds, fetchs);
-
- for (size_t i = 0; i < fetchs.size(); ++i) {
- auto dims_i = fetchs[i].dims();
- std::cout << "dims_i:";
- for (int j = 0; j < dims_i.size(); ++j) {
- std::cout << " " << dims_i[j];
- }
- std::cout << std::endl;
- std::cout << "result:";
- float* output_ptr = fetchs[i].data();
- for (int j = 0; j < paddle::framework::product(dims_i); ++j) {
- std::cout << " " << output_ptr[j];
- }
- std::cout << std::endl;
- }
-
- delete engine;
- return 0;
-}
diff --git a/paddle/inference/inference.cc b/paddle/inference/inference.cc
deleted file mode 100644
index b43c359ed1787143403336e8c1cb4c7f85b1d7a2..0000000000000000000000000000000000000000
--- a/paddle/inference/inference.cc
+++ /dev/null
@@ -1,187 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "inference.h"
-#include
-#include "paddle/framework/executor.h"
-#include "paddle/framework/init.h"
-#include "paddle/framework/scope.h"
-
-namespace paddle {
-
-void InferenceEngine::LoadInferenceModel(const std::string& dirname) {
- std::string model_filename = dirname + "/__model__";
- LOG(INFO) << "loading model from " << model_filename;
- std::ifstream inputfs(model_filename, std::ios::in | std::ios::binary);
- std::string program_desc_str;
- inputfs.seekg(0, std::ios::end);
- program_desc_str.resize(inputfs.tellg());
- inputfs.seekg(0, std::ios::beg);
- LOG(INFO) << "program_desc_str's size: " << program_desc_str.size();
- inputfs.read(&program_desc_str[0], program_desc_str.size());
- inputfs.close();
-
- program_ = new framework::ProgramDesc(program_desc_str);
- GenerateLoadProgram(dirname);
-
- framework::BlockDesc* global_block = program_->MutableBlock(0);
- feed_var_names_.clear();
- fetch_var_names_.clear();
- for (auto* op : global_block->AllOps()) {
- if (op->Type() == "feed") {
- feed_var_names_.insert(feed_var_names_.begin(), op->Output("Out")[0]);
- } else if (op->Type() == "fetch") {
- fetch_var_names_.push_back(op->Input("X")[0]);
- }
- }
-}
-
-bool InferenceEngine::IsParameter(const framework::VarDesc* var) {
- if (var->Persistable()) {
- // There are many unreachable variables in the program
- for (size_t i = 0; i < program_->Size(); ++i) {
- const framework::BlockDesc& block = program_->Block(i);
- for (auto* op : block.AllOps()) {
- if (op->Type() == "feed") {
- continue;
- }
- for (auto input_argument_name : op->InputArgumentNames()) {
- if (input_argument_name == var->Name()) {
- return true;
- }
- }
- }
- }
- }
- return false;
-}
-
-void InferenceEngine::GenerateLoadProgram(const std::string& dirname) {
- framework::BlockDesc* global_block = program_->MutableBlock(0);
-
- load_program_ = new framework::ProgramDesc();
- framework::BlockDesc* load_block = load_program_->MutableBlock(0);
- for (auto* var : global_block->AllVars()) {
- if (IsParameter(var)) {
- LOG(INFO) << "parameter's name: " << var->Name();
-
- framework::VarDesc* new_var = load_block->Var(var->Name());
- new_var->SetShape(var->Shape());
- new_var->SetDataType(var->GetDataType());
- new_var->SetType(var->GetType());
- new_var->SetLoDLevel(var->GetLoDLevel());
- new_var->SetPersistable(true);
-
- // append_op
- framework::OpDesc* op = load_block->AppendOp();
- op->SetType("load");
- op->SetOutput("Out", {new_var->Name()});
- op->SetAttr("file_path", {dirname + "/" + new_var->Name()});
- op->CheckAttrs();
- }
- }
-}
-
-void InferenceEngine::PrependFeedOp() {
- if (!program_) {
- LOG(FATAL) << "Please initialize the program_ first.";
- }
-
- framework::BlockDesc* global_block = program_->MutableBlock(0);
-
- // create_var
- framework::VarDesc* feed_var = global_block->Var("feed");
- feed_var->SetType(framework::proto::VarDesc::FEED_MINIBATCH);
- feed_var->SetPersistable(true);
-
- // prepend feed_op
- for (size_t i = 0; i < feed_var_names_.size(); ++i) {
- std::string var_name = feed_var_names_[i];
- LOG(INFO) << "feed var's name: " << var_name;
-
- // prepend_op
- framework::OpDesc* op = global_block->PrependOp();
- op->SetType("feed");
- op->SetInput("X", {"feed"});
- op->SetOutput("Out", {var_name});
- op->SetAttr("col", {static_cast(i)});
- op->CheckAttrs();
- }
-}
-
-void InferenceEngine::AppendFetchOp() {
- if (!program_) {
- LOG(FATAL) << "Please initialize the program_ first.";
- }
-
- framework::BlockDesc* global_block = program_->MutableBlock(0);
-
- // create_var
- framework::VarDesc* fetch_var = global_block->Var("fetch");
- fetch_var->SetType(framework::proto::VarDesc::FETCH_LIST);
- fetch_var->SetPersistable(true);
-
- // append fetch_op
- for (size_t i = 0; i < fetch_var_names_.size(); ++i) {
- std::string var_name = fetch_var_names_[i];
- LOG(INFO) << "fetch var's name: " << var_name;
-
- // append_op
- framework::OpDesc* op = global_block->AppendOp();
- op->SetType("fetch");
- op->SetInput("X", {var_name});
- op->SetOutput("Out", {"fetch"});
- op->SetAttr("col", {static_cast(i)});
- op->CheckAttrs();
- }
-}
-
-void InferenceEngine::Execute(const std::vector& feeds,
- std::vector& fetchs) {
- if (!program_ || !load_program_) {
- LOG(FATAL) << "Please initialize the program_ and load_program_ first.";
- }
-
- if (feeds.size() != feed_var_names_.size()) {
- LOG(FATAL) << "Please feed " << feed_var_names_.size() << " input Tensors.";
- }
-
- auto* place = new platform::CPUPlace();
- framework::InitDevices();
- framework::Executor* executor = new framework::Executor(*place);
- framework::Scope* scope = new framework::Scope();
-
- executor->Run(*load_program_, scope, 0, true, true);
-
- std::map feed_targets;
- std::map fetch_targets;
-
- // set_feed_variable
- for (size_t i = 0; i < feed_var_names_.size(); ++i) {
- feed_targets[feed_var_names_[i]] = &feeds[i];
- }
-
- // get_fetch_variable
- fetchs.resize(fetch_var_names_.size());
- for (size_t i = 0; i < fetch_var_names_.size(); ++i) {
- fetch_targets[fetch_var_names_[i]] = &fetchs[i];
- }
-
- executor->Run(*program_, scope, feed_targets, fetch_targets);
-
- delete place;
- delete scope;
- delete executor;
-}
-} // namespace paddle
diff --git a/paddle/inference/inference.h b/paddle/inference/inference.h
deleted file mode 100644
index 26f259824b945e260b370ced9d065842264075d5..0000000000000000000000000000000000000000
--- a/paddle/inference/inference.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#include "paddle/framework/block_desc.h"
-#include "paddle/framework/lod_tensor.h"
-#include "paddle/framework/program_desc.h"
-
-namespace paddle {
-
-class InferenceEngine {
-public:
- InferenceEngine() : program_(nullptr), load_program_(nullptr) {}
- ~InferenceEngine() {
- delete program_;
- delete load_program_;
- }
-
- void LoadInferenceModel(const std::string& dirname);
- void Execute(const std::vector& feeds,
- std::vector& fetchs);
-
-private:
- bool IsParameter(const framework::VarDesc* var);
- void GenerateLoadProgram(const std::string& dirname);
- void PrependFeedOp();
- void AppendFetchOp();
-
-private:
- framework::ProgramDesc* program_;
- framework::ProgramDesc* load_program_;
- std::vector feed_var_names_;
- std::vector fetch_var_names_;
-};
-
-} // namespace paddle
diff --git a/paddle/inference/io.cc b/paddle/inference/io.cc
new file mode 100644
index 0000000000000000000000000000000000000000..60ad7af1c0a469beb6a07bf057a8647fcb98cca8
--- /dev/null
+++ b/paddle/inference/io.cc
@@ -0,0 +1,98 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/inference/io.h"
+
+#include
+#include "paddle/framework/block_desc.h"
+#include "paddle/framework/feed_fetch_type.h"
+
+namespace paddle {
+namespace inference {
+
+bool IsParameter(const framework::VarDesc* var,
+ const framework::ProgramDesc& main_program) {
+ if (var->Persistable()) {
+ // There are many unreachable variables in the program
+ for (size_t i = 0; i < main_program.Size(); ++i) {
+ const framework::BlockDesc& block = main_program.Block(i);
+ for (auto* op : block.AllOps()) {
+ if (op->Type() == framework::kFeedOpType) {
+ continue;
+ }
+ for (auto input_argument_name : op->InputArgumentNames()) {
+ if (input_argument_name == var->Name()) {
+ return true;
+ }
+ }
+ }
+ }
+ }
+ return false;
+}
+
+void LoadPersistables(framework::Executor& executor,
+ framework::Scope& scope,
+ const std::string& dirname,
+ const framework::ProgramDesc& main_program) {
+ const framework::BlockDesc& global_block = main_program.Block(0);
+
+ framework::ProgramDesc* load_program = new framework::ProgramDesc();
+ framework::BlockDesc* load_block = load_program->MutableBlock(0);
+ for (auto* var : global_block.AllVars()) {
+ if (IsParameter(var, main_program)) {
+ VLOG(3) << "parameter's name: " << var->Name();
+
+ framework::VarDesc* new_var = load_block->Var(var->Name());
+ new_var->SetShape(var->Shape());
+ new_var->SetDataType(var->GetDataType());
+ new_var->SetType(var->GetType());
+ new_var->SetLoDLevel(var->GetLoDLevel());
+ new_var->SetPersistable(true);
+
+ // append_op
+ framework::OpDesc* op = load_block->AppendOp();
+ op->SetType("load");
+ op->SetOutput("Out", {new_var->Name()});
+ op->SetAttr("file_path", {dirname + "/" + new_var->Name()});
+ op->CheckAttrs();
+ }
+ }
+ executor.Run(*load_program, &scope, 0, true, true);
+ delete load_program;
+}
+
+std::unique_ptr Load(framework::Executor& executor,
+ framework::Scope& scope,
+ const std::string& dirname) {
+ std::string model_filename = dirname + "/__model__";
+ LOG(INFO) << "loading model from " << model_filename;
+ std::ifstream inputfs(model_filename, std::ios::in | std::ios::binary);
+ std::string program_desc_str;
+ inputfs.seekg(0, std::ios::end);
+ program_desc_str.resize(inputfs.tellg());
+ inputfs.seekg(0, std::ios::beg);
+ LOG(INFO) << "program_desc_str's size: " << program_desc_str.size();
+ inputfs.read(&program_desc_str[0], program_desc_str.size());
+ inputfs.close();
+
+ std::unique_ptr main_program(
+ new framework::ProgramDesc(program_desc_str));
+
+ LoadPersistables(executor, scope, dirname, *main_program);
+ return main_program;
+}
+
+} // namespace inference
+} // namespace paddle
diff --git a/paddle/inference/io.h b/paddle/inference/io.h
new file mode 100644
index 0000000000000000000000000000000000000000..962b6c4e20d30de3cc28eae1c8c5c33b3ab5f6ac
--- /dev/null
+++ b/paddle/inference/io.h
@@ -0,0 +1,37 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include
+#include
+#include
+#include "paddle/framework/executor.h"
+#include "paddle/framework/program_desc.h"
+#include "paddle/framework/scope.h"
+
+namespace paddle {
+namespace inference {
+
+void LoadPersistables(framework::Executor& executor,
+ framework::Scope& scope,
+ const std::string& dirname,
+ const framework::ProgramDesc& main_program);
+
+std::unique_ptr Load(framework::Executor& executor,
+ framework::Scope& scope,
+ const std::string& dirname);
+
+} // namespace inference
+} // namespace paddle
diff --git a/paddle/inference/tests/book/CMakeLists.txt b/paddle/inference/tests/book/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d3798fb8fd8769aef5940d4ce724cb0cc8686422
--- /dev/null
+++ b/paddle/inference/tests/book/CMakeLists.txt
@@ -0,0 +1,7 @@
+set(PYTHON_TESTS_DIR ${PADDLE_SOURCE_DIR}/python/paddle/v2/fluid/tests)
+cc_test(test_inference_recognize_digits_mlp
+ SRCS test_inference_recognize_digits.cc
+ DEPS ARCHIVE_START paddle_fluid ARCHIVE_END
+ ARGS --dirname=${PYTHON_TESTS_DIR}/book/recognize_digits_mlp.inference.model)
+set_tests_properties(test_inference_recognize_digits_mlp
+ PROPERTIES DEPENDS test_recognize_digits_mlp_cpu)
diff --git a/paddle/inference/tests/book/test_inference_recognize_digits.cc b/paddle/inference/tests/book/test_inference_recognize_digits.cc
new file mode 100644
index 0000000000000000000000000000000000000000..26dc2aee04261d9a1fd29b4d75bfacc7870c09d8
--- /dev/null
+++ b/paddle/inference/tests/book/test_inference_recognize_digits.cc
@@ -0,0 +1,113 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include
+#include
+#include
+#include "gflags/gflags.h"
+#include "paddle/framework/lod_tensor.h"
+#include "paddle/inference/io.h"
+
+DEFINE_string(dirname, "", "Directory of the inference model.");
+
+template
+void TestInference(const std::string& dirname,
+ const std::vector& cpu_feeds,
+ std::vector& cpu_fetchs) {
+ // 1. Define place, executor and scope
+ auto place = Place();
+ auto executor = paddle::framework::Executor(place);
+ auto* scope = new paddle::framework::Scope();
+
+ // 2. Initialize the inference_program and load all parameters from file
+ auto inference_program = paddle::inference::Load(executor, *scope, dirname);
+
+ // 3. Get the feed_target_names and fetch_target_names
+ const std::vector& feed_target_names =
+ inference_program->GetFeedTargetNames();
+ const std::vector& fetch_target_names =
+ inference_program->GetFetchTargetNames();
+
+ // 4. Prepare inputs: set up maps for feed targets
+ std::map feed_targets;
+ for (size_t i = 0; i < feed_target_names.size(); ++i) {
+ // Please make sure that cpu_feeds[i] is right for feed_target_names[i]
+ feed_targets[feed_target_names[i]] = cpu_feeds[i];
+ }
+
+ // 5. Define Tensor to get the outputs: set up maps for fetch targets
+ std::map fetch_targets;
+ for (size_t i = 0; i < fetch_target_names.size(); ++i) {
+ fetch_targets[fetch_target_names[i]] = cpu_fetchs[i];
+ }
+
+ // 6. Run the inference program
+ executor.Run(*inference_program, scope, feed_targets, fetch_targets);
+
+ delete scope;
+}
+
+TEST(inference, recognize_digits) {
+ if (FLAGS_dirname.empty()) {
+ LOG(FATAL) << "Usage: ./example --dirname=path/to/your/model";
+ }
+
+ LOG(INFO) << "FLAGS_dirname: " << FLAGS_dirname << std::endl;
+ std::string dirname = FLAGS_dirname;
+
+ // 0. Call `paddle::framework::InitDevices()` initialize all the devices
+ // In unittests, this is done in paddle/testing/paddle_gtest_main.cc
+
+ paddle::framework::LoDTensor input;
+ srand(time(0));
+ float* input_ptr =
+ input.mutable_data({1, 28, 28}, paddle::platform::CPUPlace());
+ for (int i = 0; i < 784; ++i) {
+ input_ptr[i] = rand() / (static_cast(RAND_MAX));
+ }
+ std::vector cpu_feeds;
+ cpu_feeds.push_back(&input);
+
+ paddle::framework::LoDTensor output1;
+ std::vector cpu_fetchs1;
+ cpu_fetchs1.push_back(&output1);
+
+ // Run inference on CPU
+ TestInference(
+ dirname, cpu_feeds, cpu_fetchs1);
+ LOG(INFO) << output1.dims();
+
+#ifdef PADDLE_WITH_CUDA
+ paddle::framework::LoDTensor output2;
+ std::vector cpu_fetchs2;
+ cpu_fetchs2.push_back(&output2);
+
+ // Run inference on CUDA GPU
+ TestInference(
+ dirname, cpu_feeds, cpu_fetchs2);
+ LOG(INFO) << output2.dims();
+
+ EXPECT_EQ(output1.dims(), output2.dims());
+ EXPECT_EQ(output1.numel(), output2.numel());
+
+ float err = 1E-3;
+ int count = 0;
+ for (int64_t i = 0; i < output1.numel(); ++i) {
+ if (fabs(output1.data()[i] - output2.data()[i]) > err) {
+ count++;
+ }
+ }
+ EXPECT_EQ(count, 0) << "There are " << count << " different elements.";
+#endif
+}
diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp
index 1ec4336cabbc7d3073b7638b7484bf61e83a2dc5..cc86b12be08ba987f9682ebf3fda56c2f07fb576 100644
--- a/paddle/math/Matrix.cpp
+++ b/paddle/math/Matrix.cpp
@@ -2015,13 +2015,6 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat,
CHECK_EQ(channels * outLength, maskMatP->getWidth());
}
- /* initialize the data_ */
- for (size_t i = 0; i < height_; i++) {
- for (size_t j = 0; j < width_; j++) {
- outData[i * outStride + j] = -(real)FLT_MAX;
- }
- }
-
/* pool max one by one */
for (size_t n = 0; n < num; ++n) { // frame by frame
if (!isContiguous()) {
@@ -2030,19 +2023,24 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat,
for (size_t c = 0; c < channels; ++c) { // channel by channel
for (size_t ph = 0; ph < outputH; ++ph) {
int hstart = ph * strideH - paddingH;
- int hend = std::min(hstart + sizeY, imgSizeH);
- hstart = std::max(hstart, 0);
+ int hend = hstart + sizeY;
+ hstart = hstart < 0 ? 0 : hstart;
+ hend = hend < (int)imgSizeH ? hend : (int)imgSizeH;
for (size_t pw = 0; pw < outputW; ++pw) {
int wstart = pw * strideW - paddingW;
- int wend = std::min(wstart + sizeX, imgSizeW);
- wstart = std::max(wstart, 0);
+ int wend = wstart + sizeX;
+ wstart = wstart < 0 ? 0 : wstart;
+ wend = wend < (int)imgSizeW ? wend : (int)imgSizeW;
if (maskData == NULL) {
+ real tmp = -(real)FLT_MAX;
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
- outData[ph * outputW + pw] = std::max(
- outData[ph * outputW + pw], inputData[h * imgSizeW + w]);
+ tmp = tmp < inputData[h * imgSizeW + w]
+ ? inputData[h * imgSizeW + w]
+ : tmp;
}
}
+ outData[ph * outputW + pw] = tmp;
} else {
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt
index 48cf5816cce4bb5ee8e66e72c5b1acea8535ab10..e903f43ba69ee9e28b3a03e8921a41ffa81a2542 100644
--- a/paddle/operators/CMakeLists.txt
+++ b/paddle/operators/CMakeLists.txt
@@ -122,9 +122,11 @@ if(WITH_DISTRIBUTE)
set_source_files_properties(send_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
op_library(recv_op DEPS ${DISTRIBUTE_DEPS})
set_source_files_properties(recv_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
- cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS send_op recv_op sum_op executor)
+ op_library(listen_and_serv_op DEPS ${DISTRIBUTE_DEPS})
+ set_source_files_properties(listen_and_serv_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
+ cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS send_op listen_and_serv_op sum_op executor)
else()
- set(DEPS_OPS ${DEPS_OPS} send_op recv_op)
+ set(DEPS_OPS ${DEPS_OPS} send_op recv_op listen_and_serv_op)
endif()
op_library(cond_op DEPS framework_proto tensor net_op)
@@ -173,6 +175,8 @@ endif()
# FIXME(typhoonzero): save/load depends lodtensor serialization functions
op_library(save_op DEPS lod_tensor)
op_library(load_op DEPS lod_tensor)
+op_library(save_combine_op DEPS lod_tensor)
+op_library(load_combine_op DEPS lod_tensor)
list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS})
foreach(src ${GENERAL_OPS})
@@ -192,3 +196,4 @@ if(WITH_GPU)
cc_test(nccl_op_test SRCS nccl_op_test.cu.cc DEPS nccl_op gpu_info device_context)
endif()
cc_test(save_load_op_test SRCS save_load_op_test.cc DEPS save_op load_op)
+cc_test(save_load_combine_op_test SRCS save_load_combine_op_test.cc DEPS save_combine_op load_combine_op)
diff --git a/paddle/operators/adagrad_op.cu b/paddle/operators/adagrad_op.cu
index 4e579387924a5b0499f29609bc6b1322030a3c0d..00cb6e9cafb4e79ed3d59cd4a6e40ea132e5efda 100644
--- a/paddle/operators/adagrad_op.cu
+++ b/paddle/operators/adagrad_op.cu
@@ -82,7 +82,7 @@ struct SparseAdagradFunctor {
math::scatter::MergeAdd merge_func;
auto grad_merge = merge_func(context, grad);
auto* grad_merge_data = grad_merge.mutable_value()->template data();
- auto& merge_rows = grad_merge.rows();
+ framework::Vector merge_rows(grad_merge.rows());
// 2. m += g_m * g_m
math::scatter::Mul sqare_func;
auto grad_square = sqare_func(context, grad_merge, grad_merge);
@@ -101,8 +101,8 @@ struct SparseAdagradFunctor {
SparseAdagradFunctorKernel<
T, 256><<(context)
- .stream()>>>(grad_merge_data, grad_merge.rows().data(),
- lr, param_data, moment_data, grad_width,
+ .stream()>>>(grad_merge_data, merge_rows.cuda_data(), lr,
+ param_data, moment_data, grad_width,
epsilon);
}
};
diff --git a/paddle/operators/adam_op.h b/paddle/operators/adam_op.h
index 9cc34bdded780e61e8700eb4fa4a295c84fb48bc..bf536687d398b8342e6ae76a07c11e5fe47483e0 100644
--- a/paddle/operators/adam_op.h
+++ b/paddle/operators/adam_op.h
@@ -199,7 +199,12 @@ class AdamOpKernel : public framework::OpKernel {
merge_func(ctx.template device_context(), grad);
auto& grad_tensor = grad_merge.value();
const T* grad_data = grad_tensor.template data();
- auto* rows = grad_merge.rows().data();
+ int64_t* rows = nullptr;
+ if (platform::is_gpu_place(ctx.GetPlace())) {
+ rows = grad_merge.mutable_rows()->cuda_data();
+ } else {
+ rows = grad_merge.mutable_rows()->data();
+ }
auto row_numel = grad_tensor.numel() / grad_merge.rows().size();
SparseAdamFunctor functor(
diff --git a/paddle/operators/ctc_align_op.cu b/paddle/operators/ctc_align_op.cu
index 45635f16745346b08f7e31db2f25905bdbc3aeeb..2a970cd9fa965b4126356eaa1519068f9c7a7f34 100644
--- a/paddle/operators/ctc_align_op.cu
+++ b/paddle/operators/ctc_align_op.cu
@@ -69,12 +69,11 @@ class CTCAlignOpCUDAKernel : public framework::OpKernel {
auto stream = ctx.cuda_device_context().stream();
MergeAndDelCudaKernel<<<1, 1, 0, stream>>>(
- num_tokens, tokens, num_seq, input_lod[level].data(), blank,
+ num_tokens, tokens, num_seq, input_lod[level].cuda_data(), blank,
merge_repeated, dev_out_lod0_ptr, output_data);
// set output lod
- thrust::host_vector host_out_lod0(dev_out_lod0.begin(),
- dev_out_lod0.end());
+ std::vector host_out_lod0(dev_out_lod0.begin(), dev_out_lod0.end());
framework::LoD out_lod;
out_lod.push_back(host_out_lod0);
output->set_lod(out_lod);
diff --git a/paddle/operators/dropout_op.cc b/paddle/operators/dropout_op.cc
index 35cb18797ff66cb87a6658e73ce02b0bfae29baa..5274aa204e6629c9c5ea850c433e0948c89015bd 100644
--- a/paddle/operators/dropout_op.cc
+++ b/paddle/operators/dropout_op.cc
@@ -51,6 +51,13 @@ class DropoutOpMaker : public framework::OpProtoAndCheckerMaker {
"'dropout_prob' must be between 0.0 and 1.0.");
});
AddAttr("is_test", "True if in test phase.").SetDefault(false);
+ AddAttr("fix_seed",
+ "A flag indicating whether to use a fixed seed to generate "
+ "random mask. NOTE: DO NOT set this flag to true in "
+ "training. Setting this flag to true is only useful in "
+ "unittest or for debug that always the same output units "
+ "will be dropped.")
+ .SetDefault(false);
AddAttr("seed", "Dropout random seed.").SetDefault(0);
AddComment(R"DOC(
diff --git a/paddle/operators/dropout_op.cu b/paddle/operators/dropout_op.cu
index c56930336e865079f1b96df0f35b0a051fe63a27..84d78445a4fa340ba3c066bb48b96b2a890db652 100644
--- a/paddle/operators/dropout_op.cu
+++ b/paddle/operators/dropout_op.cu
@@ -62,7 +62,11 @@ class GPUDropoutKernel : public framework::OpKernel {
auto* mask = context.Output("Mask");
auto* mask_data = mask->mutable_data(context.GetPlace());
int size = framework::product(mask->dims());
- int seed = context.Attr("seed");
+
+ std::random_device rnd;
+ int seed =
+ context.Attr("fix_seed") ? context.Attr("seed") : rnd();
+
thrust::counting_iterator index_sequence_begin(0);
thrust::transform(index_sequence_begin, index_sequence_begin + size,
thrust::device_ptr(mask_data),
diff --git a/paddle/operators/dropout_op.h b/paddle/operators/dropout_op.h
index c90b8d277eb78048c001d36a367287146b51c636..46e5dbc64ff9ad3d04a9c1c07f4226932f661baf 100644
--- a/paddle/operators/dropout_op.h
+++ b/paddle/operators/dropout_op.h
@@ -38,9 +38,15 @@ class CPUDropoutKernel : public framework::OpKernel {
if (!context.Attr("is_test")) {
auto* mask = context.Output("Mask");
auto* mask_data = mask->mutable_data(context.GetPlace());
- int seed = context.Attr("seed");
+
+ // NOTE: fixed seed should only be used in unittest or for debug.
+ // Guarantee to use random seed in training.
+ std::random_device rnd;
std::minstd_rand engine;
+ int seed =
+ context.Attr("fix_seed") ? context.Attr("seed") : rnd();
engine.seed(seed);
+
std::uniform_real_distribution dist(0, 1);
size_t size = framework::product(mask->dims());
for (size_t i = 0; i < size; ++i) {
diff --git a/paddle/operators/elementwise_pow_op.cc b/paddle/operators/elementwise_pow_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..5293cc7dd34ccee860c50e964516da9b4d42d29c
--- /dev/null
+++ b/paddle/operators/elementwise_pow_op.cc
@@ -0,0 +1,37 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/elementwise_pow_op.h"
+#include "paddle/operators/elementwise_op.h"
+
+namespace paddle {
+namespace operators {
+class ElementwisePowOpMaker : public ElementwiseOpMaker {
+ public:
+ ElementwisePowOpMaker(OpProto* proto, OpAttrChecker* op_checker)
+ : ElementwiseOpMaker(proto, op_checker) {
+ SetComment("Pow", "Out = X ^ Y");
+ AddComment(comment_);
+ }
+};
+} // namespace operators
+} // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP_WITHOUT_GRADIENT(elementwise_pow, ops::ElementwiseOp,
+ ops::ElementwisePowOpMaker);
+REGISTER_OP_CPU_KERNEL(
+ elementwise_pow,
+ ops::ElementwisePowKernel,
+ ops::ElementwisePowKernel);
diff --git a/paddle/operators/elementwise_pow_op.cu b/paddle/operators/elementwise_pow_op.cu
new file mode 100644
index 0000000000000000000000000000000000000000..643c978e635bc8e9671b47774c2eac5b713f59c2
--- /dev/null
+++ b/paddle/operators/elementwise_pow_op.cu
@@ -0,0 +1,20 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#define EIGEN_USE_GPU
+#include "paddle/operators/elementwise_pow_op.h"
+
+namespace ops = paddle::operators;
+
+REGISTER_OP_CUDA_KERNEL(
+ elementwise_pow,
+ ops::ElementwisePowKernel,
+ ops::ElementwisePowKernel);
diff --git a/paddle/operators/elementwise_pow_op.h b/paddle/operators/elementwise_pow_op.h
new file mode 100644
index 0000000000000000000000000000000000000000..6019e709e0db0fd62b4d3350bb768095f87ef241
--- /dev/null
+++ b/paddle/operators/elementwise_pow_op.h
@@ -0,0 +1,37 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include
+#include "paddle/operators/elementwise_op_function.h"
+
+namespace paddle {
+namespace operators {
+
+template
+struct PowFunctor {
+ inline HOSTDEVICE T operator()(T a, T b) const { return std::pow(a, b); }
+};
+
+template
+class ElementwisePowKernel : public framework::OpKernel {
+ public:
+ void Compute(const framework::ExecutionContext& ctx) const override {
+ ElementwiseComputeEx, DeviceContext, T>(ctx);
+ }
+};
+
+} // namespace operators
+} // namespace paddle
diff --git a/paddle/operators/feed_op.cc b/paddle/operators/feed_op.cc
index d738e1850ca4f658f4fca5c9bf643c44f676cce9..789d01e0022b5c36957f295265a9dc42649b310f 100644
--- a/paddle/operators/feed_op.cc
+++ b/paddle/operators/feed_op.cc
@@ -52,7 +52,11 @@ class FeedOp : public framework::OperatorBase {
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
auto &dev_ctx = *pool.Get(place);
- framework::Copy(feed_item, place, dev_ctx, out_item);
+ if (platform::is_same_place(feed_item.place(), place)) {
+ out_item->ShareDataWith(feed_item);
+ } else {
+ framework::Copy(feed_item, place, dev_ctx, out_item);
+ }
out_item->set_lod(feed_item.lod());
}
};
diff --git a/paddle/operators/gru_op.h b/paddle/operators/gru_op.h
index b1957fb9ce6add8628cb206abf2c569d3f615c85..a08bd4233b02d021aaa64bafe4b855f11a60d338 100644
--- a/paddle/operators/gru_op.h
+++ b/paddle/operators/gru_op.h
@@ -30,11 +30,12 @@ using Tensor = framework::Tensor;
template
inline void ReorderInitState(const DeviceContext& ctx,
- const framework::Tensor& src, const size_t* index,
+ const framework::Tensor& src,
+ framework::Vector index_lod,
framework::Tensor* dst, bool indexed_src) {
math::CopyMatrixRowsFunctor