diff --git a/RELEASE.md b/RELEASE.md index f1588cb15bb8d1f79732d6b5195775cb13acef54..146f7afa7dfbc152500b82fde28445ae3155c16c 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,11 +1,103 @@ # Release v0.10.0 +We are glad to release version 0.10.0. In this version, we are happy to release the new +[Python API](http://research.baidu.com/paddlepaddles-new-api-simplifies-deep-learning-programs/). + +- Our old Python API is kind of out of date. It's hard to learn and hard to + use. To write a PaddlePaddle program using the old API, we'd have to write + at least two Python files: one `data provider` and another one that defines + the network topology. Users start a PaddlePaddle job by running the + `paddle_trainer` C++ program, which calls Python interpreter to run the + network topology configuration script and then start the training loop, + which iteratively calls the data provider function to load minibatches. + This prevents us from writing a Python program in a modern way, e.g., in the + Jupyter Notebook. + +- The new API, which we often refer to as the *v2 API*, allows us to write + much shorter Python programs to define the network and the data in a single + .py file. Also, this program can run in Jupyter Notebook, since the entry + point is in Python program and PaddlePaddle runs as a shared library loaded + and invoked by this Python program. + +Basing on the new API, we delivered an online interative +book, [Deep Learning 101](http://book.paddlepaddle.org/index.en.html) +and [its Chinese version](http://book.paddlepaddle.org/). + +We also worked on updating our online documentation to describe the new API. +But this is an ongoing work. We will release more documentation improvements +in the next version. + +We also worked on bring the new API to distributed model training (via MPI and +Kubernetes). This work is ongoing. We will release more about it in the next +version. + ## New Features +* We release [new Python API](http://research.baidu.com/paddlepaddles-new-api-simplifies-deep-learning-programs/). +* Deep Learning 101 book in [English](http://book.paddlepaddle.org/index.en.html) and [Chinese](http://book.paddlepaddle.org/). +* Support rectangle input for CNN. +* Support stride pooling for seqlastin and seqfirstin. +* Expose `seq_concat_layer/seq_reshape_layer` in `trainer_config_helpers`. +* Add dataset package: CIFAR, MNIST, IMDB, WMT14, CONLL05, movielens, imikolov. +* Add Priorbox layer for Single Shot Multibox Detection. +* Add smooth L1 cost. +* Add data reader creator and data reader decorator for v2 API. +* Add the CPU implementation of cmrnorm projection. + ## Improvements +* Support Python virtualenv for `paddle_trainer`. +* Add pre-commit hooks, used for automatically format our code. +* Upgrade protobuf to version 3.x. +* Add an option to check data type in Python data provider. +* Speedup the backward of average layer on GPU. +* Documentation refinement. +* Check dead links in documents using Travis-CI. +* Add a example for explaining `sparse_vector`. +* Add ReLU in layer_math.py +* Simplify data processing flow for Quick Start. +* Support CUDNN Deconv. +* Add data feeder in v2 API. +* Support predicting the samples from sys.stdin for sentiment demo. +* Provide multi-proccess interface for image preprocessing. +* Add benchmark document for v1 API. +* Add ReLU in `layer_math.py`. +* Add packages for automatically downloading public datasets. +* Rename `Argument::sumCost` to `Argument::sum` since class `Argument` is nothing with cost. +* Expose Argument::sum to Python +* Add a new `TensorExpression` implementation for matrix-related expression evaluations. +* Add lazy assignment for optimizing the calculation of a batch of multiple expressions. +* Add abstract calss `Function` and its implementation: + * `PadFunc` and `PadGradFunc`. + * `ContextProjectionForwardFunc` and `ContextProjectionBackwardFunc`. + * `CosSimBackward` and `CosSimBackwardFunc`. + * `CrossMapNormalFunc` and `CrossMapNormalGradFunc`. + * `MulFunc`. +* Add class `AutoCompare` and `FunctionCompare`, which make it easier to write unit tests for comparing gpu and cpu version of a function. +* Generate `libpaddle_test_main.a` and remove the main function inside the test file. +* Support dense numpy vector in PyDataProvider2. +* Clean code base, remove some copy-n-pasted code snippets: + * Extract `RowBuffer` class for `SparseRowMatrix`. + * Clean the interface of `GradientMachine`. + * Use `override` keyword in layer. + * Simplify `Evaluator::create`, use `ClassRegister` to create `Evaluator`s. +* Check MD5 checksum when downloading demo's dataset. +* Add `paddle::Error` which intentially replace `LOG(FATAL)` in Paddle. + ## Bug Fixes +* Check layer input types for `recurrent_group`. +* Don't run `clang-format` with .cu source files. +* Fix bugs with `LogActivation`. +* Fix the bug that runs `test_layerHelpers` multiple times. +* Fix the bug that the seq2seq demo exceeds protobuf message size limit. +* Fix the bug in dataprovider converter in GPU mode. +* Fix a bug in `GatedRecurrentLayer`. +* Fix bug for `BatchNorm` when testing more than one models. +* Fix broken unit test of paramRelu. +* Fix some compile-time warnings about `CpuSparseMatrix`. +* Fix `MultiGradientMachine` error when `trainer_count > batch_size`. +* Fix bugs that prevents from asynchronous data loading in `PyDataProvider2`. # Release v0.9.0 diff --git a/cmake/cblas.cmake b/cmake/cblas.cmake index aebb5d9fcb186005607c4849b70ecb61de771deb..0918e6cc633e7067b8bd2d5c5e1622d4139d4d14 100644 --- a/cmake/cblas.cmake +++ b/cmake/cblas.cmake @@ -44,7 +44,6 @@ if(MKL_INC_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64) message(STATUS "Found MKL (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") set(CBLAS_FOUND ON) if(${MKL_LAPACK_INC_DIR}) - add_definitions(-DPADDLE_USE_LAPACK) message(STATUS "Found lapack in MKL (include: ${MKL_LAPACK_INC_DIR})") endif() return() # return file. @@ -80,7 +79,6 @@ if(ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_LIB AND NOT CBLAS_FOUND) message(STATUS "Found ATLAS (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") set(CBLAS_FOUND ON) if(ATLAS_CLAPACK_INC_DIR) - add_definitions(-DPADDLE_USE_LAPACK) set(CBLAS_INC_DIR ${CBLAS_INC_DIR} ${ATLAS_CLAPACK_INC_DIR}) message(STATUS "Found lapack in ATLAS (include: ${ATLAS_CLAPACK_INC_DIR})") endif() @@ -115,7 +113,6 @@ if(OPENBLAS_INC_DIR AND OPENBLAS_LIB) message(STATUS "Found OpenBLAS (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") set(CBLAS_FOUND ON) if(OPENBLAS_LAPACKE_INC_DIR) - add_definitions(-DPADDLE_USE_LAPACK) message(STATUS "Found lapack in OpenBLAS (include: ${OPENBLAS_LAPACKE_INC_DIR})") endif() return() diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake index 4ddadb9fa3b2c3993d1938fcbf1b823e66db99f2..97b6768decbf27c62af98542a5633eda1c544f29 100644 --- a/cmake/external/openblas.cmake +++ b/cmake/external/openblas.cmake @@ -24,45 +24,17 @@ IF(NOT ${CBLAS_FOUND}) SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/${LIBRARY_PREFIX}openblas${STATIC_LIBRARY_SUFFIX}" CACHE FILEPATH "openblas library." FORCE) - # check fortran compiler and library + SET(COMMON_ARGS CC=${CMAKE_C_COMPILER} NO_LAPACK=1 NO_SHARED=1) + IF(ANDROID) SET(OPENBLAS_COMMIT "b5c96fcfcdc82945502a2303116a64d89985daf5") - SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER} TARGET=ARMV7 ARM_SOFTFP_ABI=1 NOFORTRAN=1 USE_THREAD=0 libs) + SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER} TARGET=ARMV7 ARM_SOFTFP_ABI=1 USE_THREAD=0 libs) ELSEIF(RPI) SET(OPENBLAS_COMMIT "v0.2.19") - SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER} TARGET=ARMV7 NOFORTRAN=1 USE_THREAD=0 libs) + SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER} TARGET=ARMV7 USE_THREAD=0 libs) ELSE() - IF(CMAKE_COMPILER_IS_GNUCC) - ENABLE_LANGUAGE(Fortran) - if (NOT CMAKE_Fortran_COMPILER_VERSION) - # cmake < 3.4 cannot get CMAKE_Fortran_COMPILER_VERSION directly. - execute_process(COMMAND ${CMAKE_Fortran_COMPILER} -dumpversion - OUTPUT_VARIABLE CMAKE_Fortran_COMPILER_VERSION) - endif() - string(REGEX MATCHALL "[0-9]+" Fortran_VERSION ${CMAKE_Fortran_COMPILER_VERSION}) - list(GET Fortran_VERSION 0 Fortran_MAJOR) - list(GET Fortran_VERSION 1 Fortran_MINOR) - find_library(GFORTRAN_LIBRARY NAMES gfortran PATHS - /lib - /usr/lib - /usr/lib/gcc/x86_64-linux-gnu/${Fortran_MAJOR}.${Fortran_MINOR}/ - /usr/lib/gcc/x86_64-linux-gnu/${Fortran_MAJOR}/) - if (NOT GFORTRAN_LIBRARY) - message(FATAL_ERROR "Cannot found gfortran library which it is used by openblas") - endif() - find_package(Threads REQUIRED) - LIST(APPEND CBLAS_LIBRARIES ${GFORTRAN_LIBRARY} ${CMAKE_THREAD_LIBS_INIT}) - ENDIF(CMAKE_COMPILER_IS_GNUCC) - - IF(NOT CMAKE_Fortran_COMPILER) - MESSAGE(FATAL_ERROR "To build lapack in libopenblas, " - "you need to set gfortran compiler: cmake .. -DCMAKE_Fortran_COMPILER=...") - ENDIF(NOT CMAKE_Fortran_COMPILER) - - ADD_DEFINITIONS(-DPADDLE_USE_LAPACK) - SET(OPENBLAS_COMMIT "v0.2.19") - SET(OPENBLAS_ARGS FC=${CMAKE_Fortran_COMPILER} DYNAMIC_ARCH=1 libs netlib) + SET(OPENBLAS_ARGS DYNAMIC_ARCH=1 libs) ENDIF() ExternalProject_Add( @@ -73,7 +45,7 @@ IF(NOT ${CBLAS_FOUND}) PREFIX ${CBLAS_SOURCES_DIR} INSTALL_DIR ${CBLAS_INSTALL_DIR} BUILD_IN_SOURCE 1 - BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} CC=${CMAKE_C_COMPILER} NO_SHARED=1 ${OPTIONAL_ARGS} + BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} ${COMMON_ARGS} ${OPTIONAL_ARGS} INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} install NO_SHARED=1 PREFIX= UPDATE_COMMAND "" CONFIGURE_COMMAND "" diff --git a/cmake/package.cmake b/cmake/package.cmake index 211593f358eb34cf1a5692697247511893dfeb93..ff49a2d08e8f6004320acfce266339aa301eb9c4 100644 --- a/cmake/package.cmake +++ b/cmake/package.cmake @@ -1,5 +1,4 @@ set(CPACK_PACKAGE_NAME paddle) -set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "") set(CPACK_PACKAGE_VERSION_MAJOR ${PADDLE_MAJOR_VERSION}) set(CPACK_PACKAGE_VERSION_MINOR ${PADDLE_MINOR_VERSION}) set(CPACK_PACKAGE_VERSION_PATCH ${PADDLE_PATCH_VERSION}) @@ -10,8 +9,9 @@ set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE amd64) set(CPACK_DEBIAN_PACKAGE_MAINTAINER PaddlePaddle Dev ) set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Paddle") set(CPACK_PACKAGE_DESCRIPTION "") -set(CPACK_DEBIAN_PACKAGE_DEPENDS "libatlas3-base, libgflags2, libgoogle-glog0, libprotobuf8, libpython2.7, libstdc++6, python-numpy, python-pip, python-pip-whl, python-protobuf") +set(CPACK_DEBIAN_PACKAGE_DEPENDS "libpython2.7-dev, libstdc++6, python-pip, curl, libgfortran3, python-pip-whl") set(CPACK_DEBIAN_PACKAGE_SECTION Devel) +set(CPACK_DEBIAN_PACKAGE_VERSION ${PADDLE_VERSION}) set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJ_ROOT}/paddle/scripts/deb/postinst") #set(CPACK_GENERATOR "DEB") # Start cpack diff --git a/demo/sentiment/trainer_config.py b/demo/sentiment/trainer_config.py index 2defecd178262900c03c1eda60b351dc44629d1f..f1cadaa728ac58107e15f77b5994d31da088caf7 100644 --- a/demo/sentiment/trainer_config.py +++ b/demo/sentiment/trainer_config.py @@ -29,7 +29,7 @@ settings( batch_size=128, learning_rate=2e-3, learning_method=AdamOptimizer(), - average_window=0.5, + model_average=ModelAverage(0.5), regularization=L2Regularization(8e-4), gradient_clipping_threshold=25) diff --git a/demo/seqToseq/seqToseq_net.py b/demo/seqToseq/seqToseq_net.py index e523a34d5a95120d1f0a583be8bbdbff5678d1ab..3d1f86ec3b7eda4fceaf3a1e406e3d0a1a4a2f60 100644 --- a/demo/seqToseq/seqToseq_net.py +++ b/demo/seqToseq/seqToseq_net.py @@ -69,7 +69,8 @@ def gru_encoder_decoder(data_conf, encoder_size=512, decoder_size=512, beam_size=3, - max_length=250): + max_length=250, + error_clipping=50): """ A wrapper for an attention version of GRU Encoder-Decoder network is_generating: whether this config is used for generating @@ -90,9 +91,19 @@ def gru_encoder_decoder(data_conf, input=src_word_id, size=word_vector_dim, param_attr=ParamAttr(name='_source_language_embedding')) - src_forward = simple_gru(input=src_embedding, size=encoder_size) + src_forward = simple_gru( + input=src_embedding, + size=encoder_size, + naive=True, + gru_layer_attr=ExtraLayerAttribute( + error_clipping_threshold=error_clipping)) src_backward = simple_gru( - input=src_embedding, size=encoder_size, reverse=True) + input=src_embedding, + size=encoder_size, + reverse=True, + naive=True, + gru_layer_attr=ExtraLayerAttribute( + error_clipping_threshold=error_clipping)) encoded_vector = concat_layer(input=[src_forward, src_backward]) with mixed_layer(size=decoder_size) as encoded_proj: @@ -117,11 +128,13 @@ def gru_encoder_decoder(data_conf, decoder_inputs += full_matrix_projection(input=context) decoder_inputs += full_matrix_projection(input=current_word) - gru_step = gru_step_layer( + gru_step = gru_step_naive_layer( name='gru_decoder', input=decoder_inputs, output_mem=decoder_mem, - size=decoder_size) + size=decoder_size, + layer_attr=ExtraLayerAttribute( + error_clipping_threshold=error_clipping)) with mixed_layer( size=target_dict_dim, bias_attr=True, diff --git a/doc/getstarted/index_cn.rst b/doc/getstarted/index_cn.rst index c6a4d3121c5857cd434acecb389d68f4d4c7a532..cadf092f8f42ca16bbeb23bd21e0d018af8e43cc 100644 --- a/doc/getstarted/index_cn.rst +++ b/doc/getstarted/index_cn.rst @@ -2,7 +2,8 @@ ============ .. toctree:: - :maxdepth: 2 + :maxdepth: 1 build_and_install/index_cn.rst - basic_usage/index_cn.rst + +- `深度学习入门课程 `_ diff --git a/doc/getstarted/index_en.rst b/doc/getstarted/index_en.rst index 55d95d8015e56ddae3363d19315db0fad841caad..9f771e93e8b63eb98e31ec12667bd1aa007af20e 100644 --- a/doc/getstarted/index_en.rst +++ b/doc/getstarted/index_en.rst @@ -2,7 +2,8 @@ GET STARTED ============ .. toctree:: - :maxdepth: 2 + :maxdepth: 1 build_and_install/index_en.rst - basic_usage/index_en.rst + +- `Deep Learning 101 `_ diff --git a/doc/howto/deep_model/rnn/hierarchical_layer_cn.rst b/doc/howto/deep_model/rnn/hierarchical_layer_cn.rst index 4b328fc9d38bc5dfec35d5e0f0d46136aeeb41bc..79048e92482851af6c2dd7d055868ebcaa7a298b 100644 --- a/doc/howto/deep_model/rnn/hierarchical_layer_cn.rst +++ b/doc/howto/deep_model/rnn/hierarchical_layer_cn.rst @@ -19,18 +19,18 @@ 在 PaddlePaddle中,下面这些Layer能够接受双层序列作为输入,完成相应的计算。 -pooling_layer -============== +pooling +======== -pooling_layer 的使用示例如下,详细见 :ref:`api_trainer_config_helpers_layers_pooling_layer` 配置API。 +pooling 的使用示例如下,详细见 :ref:`api_v2.layer_pooling` 配置API。 .. code-block:: bash - seq_pool = pooling_layer(input=layer, - pooling_type=AvgPooling(), - agg_level=AggregateLevel.EACH_SEQUENCE) + seq_pool = pooling(input=layer, + pooling_type=pooling.Max(), + agg_level=AggregateLevel.EACH_SEQUENCE) -- `pooling_type` 目前支持两种,分别是:MaxPooling()和AvgPooling()。 +- `pooling_type` 目前支持两种,分别是:pooling.Max()和pooling.Avg()。 - `agg_level=AggregateLevel.EACH_TIMESTEP` 时(默认值): @@ -47,7 +47,7 @@ pooling_layer 的使用示例如下,详细见 :ref:`api_trainer_config_helpers last_seq 和 first_seq ===================== -last_seq 的使用示例如下( :ref:`api_trainer_config_helpers_layers_first_seq` 类似),详细见 :ref:`api_trainer_config_helpers_layers_last_seq` 配置API。 +last_seq 的使用示例如下( :ref:`api_v2.layer_first_seq` 类似),详细见 :ref:`api_v2.layer_last_seq` 配置API。 .. code-block:: bash @@ -65,16 +65,16 @@ last_seq 的使用示例如下( :ref:`api_trainer_config_helpers_layers_first_ - 输入:必须是一个双层序列 - 输出:一个单层序列,其中每个元素是双层序列中每个subseq最后一个(或第一个)元素。 -expand_layer -============ +expand +====== -expand_layer 的使用示例如下,详细见 :ref:`api_trainer_config_helpers_layers_expand_layer` 配置API。 +expand 的使用示例如下,详细见 :ref:`api_v2.layer_expand` 配置API。 .. code-block:: bash - expand = expand_layer(input=layer1, - expand_as=layer2, - expand_level=ExpandLevel.FROM_TIMESTEP) + ex = expand(input=layer1, + expand_as=layer2, + expand_level=ExpandLevel.FROM_TIMESTEP) - `expand_level=ExpandLevel.FROM_TIMESTEP` 时(默认值): diff --git a/doc/howto/deep_model/rnn/index_cn.rst b/doc/howto/deep_model/rnn/index_cn.rst index 9ecab5594cff47cde4700b7ce0f58013a960a16e..9e805ca85191b793c8798a239927a318c70b96f5 100644 --- a/doc/howto/deep_model/rnn/index_cn.rst +++ b/doc/howto/deep_model/rnn/index_cn.rst @@ -4,7 +4,6 @@ RNN相关模型 .. toctree:: :maxdepth: 1 - rnn_config_cn.rst recurrent_group_cn.md hierarchical_layer_cn.rst hrnn_rnn_api_compare_cn.rst diff --git a/doc/howto/deep_model/rnn/index_en.rst b/doc/howto/deep_model/rnn/index_en.rst index 7adc79873d699fdfd5a85034bcef964dd1f19132..13a153b05c578e0af82ee29db5ea27fd4b6d6f59 100644 --- a/doc/howto/deep_model/rnn/index_en.rst +++ b/doc/howto/deep_model/rnn/index_en.rst @@ -1,7 +1,2 @@ RNN Models ========== - -.. toctree:: - :maxdepth: 1 - - rnn_config_en.rst diff --git a/doc/index_cn.rst b/doc/index_cn.rst index 460fedb5658a8ea9bbe8b602ee2b5df66502fa62..9279bac7f4b2898c18979630a8d6dfcb2dba70e0 100644 --- a/doc/index_cn.rst +++ b/doc/index_cn.rst @@ -5,7 +5,6 @@ PaddlePaddle 文档 :maxdepth: 1 getstarted/index_cn.rst - tutorials/index_cn.md howto/index_cn.rst api/index_cn.rst faq/index_cn.rst diff --git a/doc/index_en.rst b/doc/index_en.rst index 1d9cca7de720ebc23fe816f32d158930d91c07e7..168c7667c61da677905585d6c4b5037ce80b3765 100644 --- a/doc/index_en.rst +++ b/doc/index_en.rst @@ -5,8 +5,6 @@ PaddlePaddle Documentation :maxdepth: 1 getstarted/index_en.rst - tutorials/index_en.md howto/index_en.rst api/index_en.rst about/index_en.rst - \ No newline at end of file diff --git a/doc_theme/templates/layout.html b/doc_theme/templates/layout.html index 034740369ed10a748856e2205d3315f51a7de62f..65e61c5f298e19adc6330c378779a6edf418752e 100644 --- a/doc_theme/templates/layout.html +++ b/doc_theme/templates/layout.html @@ -114,10 +114,7 @@
@@ -137,7 +134,7 @@ {{ toctree }} {% endblock %} - {% if toc %} + {% if False %} {% endif %}
@@ -168,7 +165,8 @@ VERSION:'{{ release|e }}', COLLAPSE_INDEX:false, FILE_SUFFIX:'{{ '' if no_search_suffix else file_suffix }}', - HAS_SOURCE: {{ has_source|lower }} + HAS_SOURCE: {{ has_source|lower }}, + SOURCELINK_SUFFIX: ".txt", }; {%- for scriptfile in script_files %} diff --git a/paddle/cuda/CMakeLists.txt b/paddle/cuda/CMakeLists.txt index a28ccd6f07cfd56b7f1978f67fdcf6e7e5fe6337..f9061e96deb659dcf7bfb88b46e6509af0425199 100755 --- a/paddle/cuda/CMakeLists.txt +++ b/paddle/cuda/CMakeLists.txt @@ -21,16 +21,13 @@ set(CUDA_CXX_WITH_GPU_SOURCES if(WITH_GPU) set(CUDA_CXX_SOURCES - src/hl_dso_loader.cc src/hl_warpctc_wrap.cc ${CUDA_CXX_WITH_GPU_SOURCES}) set_source_files_properties(${CUDA_CXX_SOURCES} PROPERTIES COMPILE_FLAGS "-D__NVCC__") else() - set(CUDA_CXX_SOURCES - src/hl_dso_loader.cc - src/hl_warpctc_wrap.cc) + set(CUDA_CXX_SOURCES src/hl_warpctc_wrap.cc) endif() set(CUDA_CU_SOURCES @@ -47,7 +44,6 @@ set(CUDA_CU_SOURCES set(CUDA_HEADERS include/hl_time.h - include/hl_dso_loader.h include/hl_warpctc_wrap.h include/hl_sequence.h include/hl_cuda_cublas.h diff --git a/paddle/cuda/include/hl_activation_functions.h b/paddle/cuda/include/hl_activation_functions.h index cdb2dba06cb4123da4be2088e290c6a740e0375b..93957fd9644652c103d15873b732d0b9fa89330f 100644 --- a/paddle/cuda/include/hl_activation_functions.h +++ b/paddle/cuda/include/hl_activation_functions.h @@ -40,18 +40,18 @@ public: namespace gpu { static __device__ Active::forward forward[] = HPPL_ACTIVE_FUNCTION; static __device__ Active::backward backward[] = HPPL_ACTIVE_FUNCTION; -} +} // namespace gpu #else namespace cpu { static Active::forward forward[] = HPPL_ACTIVE_FUNCTION; static Active::backward backward[] = HPPL_ACTIVE_FUNCTION; -} +} // namespace cpu #ifdef __AVX__ namespace avx { static Active<__m256>::forward forward[] = HPPL_ACTIVE_FUNCTION; static Active<__m256>::backward backward[] = HPPL_ACTIVE_FUNCTION; -} +} // namespace avx #endif #endif diff --git a/paddle/cuda/include/hl_cnn.h b/paddle/cuda/include/hl_cnn.h index c5787630abbe105af64888692b1106bd21f4c1e8..f55197c8c9ebb4a0f67ab915abfefd6a45cd13aa 100644 --- a/paddle/cuda/include/hl_cnn.h +++ b/paddle/cuda/include/hl_cnn.h @@ -273,23 +273,23 @@ extern void hl_bilinear_forward(const real* inData, const real ratioW); /** -* @brief Bilinear interpolation backward. -* -* @param[out] inGrad input gradient. -* @param[in] inImgH input image height. -* @param[in] inImgW input image width. -* @param[in] inputH input batchSize. -* @param[in] inputW input image data dim. -* @param[in] outGrad output gradient. -* @param[in] outImgH output image height. -* @param[in] outImgW output image width. -* @param[in] outputH output batchSize. -* @param[in] outputW output image data dim. -* @param[in] numChannels number of channels. -* @param[in] ratioH inImgH / outImgH. -* @param[in] ratioW inImgW / outImgW. -* -*/ + * @brief Bilinear interpolation backward. + * + * @param[out] inGrad input gradient. + * @param[in] inImgH input image height. + * @param[in] inImgW input image width. + * @param[in] inputH input batchSize. + * @param[in] inputW input image data dim. + * @param[in] outGrad output gradient. + * @param[in] outImgH output image height. + * @param[in] outImgW output image width. + * @param[in] outputH output batchSize. + * @param[in] outputW output image data dim. + * @param[in] numChannels number of channels. + * @param[in] ratioH inImgH / outImgH. + * @param[in] ratioW inImgW / outImgW. + * + */ extern void hl_bilinear_backward(real* inGrad, const size_t inImgH, const size_t inImgW, diff --git a/paddle/cuda/src/hl_cuda_cublas.cc b/paddle/cuda/src/hl_cuda_cublas.cc index 182e8ab218cce18448f8a08f5c1a1dab7e38f2b6..6163209e9bc681209712243ba68dec549b7e360a 100644 --- a/paddle/cuda/src/hl_cuda_cublas.cc +++ b/paddle/cuda/src/hl_cuda_cublas.cc @@ -14,10 +14,9 @@ limitations under the License. */ #include "hl_cuda_cublas.h" #include -#include #include "hl_cuda.h" -#include "hl_dso_loader.h" #include "hl_thread.ph" +#include "paddle/utils/DynamicLoader.h" #include "paddle/utils/Logging.h" namespace dynload { diff --git a/paddle/cuda/src/hl_cuda_cudnn.cc b/paddle/cuda/src/hl_cuda_cudnn.cc index 6198f067bab2ec790e641e77dce058fe6a52491a..c53a5636829cab9d575f58cc2326cb3efe383e1c 100644 --- a/paddle/cuda/src/hl_cuda_cudnn.cc +++ b/paddle/cuda/src/hl_cuda_cudnn.cc @@ -15,10 +15,9 @@ limitations under the License. */ #include "hl_cuda_cudnn.h" #include #include -#include #include "hl_cuda_cudnn.ph" -#include "hl_dso_loader.h" #include "hl_thread.ph" +#include "paddle/utils/DynamicLoader.h" #include "paddle/utils/Logging.h" DEFINE_int32(cudnn_conv_workspace_limit_in_mb, diff --git a/paddle/cuda/src/hl_cuda_device.cc b/paddle/cuda/src/hl_cuda_device.cc index 6dfb12e00b80db36ad2e53326b880c7d1ed59263..4042d9742a92f6718406c8923d9129b81afe89e7 100644 --- a/paddle/cuda/src/hl_cuda_device.cc +++ b/paddle/cuda/src/hl_cuda_device.cc @@ -21,11 +21,10 @@ limitations under the License. */ #include #include #include -#include #include "hl_cuda.ph" #include "hl_thread.ph" -#include "hl_dso_loader.h" #include "paddle/utils/Logging.h" +#include "paddle/utils/DynamicLoader.h" // clang-format on namespace dynload { diff --git a/paddle/cuda/src/hl_warpctc_wrap.cc b/paddle/cuda/src/hl_warpctc_wrap.cc index f57efb2b46797c303d99a5468ad96163a3e74972..9f812dd0dead8b4b4e0a4ac58b12a81d1da00aee 100644 --- a/paddle/cuda/src/hl_warpctc_wrap.cc +++ b/paddle/cuda/src/hl_warpctc_wrap.cc @@ -14,7 +14,7 @@ limitations under the License. */ #include "hl_warpctc_wrap.h" #include -#include "hl_dso_loader.h" +#include "paddle/utils/DynamicLoader.h" #include "paddle/utils/Logging.h" namespace dynload { diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt index 1522510e8bb9816cb468fcf406e22560163950cc..233a53709a80f06dd2a06995b159c1aef10e2788 100644 --- a/paddle/function/CMakeLists.txt +++ b/paddle/function/CMakeLists.txt @@ -12,7 +12,7 @@ endif() add_library(paddle_function STATIC ${cpp_files} ${cu_objs}) add_dependencies(paddle_function ${external_project_dependencies}) - +add_dependencies(paddle_function gen_proto_cpp) if(WITH_GPU) if(WITH_TESTING) diff --git a/paddle/function/MulOpTest.cpp b/paddle/function/MulOpTest.cpp index 8748eb0d79fa0fcb0935eac5bb37b44274128aa0..8753057ebf73c99336b2f5d9c610e4aaf293f845 100644 --- a/paddle/function/MulOpTest.cpp +++ b/paddle/function/MulOpTest.cpp @@ -74,9 +74,9 @@ TEST(MulOp, DDDMatrixMul) { } /** - * C += A * B, B, C dense, A sparse - * dense = sparse * dense - */ + * C += A * B, B, C dense, A sparse + * dense = sparse * dense + */ void testFuncDSparseDMatrix( size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) { real scaleT = 1.0; @@ -119,9 +119,9 @@ TEST(MuLOp, DSparseDMul) { } /** - * C += A * B, A, C dense, B sparse - * dense = dense * sparse - */ + * C += A * B, A, C dense, B sparse + * dense = dense * sparse + */ void testFuncDDSparseMatrix( size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) { real scaleT = 1.0; @@ -165,9 +165,9 @@ TEST(MulOp, DDSparseMul) { } /** - * C += A * B, A sparse, B, C dense - * sparse = dense * dense - */ + * C += A * B, A sparse, B, C dense + * sparse = dense * dense + */ void testFuncSparseDDMatrix( size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) { real scaleT = 1.0; diff --git a/paddle/gserver/gradientmachines/GradientMachine.cpp b/paddle/gserver/gradientmachines/GradientMachine.cpp index 3eb87d9b85c8207a23046fdb4bda06ba8185e2a3..b44e4dc202f01956ed21c175aa897ced8e92546b 100644 --- a/paddle/gserver/gradientmachines/GradientMachine.cpp +++ b/paddle/gserver/gradientmachines/GradientMachine.cpp @@ -21,7 +21,6 @@ limitations under the License. */ #include "MultiGradientMachine.h" #include "MultiNetwork.h" #include "NeuralNetwork.h" -#include "NeuralNetwork.h" #include "ParallelNeuralNetwork.h" #include "hl_gpu.h" diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp index 2ab964b8fc2e080282aa03db4ee6836540e666d7..01158d1dce8d711c67b1ecf29bb644e42ccf6ff5 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp @@ -637,7 +637,7 @@ void RecurrentGradientMachine::removeBeamSearchStatisticsCallbacks() { /* create scattered id infomation for all realLayer of inFrameLines one time. * If hasSubseq, will also create scattered sequenceStartPositions infomation * for all realLayer of inFrameLines one time. -*/ + */ void RecurrentGradientMachine::createInFrameInfo(int inlinkId, const Argument& input, diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h index 910ca4376bedeac31674c71b9ea1205ef769cda9..c2bc52709ab42bbe21dcc3951f23f2e0b5e6793d 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h +++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h @@ -107,18 +107,18 @@ public: DropCallback; /** - * @brief NormOrDropNodeCallback - * - * Normalize a path's probabilities or just drop it by modifying path.logProb - * - * The first parameter is sequence index in a batch - * - * The second parameter is path.ids - * - * The third parameter is probabilites for each node in this path. - * - * The fourth parameter is the probability of the whole path. - */ + * @brief NormOrDropNodeCallback + * + * Normalize a path's probabilities or just drop it by modifying path.logProb + * + * The first parameter is sequence index in a batch + * + * The second parameter is path.ids + * + * The third parameter is probabilites for each node in this path. + * + * The fourth parameter is the probability of the whole path. + */ typedef std::function&, std::vector&, real*)> NormOrDropNodeCallback; @@ -348,9 +348,9 @@ protected: int targetInfoInlinkId_; /* create scattered id infomation for all realLayer of inFrameLines one time. - * If hasSubseq, will also create scattered sequenceStartPositions infomation - * for all realLayer of inFrameLines one time. - */ + * If hasSubseq, will also create scattered sequenceStartPositions infomation + * for all realLayer of inFrameLines one time. + */ void createInFrameInfo(int inlinks_id, const Argument& input, PassType passType); diff --git a/paddle/gserver/layers/Layer.h b/paddle/gserver/layers/Layer.h index 47182c9ecc695f4d79089d06d6a1a61b878ce409..0ed482889d0cea884db3759620088575c5b10201 100644 --- a/paddle/gserver/layers/Layer.h +++ b/paddle/gserver/layers/Layer.h @@ -106,9 +106,9 @@ protected: public: /** - * Wait until all input value ready. - * Called before Layer::forward() function. - */ + * Wait until all input value ready. + * Called before Layer::forward() function. + */ virtual void waitInputValue(); /** @@ -118,9 +118,9 @@ public: virtual void copyOutputToOtherDevice(); /** - * Wait until all output grad ready and merge them to output_.grad. - * Called before Layer::backward() function. - */ + * Wait until all output grad ready and merge them to output_.grad. + * Called before Layer::backward() function. + */ virtual void waitAndMergeOutputGrad(); /** diff --git a/paddle/gserver/layers/RotateLayer.h b/paddle/gserver/layers/RotateLayer.h index 1a64d4d5a51d9c04df07861f02f1bb91eaec088e..d05c2065cb1cb81452c54ee1858c34cb46e6c7f6 100644 --- a/paddle/gserver/layers/RotateLayer.h +++ b/paddle/gserver/layers/RotateLayer.h @@ -29,7 +29,7 @@ namespace paddle { * * The config file api is rotate_layer * -*/ + */ class RotateLayer : public Layer { public: diff --git a/paddle/gserver/layers/SequencePoolLayer.cpp b/paddle/gserver/layers/SequencePoolLayer.cpp index 8c49502011582b534a2ba4113ffeffaa2f06a51c..235d9a9b0f0653df5c0b671092df9e195f08fc48 100644 --- a/paddle/gserver/layers/SequencePoolLayer.cpp +++ b/paddle/gserver/layers/SequencePoolLayer.cpp @@ -60,7 +60,7 @@ void SequencePoolLayer::forward(PassType passType) { * thus, in this case, output_ has no sequenceStartPositions. * If type_ = kSeq, seq has sub-seq degrades to a seq, thus, only in this * case, we should compute the new sequenceStartPositions. - */ + */ if (type_) { CHECK(input.subSequenceStartPositions) << "when trans_type = seq, input must hasSubseq"; diff --git a/paddle/gserver/tests/sequence_layer_group.conf b/paddle/gserver/tests/sequence_layer_group.conf index 68d150d553588c864de56ce1e6f283cc42fbbf2f..50f2d89d0271b2eaa460e57636eb09b6d6aeda18 100644 --- a/paddle/gserver/tests/sequence_layer_group.conf +++ b/paddle/gserver/tests/sequence_layer_group.conf @@ -48,8 +48,7 @@ lstm = lstmemory_group( size=hidden_dim, act=TanhActivation(), gate_act=SigmoidActivation(), - state_act=TanhActivation(), - lstm_layer_attr=ExtraLayerAttribute(error_clipping_threshold=50)) + state_act=TanhActivation()) lstm_last = last_seq(input=lstm) diff --git a/paddle/gserver/tests/sequence_nest_layer_group.conf b/paddle/gserver/tests/sequence_nest_layer_group.conf index 88cb42798baff79fa6a86ef11dabf1781575c0b4..c01b95f7a29ae73c2b3ccd5b56ad1d316cbc72ec 100644 --- a/paddle/gserver/tests/sequence_nest_layer_group.conf +++ b/paddle/gserver/tests/sequence_nest_layer_group.conf @@ -51,8 +51,7 @@ def lstm_group(lstm_group_input): size=hidden_dim, act=TanhActivation(), gate_act=SigmoidActivation(), - state_act=TanhActivation(), - lstm_layer_attr=ExtraLayerAttribute(error_clipping_threshold=50)) + state_act=TanhActivation()) return lstm_output diff --git a/paddle/math/MathFunctions.cpp b/paddle/math/MathFunctions.cpp index 6203cd3b9ab9f95853cd3c46750fd55d6dfbba4a..178fce5b0a97442173a035fe85bdaddabba7da17 100644 --- a/paddle/math/MathFunctions.cpp +++ b/paddle/math/MathFunctions.cpp @@ -15,6 +15,54 @@ limitations under the License. */ #include "MathFunctions.h" #include "hl_matrix_apply.cuh" #include "hl_matrix_ops.cuh" +#include "paddle/utils/DynamicLoader.h" + +namespace dynload { + +std::once_flag lapack_dso_flag; +void* lapack_dso_handle = nullptr; + +/** + * The following macro definition can generate structs + * (for each function) to dynamic load lapack routine + * via operator overloading. + * + * note: default dynamic linked libs + */ +#define DYNAMIC_LOAD_LAPACK_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + auto operator()(Args... args) -> decltype(__name(args...)) { \ + using lapack_func = decltype(__name(args...)) (*)(Args...); \ + std::call_once(lapack_dso_flag, GetLapackDsoHandle, &lapack_dso_handle); \ + void* p_##__name = dlsym(lapack_dso_handle, #__name); \ + return reinterpret_cast(p_##__name)(args...); \ + } \ + } __name; // struct DynLoad__##__name + +// clang-format off +#ifdef PADDLE_USE_ATLAS + #define PADDLE_SGETRF clapack_sgetrf + #define PADDLE_DGETRF clapack_dgetrf + #define PADDLE_SGETRI clapack_sgetri + #define PADDLE_DGETRI clapack_dgetri +#else + #define PADDLE_SGETRF LAPACKE_sgetrf + #define PADDLE_DGETRF LAPACKE_dgetrf + #define PADDLE_SGETRI LAPACKE_sgetri + #define PADDLE_DGETRI LAPACKE_dgetri +#endif + +#define LAPACK_ROUTINE_EACH(__macro) \ + __macro(PADDLE_SGETRF) \ + __macro(PADDLE_DGETRF) \ + __macro(PADDLE_SGETRI) \ + __macro(PADDLE_DGETRI) +// clang-format on + +LAPACK_ROUTINE_EACH(DYNAMIC_LOAD_LAPACK_WRAP) + +} // namespace dynload namespace paddle { @@ -85,16 +133,7 @@ int getrf(const CBLAS_ORDER order, float* A, const int lda, int* ipiv) { -#ifdef PADDLE_USE_LAPACK -#ifdef PADDLE_USE_ATLAS - return clapack_sgetrf(order, M, N, A, lda, ipiv); -#else - return LAPACKE_sgetrf(order, M, N, A, lda, ipiv); -#endif -#else - LOG(FATAL) << "Not implemented"; -#endif - return 0; + return dynload::PADDLE_SGETRF(order, M, N, A, lda, ipiv); } template <> @@ -104,16 +143,7 @@ int getrf(const CBLAS_ORDER order, double* A, const int lda, int* ipiv) { -#ifdef PADDLE_USE_LAPACK -#ifdef PADDLE_USE_ATLAS - return clapack_dgetrf(order, M, N, A, lda, ipiv); -#else - return LAPACKE_dgetrf(order, M, N, A, lda, ipiv); -#endif -#else - LOG(FATAL) << "Not implemented"; -#endif - return 0; + return dynload::PADDLE_DGETRF(order, M, N, A, lda, ipiv); } template <> @@ -122,16 +152,7 @@ int getri(const CBLAS_ORDER order, float* A, const int lda, const int* ipiv) { -#ifdef PADDLE_USE_LAPACK -#ifdef PADDLE_USE_ATLAS - return clapack_sgetri(order, N, A, lda, ipiv); -#else - return LAPACKE_sgetri(order, N, A, lda, ipiv); -#endif -#else - LOG(FATAL) << "Not implemented"; -#endif - return 0; + return dynload::PADDLE_SGETRI(order, N, A, lda, ipiv); } template <> @@ -140,15 +161,7 @@ int getri(const CBLAS_ORDER order, double* A, const int lda, const int* ipiv) { -#ifdef PADDLE_USE_LAPACK -#ifdef PADDLE_USE_ATLAS - return clapack_dgetri(order, N, A, lda, ipiv); -#else - return LAPACKE_dgetri(order, N, A, lda, ipiv); -#endif -#else - LOG(FATAL) << "Not implemented"; -#endif + return dynload::PADDLE_DGETRI(order, N, A, lda, ipiv); return 0; } diff --git a/paddle/math/MathFunctions.h b/paddle/math/MathFunctions.h index 9f8f84a87c5e60b2a6573844f251c42152d8156b..c8559eefd8378450fc18c2ba821c65b39c8cc046 100644 --- a/paddle/math/MathFunctions.h +++ b/paddle/math/MathFunctions.h @@ -17,14 +17,11 @@ limitations under the License. */ #ifdef PADDLE_USE_MKL #include -#ifdef PADDLE_USE_LAPACK #include -#endif #else extern "C" { #include } -#ifdef PADDLE_USE_LAPACK #ifdef PADDLE_USE_ATLAS extern "C" { #include @@ -33,7 +30,6 @@ extern "C" { #include #endif #endif -#endif #include diff --git a/paddle/math/tests/TestUtils.h b/paddle/math/tests/TestUtils.h index c3020961880484a7944f8cc61377a4f08122e403..713f407f496099c04e5834b2bdcf7b1cf5a86a3a 100644 --- a/paddle/math/tests/TestUtils.h +++ b/paddle/math/tests/TestUtils.h @@ -37,7 +37,7 @@ limitations under the License. */ * * AutoCompare test; * test.cmpWithoutArg(function, height, width) -*/ + */ #include #include "TensorCheck.h" diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp index 5210fe3fa1f3e221d7025edbc8a511d74ddaed51..3b1b0065af38d72716194787471889e69e719b9e 100644 --- a/paddle/math/tests/test_matrixCompare.cpp +++ b/paddle/math/tests/test_matrixCompare.cpp @@ -21,6 +21,7 @@ limitations under the License. */ #include "paddle/math/Matrix.h" #include "paddle/math/SparseMatrix.h" #include "paddle/testing/TestUtil.h" +#include "paddle/utils/DynamicLoader.h" #include "paddle/utils/Stat.h" #include "paddle/utils/Util.h" @@ -235,10 +236,15 @@ TEST(Matrix, unary) { testMatrixTranspose(height, width); testMatrixRotate(height, width); } -// inverse -#ifdef PADDLE_USE_LAPACK - testMatrixInverse(height); -#endif + // inverse matrix + void** dso_handler = nullptr; + GetLapackDsoHandle(dso_handler); + if (nullptr == *dso_handler) { + LOG(WARNING) << "Failed to find liblapack.so, please specify its path " + "using LD_LIBRARY_PATH."; + } else { + testMatrixInverse(height); + } } } diff --git a/paddle/parameter/FirstOrderOptimizer.h b/paddle/parameter/FirstOrderOptimizer.h index 095019b74f4f667991a0d4c5d5511e371889539f..caa78acd98ea4b35fc69643689cfce23026275e0 100644 --- a/paddle/parameter/FirstOrderOptimizer.h +++ b/paddle/parameter/FirstOrderOptimizer.h @@ -126,7 +126,7 @@ protected: /* * AdaDelta Optimization. * http://www.matthewzeiler.com/pubs/googleTR2012/googleTR2012.pdf -*/ + */ class AdaDeltaParameterOptimizer : public ParameterOptimizer { public: explicit AdaDeltaParameterOptimizer(const OptimizationConfig& optConfig) diff --git a/paddle/scripts/deb/postinst b/paddle/scripts/deb/postinst new file mode 100644 index 0000000000000000000000000000000000000000..91620b1ee7569cd17927f44112dfa9279ddbdd32 --- /dev/null +++ b/paddle/scripts/deb/postinst @@ -0,0 +1,6 @@ +#!/bin/bash +set -e +echo "Post install paddle debian package." +echo "Install some python package used for paddle. You can run " +echo " pip install /usr/opt/paddle/share/wheels/*.whl to install them." +find /usr/ -name '*paddle*.whl' | xargs pip install diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index a750c436dc50f906a35313490f667d9a24cc0c00..4172063d923f939dac7229573bc087ec8c62b844 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -5,13 +5,8 @@ set -e # Set BASE_IMAGE according to env variables if [ ${WITH_GPU} == "ON" ]; then BASE_IMAGE="nvidia/cuda:8.0-cudnn5-runtime-ubuntu14.04" - # additional packages to install when building gpu images - GPU_DOCKER_PKG="python-pip python-dev" else - BASE_IMAGE="python:2.7.13-slim" - # FIXME: python base image uses different python version than WITH_GPU - # need to change PYTHONHOME to /usr/local when using python base image - CPU_DOCKER_PYTHON_HOME_ENV="ENV PYTHONHOME /usr/local" + BASE_IMAGE="ubuntu:14.04" fi DOCKERFILE_GPU_ENV="" @@ -66,10 +61,7 @@ if [ ${WITH_DOC} == "ON" ]; then rm -rf /paddle/build_doc fi # generate deb package for current build -# FIXME(typhoonzero): should we remove paddle/scripts/deb ? -# FIXME: CPACK_DEBIAN_PACKAGE_DEPENDS removes all dev dependencies, must -# install them in docker -cpack -D CPACK_GENERATOR='DEB' -D CPACK_DEBIAN_PACKAGE_DEPENDS="" .. +cpack -D CPACK_GENERATOR='DEB' .. if [[ ${WOBOQ:-OFF} == 'ON' ]]; then apt-get install -y clang-3.8 llvm-3.8 libclang-3.8-dev @@ -97,32 +89,30 @@ fi paddle version -if [[ -n ${APT_MIRROR} ]]; then - MIRROR_UPDATE="sed -i '${APT_MIRROR}' /etc/apt/sources.list && \\" -else - MIRROR_UPDATE="\\" -fi - cat > /paddle/build/Dockerfile < ENV HOME /root ENV LANG en_US.UTF-8 # Use Fix locales to en_US.UTF-8 -RUN ${MIRROR_UPDATE} - apt-get update && \ - apt-get install -y libgfortran3 libpython2.7 ${GPU_DOCKER_PKG} && \ - apt-get clean -y && \ - pip install --upgrade pip && \ - pip install -U 'protobuf==3.1.0' requests numpy +EOF + +if [[ -n ${APT_MIRROR} ]]; then +cat >> /paddle/build/Dockerfile <> /paddle/build/Dockerfile <=3.2.2', 'numpy>=1.8.0', # The numpy is required. - 'protobuf>=${PROTOBUF_VERSION}' # The paddle protobuf version + 'protobuf==${PROTOBUF_VERSION}' # The paddle protobuf version ], url='http://www.paddlepaddle.org/', license='Apache 2.0', diff --git a/paddle/trainer/tests/picojson.h b/paddle/trainer/tests/picojson.h index 23bfa164080a6ea392bb6ee15e7e2bec25257ce9..4aa64961d096ce94a4187fe94000b05de4080122 100644 --- a/paddle/trainer/tests/picojson.h +++ b/paddle/trainer/tests/picojson.h @@ -1059,14 +1059,14 @@ inline bool operator==(const value& x, const value& y) { } inline bool operator!=(const value& x, const value& y) { return !(x == y); } -} +} // namespace picojson namespace std { template <> inline void swap(picojson::value& x, picojson::value& y) { x.swap(y); } -} +} // namespace std inline std::istream& operator>>(std::istream& is, picojson::value& x) { picojson::set_last_error(std::string()); diff --git a/paddle/cuda/src/hl_dso_loader.cc b/paddle/utils/DynamicLoader.cpp similarity index 94% rename from paddle/cuda/src/hl_dso_loader.cc rename to paddle/utils/DynamicLoader.cpp index 53164dd27c7c5f5254e743b6fcf1d7b6fc895e31..368c35e15186d4d01f939dd4e4c05e7cac3dd214 100644 --- a/paddle/cuda/src/hl_dso_loader.cc +++ b/paddle/utils/DynamicLoader.cpp @@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "hl_dso_loader.h" +#include "DynamicLoader.h" #include -#include "paddle/utils/Logging.h" +#include "Logging.h" DEFINE_string(cudnn_dir, "", @@ -30,6 +30,8 @@ DEFINE_string(cuda_dir, DEFINE_string(warpctc_dir, "", "Specify path for loading libwarpctc.so."); +DEFINE_string(lapack_dir, "", "Specify path for loading liblapack.so."); + static inline std::string join(const std::string& part1, const std::string& part2) { // directory separator @@ -160,3 +162,11 @@ void GetWarpCTCDsoHandle(void** dso_handle) { GetDsoHandleFromSearchPath(FLAGS_warpctc_dir, "libwarpctc.so", dso_handle); #endif } + +void GetLapackDsoHandle(void** dso_handle) { +#if defined(__APPLE__) || defined(__OSX__) + GetDsoHandleFromSearchPath(FLAGS_lapack_dir, "liblapack.dylib", dso_handle); +#else + GetDsoHandleFromSearchPath(FLAGS_lapack_dir, "liblapack.so", dso_handle); +#endif +} diff --git a/paddle/cuda/include/hl_dso_loader.h b/paddle/utils/DynamicLoader.h similarity index 83% rename from paddle/cuda/include/hl_dso_loader.h rename to paddle/utils/DynamicLoader.h index 276a07d3c735c771c851e8b4bd14c720f9ab6569..9b5ad21724afd7176f958619e7e10d12dc08fa49 100644 --- a/paddle/cuda/include/hl_dso_loader.h +++ b/paddle/utils/DynamicLoader.h @@ -12,13 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#ifndef HL_DSO_LOADER_H_ -#define HL_DSO_LOADER_H_ +#ifndef DYNAMIC_LOAD_H_ +#define DYNAMIC_LOAD_H_ #include #include +#include #include -#include "hl_base.h" /** * @brief load the DSO of CUBLAS @@ -52,4 +52,12 @@ void GetCurandDsoHandle(void** dso_handle); */ void GetWarpCTCDsoHandle(void** dso_handle); -#endif // HL_DSO_LOADER_H_ +/** + * @brief load the DSO of lapack + * + * @param **dso_handle dso handler + * + */ +void GetLapackDsoHandle(void** dso_handle); + +#endif // DYNAMIC_LOAD_H_ diff --git a/python/paddle/trainer_config_helpers/attrs.py b/python/paddle/trainer_config_helpers/attrs.py index 41ad05c3eb467b9a2e09315f980410d5e9b3853f..7ae9e5cb3050fa6f70fa84785a1ddbdc68c70235 100644 --- a/python/paddle/trainer_config_helpers/attrs.py +++ b/python/paddle/trainer_config_helpers/attrs.py @@ -208,12 +208,15 @@ class ExtraLayerAttribute(object): drop_rate=None, device=None): self.attr = dict() - if isinstance(error_clipping_threshold, float): - assert error_clipping_threshold > 0 - self.attr["error_clipping_threshold"] = error_clipping_threshold - - if isinstance(drop_rate, float): - assert drop_rate > 0 + if error_clipping_threshold is not None: + error_clipping_threshold = float(error_clipping_threshold) + if error_clipping_threshold < 0: + raise ValueError("Error clipping must > 0") + self.attr['error_clipping_threshold'] = error_clipping_threshold + if drop_rate is not None: + drop_rate = float(drop_rate) + if drop_rate < 0: + raise ValueError("Dropout rate must > 0") self.attr["drop_rate"] = drop_rate if isinstance(device, int): diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 46fd752d527fa63578a1e01865356780955bc87a..31652613fb3a55636b32babbc4bde60d65776c61 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -84,6 +84,7 @@ __all__ = [ 'GeneratedInput', 'SubsequenceInput', 'gru_step_layer', + 'gru_step_naive_layer', 'recurrent_layer', 'BaseGeneratedInput', 'conv_operator', @@ -2286,7 +2287,7 @@ def img_pool_layer(input, type_name = pool_type.name + '-projection' \ if ( - isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)) \ + isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)) \ else pool_type.name pool_size_y = pool_size if pool_size_y is None else pool_size_y @@ -3086,6 +3087,78 @@ def gru_step_layer(input, activation=act) +@wrap_bias_attr_default() +@wrap_param_attr_default() +@wrap_act_default(param_names=['gate_act'], act=SigmoidActivation()) +@wrap_act_default(act=TanhActivation()) +@wrap_name_default('gru_step_naive') +@layer_support(ERROR_CLIPPING, DROPOUT) +def gru_step_naive_layer(input, + output_mem, + size=None, + name=None, + act=None, + gate_act=None, + bias_attr=None, + param_attr=None, + layer_attr=None): + """ + GRU Step Layer, but using MixedLayer to generate. It support ERROR_CLIPPING + and DROPOUT. + + :param input: + :param output_mem: + :param size: + :param name: + :param act: + :param gate_act: + :param bias_attr: + :param param_attr: + :param layer_attr: + :return: + """ + if input.size % 3 != 0: + raise ValueError("GruStep input size must be divided by 3") + if size is None: + size = input.size / 3 + + def __gate__(gate_name, offset): + with mixed_layer( + name=name + "_" + gate_name, + size=size, + layer_attr=layer_attr, + bias_attr=bias_attr, + act=gate_act) as gate: + gate += identity_projection(input=input, offset=offset) + gate += full_matrix_projection( + input=output_mem, param_attr=param_attr) + return gate + + update_gate = __gate__("update", 0) + reset_gate = __gate__("reset", size) + + with mixed_layer( + name=name + "_reset_output", bias_attr=False) as reset_output: + reset_output += dotmul_operator(a=output_mem, b=reset_gate) + + with mixed_layer( + name=name + "_output_candidate", + size=size, + layer_attr=layer_attr, + bias_attr=bias_attr, + act=act) as output_candidate: + output_candidate += identity_projection(input=input, offset=2 * size) + output_candidate += full_matrix_projection( + input=reset_output, param_attr=param_attr) + + with mixed_layer(name=name) as output: + output += identity_projection(output_mem) + output += dotmul_operator(a=output_mem, b=update_gate, scale=-1.0) + output += dotmul_operator(a=output_candidate, b=update_gate) + + return output + + @wrap_name_default() @layer_support() def get_output_layer(input, arg_name, name=None, layer_attr=None): diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py index cadde11ff81658cb309cd1bf7a44bac6374c1e44..fb533a47e0b0585be6f0e019086993f8b3aa7f38 100755 --- a/python/paddle/trainer_config_helpers/networks.py +++ b/python/paddle/trainer_config_helpers/networks.py @@ -825,7 +825,8 @@ def gru_unit(input, gru_param_attr=None, act=None, gate_act=None, - gru_layer_attr=None): + gru_layer_attr=None, + naive=False): """ Define calculations that a gated recurrent unit performs in a single time step. This function itself is not a recurrent layer, so that it can not be @@ -857,7 +858,12 @@ def gru_unit(input, out_mem = memory(name=name, size=size) - gru_out = gru_step_layer( + if naive: + __step__ = gru_step_naive_layer + else: + __step__ = gru_step_layer + + gru_out = __step__( name=name, input=input, output_mem=out_mem, @@ -879,7 +885,8 @@ def gru_group(input, gru_param_attr=None, act=None, gate_act=None, - gru_layer_attr=None): + gru_layer_attr=None, + naive=False): """ gru_group is a recurrent layer group version of Gated Recurrent Unit. It does exactly the same calculation as the grumemory layer does. A promising @@ -928,7 +935,8 @@ def gru_group(input, gru_param_attr=gru_param_attr, act=act, gate_act=gate_act, - gru_layer_attr=gru_layer_attr) + gru_layer_attr=gru_layer_attr, + naive=naive) return recurrent_group( name='%s_recurrent_group' % name, @@ -949,7 +957,8 @@ def simple_gru(input, gru_param_attr=None, act=None, gate_act=None, - gru_layer_attr=None): + gru_layer_attr=None, + naive=False): """ You maybe see gru_step_layer, grumemory in layers.py, gru_unit, gru_group, simple_gru in network.py. The reason why there are so many interfaces is @@ -1018,7 +1027,8 @@ def simple_gru(input, gru_param_attr=gru_param_attr, act=act, gate_act=gate_act, - gru_layer_attr=gru_layer_attr) + gru_layer_attr=gru_layer_attr, + naive=naive) @wrap_name_default('simple_gru2') diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr index 2afc3afef6d39ce9b8eef05948861284775d5011..d8bd7b9dfb71a392d0dc53872a0d72f47530530f 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr @@ -320,6 +320,7 @@ layers { } } drop_rate: 0.5 + error_clipping_threshold: 40.0 } parameters { name: "___embedding_0__.w0" diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 384de9b9d57f88e84ab6067846174bb037502dc0..89cca7acd34b8dea0572169338649b5e9ff6536a 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -356,6 +356,9 @@ def mixed(size=0, return MixedLayerV2(size, input, name, act, bias_attr, layer_attr) +mixed.__doc__ = conf_helps.mixed_layer.__doc__ + + class RecurrentLayerInput(Layer): def __init__(self, recurrent_name, index, parent_layers): parents_len = len(parent_layers) @@ -404,6 +407,8 @@ data.__name__ = 'data' AggregateLevel = conf_helps.layers.AggregateLevel ExpandLevel = conf_helps.layers.ExpandLevel memory = MemoryV2 +memory.__name__ = 'memory' +memory.__doc__ = conf_helps.memory.__doc__ def __layer_name_mapping__(inname): @@ -512,6 +517,9 @@ def recurrent_group(step, input, name=None): return retv +recurrent_group.__doc__ = conf_helps.recurrent_group.__doc__ + + @wrap_name_default() def beam_search(step, input, @@ -579,6 +587,8 @@ def beam_search(step, return tmp +beam_search.__doc__ = conf_helps.beam_search.__doc__ + __projection_names__ = filter(lambda x: x.endswith('_projection'), dir(conf_helps)) diff --git a/python/setup.py.in b/python/setup.py.in index 228e762d56f672d7b31ede2b2b92c77f9a126f3c..5dfb46192ae54fdc36b0867312cf156aefb84f84 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -15,6 +15,9 @@ setup(name='paddle', description='Parallel Distributed Deep Learning', install_requires=[ "requests", + "numpy", + "protobuf==${PROTOBUF_VERSION}", + "matplotlib", ], packages=packages, package_dir={